diff --git a/parso/pgen2/grammar.py b/parso/pgen2/grammar.py index a66f03d..b941ba1 100644 --- a/parso/pgen2/grammar.py +++ b/parso/pgen2/grammar.py @@ -59,7 +59,7 @@ class Grammar(object): self._nonterminal_to_dfas = rule_to_dfas self.labels = [(0, "EMPTY")] - self.keywords = {} + self.reserved_syntax_strings = {} self.tokens = {} self.start_nonterminal = start_nonterminal @@ -104,7 +104,6 @@ class Grammar(object): #@_cache_labels def _make_label(self, label): - # XXX Maybe this should be a method on a subclass of converter? ilabel = len(self.labels) if label[0].isalpha(): # Either a nonterminal name or a named token @@ -124,23 +123,12 @@ class Grammar(object): assert label[0] in ('"', "'"), label # TODO use literal_eval instead of a simple eval. value = eval(label) - if value[0].isalpha(): - # A keyword - if value in self.keywords: - return self.keywords[value] - else: - self.labels.append((token.NAME, value)) - self.keywords[value] = ilabel - return ilabel + if value in self.reserved_syntax_strings: + return self.reserved_syntax_strings[value] else: - # An operator (any non-numeric token) - itoken = self._token_namespace.generate_token_id(value) - if itoken in self.tokens: - return self.tokens[itoken] - else: - self.labels.append((itoken, None)) - self.tokens[itoken] = ilabel - return ilabel + self.labels.append((token.NAME, value)) + self.reserved_syntax_strings[value] = ilabel + return self.reserved_syntax_strings[value] def _calculate_first_terminals(self, nonterminal): dfas = self._nonterminal_to_dfas[nonterminal] diff --git a/parso/pgen2/grammar_parser.py b/parso/pgen2/grammar_parser.py index 17aac0c..80b4e20 100644 --- a/parso/pgen2/grammar_parser.py +++ b/parso/pgen2/grammar_parser.py @@ -30,7 +30,7 @@ class GrammarParser(): # rule: NAME ':' rhs NEWLINE self._current_rule_name = self._expect(token.NAME) - self._expect(token.COLON) + self._expect(token.OP, ':') a, z = self._parse_rhs() self._expect(token.NEWLINE) @@ -60,7 +60,7 @@ class GrammarParser(): def _parse_items(self): # items: item+ a, b = self._parse_item() - while self.type in (token.NAME, token.STRING, token.LPAR, token.LSQB): + while self.type in (token.NAME, token.STRING) or self.value in ('(', '['): c, d = self._parse_item() # Need to end on the next item. b.add_arc(c) @@ -72,7 +72,7 @@ class GrammarParser(): if self.value == "[": self._gettoken() a, z = self._parse_rhs() - self._expect(token.RSQB) + self._expect(token.OP, ']') # Make it also possible that there is no token and change the # state. a.add_arc(z) @@ -97,7 +97,7 @@ class GrammarParser(): if self.value == "(": self._gettoken() a, z = self._parse_rhs() - self._expect(token.RPAR) + self._expect(token.OP, ')') return a, z elif self.type in (token.NAME, token.STRING): a = NFAState(self._current_rule_name) @@ -110,10 +110,12 @@ class GrammarParser(): self._raise_error("expected (...) or NAME or STRING, got %s/%s", self.type, self.value) - def _expect(self, type): + def _expect(self, type, value=None): if self.type != type: self._raise_error("expected %s(%s), got %s(%s)", type, token.tok_name[type], self.type, self.value) + if value is not None and self.value != value: + self._raise_error("expected %s, got %s", value, self.value) value = self.value self._gettoken() return value diff --git a/parso/pgen2/parse.py b/parso/pgen2/parse.py index ac925a7..43edc92 100644 --- a/parso/pgen2/parse.py +++ b/parso/pgen2/parse.py @@ -71,10 +71,10 @@ def token_to_ilabel(grammar, type_, value): # Map from token to label # TODO this is not good, shouldn't use tokenize.NAME, but somehow use the # grammar. - if type_ == tokenize.NAME: + if type_ in (tokenize.NAME, tokenize.OP): # Check for reserved words (keywords) try: - return grammar.keywords[value] + return grammar.reserved_syntax_strings[value] except KeyError: pass diff --git a/parso/python/parser.py b/parso/python/parser.py index 7728121..2ebd63d 100644 --- a/parso/python/parser.py +++ b/parso/python/parser.py @@ -127,7 +127,7 @@ class Parser(BaseParser): def convert_leaf(self, pgen_grammar, type, value, prefix, start_pos): # print('leaf', repr(value), token.tok_name[type]) if type == NAME: - if value in pgen_grammar.keywords: + if value in pgen_grammar.reserved_syntax_strings: return tree.Keyword(value, start_pos, prefix) else: return tree.Name(value, start_pos, prefix) diff --git a/parso/python/tokenize.py b/parso/python/tokenize.py index 0ac8a8d..1d6e981 100644 --- a/parso/python/tokenize.py +++ b/parso/python/tokenize.py @@ -21,7 +21,7 @@ from codecs import BOM_UTF8 from parso.python.token import (tok_name, ENDMARKER, STRING, NUMBER, opmap, NAME, ERRORTOKEN, NEWLINE, INDENT, DEDENT, ERROR_DEDENT, FSTRING_STRING, FSTRING_START, - FSTRING_END) + FSTRING_END, OP) from parso._compatibility import py_version from parso.utils import split_lines @@ -574,7 +574,8 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)): try: # This check is needed in any case to check if it's a valid # operator or just some random unicode character. - typ = opmap[token] + opmap[token] + typ = OP except KeyError: typ = ERRORTOKEN yield PythonToken(typ, token, spos, prefix)