Use token.OP and use reserved words

This change breaks the tokenizer backwards compatibility a bit. Details of
operators is now part of the parser and not the tokenizer anymore. The parser
does this anyway, so we don't need the complexity in the tokenizer.
This commit is contained in:
Dave Halter
2018-06-24 11:28:23 +02:00
parent 33e321a539
commit b5378e4602
5 changed files with 19 additions and 28 deletions

View File

@@ -59,7 +59,7 @@ class Grammar(object):
self._nonterminal_to_dfas = rule_to_dfas self._nonterminal_to_dfas = rule_to_dfas
self.labels = [(0, "EMPTY")] self.labels = [(0, "EMPTY")]
self.keywords = {} self.reserved_syntax_strings = {}
self.tokens = {} self.tokens = {}
self.start_nonterminal = start_nonterminal self.start_nonterminal = start_nonterminal
@@ -104,7 +104,6 @@ class Grammar(object):
#@_cache_labels #@_cache_labels
def _make_label(self, label): def _make_label(self, label):
# XXX Maybe this should be a method on a subclass of converter?
ilabel = len(self.labels) ilabel = len(self.labels)
if label[0].isalpha(): if label[0].isalpha():
# Either a nonterminal name or a named token # Either a nonterminal name or a named token
@@ -124,23 +123,12 @@ class Grammar(object):
assert label[0] in ('"', "'"), label assert label[0] in ('"', "'"), label
# TODO use literal_eval instead of a simple eval. # TODO use literal_eval instead of a simple eval.
value = eval(label) value = eval(label)
if value[0].isalpha(): if value in self.reserved_syntax_strings:
# A keyword return self.reserved_syntax_strings[value]
if value in self.keywords:
return self.keywords[value]
else:
self.labels.append((token.NAME, value))
self.keywords[value] = ilabel
return ilabel
else: else:
# An operator (any non-numeric token) self.labels.append((token.NAME, value))
itoken = self._token_namespace.generate_token_id(value) self.reserved_syntax_strings[value] = ilabel
if itoken in self.tokens: return self.reserved_syntax_strings[value]
return self.tokens[itoken]
else:
self.labels.append((itoken, None))
self.tokens[itoken] = ilabel
return ilabel
def _calculate_first_terminals(self, nonterminal): def _calculate_first_terminals(self, nonterminal):
dfas = self._nonterminal_to_dfas[nonterminal] dfas = self._nonterminal_to_dfas[nonterminal]

View File

@@ -30,7 +30,7 @@ class GrammarParser():
# rule: NAME ':' rhs NEWLINE # rule: NAME ':' rhs NEWLINE
self._current_rule_name = self._expect(token.NAME) self._current_rule_name = self._expect(token.NAME)
self._expect(token.COLON) self._expect(token.OP, ':')
a, z = self._parse_rhs() a, z = self._parse_rhs()
self._expect(token.NEWLINE) self._expect(token.NEWLINE)
@@ -60,7 +60,7 @@ class GrammarParser():
def _parse_items(self): def _parse_items(self):
# items: item+ # items: item+
a, b = self._parse_item() a, b = self._parse_item()
while self.type in (token.NAME, token.STRING, token.LPAR, token.LSQB): while self.type in (token.NAME, token.STRING) or self.value in ('(', '['):
c, d = self._parse_item() c, d = self._parse_item()
# Need to end on the next item. # Need to end on the next item.
b.add_arc(c) b.add_arc(c)
@@ -72,7 +72,7 @@ class GrammarParser():
if self.value == "[": if self.value == "[":
self._gettoken() self._gettoken()
a, z = self._parse_rhs() a, z = self._parse_rhs()
self._expect(token.RSQB) self._expect(token.OP, ']')
# Make it also possible that there is no token and change the # Make it also possible that there is no token and change the
# state. # state.
a.add_arc(z) a.add_arc(z)
@@ -97,7 +97,7 @@ class GrammarParser():
if self.value == "(": if self.value == "(":
self._gettoken() self._gettoken()
a, z = self._parse_rhs() a, z = self._parse_rhs()
self._expect(token.RPAR) self._expect(token.OP, ')')
return a, z return a, z
elif self.type in (token.NAME, token.STRING): elif self.type in (token.NAME, token.STRING):
a = NFAState(self._current_rule_name) a = NFAState(self._current_rule_name)
@@ -110,10 +110,12 @@ class GrammarParser():
self._raise_error("expected (...) or NAME or STRING, got %s/%s", self._raise_error("expected (...) or NAME or STRING, got %s/%s",
self.type, self.value) self.type, self.value)
def _expect(self, type): def _expect(self, type, value=None):
if self.type != type: if self.type != type:
self._raise_error("expected %s(%s), got %s(%s)", self._raise_error("expected %s(%s), got %s(%s)",
type, token.tok_name[type], self.type, self.value) type, token.tok_name[type], self.type, self.value)
if value is not None and self.value != value:
self._raise_error("expected %s, got %s", value, self.value)
value = self.value value = self.value
self._gettoken() self._gettoken()
return value return value

View File

@@ -71,10 +71,10 @@ def token_to_ilabel(grammar, type_, value):
# Map from token to label # Map from token to label
# TODO this is not good, shouldn't use tokenize.NAME, but somehow use the # TODO this is not good, shouldn't use tokenize.NAME, but somehow use the
# grammar. # grammar.
if type_ == tokenize.NAME: if type_ in (tokenize.NAME, tokenize.OP):
# Check for reserved words (keywords) # Check for reserved words (keywords)
try: try:
return grammar.keywords[value] return grammar.reserved_syntax_strings[value]
except KeyError: except KeyError:
pass pass

View File

@@ -127,7 +127,7 @@ class Parser(BaseParser):
def convert_leaf(self, pgen_grammar, type, value, prefix, start_pos): def convert_leaf(self, pgen_grammar, type, value, prefix, start_pos):
# print('leaf', repr(value), token.tok_name[type]) # print('leaf', repr(value), token.tok_name[type])
if type == NAME: if type == NAME:
if value in pgen_grammar.keywords: if value in pgen_grammar.reserved_syntax_strings:
return tree.Keyword(value, start_pos, prefix) return tree.Keyword(value, start_pos, prefix)
else: else:
return tree.Name(value, start_pos, prefix) return tree.Name(value, start_pos, prefix)

View File

@@ -21,7 +21,7 @@ from codecs import BOM_UTF8
from parso.python.token import (tok_name, ENDMARKER, STRING, NUMBER, opmap, from parso.python.token import (tok_name, ENDMARKER, STRING, NUMBER, opmap,
NAME, ERRORTOKEN, NEWLINE, INDENT, DEDENT, NAME, ERRORTOKEN, NEWLINE, INDENT, DEDENT,
ERROR_DEDENT, FSTRING_STRING, FSTRING_START, ERROR_DEDENT, FSTRING_STRING, FSTRING_START,
FSTRING_END) FSTRING_END, OP)
from parso._compatibility import py_version from parso._compatibility import py_version
from parso.utils import split_lines from parso.utils import split_lines
@@ -574,7 +574,8 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
try: try:
# This check is needed in any case to check if it's a valid # This check is needed in any case to check if it's a valid
# operator or just some random unicode character. # operator or just some random unicode character.
typ = opmap[token] opmap[token]
typ = OP
except KeyError: except KeyError:
typ = ERRORTOKEN typ = ERRORTOKEN
yield PythonToken(typ, token, spos, prefix) yield PythonToken(typ, token, spos, prefix)