mirror of
https://github.com/davidhalter/parso.git
synced 2025-12-09 14:14:53 +08:00
Use token.OP and use reserved words
This change breaks the tokenizer backwards compatibility a bit. Details of operators is now part of the parser and not the tokenizer anymore. The parser does this anyway, so we don't need the complexity in the tokenizer.
This commit is contained in:
@@ -59,7 +59,7 @@ class Grammar(object):
|
|||||||
self._nonterminal_to_dfas = rule_to_dfas
|
self._nonterminal_to_dfas = rule_to_dfas
|
||||||
|
|
||||||
self.labels = [(0, "EMPTY")]
|
self.labels = [(0, "EMPTY")]
|
||||||
self.keywords = {}
|
self.reserved_syntax_strings = {}
|
||||||
self.tokens = {}
|
self.tokens = {}
|
||||||
self.start_nonterminal = start_nonterminal
|
self.start_nonterminal = start_nonterminal
|
||||||
|
|
||||||
@@ -104,7 +104,6 @@ class Grammar(object):
|
|||||||
|
|
||||||
#@_cache_labels
|
#@_cache_labels
|
||||||
def _make_label(self, label):
|
def _make_label(self, label):
|
||||||
# XXX Maybe this should be a method on a subclass of converter?
|
|
||||||
ilabel = len(self.labels)
|
ilabel = len(self.labels)
|
||||||
if label[0].isalpha():
|
if label[0].isalpha():
|
||||||
# Either a nonterminal name or a named token
|
# Either a nonterminal name or a named token
|
||||||
@@ -124,23 +123,12 @@ class Grammar(object):
|
|||||||
assert label[0] in ('"', "'"), label
|
assert label[0] in ('"', "'"), label
|
||||||
# TODO use literal_eval instead of a simple eval.
|
# TODO use literal_eval instead of a simple eval.
|
||||||
value = eval(label)
|
value = eval(label)
|
||||||
if value[0].isalpha():
|
if value in self.reserved_syntax_strings:
|
||||||
# A keyword
|
return self.reserved_syntax_strings[value]
|
||||||
if value in self.keywords:
|
|
||||||
return self.keywords[value]
|
|
||||||
else:
|
|
||||||
self.labels.append((token.NAME, value))
|
|
||||||
self.keywords[value] = ilabel
|
|
||||||
return ilabel
|
|
||||||
else:
|
else:
|
||||||
# An operator (any non-numeric token)
|
self.labels.append((token.NAME, value))
|
||||||
itoken = self._token_namespace.generate_token_id(value)
|
self.reserved_syntax_strings[value] = ilabel
|
||||||
if itoken in self.tokens:
|
return self.reserved_syntax_strings[value]
|
||||||
return self.tokens[itoken]
|
|
||||||
else:
|
|
||||||
self.labels.append((itoken, None))
|
|
||||||
self.tokens[itoken] = ilabel
|
|
||||||
return ilabel
|
|
||||||
|
|
||||||
def _calculate_first_terminals(self, nonterminal):
|
def _calculate_first_terminals(self, nonterminal):
|
||||||
dfas = self._nonterminal_to_dfas[nonterminal]
|
dfas = self._nonterminal_to_dfas[nonterminal]
|
||||||
|
|||||||
@@ -30,7 +30,7 @@ class GrammarParser():
|
|||||||
|
|
||||||
# rule: NAME ':' rhs NEWLINE
|
# rule: NAME ':' rhs NEWLINE
|
||||||
self._current_rule_name = self._expect(token.NAME)
|
self._current_rule_name = self._expect(token.NAME)
|
||||||
self._expect(token.COLON)
|
self._expect(token.OP, ':')
|
||||||
|
|
||||||
a, z = self._parse_rhs()
|
a, z = self._parse_rhs()
|
||||||
self._expect(token.NEWLINE)
|
self._expect(token.NEWLINE)
|
||||||
@@ -60,7 +60,7 @@ class GrammarParser():
|
|||||||
def _parse_items(self):
|
def _parse_items(self):
|
||||||
# items: item+
|
# items: item+
|
||||||
a, b = self._parse_item()
|
a, b = self._parse_item()
|
||||||
while self.type in (token.NAME, token.STRING, token.LPAR, token.LSQB):
|
while self.type in (token.NAME, token.STRING) or self.value in ('(', '['):
|
||||||
c, d = self._parse_item()
|
c, d = self._parse_item()
|
||||||
# Need to end on the next item.
|
# Need to end on the next item.
|
||||||
b.add_arc(c)
|
b.add_arc(c)
|
||||||
@@ -72,7 +72,7 @@ class GrammarParser():
|
|||||||
if self.value == "[":
|
if self.value == "[":
|
||||||
self._gettoken()
|
self._gettoken()
|
||||||
a, z = self._parse_rhs()
|
a, z = self._parse_rhs()
|
||||||
self._expect(token.RSQB)
|
self._expect(token.OP, ']')
|
||||||
# Make it also possible that there is no token and change the
|
# Make it also possible that there is no token and change the
|
||||||
# state.
|
# state.
|
||||||
a.add_arc(z)
|
a.add_arc(z)
|
||||||
@@ -97,7 +97,7 @@ class GrammarParser():
|
|||||||
if self.value == "(":
|
if self.value == "(":
|
||||||
self._gettoken()
|
self._gettoken()
|
||||||
a, z = self._parse_rhs()
|
a, z = self._parse_rhs()
|
||||||
self._expect(token.RPAR)
|
self._expect(token.OP, ')')
|
||||||
return a, z
|
return a, z
|
||||||
elif self.type in (token.NAME, token.STRING):
|
elif self.type in (token.NAME, token.STRING):
|
||||||
a = NFAState(self._current_rule_name)
|
a = NFAState(self._current_rule_name)
|
||||||
@@ -110,10 +110,12 @@ class GrammarParser():
|
|||||||
self._raise_error("expected (...) or NAME or STRING, got %s/%s",
|
self._raise_error("expected (...) or NAME or STRING, got %s/%s",
|
||||||
self.type, self.value)
|
self.type, self.value)
|
||||||
|
|
||||||
def _expect(self, type):
|
def _expect(self, type, value=None):
|
||||||
if self.type != type:
|
if self.type != type:
|
||||||
self._raise_error("expected %s(%s), got %s(%s)",
|
self._raise_error("expected %s(%s), got %s(%s)",
|
||||||
type, token.tok_name[type], self.type, self.value)
|
type, token.tok_name[type], self.type, self.value)
|
||||||
|
if value is not None and self.value != value:
|
||||||
|
self._raise_error("expected %s, got %s", value, self.value)
|
||||||
value = self.value
|
value = self.value
|
||||||
self._gettoken()
|
self._gettoken()
|
||||||
return value
|
return value
|
||||||
|
|||||||
@@ -71,10 +71,10 @@ def token_to_ilabel(grammar, type_, value):
|
|||||||
# Map from token to label
|
# Map from token to label
|
||||||
# TODO this is not good, shouldn't use tokenize.NAME, but somehow use the
|
# TODO this is not good, shouldn't use tokenize.NAME, but somehow use the
|
||||||
# grammar.
|
# grammar.
|
||||||
if type_ == tokenize.NAME:
|
if type_ in (tokenize.NAME, tokenize.OP):
|
||||||
# Check for reserved words (keywords)
|
# Check for reserved words (keywords)
|
||||||
try:
|
try:
|
||||||
return grammar.keywords[value]
|
return grammar.reserved_syntax_strings[value]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|||||||
@@ -127,7 +127,7 @@ class Parser(BaseParser):
|
|||||||
def convert_leaf(self, pgen_grammar, type, value, prefix, start_pos):
|
def convert_leaf(self, pgen_grammar, type, value, prefix, start_pos):
|
||||||
# print('leaf', repr(value), token.tok_name[type])
|
# print('leaf', repr(value), token.tok_name[type])
|
||||||
if type == NAME:
|
if type == NAME:
|
||||||
if value in pgen_grammar.keywords:
|
if value in pgen_grammar.reserved_syntax_strings:
|
||||||
return tree.Keyword(value, start_pos, prefix)
|
return tree.Keyword(value, start_pos, prefix)
|
||||||
else:
|
else:
|
||||||
return tree.Name(value, start_pos, prefix)
|
return tree.Name(value, start_pos, prefix)
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ from codecs import BOM_UTF8
|
|||||||
from parso.python.token import (tok_name, ENDMARKER, STRING, NUMBER, opmap,
|
from parso.python.token import (tok_name, ENDMARKER, STRING, NUMBER, opmap,
|
||||||
NAME, ERRORTOKEN, NEWLINE, INDENT, DEDENT,
|
NAME, ERRORTOKEN, NEWLINE, INDENT, DEDENT,
|
||||||
ERROR_DEDENT, FSTRING_STRING, FSTRING_START,
|
ERROR_DEDENT, FSTRING_STRING, FSTRING_START,
|
||||||
FSTRING_END)
|
FSTRING_END, OP)
|
||||||
from parso._compatibility import py_version
|
from parso._compatibility import py_version
|
||||||
from parso.utils import split_lines
|
from parso.utils import split_lines
|
||||||
|
|
||||||
@@ -574,7 +574,8 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
|||||||
try:
|
try:
|
||||||
# This check is needed in any case to check if it's a valid
|
# This check is needed in any case to check if it's a valid
|
||||||
# operator or just some random unicode character.
|
# operator or just some random unicode character.
|
||||||
typ = opmap[token]
|
opmap[token]
|
||||||
|
typ = OP
|
||||||
except KeyError:
|
except KeyError:
|
||||||
typ = ERRORTOKEN
|
typ = ERRORTOKEN
|
||||||
yield PythonToken(typ, token, spos, prefix)
|
yield PythonToken(typ, token, spos, prefix)
|
||||||
|
|||||||
Reference in New Issue
Block a user