Introduce TokenTypes

This commit is contained in:
Dave Halter
2018-06-24 16:24:09 +02:00
parent 6098d89150
commit 03de9cebb8
12 changed files with 117 additions and 169 deletions

View File

@@ -6,7 +6,7 @@ from parso.pgen2.pgen import generate_grammar
from parso.utils import split_lines, python_bytes_to_unicode, parse_version_string
from parso.python.diff import DiffParser
from parso.python.tokenize import tokenize_lines, tokenize
from parso.python import token
from parso.python.token import PythonTokenTypes
from parso.cache import parser_cache, load_module, save_module
from parso.parser import BaseParser
from parso.python.parser import Parser as PythonParser
@@ -193,7 +193,7 @@ class Grammar(object):
class PythonGrammar(Grammar):
_error_normalizer_config = ErrorFinderConfig()
_token_namespace = token
_token_namespace = PythonTokenTypes
_start_nonterminal = 'file_input'
def __init__(self, version_info, bnf_text):

View File

@@ -16,9 +16,6 @@ fallback token code OP, but the parser needs the actual token code.
"""
from parso.python import token
class DFAPlan(object):
def __init__(self, next_dfa, dfa_pushes=[]):
self.next_dfa = next_dfa
@@ -111,7 +108,6 @@ class Grammar(object):
# A named token (e.g. NAME, NUMBER, STRING)
itoken = getattr(self._token_namespace, label, None)
assert isinstance(itoken, int), label
if itoken in self.tokens:
return self.tokens[itoken]
else:
@@ -126,7 +122,7 @@ class Grammar(object):
if value in self.reserved_syntax_strings:
return self.reserved_syntax_strings[value]
else:
self.labels.append((token.NAME, value))
self.labels.append(('XXX', value))
self.reserved_syntax_strings[value] = ilabel
return self.reserved_syntax_strings[value]

View File

@@ -5,9 +5,9 @@
# Copyright David Halter and Contributors
# Modifications are dual-licensed: MIT and PSF.
from parso.python import tokenize
from parso.python.tokenize import tokenize
from parso.utils import parse_version_string
from parso.python import token
from parso.python.token import PythonTokenTypes
class GrammarParser():
@@ -16,7 +16,7 @@ class GrammarParser():
"""
def __init__(self, bnf_grammar):
self._bnf_grammar = bnf_grammar
self.generator = tokenize.tokenize(
self.generator = tokenize(
bnf_grammar,
version_info=parse_version_string('3.6')
)
@@ -24,16 +24,16 @@ class GrammarParser():
def parse(self):
# grammar: (NEWLINE | rule)* ENDMARKER
while self.type != token.ENDMARKER:
while self.type == token.NEWLINE:
while self.type != PythonTokenTypes.ENDMARKER:
while self.type == PythonTokenTypes.NEWLINE:
self._gettoken()
# rule: NAME ':' rhs NEWLINE
self._current_rule_name = self._expect(token.NAME)
self._expect(token.OP, ':')
self._current_rule_name = self._expect(PythonTokenTypes.NAME)
self._expect(PythonTokenTypes.OP, ':')
a, z = self._parse_rhs()
self._expect(token.NEWLINE)
self._expect(PythonTokenTypes.NEWLINE)
yield a, z
@@ -60,7 +60,8 @@ class GrammarParser():
def _parse_items(self):
# items: item+
a, b = self._parse_item()
while self.type in (token.NAME, token.STRING) or self.value in ('(', '['):
while self.type in (PythonTokenTypes.NAME, PythonTokenTypes.STRING) \
or self.value in ('(', '['):
c, d = self._parse_item()
# Need to end on the next item.
b.add_arc(c)
@@ -72,7 +73,7 @@ class GrammarParser():
if self.value == "[":
self._gettoken()
a, z = self._parse_rhs()
self._expect(token.OP, ']')
self._expect(PythonTokenTypes.OP, ']')
# Make it also possible that there is no token and change the
# state.
a.add_arc(z)
@@ -97,9 +98,9 @@ class GrammarParser():
if self.value == "(":
self._gettoken()
a, z = self._parse_rhs()
self._expect(token.OP, ')')
self._expect(PythonTokenTypes.OP, ')')
return a, z
elif self.type in (token.NAME, token.STRING):
elif self.type in (PythonTokenTypes.NAME, PythonTokenTypes.STRING):
a = NFAState(self._current_rule_name)
z = NFAState(self._current_rule_name)
# Make it clear that the state transition requires that value.
@@ -110,10 +111,10 @@ class GrammarParser():
self._raise_error("expected (...) or NAME or STRING, got %s/%s",
self.type, self.value)
def _expect(self, type, value=None):
if self.type != type:
self._raise_error("expected %s(%s), got %s(%s)",
type, token.tok_name[type], self.type, self.value)
def _expect(self, type_, value=None):
if self.type != type_:
self._raise_error("expected %s, got %s [%s]",
type_, self.type, self.value)
if value is not None and self.value != value:
self._raise_error("expected %s, got %s", value, self.value)
value = self.value

View File

@@ -14,8 +14,6 @@ See Parser/parser.c in the Python distribution for additional info on
how this parsing engine works.
"""
from parso.python import tokenize
class InternalParseError(Exception):
"""
@@ -24,9 +22,9 @@ class InternalParseError(Exception):
wrong.
"""
def __init__(self, msg, type, value, start_pos):
def __init__(self, msg, type_, value, start_pos):
Exception.__init__(self, "%s: type=%r, value=%r, start_pos=%r" %
(msg, tokenize.tok_name[type], value, start_pos))
(msg, type_.name, value, start_pos))
self.msg = msg
self.type = type
self.value = value
@@ -69,9 +67,7 @@ class StackNode(object):
def token_to_ilabel(grammar, type_, value):
# Map from token to label
# TODO this is not good, shouldn't use tokenize.NAME, but somehow use the
# grammar.
if type_ in (tokenize.NAME, tokenize.OP):
if type_.contains_syntax:
# Check for reserved words (keywords)
try:
return grammar.reserved_syntax_strings[value]
@@ -196,6 +192,7 @@ class PgenParser(object):
# creating a new node. We still create expr_stmt and
# file_input though, because a lot of Jedi depends on its
# logic.
print(tos.nodes)
if len(tos.nodes) == 1:
new_node = tos.nodes[0]
else:

View File

@@ -13,8 +13,8 @@ import logging
from parso.utils import split_lines
from parso.python.parser import Parser
from parso.python.tree import EndMarker
from parso.python.tokenize import (NEWLINE, PythonToken, ERROR_DEDENT,
ENDMARKER, INDENT, DEDENT)
from parso.python.tokenize import PythonToken
from parso.python.token import PythonTokenTypes
LOG = logging.getLogger(__name__)
@@ -29,7 +29,7 @@ def _get_last_line(node_or_leaf):
def _ends_with_newline(leaf, suffix=''):
if leaf.type == 'error_leaf':
typ = leaf.original_type
typ = leaf.token_type.lower()
else:
typ = leaf.type
@@ -167,8 +167,7 @@ class DiffParser(object):
def _enabled_debugging(self, old_lines, lines_new):
if self._module.get_code() != ''.join(lines_new):
LOG.warning('parser issue:\n%s\n%s', ''.join(old_lines),
''.join(lines_new))
LOG.warning('parser issue:\n%s\n%s', ''.join(old_lines), ''.join(lines_new))
def _copy_from_old_parser(self, line_offset, until_line_old, until_line_new):
copied_nodes = [None]
@@ -272,7 +271,6 @@ class DiffParser(object):
# memoryview?
parsed_until_line = self._nodes_stack.parsed_until_line
lines_after = self._parser_lines_new[parsed_until_line:]
#print('parse_content', parsed_until_line, lines_after, until_line)
tokens = self._diff_tokenize(
lines_after,
until_line,
@@ -292,7 +290,7 @@ class DiffParser(object):
stack = self._active_parser.pgen_parser.stack
for typ, string, start_pos, prefix in tokens:
start_pos = start_pos[0] + line_offset, start_pos[1]
if typ == INDENT:
if typ == PythonTokenTypes.INDENT:
indents.append(start_pos[1])
if is_first_token:
omitted_first_indent = True
@@ -305,8 +303,9 @@ class DiffParser(object):
# In case of omitted_first_indent, it might not be dedented fully.
# However this is a sign for us that a dedent happened.
if typ == DEDENT \
or typ == ERROR_DEDENT and omitted_first_indent and len(indents) == 1:
if typ == PythonTokenTypes.DEDENT \
or typ == PythonTokenTypes.ERROR_DEDENT \
and omitted_first_indent and len(indents) == 1:
indents.pop()
if omitted_first_indent and not indents:
# We are done here, only thing that can come now is an
@@ -316,18 +315,22 @@ class DiffParser(object):
prefix = re.sub(r'(<=\n)[^\n]+$', '', prefix)
else:
prefix = ''
yield PythonToken(ENDMARKER, '', (start_pos[0] + line_offset, 0), prefix)
yield PythonToken(
PythonTokenTypes.ENDMARKER, '',
(start_pos[0] + line_offset, 0),
prefix
)
break
elif typ == NEWLINE and start_pos[0] >= until_line:
elif typ == PythonTokenTypes.NEWLINE and start_pos[0] >= until_line:
yield PythonToken(typ, string, start_pos, prefix)
# Check if the parser is actually in a valid suite state.
if suite_or_file_input_is_valid(self._pgen_grammar, stack):
start_pos = start_pos[0] + 1, 0
while len(indents) > int(omitted_first_indent):
indents.pop()
yield PythonToken(DEDENT, '', start_pos, '')
yield PythonToken(PythonTokenTypes.DEDENT, '', start_pos, '')
yield PythonToken(ENDMARKER, '', start_pos, '')
yield PythonToken(PythonTokenTypes.ENDMARKER, '', start_pos, '')
break
else:
continue

View File

@@ -306,12 +306,12 @@ class ErrorFinder(Normalizer):
def visit_leaf(self, leaf):
if leaf.type == 'error_leaf':
if leaf.original_type in ('indent', 'error_dedent'):
if leaf.token_type in ('INDENT', 'ERROR_DEDENT'):
# Indents/Dedents itself never have a prefix. They are just
# "pseudo" tokens that get removed by the syntax tree later.
# Therefore in case of an error we also have to check for this.
spacing = list(leaf.get_next_leaf()._split_prefix())[-1]
if leaf.original_type == 'indent':
if leaf.token_type == 'INDENT':
message = 'unexpected indent'
else:
message = 'unindent does not match any outer indentation level'

View File

@@ -1,7 +1,5 @@
from parso.python import tree
from parso.python.token import (DEDENT, INDENT, ENDMARKER, NEWLINE, NUMBER,
STRING, tok_name, NAME, FSTRING_STRING,
FSTRING_START, FSTRING_END)
from parso.python.token import PythonTokenTypes
from parso.parser import BaseParser
from parso.pgen2.parse import token_to_ilabel
@@ -53,17 +51,18 @@ class Parser(BaseParser):
# Names/Keywords are handled separately
_leaf_map = {
STRING: tree.String,
NUMBER: tree.Number,
NEWLINE: tree.Newline,
ENDMARKER: tree.EndMarker,
FSTRING_STRING: tree.FStringString,
FSTRING_START: tree.FStringStart,
FSTRING_END: tree.FStringEnd,
PythonTokenTypes.STRING: tree.String,
PythonTokenTypes.NUMBER: tree.Number,
PythonTokenTypes.NEWLINE: tree.Newline,
PythonTokenTypes.ENDMARKER: tree.EndMarker,
PythonTokenTypes.FSTRING_STRING: tree.FStringString,
PythonTokenTypes.FSTRING_START: tree.FStringStart,
PythonTokenTypes.FSTRING_END: tree.FStringEnd,
}
def __init__(self, pgen_grammar, error_recovery=True, start_nonterminal='file_input'):
super(Parser, self).__init__(pgen_grammar, start_nonterminal, error_recovery=error_recovery)
super(Parser, self).__init__(pgen_grammar, start_nonterminal,
error_recovery=error_recovery)
self.syntax_errors = []
self._omit_dedent_list = []
@@ -126,7 +125,7 @@ class Parser(BaseParser):
def convert_leaf(self, pgen_grammar, type, value, prefix, start_pos):
# print('leaf', repr(value), token.tok_name[type])
if type == NAME:
if type == PythonTokenTypes.NAME:
if value in pgen_grammar.reserved_syntax_strings:
return tree.Keyword(value, start_pos, prefix)
else:
@@ -143,7 +142,8 @@ class Parser(BaseParser):
last_leaf = None
if self._start_nonterminal == 'file_input' and \
(typ == ENDMARKER or typ == DEDENT and '\n' not in last_leaf.value):
(typ == PythonTokenTypes.ENDMARKER or
typ == PythonTokenTypes.DEDENT and '\n' not in last_leaf.value):
def reduce_stack(states, newstate):
# reduce
state = newstate
@@ -158,7 +158,7 @@ class Parser(BaseParser):
# end of a file, we have to recover even if the user doesn't want
# error recovery.
if stack[-1].dfa.from_rule == 'simple_stmt':
ilabel = token_to_ilabel(pgen_grammar, NEWLINE, value)
ilabel = token_to_ilabel(pgen_grammar, PythonTokenTypes.NEWLINE, value)
try:
plan = stack[-1].dfa.ilabel_to_plan[ilabel]
except KeyError:
@@ -199,12 +199,12 @@ class Parser(BaseParser):
if self._stack_removal(stack, until_index + 1):
add_token_callback(typ, value, start_pos, prefix)
else:
if typ == INDENT:
if typ == PythonTokenTypes.INDENT:
# For every deleted INDENT we have to delete a DEDENT as well.
# Otherwise the parser will get into trouble and DEDENT too early.
self._omit_dedent_list.append(self._indent_counter)
error_leaf = tree.PythonErrorLeaf(tok_name[typ].lower(), value, start_pos, prefix)
error_leaf = tree.PythonErrorLeaf(typ.name, value, start_pos, prefix)
stack[-1].nodes.append(error_leaf)
tos = stack[-1]
@@ -230,7 +230,7 @@ class Parser(BaseParser):
def _recovery_tokenize(self, tokens):
for typ, value, start_pos, prefix in tokens:
# print(tok_name[typ], repr(value), start_pos, repr(prefix))
if typ == DEDENT:
if typ == PythonTokenTypes.DEDENT:
# We need to count indents, because if we just omit any DEDENT,
# we might omit them in the wrong place.
o = self._omit_dedent_list
@@ -239,6 +239,6 @@ class Parser(BaseParser):
continue
self._indent_counter -= 1
elif typ == INDENT:
elif typ == PythonTokenTypes.INDENT:
self._indent_counter += 1
yield typ, value, start_pos, prefix

View File

@@ -1,47 +1,4 @@
from __future__ import absolute_import
from itertools import count
from token import *
from parso._compatibility import py_version
# Don't mutate the standard library dict
tok_name = tok_name.copy()
_counter = count(N_TOKENS)
# Never want to see this thing again.
del N_TOKENS
COMMENT = next(_counter)
tok_name[COMMENT] = 'COMMENT'
NL = next(_counter)
tok_name[NL] = 'NL'
# Sets the attributes that don't exist in these tok_name versions.
if py_version >= 30:
BACKQUOTE = next(_counter)
tok_name[BACKQUOTE] = 'BACKQUOTE'
else:
RARROW = next(_counter)
tok_name[RARROW] = 'RARROW'
ELLIPSIS = next(_counter)
tok_name[ELLIPSIS] = 'ELLIPSIS'
if py_version < 35:
ATEQUAL = next(_counter)
tok_name[ATEQUAL] = 'ATEQUAL'
ERROR_DEDENT = next(_counter)
tok_name[ERROR_DEDENT] = 'ERROR_DEDENT'
FSTRING_START = next(_counter)
tok_name[FSTRING_START] = 'FSTRING_START'
FSTRING_END = next(_counter)
tok_name[FSTRING_END] = 'FSTRING_END'
FSTRING_STRING = next(_counter)
tok_name[FSTRING_STRING] = 'FSTRING_STRING'
EXCLAMATION = next(_counter)
tok_name[EXCLAMATION] = 'EXCLAMATION'
# Map from operator to number (since tokenize doesn't do this)
@@ -100,7 +57,7 @@ opmap_raw = """\
opmap = {}
for line in opmap_raw.splitlines():
op, name = line.split()
opmap[op] = globals()[name]
opmap[op] = name
def generate_token_id(string):
@@ -115,26 +72,25 @@ def generate_token_id(string):
return globals()[string]
class Token(object):
def __init__(self, name):
class TokenType(object):
def __init__(self, name, contains_syntax=False):
self.name = name
self.contains_syntax = contains_syntax
def __repr__(self):
return '%s(%s)' % (self.__class__.__name__, self.name)
class Tokens(object):
class TokenTypes(object):
"""
Basically an enum, but Python 2 doesn't have enums in the standard library.
"""
def __init__(self, names, contains_syntax):
for name in names:
setattr(self, name, Token(name))
self.contains_syntax = [getattr(self, name) for name in contains_syntax]
setattr(self, name, TokenType(name, contains_syntax=name in contains_syntax))
PythonTokens = Tokens((
PythonTokenTypes = TokenTypes((
'STRING', 'NUMBER', 'NAME', 'ERRORTOKEN', 'NEWLINE', 'INDENT', 'DEDENT',
'ERROR_DEDENT', 'FSTRING_STRING', 'FSTRING_START', 'FSTRING_END', 'OP',
'ENDMARKER'),

View File

@@ -18,10 +18,7 @@ from collections import namedtuple
import itertools as _itertools
from codecs import BOM_UTF8
from parso.python.token import (tok_name, ENDMARKER, STRING, NUMBER, opmap,
NAME, ERRORTOKEN, NEWLINE, INDENT, DEDENT,
ERROR_DEDENT, FSTRING_STRING, FSTRING_START,
FSTRING_END, OP)
from parso.python.token import PythonTokenTypes, opmap
from parso._compatibility import py_version
from parso.utils import split_lines
@@ -242,12 +239,9 @@ class Token(namedtuple('Token', ['type', 'string', 'start_pos', 'prefix'])):
class PythonToken(Token):
def _get_type_name(self, exact=True):
return tok_name[self.type]
def __repr__(self):
return ('TokenInfo(type=%s, string=%r, start_pos=%r, prefix=%r)' %
self._replace(type=self._get_type_name()))
self._replace(type=self.type.name))
class FStringNode(object):
@@ -396,7 +390,9 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
endmatch = endprog.match(line)
if endmatch:
pos = endmatch.end(0)
yield PythonToken(STRING, contstr + line[:pos], contstr_start, prefix)
yield PythonToken(
PythonTokenTypes.STRING, contstr + line[:pos],
contstr_start, prefix)
contstr = ''
contline = None
else:
@@ -409,7 +405,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
string, pos = _find_fstring_string(fstring_stack, line, lnum, pos)
if string:
yield PythonToken(
FSTRING_STRING, string,
PythonTokenTypes.FSTRING_STRING, string,
fstring_stack[-1].last_string_start_pos,
# Never has a prefix because it can start anywhere and
# include whitespace.
@@ -426,7 +422,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
if fstring_index is not None:
yield PythonToken(
FSTRING_END,
PythonTokenTypes.FSTRING_END,
fstring_stack[fstring_index].quote,
(lnum, pos),
prefix=additional_prefix,
@@ -443,7 +439,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
match = whitespace.match(line, pos)
pos = match.end()
yield PythonToken(
ERRORTOKEN, line[pos:], (lnum, pos),
PythonTokenTypes.ERRORTOKEN, line[pos:], (lnum, pos),
additional_prefix + match.group(0)
)
additional_prefix = ''
@@ -471,24 +467,24 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
# TODO don't we need to change spos as well?
start -= 1
if start > indents[-1]:
yield PythonToken(INDENT, '', spos, '')
yield PythonToken(PythonTokenTypes.INDENT, '', spos, '')
indents.append(start)
while start < indents[-1]:
if start > indents[-2]:
yield PythonToken(ERROR_DEDENT, '', (lnum, 0), '')
yield PythonToken(PythonTokenTypes.ERROR_DEDENT, '', (lnum, 0), '')
break
yield PythonToken(DEDENT, '', spos, '')
yield PythonToken(PythonTokenTypes.DEDENT, '', spos, '')
indents.pop()
if fstring_stack:
fstring_index, end = _check_fstring_ending(fstring_stack, token)
if fstring_index is not None:
if end != 0:
yield PythonToken(ERRORTOKEN, token[:end], spos, prefix)
yield PythonToken(PythonTokenTypes.ERRORTOKEN, token[:end], spos, prefix)
prefix = ''
yield PythonToken(
FSTRING_END,
PythonTokenTypes.FSTRING_END,
fstring_stack[fstring_index].quote,
(lnum, spos[1] + 1),
prefix=prefix
@@ -499,7 +495,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
if (initial in numchars or # ordinary number
(initial == '.' and token != '.' and token != '...')):
yield PythonToken(NUMBER, token, spos, prefix)
yield PythonToken(PythonTokenTypes.NUMBER, token, spos, prefix)
elif initial in '\r\n':
if any(not f.allow_multiline() for f in fstring_stack):
# Would use fstring_stack.clear, but that's not available
@@ -507,7 +503,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
fstring_stack[:] = []
if not new_line and paren_level == 0 and not fstring_stack:
yield PythonToken(NEWLINE, token, spos, prefix)
yield PythonToken(PythonTokenTypes.NEWLINE, token, spos, prefix)
else:
additional_prefix = prefix + token
new_line = True
@@ -520,7 +516,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
if endmatch: # all on one line
pos = endmatch.end(0)
token = line[start:pos]
yield PythonToken(STRING, token, spos, prefix)
yield PythonToken(PythonTokenTypes.STRING, token, spos, prefix)
else:
contstr_start = (lnum, start) # multiple lines
contstr = line[start:]
@@ -537,10 +533,10 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
contline = line
break
else: # ordinary string
yield PythonToken(STRING, token, spos, prefix)
yield PythonToken(PythonTokenTypes.STRING, token, spos, prefix)
elif token in fstring_pattern_map: # The start of an fstring.
fstring_stack.append(FStringNode(fstring_pattern_map[token]))
yield PythonToken(FSTRING_START, token, spos, prefix)
yield PythonToken(PythonTokenTypes.FSTRING_START, token, spos, prefix)
elif is_identifier(initial): # ordinary name
if token in always_break_tokens:
fstring_stack[:] = []
@@ -548,11 +544,11 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
while True:
indent = indents.pop()
if indent > start:
yield PythonToken(DEDENT, '', spos, '')
yield PythonToken(PythonTokenTypes.DEDENT, '', spos, '')
else:
indents.append(indent)
break
yield PythonToken(NAME, token, spos, prefix)
yield PythonToken(PythonTokenTypes.NAME, token, spos, prefix)
elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n'): # continued stmt
additional_prefix += prefix + line[start:]
break
@@ -575,13 +571,13 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
# This check is needed in any case to check if it's a valid
# operator or just some random unicode character.
opmap[token]
typ = OP
typ = PythonTokenTypes.OP
except KeyError:
typ = ERRORTOKEN
typ = PythonTokenTypes.ERRORTOKEN
yield PythonToken(typ, token, spos, prefix)
if contstr:
yield PythonToken(ERRORTOKEN, contstr, contstr_start, prefix)
yield PythonToken(PythonTokenTypes.ERRORTOKEN, contstr, contstr_start, prefix)
if contstr.endswith('\n'):
new_line = True
@@ -589,8 +585,8 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
# As the last position we just take the maximally possible position. We
# remove -1 for the last new line.
for indent in indents[1:]:
yield PythonToken(DEDENT, '', end_pos, '')
yield PythonToken(ENDMARKER, '', end_pos, additional_prefix)
yield PythonToken(PythonTokenTypes.DEDENT, '', end_pos, '')
yield PythonToken(PythonTokenTypes.ENDMARKER, '', end_pos, additional_prefix)
if __name__ == "__main__":

View File

@@ -124,7 +124,7 @@ class PythonLeaf(PythonMixin, Leaf):
# indent error leafs somehow? No idea how, though.
previous_leaf = self.get_previous_leaf()
if previous_leaf is not None and previous_leaf.type == 'error_leaf' \
and previous_leaf.original_type in ('indent', 'error_dedent'):
and previous_leaf.token_type in ('INDENT', 'ERROR_DEDENT'):
previous_leaf = previous_leaf.get_previous_leaf()
if previous_leaf is None:

View File

@@ -229,6 +229,7 @@ class Leaf(NodeOrLeaf):
class TypedLeaf(Leaf):
__slots__ = ('type',)
def __init__(self, type, value, start_pos, prefix=''):
super(TypedLeaf, self).__init__(value, start_pos, prefix)
self.type = type
@@ -351,13 +352,13 @@ class ErrorLeaf(Leaf):
A leaf that is either completely invalid in a language (like `$` in Python)
or is invalid at that position. Like the star in `1 +* 1`.
"""
__slots__ = ('original_type',)
__slots__ = ('token_type',)
type = 'error_leaf'
def __init__(self, original_type, value, start_pos, prefix=''):
def __init__(self, token_type, value, start_pos, prefix=''):
super(ErrorLeaf, self).__init__(value, start_pos, prefix)
self.original_type = original_type
self.token_type = token_type
def __repr__(self):
return "<%s: %s:%s, %s>" % \
(type(self).__name__, self.original_type, repr(self.value), self.start_pos)
(type(self).__name__, self.token_type, repr(self.value), self.start_pos)

View File

@@ -1,20 +1,29 @@
# -*- coding: utf-8 # This file contains Unicode characters.
from textwrap import dedent
import tokenize as stdlib_tokenize
import pytest
from parso._compatibility import py_version
from parso.utils import split_lines, parse_version_string
from parso.python.token import (
NAME, NEWLINE, STRING, INDENT, DEDENT, ERRORTOKEN, ENDMARKER, ERROR_DEDENT,
FSTRING_START)
from parso.python.token import PythonTokenTypes
from parso.python import tokenize
from parso import parse
from parso.python.tokenize import PythonToken
# To make it easier to access some of the token types, just put them here.
NAME = PythonTokenTypes.NAME
NEWLINE = PythonTokenTypes.NEWLINE
STRING = PythonTokenTypes.STRING
INDENT = PythonTokenTypes.INDENT
DEDENT = PythonTokenTypes.DEDENT
ERRORTOKEN = PythonTokenTypes.ERRORTOKEN
ENDMARKER = PythonTokenTypes.ENDMARKER
ERROR_DEDENT = PythonTokenTypes.ERROR_DEDENT
FSTRING_START = PythonTokenTypes.FSTRING_START
def _get_token_list(string):
# Load the current version.
version_info = parse_version_string()
@@ -128,7 +137,7 @@ def test_identifier_contains_unicode():
else:
# Unicode tokens in Python 2 seem to be identified as operators.
# They will be ignored in the parser, that's ok.
assert unicode_token[0] == tokenize.ERRORTOKEN
assert unicode_token[0] == ERRORTOKEN
def test_quoted_strings():
@@ -188,17 +197,17 @@ def test_ur_literals():
def test_error_literal():
error_token, endmarker = _get_token_list('"\n')
assert error_token.type == tokenize.ERRORTOKEN
assert error_token.type == ERRORTOKEN
assert endmarker.prefix == ''
assert error_token.string == '"\n'
assert endmarker.type == tokenize.ENDMARKER
assert endmarker.type == ENDMARKER
assert endmarker.prefix == ''
bracket, error_token, endmarker = _get_token_list('( """')
assert error_token.type == tokenize.ERRORTOKEN
assert error_token.type == ERRORTOKEN
assert error_token.prefix == ' '
assert error_token.string == '"""'
assert endmarker.type == tokenize.ENDMARKER
assert endmarker.type == ENDMARKER
assert endmarker.prefix == ''
@@ -236,14 +245,3 @@ def test_error_string():
assert t1.prefix == ' '
assert t1.string == '"\n'
assert endmarker.string == ''
def test_tok_name_copied():
# Make sure parso doesn't mutate the standard library
tok_len = len(stdlib_tokenize.tok_name)
correct_len = stdlib_tokenize.N_TOKENS
if 'N_TOKENS' in stdlib_tokenize.tok_name.values(): # Python 3.7
correct_len += 1
if 'NT_OFFSET' in stdlib_tokenize.tok_name.values(): # Not there in PyPy
correct_len += 1
assert tok_len == correct_len