Introduce TokenTypes

This commit is contained in:
Dave Halter
2018-06-24 16:24:09 +02:00
parent 6098d89150
commit 03de9cebb8
12 changed files with 117 additions and 169 deletions

View File

@@ -6,7 +6,7 @@ from parso.pgen2.pgen import generate_grammar
from parso.utils import split_lines, python_bytes_to_unicode, parse_version_string from parso.utils import split_lines, python_bytes_to_unicode, parse_version_string
from parso.python.diff import DiffParser from parso.python.diff import DiffParser
from parso.python.tokenize import tokenize_lines, tokenize from parso.python.tokenize import tokenize_lines, tokenize
from parso.python import token from parso.python.token import PythonTokenTypes
from parso.cache import parser_cache, load_module, save_module from parso.cache import parser_cache, load_module, save_module
from parso.parser import BaseParser from parso.parser import BaseParser
from parso.python.parser import Parser as PythonParser from parso.python.parser import Parser as PythonParser
@@ -193,7 +193,7 @@ class Grammar(object):
class PythonGrammar(Grammar): class PythonGrammar(Grammar):
_error_normalizer_config = ErrorFinderConfig() _error_normalizer_config = ErrorFinderConfig()
_token_namespace = token _token_namespace = PythonTokenTypes
_start_nonterminal = 'file_input' _start_nonterminal = 'file_input'
def __init__(self, version_info, bnf_text): def __init__(self, version_info, bnf_text):

View File

@@ -16,9 +16,6 @@ fallback token code OP, but the parser needs the actual token code.
""" """
from parso.python import token
class DFAPlan(object): class DFAPlan(object):
def __init__(self, next_dfa, dfa_pushes=[]): def __init__(self, next_dfa, dfa_pushes=[]):
self.next_dfa = next_dfa self.next_dfa = next_dfa
@@ -111,7 +108,6 @@ class Grammar(object):
# A named token (e.g. NAME, NUMBER, STRING) # A named token (e.g. NAME, NUMBER, STRING)
itoken = getattr(self._token_namespace, label, None) itoken = getattr(self._token_namespace, label, None)
assert isinstance(itoken, int), label
if itoken in self.tokens: if itoken in self.tokens:
return self.tokens[itoken] return self.tokens[itoken]
else: else:
@@ -126,7 +122,7 @@ class Grammar(object):
if value in self.reserved_syntax_strings: if value in self.reserved_syntax_strings:
return self.reserved_syntax_strings[value] return self.reserved_syntax_strings[value]
else: else:
self.labels.append((token.NAME, value)) self.labels.append(('XXX', value))
self.reserved_syntax_strings[value] = ilabel self.reserved_syntax_strings[value] = ilabel
return self.reserved_syntax_strings[value] return self.reserved_syntax_strings[value]

View File

@@ -5,9 +5,9 @@
# Copyright David Halter and Contributors # Copyright David Halter and Contributors
# Modifications are dual-licensed: MIT and PSF. # Modifications are dual-licensed: MIT and PSF.
from parso.python import tokenize from parso.python.tokenize import tokenize
from parso.utils import parse_version_string from parso.utils import parse_version_string
from parso.python import token from parso.python.token import PythonTokenTypes
class GrammarParser(): class GrammarParser():
@@ -16,7 +16,7 @@ class GrammarParser():
""" """
def __init__(self, bnf_grammar): def __init__(self, bnf_grammar):
self._bnf_grammar = bnf_grammar self._bnf_grammar = bnf_grammar
self.generator = tokenize.tokenize( self.generator = tokenize(
bnf_grammar, bnf_grammar,
version_info=parse_version_string('3.6') version_info=parse_version_string('3.6')
) )
@@ -24,16 +24,16 @@ class GrammarParser():
def parse(self): def parse(self):
# grammar: (NEWLINE | rule)* ENDMARKER # grammar: (NEWLINE | rule)* ENDMARKER
while self.type != token.ENDMARKER: while self.type != PythonTokenTypes.ENDMARKER:
while self.type == token.NEWLINE: while self.type == PythonTokenTypes.NEWLINE:
self._gettoken() self._gettoken()
# rule: NAME ':' rhs NEWLINE # rule: NAME ':' rhs NEWLINE
self._current_rule_name = self._expect(token.NAME) self._current_rule_name = self._expect(PythonTokenTypes.NAME)
self._expect(token.OP, ':') self._expect(PythonTokenTypes.OP, ':')
a, z = self._parse_rhs() a, z = self._parse_rhs()
self._expect(token.NEWLINE) self._expect(PythonTokenTypes.NEWLINE)
yield a, z yield a, z
@@ -60,7 +60,8 @@ class GrammarParser():
def _parse_items(self): def _parse_items(self):
# items: item+ # items: item+
a, b = self._parse_item() a, b = self._parse_item()
while self.type in (token.NAME, token.STRING) or self.value in ('(', '['): while self.type in (PythonTokenTypes.NAME, PythonTokenTypes.STRING) \
or self.value in ('(', '['):
c, d = self._parse_item() c, d = self._parse_item()
# Need to end on the next item. # Need to end on the next item.
b.add_arc(c) b.add_arc(c)
@@ -72,7 +73,7 @@ class GrammarParser():
if self.value == "[": if self.value == "[":
self._gettoken() self._gettoken()
a, z = self._parse_rhs() a, z = self._parse_rhs()
self._expect(token.OP, ']') self._expect(PythonTokenTypes.OP, ']')
# Make it also possible that there is no token and change the # Make it also possible that there is no token and change the
# state. # state.
a.add_arc(z) a.add_arc(z)
@@ -97,9 +98,9 @@ class GrammarParser():
if self.value == "(": if self.value == "(":
self._gettoken() self._gettoken()
a, z = self._parse_rhs() a, z = self._parse_rhs()
self._expect(token.OP, ')') self._expect(PythonTokenTypes.OP, ')')
return a, z return a, z
elif self.type in (token.NAME, token.STRING): elif self.type in (PythonTokenTypes.NAME, PythonTokenTypes.STRING):
a = NFAState(self._current_rule_name) a = NFAState(self._current_rule_name)
z = NFAState(self._current_rule_name) z = NFAState(self._current_rule_name)
# Make it clear that the state transition requires that value. # Make it clear that the state transition requires that value.
@@ -110,10 +111,10 @@ class GrammarParser():
self._raise_error("expected (...) or NAME or STRING, got %s/%s", self._raise_error("expected (...) or NAME or STRING, got %s/%s",
self.type, self.value) self.type, self.value)
def _expect(self, type, value=None): def _expect(self, type_, value=None):
if self.type != type: if self.type != type_:
self._raise_error("expected %s(%s), got %s(%s)", self._raise_error("expected %s, got %s [%s]",
type, token.tok_name[type], self.type, self.value) type_, self.type, self.value)
if value is not None and self.value != value: if value is not None and self.value != value:
self._raise_error("expected %s, got %s", value, self.value) self._raise_error("expected %s, got %s", value, self.value)
value = self.value value = self.value

View File

@@ -14,8 +14,6 @@ See Parser/parser.c in the Python distribution for additional info on
how this parsing engine works. how this parsing engine works.
""" """
from parso.python import tokenize
class InternalParseError(Exception): class InternalParseError(Exception):
""" """
@@ -24,9 +22,9 @@ class InternalParseError(Exception):
wrong. wrong.
""" """
def __init__(self, msg, type, value, start_pos): def __init__(self, msg, type_, value, start_pos):
Exception.__init__(self, "%s: type=%r, value=%r, start_pos=%r" % Exception.__init__(self, "%s: type=%r, value=%r, start_pos=%r" %
(msg, tokenize.tok_name[type], value, start_pos)) (msg, type_.name, value, start_pos))
self.msg = msg self.msg = msg
self.type = type self.type = type
self.value = value self.value = value
@@ -69,9 +67,7 @@ class StackNode(object):
def token_to_ilabel(grammar, type_, value): def token_to_ilabel(grammar, type_, value):
# Map from token to label # Map from token to label
# TODO this is not good, shouldn't use tokenize.NAME, but somehow use the if type_.contains_syntax:
# grammar.
if type_ in (tokenize.NAME, tokenize.OP):
# Check for reserved words (keywords) # Check for reserved words (keywords)
try: try:
return grammar.reserved_syntax_strings[value] return grammar.reserved_syntax_strings[value]
@@ -196,6 +192,7 @@ class PgenParser(object):
# creating a new node. We still create expr_stmt and # creating a new node. We still create expr_stmt and
# file_input though, because a lot of Jedi depends on its # file_input though, because a lot of Jedi depends on its
# logic. # logic.
print(tos.nodes)
if len(tos.nodes) == 1: if len(tos.nodes) == 1:
new_node = tos.nodes[0] new_node = tos.nodes[0]
else: else:

View File

@@ -13,8 +13,8 @@ import logging
from parso.utils import split_lines from parso.utils import split_lines
from parso.python.parser import Parser from parso.python.parser import Parser
from parso.python.tree import EndMarker from parso.python.tree import EndMarker
from parso.python.tokenize import (NEWLINE, PythonToken, ERROR_DEDENT, from parso.python.tokenize import PythonToken
ENDMARKER, INDENT, DEDENT) from parso.python.token import PythonTokenTypes
LOG = logging.getLogger(__name__) LOG = logging.getLogger(__name__)
@@ -29,7 +29,7 @@ def _get_last_line(node_or_leaf):
def _ends_with_newline(leaf, suffix=''): def _ends_with_newline(leaf, suffix=''):
if leaf.type == 'error_leaf': if leaf.type == 'error_leaf':
typ = leaf.original_type typ = leaf.token_type.lower()
else: else:
typ = leaf.type typ = leaf.type
@@ -167,8 +167,7 @@ class DiffParser(object):
def _enabled_debugging(self, old_lines, lines_new): def _enabled_debugging(self, old_lines, lines_new):
if self._module.get_code() != ''.join(lines_new): if self._module.get_code() != ''.join(lines_new):
LOG.warning('parser issue:\n%s\n%s', ''.join(old_lines), LOG.warning('parser issue:\n%s\n%s', ''.join(old_lines), ''.join(lines_new))
''.join(lines_new))
def _copy_from_old_parser(self, line_offset, until_line_old, until_line_new): def _copy_from_old_parser(self, line_offset, until_line_old, until_line_new):
copied_nodes = [None] copied_nodes = [None]
@@ -272,7 +271,6 @@ class DiffParser(object):
# memoryview? # memoryview?
parsed_until_line = self._nodes_stack.parsed_until_line parsed_until_line = self._nodes_stack.parsed_until_line
lines_after = self._parser_lines_new[parsed_until_line:] lines_after = self._parser_lines_new[parsed_until_line:]
#print('parse_content', parsed_until_line, lines_after, until_line)
tokens = self._diff_tokenize( tokens = self._diff_tokenize(
lines_after, lines_after,
until_line, until_line,
@@ -292,7 +290,7 @@ class DiffParser(object):
stack = self._active_parser.pgen_parser.stack stack = self._active_parser.pgen_parser.stack
for typ, string, start_pos, prefix in tokens: for typ, string, start_pos, prefix in tokens:
start_pos = start_pos[0] + line_offset, start_pos[1] start_pos = start_pos[0] + line_offset, start_pos[1]
if typ == INDENT: if typ == PythonTokenTypes.INDENT:
indents.append(start_pos[1]) indents.append(start_pos[1])
if is_first_token: if is_first_token:
omitted_first_indent = True omitted_first_indent = True
@@ -305,8 +303,9 @@ class DiffParser(object):
# In case of omitted_first_indent, it might not be dedented fully. # In case of omitted_first_indent, it might not be dedented fully.
# However this is a sign for us that a dedent happened. # However this is a sign for us that a dedent happened.
if typ == DEDENT \ if typ == PythonTokenTypes.DEDENT \
or typ == ERROR_DEDENT and omitted_first_indent and len(indents) == 1: or typ == PythonTokenTypes.ERROR_DEDENT \
and omitted_first_indent and len(indents) == 1:
indents.pop() indents.pop()
if omitted_first_indent and not indents: if omitted_first_indent and not indents:
# We are done here, only thing that can come now is an # We are done here, only thing that can come now is an
@@ -316,18 +315,22 @@ class DiffParser(object):
prefix = re.sub(r'(<=\n)[^\n]+$', '', prefix) prefix = re.sub(r'(<=\n)[^\n]+$', '', prefix)
else: else:
prefix = '' prefix = ''
yield PythonToken(ENDMARKER, '', (start_pos[0] + line_offset, 0), prefix) yield PythonToken(
PythonTokenTypes.ENDMARKER, '',
(start_pos[0] + line_offset, 0),
prefix
)
break break
elif typ == NEWLINE and start_pos[0] >= until_line: elif typ == PythonTokenTypes.NEWLINE and start_pos[0] >= until_line:
yield PythonToken(typ, string, start_pos, prefix) yield PythonToken(typ, string, start_pos, prefix)
# Check if the parser is actually in a valid suite state. # Check if the parser is actually in a valid suite state.
if suite_or_file_input_is_valid(self._pgen_grammar, stack): if suite_or_file_input_is_valid(self._pgen_grammar, stack):
start_pos = start_pos[0] + 1, 0 start_pos = start_pos[0] + 1, 0
while len(indents) > int(omitted_first_indent): while len(indents) > int(omitted_first_indent):
indents.pop() indents.pop()
yield PythonToken(DEDENT, '', start_pos, '') yield PythonToken(PythonTokenTypes.DEDENT, '', start_pos, '')
yield PythonToken(ENDMARKER, '', start_pos, '') yield PythonToken(PythonTokenTypes.ENDMARKER, '', start_pos, '')
break break
else: else:
continue continue
@@ -536,7 +539,7 @@ class _NodesStack(object):
line_offset_index = -2 line_offset_index = -2
elif (new_nodes[-1].type in ('error_leaf', 'error_node') or elif (new_nodes[-1].type in ('error_leaf', 'error_node') or
_is_flow_node(new_nodes[-1])): _is_flow_node(new_nodes[-1])):
# Error leafs/nodes don't have a defined start/end. Error # Error leafs/nodes don't have a defined start/end. Error
# nodes might not end with a newline (e.g. if there's an # nodes might not end with a newline (e.g. if there's an
# open `(`). Therefore ignore all of them unless they are # open `(`). Therefore ignore all of them unless they are

View File

@@ -306,12 +306,12 @@ class ErrorFinder(Normalizer):
def visit_leaf(self, leaf): def visit_leaf(self, leaf):
if leaf.type == 'error_leaf': if leaf.type == 'error_leaf':
if leaf.original_type in ('indent', 'error_dedent'): if leaf.token_type in ('INDENT', 'ERROR_DEDENT'):
# Indents/Dedents itself never have a prefix. They are just # Indents/Dedents itself never have a prefix. They are just
# "pseudo" tokens that get removed by the syntax tree later. # "pseudo" tokens that get removed by the syntax tree later.
# Therefore in case of an error we also have to check for this. # Therefore in case of an error we also have to check for this.
spacing = list(leaf.get_next_leaf()._split_prefix())[-1] spacing = list(leaf.get_next_leaf()._split_prefix())[-1]
if leaf.original_type == 'indent': if leaf.token_type == 'INDENT':
message = 'unexpected indent' message = 'unexpected indent'
else: else:
message = 'unindent does not match any outer indentation level' message = 'unindent does not match any outer indentation level'

View File

@@ -1,7 +1,5 @@
from parso.python import tree from parso.python import tree
from parso.python.token import (DEDENT, INDENT, ENDMARKER, NEWLINE, NUMBER, from parso.python.token import PythonTokenTypes
STRING, tok_name, NAME, FSTRING_STRING,
FSTRING_START, FSTRING_END)
from parso.parser import BaseParser from parso.parser import BaseParser
from parso.pgen2.parse import token_to_ilabel from parso.pgen2.parse import token_to_ilabel
@@ -53,17 +51,18 @@ class Parser(BaseParser):
# Names/Keywords are handled separately # Names/Keywords are handled separately
_leaf_map = { _leaf_map = {
STRING: tree.String, PythonTokenTypes.STRING: tree.String,
NUMBER: tree.Number, PythonTokenTypes.NUMBER: tree.Number,
NEWLINE: tree.Newline, PythonTokenTypes.NEWLINE: tree.Newline,
ENDMARKER: tree.EndMarker, PythonTokenTypes.ENDMARKER: tree.EndMarker,
FSTRING_STRING: tree.FStringString, PythonTokenTypes.FSTRING_STRING: tree.FStringString,
FSTRING_START: tree.FStringStart, PythonTokenTypes.FSTRING_START: tree.FStringStart,
FSTRING_END: tree.FStringEnd, PythonTokenTypes.FSTRING_END: tree.FStringEnd,
} }
def __init__(self, pgen_grammar, error_recovery=True, start_nonterminal='file_input'): def __init__(self, pgen_grammar, error_recovery=True, start_nonterminal='file_input'):
super(Parser, self).__init__(pgen_grammar, start_nonterminal, error_recovery=error_recovery) super(Parser, self).__init__(pgen_grammar, start_nonterminal,
error_recovery=error_recovery)
self.syntax_errors = [] self.syntax_errors = []
self._omit_dedent_list = [] self._omit_dedent_list = []
@@ -126,7 +125,7 @@ class Parser(BaseParser):
def convert_leaf(self, pgen_grammar, type, value, prefix, start_pos): def convert_leaf(self, pgen_grammar, type, value, prefix, start_pos):
# print('leaf', repr(value), token.tok_name[type]) # print('leaf', repr(value), token.tok_name[type])
if type == NAME: if type == PythonTokenTypes.NAME:
if value in pgen_grammar.reserved_syntax_strings: if value in pgen_grammar.reserved_syntax_strings:
return tree.Keyword(value, start_pos, prefix) return tree.Keyword(value, start_pos, prefix)
else: else:
@@ -143,7 +142,8 @@ class Parser(BaseParser):
last_leaf = None last_leaf = None
if self._start_nonterminal == 'file_input' and \ if self._start_nonterminal == 'file_input' and \
(typ == ENDMARKER or typ == DEDENT and '\n' not in last_leaf.value): (typ == PythonTokenTypes.ENDMARKER or
typ == PythonTokenTypes.DEDENT and '\n' not in last_leaf.value):
def reduce_stack(states, newstate): def reduce_stack(states, newstate):
# reduce # reduce
state = newstate state = newstate
@@ -158,7 +158,7 @@ class Parser(BaseParser):
# end of a file, we have to recover even if the user doesn't want # end of a file, we have to recover even if the user doesn't want
# error recovery. # error recovery.
if stack[-1].dfa.from_rule == 'simple_stmt': if stack[-1].dfa.from_rule == 'simple_stmt':
ilabel = token_to_ilabel(pgen_grammar, NEWLINE, value) ilabel = token_to_ilabel(pgen_grammar, PythonTokenTypes.NEWLINE, value)
try: try:
plan = stack[-1].dfa.ilabel_to_plan[ilabel] plan = stack[-1].dfa.ilabel_to_plan[ilabel]
except KeyError: except KeyError:
@@ -199,12 +199,12 @@ class Parser(BaseParser):
if self._stack_removal(stack, until_index + 1): if self._stack_removal(stack, until_index + 1):
add_token_callback(typ, value, start_pos, prefix) add_token_callback(typ, value, start_pos, prefix)
else: else:
if typ == INDENT: if typ == PythonTokenTypes.INDENT:
# For every deleted INDENT we have to delete a DEDENT as well. # For every deleted INDENT we have to delete a DEDENT as well.
# Otherwise the parser will get into trouble and DEDENT too early. # Otherwise the parser will get into trouble and DEDENT too early.
self._omit_dedent_list.append(self._indent_counter) self._omit_dedent_list.append(self._indent_counter)
error_leaf = tree.PythonErrorLeaf(tok_name[typ].lower(), value, start_pos, prefix) error_leaf = tree.PythonErrorLeaf(typ.name, value, start_pos, prefix)
stack[-1].nodes.append(error_leaf) stack[-1].nodes.append(error_leaf)
tos = stack[-1] tos = stack[-1]
@@ -230,7 +230,7 @@ class Parser(BaseParser):
def _recovery_tokenize(self, tokens): def _recovery_tokenize(self, tokens):
for typ, value, start_pos, prefix in tokens: for typ, value, start_pos, prefix in tokens:
# print(tok_name[typ], repr(value), start_pos, repr(prefix)) # print(tok_name[typ], repr(value), start_pos, repr(prefix))
if typ == DEDENT: if typ == PythonTokenTypes.DEDENT:
# We need to count indents, because if we just omit any DEDENT, # We need to count indents, because if we just omit any DEDENT,
# we might omit them in the wrong place. # we might omit them in the wrong place.
o = self._omit_dedent_list o = self._omit_dedent_list
@@ -239,6 +239,6 @@ class Parser(BaseParser):
continue continue
self._indent_counter -= 1 self._indent_counter -= 1
elif typ == INDENT: elif typ == PythonTokenTypes.INDENT:
self._indent_counter += 1 self._indent_counter += 1
yield typ, value, start_pos, prefix yield typ, value, start_pos, prefix

View File

@@ -1,47 +1,4 @@
from __future__ import absolute_import from __future__ import absolute_import
from itertools import count
from token import *
from parso._compatibility import py_version
# Don't mutate the standard library dict
tok_name = tok_name.copy()
_counter = count(N_TOKENS)
# Never want to see this thing again.
del N_TOKENS
COMMENT = next(_counter)
tok_name[COMMENT] = 'COMMENT'
NL = next(_counter)
tok_name[NL] = 'NL'
# Sets the attributes that don't exist in these tok_name versions.
if py_version >= 30:
BACKQUOTE = next(_counter)
tok_name[BACKQUOTE] = 'BACKQUOTE'
else:
RARROW = next(_counter)
tok_name[RARROW] = 'RARROW'
ELLIPSIS = next(_counter)
tok_name[ELLIPSIS] = 'ELLIPSIS'
if py_version < 35:
ATEQUAL = next(_counter)
tok_name[ATEQUAL] = 'ATEQUAL'
ERROR_DEDENT = next(_counter)
tok_name[ERROR_DEDENT] = 'ERROR_DEDENT'
FSTRING_START = next(_counter)
tok_name[FSTRING_START] = 'FSTRING_START'
FSTRING_END = next(_counter)
tok_name[FSTRING_END] = 'FSTRING_END'
FSTRING_STRING = next(_counter)
tok_name[FSTRING_STRING] = 'FSTRING_STRING'
EXCLAMATION = next(_counter)
tok_name[EXCLAMATION] = 'EXCLAMATION'
# Map from operator to number (since tokenize doesn't do this) # Map from operator to number (since tokenize doesn't do this)
@@ -100,7 +57,7 @@ opmap_raw = """\
opmap = {} opmap = {}
for line in opmap_raw.splitlines(): for line in opmap_raw.splitlines():
op, name = line.split() op, name = line.split()
opmap[op] = globals()[name] opmap[op] = name
def generate_token_id(string): def generate_token_id(string):
@@ -115,26 +72,25 @@ def generate_token_id(string):
return globals()[string] return globals()[string]
class Token(object): class TokenType(object):
def __init__(self, name): def __init__(self, name, contains_syntax=False):
self.name = name self.name = name
self.contains_syntax = contains_syntax
def __repr__(self): def __repr__(self):
return '%s(%s)' % (self.__class__.__name__, self.name) return '%s(%s)' % (self.__class__.__name__, self.name)
class Tokens(object): class TokenTypes(object):
""" """
Basically an enum, but Python 2 doesn't have enums in the standard library. Basically an enum, but Python 2 doesn't have enums in the standard library.
""" """
def __init__(self, names, contains_syntax): def __init__(self, names, contains_syntax):
for name in names: for name in names:
setattr(self, name, Token(name)) setattr(self, name, TokenType(name, contains_syntax=name in contains_syntax))
self.contains_syntax = [getattr(self, name) for name in contains_syntax]
PythonTokens = Tokens(( PythonTokenTypes = TokenTypes((
'STRING', 'NUMBER', 'NAME', 'ERRORTOKEN', 'NEWLINE', 'INDENT', 'DEDENT', 'STRING', 'NUMBER', 'NAME', 'ERRORTOKEN', 'NEWLINE', 'INDENT', 'DEDENT',
'ERROR_DEDENT', 'FSTRING_STRING', 'FSTRING_START', 'FSTRING_END', 'OP', 'ERROR_DEDENT', 'FSTRING_STRING', 'FSTRING_START', 'FSTRING_END', 'OP',
'ENDMARKER'), 'ENDMARKER'),

View File

@@ -18,10 +18,7 @@ from collections import namedtuple
import itertools as _itertools import itertools as _itertools
from codecs import BOM_UTF8 from codecs import BOM_UTF8
from parso.python.token import (tok_name, ENDMARKER, STRING, NUMBER, opmap, from parso.python.token import PythonTokenTypes, opmap
NAME, ERRORTOKEN, NEWLINE, INDENT, DEDENT,
ERROR_DEDENT, FSTRING_STRING, FSTRING_START,
FSTRING_END, OP)
from parso._compatibility import py_version from parso._compatibility import py_version
from parso.utils import split_lines from parso.utils import split_lines
@@ -242,12 +239,9 @@ class Token(namedtuple('Token', ['type', 'string', 'start_pos', 'prefix'])):
class PythonToken(Token): class PythonToken(Token):
def _get_type_name(self, exact=True):
return tok_name[self.type]
def __repr__(self): def __repr__(self):
return ('TokenInfo(type=%s, string=%r, start_pos=%r, prefix=%r)' % return ('TokenInfo(type=%s, string=%r, start_pos=%r, prefix=%r)' %
self._replace(type=self._get_type_name())) self._replace(type=self.type.name))
class FStringNode(object): class FStringNode(object):
@@ -396,7 +390,9 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
endmatch = endprog.match(line) endmatch = endprog.match(line)
if endmatch: if endmatch:
pos = endmatch.end(0) pos = endmatch.end(0)
yield PythonToken(STRING, contstr + line[:pos], contstr_start, prefix) yield PythonToken(
PythonTokenTypes.STRING, contstr + line[:pos],
contstr_start, prefix)
contstr = '' contstr = ''
contline = None contline = None
else: else:
@@ -409,7 +405,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
string, pos = _find_fstring_string(fstring_stack, line, lnum, pos) string, pos = _find_fstring_string(fstring_stack, line, lnum, pos)
if string: if string:
yield PythonToken( yield PythonToken(
FSTRING_STRING, string, PythonTokenTypes.FSTRING_STRING, string,
fstring_stack[-1].last_string_start_pos, fstring_stack[-1].last_string_start_pos,
# Never has a prefix because it can start anywhere and # Never has a prefix because it can start anywhere and
# include whitespace. # include whitespace.
@@ -426,7 +422,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
if fstring_index is not None: if fstring_index is not None:
yield PythonToken( yield PythonToken(
FSTRING_END, PythonTokenTypes.FSTRING_END,
fstring_stack[fstring_index].quote, fstring_stack[fstring_index].quote,
(lnum, pos), (lnum, pos),
prefix=additional_prefix, prefix=additional_prefix,
@@ -443,7 +439,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
match = whitespace.match(line, pos) match = whitespace.match(line, pos)
pos = match.end() pos = match.end()
yield PythonToken( yield PythonToken(
ERRORTOKEN, line[pos:], (lnum, pos), PythonTokenTypes.ERRORTOKEN, line[pos:], (lnum, pos),
additional_prefix + match.group(0) additional_prefix + match.group(0)
) )
additional_prefix = '' additional_prefix = ''
@@ -471,24 +467,24 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
# TODO don't we need to change spos as well? # TODO don't we need to change spos as well?
start -= 1 start -= 1
if start > indents[-1]: if start > indents[-1]:
yield PythonToken(INDENT, '', spos, '') yield PythonToken(PythonTokenTypes.INDENT, '', spos, '')
indents.append(start) indents.append(start)
while start < indents[-1]: while start < indents[-1]:
if start > indents[-2]: if start > indents[-2]:
yield PythonToken(ERROR_DEDENT, '', (lnum, 0), '') yield PythonToken(PythonTokenTypes.ERROR_DEDENT, '', (lnum, 0), '')
break break
yield PythonToken(DEDENT, '', spos, '') yield PythonToken(PythonTokenTypes.DEDENT, '', spos, '')
indents.pop() indents.pop()
if fstring_stack: if fstring_stack:
fstring_index, end = _check_fstring_ending(fstring_stack, token) fstring_index, end = _check_fstring_ending(fstring_stack, token)
if fstring_index is not None: if fstring_index is not None:
if end != 0: if end != 0:
yield PythonToken(ERRORTOKEN, token[:end], spos, prefix) yield PythonToken(PythonTokenTypes.ERRORTOKEN, token[:end], spos, prefix)
prefix = '' prefix = ''
yield PythonToken( yield PythonToken(
FSTRING_END, PythonTokenTypes.FSTRING_END,
fstring_stack[fstring_index].quote, fstring_stack[fstring_index].quote,
(lnum, spos[1] + 1), (lnum, spos[1] + 1),
prefix=prefix prefix=prefix
@@ -499,7 +495,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
if (initial in numchars or # ordinary number if (initial in numchars or # ordinary number
(initial == '.' and token != '.' and token != '...')): (initial == '.' and token != '.' and token != '...')):
yield PythonToken(NUMBER, token, spos, prefix) yield PythonToken(PythonTokenTypes.NUMBER, token, spos, prefix)
elif initial in '\r\n': elif initial in '\r\n':
if any(not f.allow_multiline() for f in fstring_stack): if any(not f.allow_multiline() for f in fstring_stack):
# Would use fstring_stack.clear, but that's not available # Would use fstring_stack.clear, but that's not available
@@ -507,7 +503,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
fstring_stack[:] = [] fstring_stack[:] = []
if not new_line and paren_level == 0 and not fstring_stack: if not new_line and paren_level == 0 and not fstring_stack:
yield PythonToken(NEWLINE, token, spos, prefix) yield PythonToken(PythonTokenTypes.NEWLINE, token, spos, prefix)
else: else:
additional_prefix = prefix + token additional_prefix = prefix + token
new_line = True new_line = True
@@ -520,7 +516,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
if endmatch: # all on one line if endmatch: # all on one line
pos = endmatch.end(0) pos = endmatch.end(0)
token = line[start:pos] token = line[start:pos]
yield PythonToken(STRING, token, spos, prefix) yield PythonToken(PythonTokenTypes.STRING, token, spos, prefix)
else: else:
contstr_start = (lnum, start) # multiple lines contstr_start = (lnum, start) # multiple lines
contstr = line[start:] contstr = line[start:]
@@ -537,10 +533,10 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
contline = line contline = line
break break
else: # ordinary string else: # ordinary string
yield PythonToken(STRING, token, spos, prefix) yield PythonToken(PythonTokenTypes.STRING, token, spos, prefix)
elif token in fstring_pattern_map: # The start of an fstring. elif token in fstring_pattern_map: # The start of an fstring.
fstring_stack.append(FStringNode(fstring_pattern_map[token])) fstring_stack.append(FStringNode(fstring_pattern_map[token]))
yield PythonToken(FSTRING_START, token, spos, prefix) yield PythonToken(PythonTokenTypes.FSTRING_START, token, spos, prefix)
elif is_identifier(initial): # ordinary name elif is_identifier(initial): # ordinary name
if token in always_break_tokens: if token in always_break_tokens:
fstring_stack[:] = [] fstring_stack[:] = []
@@ -548,11 +544,11 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
while True: while True:
indent = indents.pop() indent = indents.pop()
if indent > start: if indent > start:
yield PythonToken(DEDENT, '', spos, '') yield PythonToken(PythonTokenTypes.DEDENT, '', spos, '')
else: else:
indents.append(indent) indents.append(indent)
break break
yield PythonToken(NAME, token, spos, prefix) yield PythonToken(PythonTokenTypes.NAME, token, spos, prefix)
elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n'): # continued stmt elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n'): # continued stmt
additional_prefix += prefix + line[start:] additional_prefix += prefix + line[start:]
break break
@@ -575,13 +571,13 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
# This check is needed in any case to check if it's a valid # This check is needed in any case to check if it's a valid
# operator or just some random unicode character. # operator or just some random unicode character.
opmap[token] opmap[token]
typ = OP typ = PythonTokenTypes.OP
except KeyError: except KeyError:
typ = ERRORTOKEN typ = PythonTokenTypes.ERRORTOKEN
yield PythonToken(typ, token, spos, prefix) yield PythonToken(typ, token, spos, prefix)
if contstr: if contstr:
yield PythonToken(ERRORTOKEN, contstr, contstr_start, prefix) yield PythonToken(PythonTokenTypes.ERRORTOKEN, contstr, contstr_start, prefix)
if contstr.endswith('\n'): if contstr.endswith('\n'):
new_line = True new_line = True
@@ -589,8 +585,8 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
# As the last position we just take the maximally possible position. We # As the last position we just take the maximally possible position. We
# remove -1 for the last new line. # remove -1 for the last new line.
for indent in indents[1:]: for indent in indents[1:]:
yield PythonToken(DEDENT, '', end_pos, '') yield PythonToken(PythonTokenTypes.DEDENT, '', end_pos, '')
yield PythonToken(ENDMARKER, '', end_pos, additional_prefix) yield PythonToken(PythonTokenTypes.ENDMARKER, '', end_pos, additional_prefix)
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -124,7 +124,7 @@ class PythonLeaf(PythonMixin, Leaf):
# indent error leafs somehow? No idea how, though. # indent error leafs somehow? No idea how, though.
previous_leaf = self.get_previous_leaf() previous_leaf = self.get_previous_leaf()
if previous_leaf is not None and previous_leaf.type == 'error_leaf' \ if previous_leaf is not None and previous_leaf.type == 'error_leaf' \
and previous_leaf.original_type in ('indent', 'error_dedent'): and previous_leaf.token_type in ('INDENT', 'ERROR_DEDENT'):
previous_leaf = previous_leaf.get_previous_leaf() previous_leaf = previous_leaf.get_previous_leaf()
if previous_leaf is None: if previous_leaf is None:

View File

@@ -229,6 +229,7 @@ class Leaf(NodeOrLeaf):
class TypedLeaf(Leaf): class TypedLeaf(Leaf):
__slots__ = ('type',) __slots__ = ('type',)
def __init__(self, type, value, start_pos, prefix=''): def __init__(self, type, value, start_pos, prefix=''):
super(TypedLeaf, self).__init__(value, start_pos, prefix) super(TypedLeaf, self).__init__(value, start_pos, prefix)
self.type = type self.type = type
@@ -351,13 +352,13 @@ class ErrorLeaf(Leaf):
A leaf that is either completely invalid in a language (like `$` in Python) A leaf that is either completely invalid in a language (like `$` in Python)
or is invalid at that position. Like the star in `1 +* 1`. or is invalid at that position. Like the star in `1 +* 1`.
""" """
__slots__ = ('original_type',) __slots__ = ('token_type',)
type = 'error_leaf' type = 'error_leaf'
def __init__(self, original_type, value, start_pos, prefix=''): def __init__(self, token_type, value, start_pos, prefix=''):
super(ErrorLeaf, self).__init__(value, start_pos, prefix) super(ErrorLeaf, self).__init__(value, start_pos, prefix)
self.original_type = original_type self.token_type = token_type
def __repr__(self): def __repr__(self):
return "<%s: %s:%s, %s>" % \ return "<%s: %s:%s, %s>" % \
(type(self).__name__, self.original_type, repr(self.value), self.start_pos) (type(self).__name__, self.token_type, repr(self.value), self.start_pos)

View File

@@ -1,20 +1,29 @@
# -*- coding: utf-8 # This file contains Unicode characters. # -*- coding: utf-8 # This file contains Unicode characters.
from textwrap import dedent from textwrap import dedent
import tokenize as stdlib_tokenize
import pytest import pytest
from parso._compatibility import py_version from parso._compatibility import py_version
from parso.utils import split_lines, parse_version_string from parso.utils import split_lines, parse_version_string
from parso.python.token import ( from parso.python.token import PythonTokenTypes
NAME, NEWLINE, STRING, INDENT, DEDENT, ERRORTOKEN, ENDMARKER, ERROR_DEDENT,
FSTRING_START)
from parso.python import tokenize from parso.python import tokenize
from parso import parse from parso import parse
from parso.python.tokenize import PythonToken from parso.python.tokenize import PythonToken
# To make it easier to access some of the token types, just put them here.
NAME = PythonTokenTypes.NAME
NEWLINE = PythonTokenTypes.NEWLINE
STRING = PythonTokenTypes.STRING
INDENT = PythonTokenTypes.INDENT
DEDENT = PythonTokenTypes.DEDENT
ERRORTOKEN = PythonTokenTypes.ERRORTOKEN
ENDMARKER = PythonTokenTypes.ENDMARKER
ERROR_DEDENT = PythonTokenTypes.ERROR_DEDENT
FSTRING_START = PythonTokenTypes.FSTRING_START
def _get_token_list(string): def _get_token_list(string):
# Load the current version. # Load the current version.
version_info = parse_version_string() version_info = parse_version_string()
@@ -128,7 +137,7 @@ def test_identifier_contains_unicode():
else: else:
# Unicode tokens in Python 2 seem to be identified as operators. # Unicode tokens in Python 2 seem to be identified as operators.
# They will be ignored in the parser, that's ok. # They will be ignored in the parser, that's ok.
assert unicode_token[0] == tokenize.ERRORTOKEN assert unicode_token[0] == ERRORTOKEN
def test_quoted_strings(): def test_quoted_strings():
@@ -188,17 +197,17 @@ def test_ur_literals():
def test_error_literal(): def test_error_literal():
error_token, endmarker = _get_token_list('"\n') error_token, endmarker = _get_token_list('"\n')
assert error_token.type == tokenize.ERRORTOKEN assert error_token.type == ERRORTOKEN
assert endmarker.prefix == '' assert endmarker.prefix == ''
assert error_token.string == '"\n' assert error_token.string == '"\n'
assert endmarker.type == tokenize.ENDMARKER assert endmarker.type == ENDMARKER
assert endmarker.prefix == '' assert endmarker.prefix == ''
bracket, error_token, endmarker = _get_token_list('( """') bracket, error_token, endmarker = _get_token_list('( """')
assert error_token.type == tokenize.ERRORTOKEN assert error_token.type == ERRORTOKEN
assert error_token.prefix == ' ' assert error_token.prefix == ' '
assert error_token.string == '"""' assert error_token.string == '"""'
assert endmarker.type == tokenize.ENDMARKER assert endmarker.type == ENDMARKER
assert endmarker.prefix == '' assert endmarker.prefix == ''
@@ -236,14 +245,3 @@ def test_error_string():
assert t1.prefix == ' ' assert t1.prefix == ' '
assert t1.string == '"\n' assert t1.string == '"\n'
assert endmarker.string == '' assert endmarker.string == ''
def test_tok_name_copied():
# Make sure parso doesn't mutate the standard library
tok_len = len(stdlib_tokenize.tok_name)
correct_len = stdlib_tokenize.N_TOKENS
if 'N_TOKENS' in stdlib_tokenize.tok_name.values(): # Python 3.7
correct_len += 1
if 'NT_OFFSET' in stdlib_tokenize.tok_name.values(): # Not there in PyPy
correct_len += 1
assert tok_len == correct_len