From 03de9cebb8691c31775c45c003baf276da54b0c6 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Sun, 24 Jun 2018 16:24:09 +0200 Subject: [PATCH] Introduce TokenTypes --- parso/grammar.py | 4 +-- parso/pgen2/grammar.py | 6 +--- parso/pgen2/grammar_parser.py | 33 ++++++++++---------- parso/pgen2/parse.py | 11 +++---- parso/python/diff.py | 31 ++++++++++--------- parso/python/errors.py | 4 +-- parso/python/parser.py | 36 +++++++++++----------- parso/python/token.py | 58 +++++------------------------------ parso/python/tokenize.py | 54 +++++++++++++++----------------- parso/python/tree.py | 2 +- parso/tree.py | 9 +++--- test/test_tokenize.py | 38 +++++++++++------------ 12 files changed, 117 insertions(+), 169 deletions(-) diff --git a/parso/grammar.py b/parso/grammar.py index acdf286..981a0fc 100644 --- a/parso/grammar.py +++ b/parso/grammar.py @@ -6,7 +6,7 @@ from parso.pgen2.pgen import generate_grammar from parso.utils import split_lines, python_bytes_to_unicode, parse_version_string from parso.python.diff import DiffParser from parso.python.tokenize import tokenize_lines, tokenize -from parso.python import token +from parso.python.token import PythonTokenTypes from parso.cache import parser_cache, load_module, save_module from parso.parser import BaseParser from parso.python.parser import Parser as PythonParser @@ -193,7 +193,7 @@ class Grammar(object): class PythonGrammar(Grammar): _error_normalizer_config = ErrorFinderConfig() - _token_namespace = token + _token_namespace = PythonTokenTypes _start_nonterminal = 'file_input' def __init__(self, version_info, bnf_text): diff --git a/parso/pgen2/grammar.py b/parso/pgen2/grammar.py index b941ba1..453298a 100644 --- a/parso/pgen2/grammar.py +++ b/parso/pgen2/grammar.py @@ -16,9 +16,6 @@ fallback token code OP, but the parser needs the actual token code. """ -from parso.python import token - - class DFAPlan(object): def __init__(self, next_dfa, dfa_pushes=[]): self.next_dfa = next_dfa @@ -111,7 +108,6 @@ class Grammar(object): # A named token (e.g. NAME, NUMBER, STRING) itoken = getattr(self._token_namespace, label, None) - assert isinstance(itoken, int), label if itoken in self.tokens: return self.tokens[itoken] else: @@ -126,7 +122,7 @@ class Grammar(object): if value in self.reserved_syntax_strings: return self.reserved_syntax_strings[value] else: - self.labels.append((token.NAME, value)) + self.labels.append(('XXX', value)) self.reserved_syntax_strings[value] = ilabel return self.reserved_syntax_strings[value] diff --git a/parso/pgen2/grammar_parser.py b/parso/pgen2/grammar_parser.py index 80b4e20..623a455 100644 --- a/parso/pgen2/grammar_parser.py +++ b/parso/pgen2/grammar_parser.py @@ -5,9 +5,9 @@ # Copyright David Halter and Contributors # Modifications are dual-licensed: MIT and PSF. -from parso.python import tokenize +from parso.python.tokenize import tokenize from parso.utils import parse_version_string -from parso.python import token +from parso.python.token import PythonTokenTypes class GrammarParser(): @@ -16,7 +16,7 @@ class GrammarParser(): """ def __init__(self, bnf_grammar): self._bnf_grammar = bnf_grammar - self.generator = tokenize.tokenize( + self.generator = tokenize( bnf_grammar, version_info=parse_version_string('3.6') ) @@ -24,16 +24,16 @@ class GrammarParser(): def parse(self): # grammar: (NEWLINE | rule)* ENDMARKER - while self.type != token.ENDMARKER: - while self.type == token.NEWLINE: + while self.type != PythonTokenTypes.ENDMARKER: + while self.type == PythonTokenTypes.NEWLINE: self._gettoken() # rule: NAME ':' rhs NEWLINE - self._current_rule_name = self._expect(token.NAME) - self._expect(token.OP, ':') + self._current_rule_name = self._expect(PythonTokenTypes.NAME) + self._expect(PythonTokenTypes.OP, ':') a, z = self._parse_rhs() - self._expect(token.NEWLINE) + self._expect(PythonTokenTypes.NEWLINE) yield a, z @@ -60,7 +60,8 @@ class GrammarParser(): def _parse_items(self): # items: item+ a, b = self._parse_item() - while self.type in (token.NAME, token.STRING) or self.value in ('(', '['): + while self.type in (PythonTokenTypes.NAME, PythonTokenTypes.STRING) \ + or self.value in ('(', '['): c, d = self._parse_item() # Need to end on the next item. b.add_arc(c) @@ -72,7 +73,7 @@ class GrammarParser(): if self.value == "[": self._gettoken() a, z = self._parse_rhs() - self._expect(token.OP, ']') + self._expect(PythonTokenTypes.OP, ']') # Make it also possible that there is no token and change the # state. a.add_arc(z) @@ -97,9 +98,9 @@ class GrammarParser(): if self.value == "(": self._gettoken() a, z = self._parse_rhs() - self._expect(token.OP, ')') + self._expect(PythonTokenTypes.OP, ')') return a, z - elif self.type in (token.NAME, token.STRING): + elif self.type in (PythonTokenTypes.NAME, PythonTokenTypes.STRING): a = NFAState(self._current_rule_name) z = NFAState(self._current_rule_name) # Make it clear that the state transition requires that value. @@ -110,10 +111,10 @@ class GrammarParser(): self._raise_error("expected (...) or NAME or STRING, got %s/%s", self.type, self.value) - def _expect(self, type, value=None): - if self.type != type: - self._raise_error("expected %s(%s), got %s(%s)", - type, token.tok_name[type], self.type, self.value) + def _expect(self, type_, value=None): + if self.type != type_: + self._raise_error("expected %s, got %s [%s]", + type_, self.type, self.value) if value is not None and self.value != value: self._raise_error("expected %s, got %s", value, self.value) value = self.value diff --git a/parso/pgen2/parse.py b/parso/pgen2/parse.py index 43edc92..b22ffa1 100644 --- a/parso/pgen2/parse.py +++ b/parso/pgen2/parse.py @@ -14,8 +14,6 @@ See Parser/parser.c in the Python distribution for additional info on how this parsing engine works. """ -from parso.python import tokenize - class InternalParseError(Exception): """ @@ -24,9 +22,9 @@ class InternalParseError(Exception): wrong. """ - def __init__(self, msg, type, value, start_pos): + def __init__(self, msg, type_, value, start_pos): Exception.__init__(self, "%s: type=%r, value=%r, start_pos=%r" % - (msg, tokenize.tok_name[type], value, start_pos)) + (msg, type_.name, value, start_pos)) self.msg = msg self.type = type self.value = value @@ -69,9 +67,7 @@ class StackNode(object): def token_to_ilabel(grammar, type_, value): # Map from token to label - # TODO this is not good, shouldn't use tokenize.NAME, but somehow use the - # grammar. - if type_ in (tokenize.NAME, tokenize.OP): + if type_.contains_syntax: # Check for reserved words (keywords) try: return grammar.reserved_syntax_strings[value] @@ -196,6 +192,7 @@ class PgenParser(object): # creating a new node. We still create expr_stmt and # file_input though, because a lot of Jedi depends on its # logic. + print(tos.nodes) if len(tos.nodes) == 1: new_node = tos.nodes[0] else: diff --git a/parso/python/diff.py b/parso/python/diff.py index 742e0eb..3b7eee5 100644 --- a/parso/python/diff.py +++ b/parso/python/diff.py @@ -13,8 +13,8 @@ import logging from parso.utils import split_lines from parso.python.parser import Parser from parso.python.tree import EndMarker -from parso.python.tokenize import (NEWLINE, PythonToken, ERROR_DEDENT, - ENDMARKER, INDENT, DEDENT) +from parso.python.tokenize import PythonToken +from parso.python.token import PythonTokenTypes LOG = logging.getLogger(__name__) @@ -29,7 +29,7 @@ def _get_last_line(node_or_leaf): def _ends_with_newline(leaf, suffix=''): if leaf.type == 'error_leaf': - typ = leaf.original_type + typ = leaf.token_type.lower() else: typ = leaf.type @@ -167,8 +167,7 @@ class DiffParser(object): def _enabled_debugging(self, old_lines, lines_new): if self._module.get_code() != ''.join(lines_new): - LOG.warning('parser issue:\n%s\n%s', ''.join(old_lines), - ''.join(lines_new)) + LOG.warning('parser issue:\n%s\n%s', ''.join(old_lines), ''.join(lines_new)) def _copy_from_old_parser(self, line_offset, until_line_old, until_line_new): copied_nodes = [None] @@ -272,7 +271,6 @@ class DiffParser(object): # memoryview? parsed_until_line = self._nodes_stack.parsed_until_line lines_after = self._parser_lines_new[parsed_until_line:] - #print('parse_content', parsed_until_line, lines_after, until_line) tokens = self._diff_tokenize( lines_after, until_line, @@ -292,7 +290,7 @@ class DiffParser(object): stack = self._active_parser.pgen_parser.stack for typ, string, start_pos, prefix in tokens: start_pos = start_pos[0] + line_offset, start_pos[1] - if typ == INDENT: + if typ == PythonTokenTypes.INDENT: indents.append(start_pos[1]) if is_first_token: omitted_first_indent = True @@ -305,8 +303,9 @@ class DiffParser(object): # In case of omitted_first_indent, it might not be dedented fully. # However this is a sign for us that a dedent happened. - if typ == DEDENT \ - or typ == ERROR_DEDENT and omitted_first_indent and len(indents) == 1: + if typ == PythonTokenTypes.DEDENT \ + or typ == PythonTokenTypes.ERROR_DEDENT \ + and omitted_first_indent and len(indents) == 1: indents.pop() if omitted_first_indent and not indents: # We are done here, only thing that can come now is an @@ -316,18 +315,22 @@ class DiffParser(object): prefix = re.sub(r'(<=\n)[^\n]+$', '', prefix) else: prefix = '' - yield PythonToken(ENDMARKER, '', (start_pos[0] + line_offset, 0), prefix) + yield PythonToken( + PythonTokenTypes.ENDMARKER, '', + (start_pos[0] + line_offset, 0), + prefix + ) break - elif typ == NEWLINE and start_pos[0] >= until_line: + elif typ == PythonTokenTypes.NEWLINE and start_pos[0] >= until_line: yield PythonToken(typ, string, start_pos, prefix) # Check if the parser is actually in a valid suite state. if suite_or_file_input_is_valid(self._pgen_grammar, stack): start_pos = start_pos[0] + 1, 0 while len(indents) > int(omitted_first_indent): indents.pop() - yield PythonToken(DEDENT, '', start_pos, '') + yield PythonToken(PythonTokenTypes.DEDENT, '', start_pos, '') - yield PythonToken(ENDMARKER, '', start_pos, '') + yield PythonToken(PythonTokenTypes.ENDMARKER, '', start_pos, '') break else: continue @@ -536,7 +539,7 @@ class _NodesStack(object): line_offset_index = -2 elif (new_nodes[-1].type in ('error_leaf', 'error_node') or - _is_flow_node(new_nodes[-1])): + _is_flow_node(new_nodes[-1])): # Error leafs/nodes don't have a defined start/end. Error # nodes might not end with a newline (e.g. if there's an # open `(`). Therefore ignore all of them unless they are diff --git a/parso/python/errors.py b/parso/python/errors.py index cfb8380..92fdef1 100644 --- a/parso/python/errors.py +++ b/parso/python/errors.py @@ -306,12 +306,12 @@ class ErrorFinder(Normalizer): def visit_leaf(self, leaf): if leaf.type == 'error_leaf': - if leaf.original_type in ('indent', 'error_dedent'): + if leaf.token_type in ('INDENT', 'ERROR_DEDENT'): # Indents/Dedents itself never have a prefix. They are just # "pseudo" tokens that get removed by the syntax tree later. # Therefore in case of an error we also have to check for this. spacing = list(leaf.get_next_leaf()._split_prefix())[-1] - if leaf.original_type == 'indent': + if leaf.token_type == 'INDENT': message = 'unexpected indent' else: message = 'unindent does not match any outer indentation level' diff --git a/parso/python/parser.py b/parso/python/parser.py index 2ebd63d..d2ae0f9 100644 --- a/parso/python/parser.py +++ b/parso/python/parser.py @@ -1,7 +1,5 @@ from parso.python import tree -from parso.python.token import (DEDENT, INDENT, ENDMARKER, NEWLINE, NUMBER, - STRING, tok_name, NAME, FSTRING_STRING, - FSTRING_START, FSTRING_END) +from parso.python.token import PythonTokenTypes from parso.parser import BaseParser from parso.pgen2.parse import token_to_ilabel @@ -53,17 +51,18 @@ class Parser(BaseParser): # Names/Keywords are handled separately _leaf_map = { - STRING: tree.String, - NUMBER: tree.Number, - NEWLINE: tree.Newline, - ENDMARKER: tree.EndMarker, - FSTRING_STRING: tree.FStringString, - FSTRING_START: tree.FStringStart, - FSTRING_END: tree.FStringEnd, + PythonTokenTypes.STRING: tree.String, + PythonTokenTypes.NUMBER: tree.Number, + PythonTokenTypes.NEWLINE: tree.Newline, + PythonTokenTypes.ENDMARKER: tree.EndMarker, + PythonTokenTypes.FSTRING_STRING: tree.FStringString, + PythonTokenTypes.FSTRING_START: tree.FStringStart, + PythonTokenTypes.FSTRING_END: tree.FStringEnd, } def __init__(self, pgen_grammar, error_recovery=True, start_nonterminal='file_input'): - super(Parser, self).__init__(pgen_grammar, start_nonterminal, error_recovery=error_recovery) + super(Parser, self).__init__(pgen_grammar, start_nonterminal, + error_recovery=error_recovery) self.syntax_errors = [] self._omit_dedent_list = [] @@ -126,7 +125,7 @@ class Parser(BaseParser): def convert_leaf(self, pgen_grammar, type, value, prefix, start_pos): # print('leaf', repr(value), token.tok_name[type]) - if type == NAME: + if type == PythonTokenTypes.NAME: if value in pgen_grammar.reserved_syntax_strings: return tree.Keyword(value, start_pos, prefix) else: @@ -143,7 +142,8 @@ class Parser(BaseParser): last_leaf = None if self._start_nonterminal == 'file_input' and \ - (typ == ENDMARKER or typ == DEDENT and '\n' not in last_leaf.value): + (typ == PythonTokenTypes.ENDMARKER or + typ == PythonTokenTypes.DEDENT and '\n' not in last_leaf.value): def reduce_stack(states, newstate): # reduce state = newstate @@ -158,7 +158,7 @@ class Parser(BaseParser): # end of a file, we have to recover even if the user doesn't want # error recovery. if stack[-1].dfa.from_rule == 'simple_stmt': - ilabel = token_to_ilabel(pgen_grammar, NEWLINE, value) + ilabel = token_to_ilabel(pgen_grammar, PythonTokenTypes.NEWLINE, value) try: plan = stack[-1].dfa.ilabel_to_plan[ilabel] except KeyError: @@ -199,12 +199,12 @@ class Parser(BaseParser): if self._stack_removal(stack, until_index + 1): add_token_callback(typ, value, start_pos, prefix) else: - if typ == INDENT: + if typ == PythonTokenTypes.INDENT: # For every deleted INDENT we have to delete a DEDENT as well. # Otherwise the parser will get into trouble and DEDENT too early. self._omit_dedent_list.append(self._indent_counter) - error_leaf = tree.PythonErrorLeaf(tok_name[typ].lower(), value, start_pos, prefix) + error_leaf = tree.PythonErrorLeaf(typ.name, value, start_pos, prefix) stack[-1].nodes.append(error_leaf) tos = stack[-1] @@ -230,7 +230,7 @@ class Parser(BaseParser): def _recovery_tokenize(self, tokens): for typ, value, start_pos, prefix in tokens: # print(tok_name[typ], repr(value), start_pos, repr(prefix)) - if typ == DEDENT: + if typ == PythonTokenTypes.DEDENT: # We need to count indents, because if we just omit any DEDENT, # we might omit them in the wrong place. o = self._omit_dedent_list @@ -239,6 +239,6 @@ class Parser(BaseParser): continue self._indent_counter -= 1 - elif typ == INDENT: + elif typ == PythonTokenTypes.INDENT: self._indent_counter += 1 yield typ, value, start_pos, prefix diff --git a/parso/python/token.py b/parso/python/token.py index 9571364..3e4e17b 100644 --- a/parso/python/token.py +++ b/parso/python/token.py @@ -1,47 +1,4 @@ from __future__ import absolute_import -from itertools import count -from token import * - -from parso._compatibility import py_version - -# Don't mutate the standard library dict -tok_name = tok_name.copy() - -_counter = count(N_TOKENS) -# Never want to see this thing again. -del N_TOKENS - -COMMENT = next(_counter) -tok_name[COMMENT] = 'COMMENT' - -NL = next(_counter) -tok_name[NL] = 'NL' - -# Sets the attributes that don't exist in these tok_name versions. -if py_version >= 30: - BACKQUOTE = next(_counter) - tok_name[BACKQUOTE] = 'BACKQUOTE' -else: - RARROW = next(_counter) - tok_name[RARROW] = 'RARROW' - ELLIPSIS = next(_counter) - tok_name[ELLIPSIS] = 'ELLIPSIS' - -if py_version < 35: - ATEQUAL = next(_counter) - tok_name[ATEQUAL] = 'ATEQUAL' - -ERROR_DEDENT = next(_counter) -tok_name[ERROR_DEDENT] = 'ERROR_DEDENT' - -FSTRING_START = next(_counter) -tok_name[FSTRING_START] = 'FSTRING_START' -FSTRING_END = next(_counter) -tok_name[FSTRING_END] = 'FSTRING_END' -FSTRING_STRING = next(_counter) -tok_name[FSTRING_STRING] = 'FSTRING_STRING' -EXCLAMATION = next(_counter) -tok_name[EXCLAMATION] = 'EXCLAMATION' # Map from operator to number (since tokenize doesn't do this) @@ -100,7 +57,7 @@ opmap_raw = """\ opmap = {} for line in opmap_raw.splitlines(): op, name = line.split() - opmap[op] = globals()[name] + opmap[op] = name def generate_token_id(string): @@ -115,26 +72,25 @@ def generate_token_id(string): return globals()[string] -class Token(object): - def __init__(self, name): +class TokenType(object): + def __init__(self, name, contains_syntax=False): self.name = name + self.contains_syntax = contains_syntax def __repr__(self): return '%s(%s)' % (self.__class__.__name__, self.name) -class Tokens(object): +class TokenTypes(object): """ Basically an enum, but Python 2 doesn't have enums in the standard library. """ def __init__(self, names, contains_syntax): for name in names: - setattr(self, name, Token(name)) - - self.contains_syntax = [getattr(self, name) for name in contains_syntax] + setattr(self, name, TokenType(name, contains_syntax=name in contains_syntax)) -PythonTokens = Tokens(( +PythonTokenTypes = TokenTypes(( 'STRING', 'NUMBER', 'NAME', 'ERRORTOKEN', 'NEWLINE', 'INDENT', 'DEDENT', 'ERROR_DEDENT', 'FSTRING_STRING', 'FSTRING_START', 'FSTRING_END', 'OP', 'ENDMARKER'), diff --git a/parso/python/tokenize.py b/parso/python/tokenize.py index 1d6e981..1061672 100644 --- a/parso/python/tokenize.py +++ b/parso/python/tokenize.py @@ -18,10 +18,7 @@ from collections import namedtuple import itertools as _itertools from codecs import BOM_UTF8 -from parso.python.token import (tok_name, ENDMARKER, STRING, NUMBER, opmap, - NAME, ERRORTOKEN, NEWLINE, INDENT, DEDENT, - ERROR_DEDENT, FSTRING_STRING, FSTRING_START, - FSTRING_END, OP) +from parso.python.token import PythonTokenTypes, opmap from parso._compatibility import py_version from parso.utils import split_lines @@ -242,12 +239,9 @@ class Token(namedtuple('Token', ['type', 'string', 'start_pos', 'prefix'])): class PythonToken(Token): - def _get_type_name(self, exact=True): - return tok_name[self.type] - def __repr__(self): return ('TokenInfo(type=%s, string=%r, start_pos=%r, prefix=%r)' % - self._replace(type=self._get_type_name())) + self._replace(type=self.type.name)) class FStringNode(object): @@ -396,7 +390,9 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)): endmatch = endprog.match(line) if endmatch: pos = endmatch.end(0) - yield PythonToken(STRING, contstr + line[:pos], contstr_start, prefix) + yield PythonToken( + PythonTokenTypes.STRING, contstr + line[:pos], + contstr_start, prefix) contstr = '' contline = None else: @@ -409,7 +405,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)): string, pos = _find_fstring_string(fstring_stack, line, lnum, pos) if string: yield PythonToken( - FSTRING_STRING, string, + PythonTokenTypes.FSTRING_STRING, string, fstring_stack[-1].last_string_start_pos, # Never has a prefix because it can start anywhere and # include whitespace. @@ -426,7 +422,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)): if fstring_index is not None: yield PythonToken( - FSTRING_END, + PythonTokenTypes.FSTRING_END, fstring_stack[fstring_index].quote, (lnum, pos), prefix=additional_prefix, @@ -443,7 +439,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)): match = whitespace.match(line, pos) pos = match.end() yield PythonToken( - ERRORTOKEN, line[pos:], (lnum, pos), + PythonTokenTypes.ERRORTOKEN, line[pos:], (lnum, pos), additional_prefix + match.group(0) ) additional_prefix = '' @@ -471,24 +467,24 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)): # TODO don't we need to change spos as well? start -= 1 if start > indents[-1]: - yield PythonToken(INDENT, '', spos, '') + yield PythonToken(PythonTokenTypes.INDENT, '', spos, '') indents.append(start) while start < indents[-1]: if start > indents[-2]: - yield PythonToken(ERROR_DEDENT, '', (lnum, 0), '') + yield PythonToken(PythonTokenTypes.ERROR_DEDENT, '', (lnum, 0), '') break - yield PythonToken(DEDENT, '', spos, '') + yield PythonToken(PythonTokenTypes.DEDENT, '', spos, '') indents.pop() if fstring_stack: fstring_index, end = _check_fstring_ending(fstring_stack, token) if fstring_index is not None: if end != 0: - yield PythonToken(ERRORTOKEN, token[:end], spos, prefix) + yield PythonToken(PythonTokenTypes.ERRORTOKEN, token[:end], spos, prefix) prefix = '' yield PythonToken( - FSTRING_END, + PythonTokenTypes.FSTRING_END, fstring_stack[fstring_index].quote, (lnum, spos[1] + 1), prefix=prefix @@ -499,7 +495,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)): if (initial in numchars or # ordinary number (initial == '.' and token != '.' and token != '...')): - yield PythonToken(NUMBER, token, spos, prefix) + yield PythonToken(PythonTokenTypes.NUMBER, token, spos, prefix) elif initial in '\r\n': if any(not f.allow_multiline() for f in fstring_stack): # Would use fstring_stack.clear, but that's not available @@ -507,7 +503,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)): fstring_stack[:] = [] if not new_line and paren_level == 0 and not fstring_stack: - yield PythonToken(NEWLINE, token, spos, prefix) + yield PythonToken(PythonTokenTypes.NEWLINE, token, spos, prefix) else: additional_prefix = prefix + token new_line = True @@ -520,7 +516,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)): if endmatch: # all on one line pos = endmatch.end(0) token = line[start:pos] - yield PythonToken(STRING, token, spos, prefix) + yield PythonToken(PythonTokenTypes.STRING, token, spos, prefix) else: contstr_start = (lnum, start) # multiple lines contstr = line[start:] @@ -537,10 +533,10 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)): contline = line break else: # ordinary string - yield PythonToken(STRING, token, spos, prefix) + yield PythonToken(PythonTokenTypes.STRING, token, spos, prefix) elif token in fstring_pattern_map: # The start of an fstring. fstring_stack.append(FStringNode(fstring_pattern_map[token])) - yield PythonToken(FSTRING_START, token, spos, prefix) + yield PythonToken(PythonTokenTypes.FSTRING_START, token, spos, prefix) elif is_identifier(initial): # ordinary name if token in always_break_tokens: fstring_stack[:] = [] @@ -548,11 +544,11 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)): while True: indent = indents.pop() if indent > start: - yield PythonToken(DEDENT, '', spos, '') + yield PythonToken(PythonTokenTypes.DEDENT, '', spos, '') else: indents.append(indent) break - yield PythonToken(NAME, token, spos, prefix) + yield PythonToken(PythonTokenTypes.NAME, token, spos, prefix) elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n'): # continued stmt additional_prefix += prefix + line[start:] break @@ -575,13 +571,13 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)): # This check is needed in any case to check if it's a valid # operator or just some random unicode character. opmap[token] - typ = OP + typ = PythonTokenTypes.OP except KeyError: - typ = ERRORTOKEN + typ = PythonTokenTypes.ERRORTOKEN yield PythonToken(typ, token, spos, prefix) if contstr: - yield PythonToken(ERRORTOKEN, contstr, contstr_start, prefix) + yield PythonToken(PythonTokenTypes.ERRORTOKEN, contstr, contstr_start, prefix) if contstr.endswith('\n'): new_line = True @@ -589,8 +585,8 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)): # As the last position we just take the maximally possible position. We # remove -1 for the last new line. for indent in indents[1:]: - yield PythonToken(DEDENT, '', end_pos, '') - yield PythonToken(ENDMARKER, '', end_pos, additional_prefix) + yield PythonToken(PythonTokenTypes.DEDENT, '', end_pos, '') + yield PythonToken(PythonTokenTypes.ENDMARKER, '', end_pos, additional_prefix) if __name__ == "__main__": diff --git a/parso/python/tree.py b/parso/python/tree.py index f6b4dd3..70de59e 100644 --- a/parso/python/tree.py +++ b/parso/python/tree.py @@ -124,7 +124,7 @@ class PythonLeaf(PythonMixin, Leaf): # indent error leafs somehow? No idea how, though. previous_leaf = self.get_previous_leaf() if previous_leaf is not None and previous_leaf.type == 'error_leaf' \ - and previous_leaf.original_type in ('indent', 'error_dedent'): + and previous_leaf.token_type in ('INDENT', 'ERROR_DEDENT'): previous_leaf = previous_leaf.get_previous_leaf() if previous_leaf is None: diff --git a/parso/tree.py b/parso/tree.py index 5316795..9e7ab2f 100644 --- a/parso/tree.py +++ b/parso/tree.py @@ -229,6 +229,7 @@ class Leaf(NodeOrLeaf): class TypedLeaf(Leaf): __slots__ = ('type',) + def __init__(self, type, value, start_pos, prefix=''): super(TypedLeaf, self).__init__(value, start_pos, prefix) self.type = type @@ -351,13 +352,13 @@ class ErrorLeaf(Leaf): A leaf that is either completely invalid in a language (like `$` in Python) or is invalid at that position. Like the star in `1 +* 1`. """ - __slots__ = ('original_type',) + __slots__ = ('token_type',) type = 'error_leaf' - def __init__(self, original_type, value, start_pos, prefix=''): + def __init__(self, token_type, value, start_pos, prefix=''): super(ErrorLeaf, self).__init__(value, start_pos, prefix) - self.original_type = original_type + self.token_type = token_type def __repr__(self): return "<%s: %s:%s, %s>" % \ - (type(self).__name__, self.original_type, repr(self.value), self.start_pos) + (type(self).__name__, self.token_type, repr(self.value), self.start_pos) diff --git a/test/test_tokenize.py b/test/test_tokenize.py index 08590a6..6593ff8 100644 --- a/test/test_tokenize.py +++ b/test/test_tokenize.py @@ -1,20 +1,29 @@ # -*- coding: utf-8 # This file contains Unicode characters. from textwrap import dedent -import tokenize as stdlib_tokenize import pytest from parso._compatibility import py_version from parso.utils import split_lines, parse_version_string -from parso.python.token import ( - NAME, NEWLINE, STRING, INDENT, DEDENT, ERRORTOKEN, ENDMARKER, ERROR_DEDENT, - FSTRING_START) +from parso.python.token import PythonTokenTypes from parso.python import tokenize from parso import parse from parso.python.tokenize import PythonToken +# To make it easier to access some of the token types, just put them here. +NAME = PythonTokenTypes.NAME +NEWLINE = PythonTokenTypes.NEWLINE +STRING = PythonTokenTypes.STRING +INDENT = PythonTokenTypes.INDENT +DEDENT = PythonTokenTypes.DEDENT +ERRORTOKEN = PythonTokenTypes.ERRORTOKEN +ENDMARKER = PythonTokenTypes.ENDMARKER +ERROR_DEDENT = PythonTokenTypes.ERROR_DEDENT +FSTRING_START = PythonTokenTypes.FSTRING_START + + def _get_token_list(string): # Load the current version. version_info = parse_version_string() @@ -128,7 +137,7 @@ def test_identifier_contains_unicode(): else: # Unicode tokens in Python 2 seem to be identified as operators. # They will be ignored in the parser, that's ok. - assert unicode_token[0] == tokenize.ERRORTOKEN + assert unicode_token[0] == ERRORTOKEN def test_quoted_strings(): @@ -188,17 +197,17 @@ def test_ur_literals(): def test_error_literal(): error_token, endmarker = _get_token_list('"\n') - assert error_token.type == tokenize.ERRORTOKEN + assert error_token.type == ERRORTOKEN assert endmarker.prefix == '' assert error_token.string == '"\n' - assert endmarker.type == tokenize.ENDMARKER + assert endmarker.type == ENDMARKER assert endmarker.prefix == '' bracket, error_token, endmarker = _get_token_list('( """') - assert error_token.type == tokenize.ERRORTOKEN + assert error_token.type == ERRORTOKEN assert error_token.prefix == ' ' assert error_token.string == '"""' - assert endmarker.type == tokenize.ENDMARKER + assert endmarker.type == ENDMARKER assert endmarker.prefix == '' @@ -236,14 +245,3 @@ def test_error_string(): assert t1.prefix == ' ' assert t1.string == '"\n' assert endmarker.string == '' - -def test_tok_name_copied(): - # Make sure parso doesn't mutate the standard library - tok_len = len(stdlib_tokenize.tok_name) - correct_len = stdlib_tokenize.N_TOKENS - if 'N_TOKENS' in stdlib_tokenize.tok_name.values(): # Python 3.7 - correct_len += 1 - if 'NT_OFFSET' in stdlib_tokenize.tok_name.values(): # Not there in PyPy - correct_len += 1 - - assert tok_len == correct_len