mirror of
https://github.com/davidhalter/parso.git
synced 2025-12-06 21:04:29 +08:00
Introduce TokenTypes
This commit is contained in:
@@ -6,7 +6,7 @@ from parso.pgen2.pgen import generate_grammar
|
||||
from parso.utils import split_lines, python_bytes_to_unicode, parse_version_string
|
||||
from parso.python.diff import DiffParser
|
||||
from parso.python.tokenize import tokenize_lines, tokenize
|
||||
from parso.python import token
|
||||
from parso.python.token import PythonTokenTypes
|
||||
from parso.cache import parser_cache, load_module, save_module
|
||||
from parso.parser import BaseParser
|
||||
from parso.python.parser import Parser as PythonParser
|
||||
@@ -193,7 +193,7 @@ class Grammar(object):
|
||||
|
||||
class PythonGrammar(Grammar):
|
||||
_error_normalizer_config = ErrorFinderConfig()
|
||||
_token_namespace = token
|
||||
_token_namespace = PythonTokenTypes
|
||||
_start_nonterminal = 'file_input'
|
||||
|
||||
def __init__(self, version_info, bnf_text):
|
||||
|
||||
@@ -16,9 +16,6 @@ fallback token code OP, but the parser needs the actual token code.
|
||||
|
||||
"""
|
||||
|
||||
from parso.python import token
|
||||
|
||||
|
||||
class DFAPlan(object):
|
||||
def __init__(self, next_dfa, dfa_pushes=[]):
|
||||
self.next_dfa = next_dfa
|
||||
@@ -111,7 +108,6 @@ class Grammar(object):
|
||||
|
||||
# A named token (e.g. NAME, NUMBER, STRING)
|
||||
itoken = getattr(self._token_namespace, label, None)
|
||||
assert isinstance(itoken, int), label
|
||||
if itoken in self.tokens:
|
||||
return self.tokens[itoken]
|
||||
else:
|
||||
@@ -126,7 +122,7 @@ class Grammar(object):
|
||||
if value in self.reserved_syntax_strings:
|
||||
return self.reserved_syntax_strings[value]
|
||||
else:
|
||||
self.labels.append((token.NAME, value))
|
||||
self.labels.append(('XXX', value))
|
||||
self.reserved_syntax_strings[value] = ilabel
|
||||
return self.reserved_syntax_strings[value]
|
||||
|
||||
|
||||
@@ -5,9 +5,9 @@
|
||||
# Copyright David Halter and Contributors
|
||||
# Modifications are dual-licensed: MIT and PSF.
|
||||
|
||||
from parso.python import tokenize
|
||||
from parso.python.tokenize import tokenize
|
||||
from parso.utils import parse_version_string
|
||||
from parso.python import token
|
||||
from parso.python.token import PythonTokenTypes
|
||||
|
||||
|
||||
class GrammarParser():
|
||||
@@ -16,7 +16,7 @@ class GrammarParser():
|
||||
"""
|
||||
def __init__(self, bnf_grammar):
|
||||
self._bnf_grammar = bnf_grammar
|
||||
self.generator = tokenize.tokenize(
|
||||
self.generator = tokenize(
|
||||
bnf_grammar,
|
||||
version_info=parse_version_string('3.6')
|
||||
)
|
||||
@@ -24,16 +24,16 @@ class GrammarParser():
|
||||
|
||||
def parse(self):
|
||||
# grammar: (NEWLINE | rule)* ENDMARKER
|
||||
while self.type != token.ENDMARKER:
|
||||
while self.type == token.NEWLINE:
|
||||
while self.type != PythonTokenTypes.ENDMARKER:
|
||||
while self.type == PythonTokenTypes.NEWLINE:
|
||||
self._gettoken()
|
||||
|
||||
# rule: NAME ':' rhs NEWLINE
|
||||
self._current_rule_name = self._expect(token.NAME)
|
||||
self._expect(token.OP, ':')
|
||||
self._current_rule_name = self._expect(PythonTokenTypes.NAME)
|
||||
self._expect(PythonTokenTypes.OP, ':')
|
||||
|
||||
a, z = self._parse_rhs()
|
||||
self._expect(token.NEWLINE)
|
||||
self._expect(PythonTokenTypes.NEWLINE)
|
||||
|
||||
yield a, z
|
||||
|
||||
@@ -60,7 +60,8 @@ class GrammarParser():
|
||||
def _parse_items(self):
|
||||
# items: item+
|
||||
a, b = self._parse_item()
|
||||
while self.type in (token.NAME, token.STRING) or self.value in ('(', '['):
|
||||
while self.type in (PythonTokenTypes.NAME, PythonTokenTypes.STRING) \
|
||||
or self.value in ('(', '['):
|
||||
c, d = self._parse_item()
|
||||
# Need to end on the next item.
|
||||
b.add_arc(c)
|
||||
@@ -72,7 +73,7 @@ class GrammarParser():
|
||||
if self.value == "[":
|
||||
self._gettoken()
|
||||
a, z = self._parse_rhs()
|
||||
self._expect(token.OP, ']')
|
||||
self._expect(PythonTokenTypes.OP, ']')
|
||||
# Make it also possible that there is no token and change the
|
||||
# state.
|
||||
a.add_arc(z)
|
||||
@@ -97,9 +98,9 @@ class GrammarParser():
|
||||
if self.value == "(":
|
||||
self._gettoken()
|
||||
a, z = self._parse_rhs()
|
||||
self._expect(token.OP, ')')
|
||||
self._expect(PythonTokenTypes.OP, ')')
|
||||
return a, z
|
||||
elif self.type in (token.NAME, token.STRING):
|
||||
elif self.type in (PythonTokenTypes.NAME, PythonTokenTypes.STRING):
|
||||
a = NFAState(self._current_rule_name)
|
||||
z = NFAState(self._current_rule_name)
|
||||
# Make it clear that the state transition requires that value.
|
||||
@@ -110,10 +111,10 @@ class GrammarParser():
|
||||
self._raise_error("expected (...) or NAME or STRING, got %s/%s",
|
||||
self.type, self.value)
|
||||
|
||||
def _expect(self, type, value=None):
|
||||
if self.type != type:
|
||||
self._raise_error("expected %s(%s), got %s(%s)",
|
||||
type, token.tok_name[type], self.type, self.value)
|
||||
def _expect(self, type_, value=None):
|
||||
if self.type != type_:
|
||||
self._raise_error("expected %s, got %s [%s]",
|
||||
type_, self.type, self.value)
|
||||
if value is not None and self.value != value:
|
||||
self._raise_error("expected %s, got %s", value, self.value)
|
||||
value = self.value
|
||||
|
||||
@@ -14,8 +14,6 @@ See Parser/parser.c in the Python distribution for additional info on
|
||||
how this parsing engine works.
|
||||
"""
|
||||
|
||||
from parso.python import tokenize
|
||||
|
||||
|
||||
class InternalParseError(Exception):
|
||||
"""
|
||||
@@ -24,9 +22,9 @@ class InternalParseError(Exception):
|
||||
wrong.
|
||||
"""
|
||||
|
||||
def __init__(self, msg, type, value, start_pos):
|
||||
def __init__(self, msg, type_, value, start_pos):
|
||||
Exception.__init__(self, "%s: type=%r, value=%r, start_pos=%r" %
|
||||
(msg, tokenize.tok_name[type], value, start_pos))
|
||||
(msg, type_.name, value, start_pos))
|
||||
self.msg = msg
|
||||
self.type = type
|
||||
self.value = value
|
||||
@@ -69,9 +67,7 @@ class StackNode(object):
|
||||
|
||||
def token_to_ilabel(grammar, type_, value):
|
||||
# Map from token to label
|
||||
# TODO this is not good, shouldn't use tokenize.NAME, but somehow use the
|
||||
# grammar.
|
||||
if type_ in (tokenize.NAME, tokenize.OP):
|
||||
if type_.contains_syntax:
|
||||
# Check for reserved words (keywords)
|
||||
try:
|
||||
return grammar.reserved_syntax_strings[value]
|
||||
@@ -196,6 +192,7 @@ class PgenParser(object):
|
||||
# creating a new node. We still create expr_stmt and
|
||||
# file_input though, because a lot of Jedi depends on its
|
||||
# logic.
|
||||
print(tos.nodes)
|
||||
if len(tos.nodes) == 1:
|
||||
new_node = tos.nodes[0]
|
||||
else:
|
||||
|
||||
@@ -13,8 +13,8 @@ import logging
|
||||
from parso.utils import split_lines
|
||||
from parso.python.parser import Parser
|
||||
from parso.python.tree import EndMarker
|
||||
from parso.python.tokenize import (NEWLINE, PythonToken, ERROR_DEDENT,
|
||||
ENDMARKER, INDENT, DEDENT)
|
||||
from parso.python.tokenize import PythonToken
|
||||
from parso.python.token import PythonTokenTypes
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
@@ -29,7 +29,7 @@ def _get_last_line(node_or_leaf):
|
||||
|
||||
def _ends_with_newline(leaf, suffix=''):
|
||||
if leaf.type == 'error_leaf':
|
||||
typ = leaf.original_type
|
||||
typ = leaf.token_type.lower()
|
||||
else:
|
||||
typ = leaf.type
|
||||
|
||||
@@ -167,8 +167,7 @@ class DiffParser(object):
|
||||
|
||||
def _enabled_debugging(self, old_lines, lines_new):
|
||||
if self._module.get_code() != ''.join(lines_new):
|
||||
LOG.warning('parser issue:\n%s\n%s', ''.join(old_lines),
|
||||
''.join(lines_new))
|
||||
LOG.warning('parser issue:\n%s\n%s', ''.join(old_lines), ''.join(lines_new))
|
||||
|
||||
def _copy_from_old_parser(self, line_offset, until_line_old, until_line_new):
|
||||
copied_nodes = [None]
|
||||
@@ -272,7 +271,6 @@ class DiffParser(object):
|
||||
# memoryview?
|
||||
parsed_until_line = self._nodes_stack.parsed_until_line
|
||||
lines_after = self._parser_lines_new[parsed_until_line:]
|
||||
#print('parse_content', parsed_until_line, lines_after, until_line)
|
||||
tokens = self._diff_tokenize(
|
||||
lines_after,
|
||||
until_line,
|
||||
@@ -292,7 +290,7 @@ class DiffParser(object):
|
||||
stack = self._active_parser.pgen_parser.stack
|
||||
for typ, string, start_pos, prefix in tokens:
|
||||
start_pos = start_pos[0] + line_offset, start_pos[1]
|
||||
if typ == INDENT:
|
||||
if typ == PythonTokenTypes.INDENT:
|
||||
indents.append(start_pos[1])
|
||||
if is_first_token:
|
||||
omitted_first_indent = True
|
||||
@@ -305,8 +303,9 @@ class DiffParser(object):
|
||||
|
||||
# In case of omitted_first_indent, it might not be dedented fully.
|
||||
# However this is a sign for us that a dedent happened.
|
||||
if typ == DEDENT \
|
||||
or typ == ERROR_DEDENT and omitted_first_indent and len(indents) == 1:
|
||||
if typ == PythonTokenTypes.DEDENT \
|
||||
or typ == PythonTokenTypes.ERROR_DEDENT \
|
||||
and omitted_first_indent and len(indents) == 1:
|
||||
indents.pop()
|
||||
if omitted_first_indent and not indents:
|
||||
# We are done here, only thing that can come now is an
|
||||
@@ -316,18 +315,22 @@ class DiffParser(object):
|
||||
prefix = re.sub(r'(<=\n)[^\n]+$', '', prefix)
|
||||
else:
|
||||
prefix = ''
|
||||
yield PythonToken(ENDMARKER, '', (start_pos[0] + line_offset, 0), prefix)
|
||||
yield PythonToken(
|
||||
PythonTokenTypes.ENDMARKER, '',
|
||||
(start_pos[0] + line_offset, 0),
|
||||
prefix
|
||||
)
|
||||
break
|
||||
elif typ == NEWLINE and start_pos[0] >= until_line:
|
||||
elif typ == PythonTokenTypes.NEWLINE and start_pos[0] >= until_line:
|
||||
yield PythonToken(typ, string, start_pos, prefix)
|
||||
# Check if the parser is actually in a valid suite state.
|
||||
if suite_or_file_input_is_valid(self._pgen_grammar, stack):
|
||||
start_pos = start_pos[0] + 1, 0
|
||||
while len(indents) > int(omitted_first_indent):
|
||||
indents.pop()
|
||||
yield PythonToken(DEDENT, '', start_pos, '')
|
||||
yield PythonToken(PythonTokenTypes.DEDENT, '', start_pos, '')
|
||||
|
||||
yield PythonToken(ENDMARKER, '', start_pos, '')
|
||||
yield PythonToken(PythonTokenTypes.ENDMARKER, '', start_pos, '')
|
||||
break
|
||||
else:
|
||||
continue
|
||||
|
||||
@@ -306,12 +306,12 @@ class ErrorFinder(Normalizer):
|
||||
|
||||
def visit_leaf(self, leaf):
|
||||
if leaf.type == 'error_leaf':
|
||||
if leaf.original_type in ('indent', 'error_dedent'):
|
||||
if leaf.token_type in ('INDENT', 'ERROR_DEDENT'):
|
||||
# Indents/Dedents itself never have a prefix. They are just
|
||||
# "pseudo" tokens that get removed by the syntax tree later.
|
||||
# Therefore in case of an error we also have to check for this.
|
||||
spacing = list(leaf.get_next_leaf()._split_prefix())[-1]
|
||||
if leaf.original_type == 'indent':
|
||||
if leaf.token_type == 'INDENT':
|
||||
message = 'unexpected indent'
|
||||
else:
|
||||
message = 'unindent does not match any outer indentation level'
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
from parso.python import tree
|
||||
from parso.python.token import (DEDENT, INDENT, ENDMARKER, NEWLINE, NUMBER,
|
||||
STRING, tok_name, NAME, FSTRING_STRING,
|
||||
FSTRING_START, FSTRING_END)
|
||||
from parso.python.token import PythonTokenTypes
|
||||
from parso.parser import BaseParser
|
||||
from parso.pgen2.parse import token_to_ilabel
|
||||
|
||||
@@ -53,17 +51,18 @@ class Parser(BaseParser):
|
||||
|
||||
# Names/Keywords are handled separately
|
||||
_leaf_map = {
|
||||
STRING: tree.String,
|
||||
NUMBER: tree.Number,
|
||||
NEWLINE: tree.Newline,
|
||||
ENDMARKER: tree.EndMarker,
|
||||
FSTRING_STRING: tree.FStringString,
|
||||
FSTRING_START: tree.FStringStart,
|
||||
FSTRING_END: tree.FStringEnd,
|
||||
PythonTokenTypes.STRING: tree.String,
|
||||
PythonTokenTypes.NUMBER: tree.Number,
|
||||
PythonTokenTypes.NEWLINE: tree.Newline,
|
||||
PythonTokenTypes.ENDMARKER: tree.EndMarker,
|
||||
PythonTokenTypes.FSTRING_STRING: tree.FStringString,
|
||||
PythonTokenTypes.FSTRING_START: tree.FStringStart,
|
||||
PythonTokenTypes.FSTRING_END: tree.FStringEnd,
|
||||
}
|
||||
|
||||
def __init__(self, pgen_grammar, error_recovery=True, start_nonterminal='file_input'):
|
||||
super(Parser, self).__init__(pgen_grammar, start_nonterminal, error_recovery=error_recovery)
|
||||
super(Parser, self).__init__(pgen_grammar, start_nonterminal,
|
||||
error_recovery=error_recovery)
|
||||
|
||||
self.syntax_errors = []
|
||||
self._omit_dedent_list = []
|
||||
@@ -126,7 +125,7 @@ class Parser(BaseParser):
|
||||
|
||||
def convert_leaf(self, pgen_grammar, type, value, prefix, start_pos):
|
||||
# print('leaf', repr(value), token.tok_name[type])
|
||||
if type == NAME:
|
||||
if type == PythonTokenTypes.NAME:
|
||||
if value in pgen_grammar.reserved_syntax_strings:
|
||||
return tree.Keyword(value, start_pos, prefix)
|
||||
else:
|
||||
@@ -143,7 +142,8 @@ class Parser(BaseParser):
|
||||
last_leaf = None
|
||||
|
||||
if self._start_nonterminal == 'file_input' and \
|
||||
(typ == ENDMARKER or typ == DEDENT and '\n' not in last_leaf.value):
|
||||
(typ == PythonTokenTypes.ENDMARKER or
|
||||
typ == PythonTokenTypes.DEDENT and '\n' not in last_leaf.value):
|
||||
def reduce_stack(states, newstate):
|
||||
# reduce
|
||||
state = newstate
|
||||
@@ -158,7 +158,7 @@ class Parser(BaseParser):
|
||||
# end of a file, we have to recover even if the user doesn't want
|
||||
# error recovery.
|
||||
if stack[-1].dfa.from_rule == 'simple_stmt':
|
||||
ilabel = token_to_ilabel(pgen_grammar, NEWLINE, value)
|
||||
ilabel = token_to_ilabel(pgen_grammar, PythonTokenTypes.NEWLINE, value)
|
||||
try:
|
||||
plan = stack[-1].dfa.ilabel_to_plan[ilabel]
|
||||
except KeyError:
|
||||
@@ -199,12 +199,12 @@ class Parser(BaseParser):
|
||||
if self._stack_removal(stack, until_index + 1):
|
||||
add_token_callback(typ, value, start_pos, prefix)
|
||||
else:
|
||||
if typ == INDENT:
|
||||
if typ == PythonTokenTypes.INDENT:
|
||||
# For every deleted INDENT we have to delete a DEDENT as well.
|
||||
# Otherwise the parser will get into trouble and DEDENT too early.
|
||||
self._omit_dedent_list.append(self._indent_counter)
|
||||
|
||||
error_leaf = tree.PythonErrorLeaf(tok_name[typ].lower(), value, start_pos, prefix)
|
||||
error_leaf = tree.PythonErrorLeaf(typ.name, value, start_pos, prefix)
|
||||
stack[-1].nodes.append(error_leaf)
|
||||
|
||||
tos = stack[-1]
|
||||
@@ -230,7 +230,7 @@ class Parser(BaseParser):
|
||||
def _recovery_tokenize(self, tokens):
|
||||
for typ, value, start_pos, prefix in tokens:
|
||||
# print(tok_name[typ], repr(value), start_pos, repr(prefix))
|
||||
if typ == DEDENT:
|
||||
if typ == PythonTokenTypes.DEDENT:
|
||||
# We need to count indents, because if we just omit any DEDENT,
|
||||
# we might omit them in the wrong place.
|
||||
o = self._omit_dedent_list
|
||||
@@ -239,6 +239,6 @@ class Parser(BaseParser):
|
||||
continue
|
||||
|
||||
self._indent_counter -= 1
|
||||
elif typ == INDENT:
|
||||
elif typ == PythonTokenTypes.INDENT:
|
||||
self._indent_counter += 1
|
||||
yield typ, value, start_pos, prefix
|
||||
|
||||
@@ -1,47 +1,4 @@
|
||||
from __future__ import absolute_import
|
||||
from itertools import count
|
||||
from token import *
|
||||
|
||||
from parso._compatibility import py_version
|
||||
|
||||
# Don't mutate the standard library dict
|
||||
tok_name = tok_name.copy()
|
||||
|
||||
_counter = count(N_TOKENS)
|
||||
# Never want to see this thing again.
|
||||
del N_TOKENS
|
||||
|
||||
COMMENT = next(_counter)
|
||||
tok_name[COMMENT] = 'COMMENT'
|
||||
|
||||
NL = next(_counter)
|
||||
tok_name[NL] = 'NL'
|
||||
|
||||
# Sets the attributes that don't exist in these tok_name versions.
|
||||
if py_version >= 30:
|
||||
BACKQUOTE = next(_counter)
|
||||
tok_name[BACKQUOTE] = 'BACKQUOTE'
|
||||
else:
|
||||
RARROW = next(_counter)
|
||||
tok_name[RARROW] = 'RARROW'
|
||||
ELLIPSIS = next(_counter)
|
||||
tok_name[ELLIPSIS] = 'ELLIPSIS'
|
||||
|
||||
if py_version < 35:
|
||||
ATEQUAL = next(_counter)
|
||||
tok_name[ATEQUAL] = 'ATEQUAL'
|
||||
|
||||
ERROR_DEDENT = next(_counter)
|
||||
tok_name[ERROR_DEDENT] = 'ERROR_DEDENT'
|
||||
|
||||
FSTRING_START = next(_counter)
|
||||
tok_name[FSTRING_START] = 'FSTRING_START'
|
||||
FSTRING_END = next(_counter)
|
||||
tok_name[FSTRING_END] = 'FSTRING_END'
|
||||
FSTRING_STRING = next(_counter)
|
||||
tok_name[FSTRING_STRING] = 'FSTRING_STRING'
|
||||
EXCLAMATION = next(_counter)
|
||||
tok_name[EXCLAMATION] = 'EXCLAMATION'
|
||||
|
||||
# Map from operator to number (since tokenize doesn't do this)
|
||||
|
||||
@@ -100,7 +57,7 @@ opmap_raw = """\
|
||||
opmap = {}
|
||||
for line in opmap_raw.splitlines():
|
||||
op, name = line.split()
|
||||
opmap[op] = globals()[name]
|
||||
opmap[op] = name
|
||||
|
||||
|
||||
def generate_token_id(string):
|
||||
@@ -115,26 +72,25 @@ def generate_token_id(string):
|
||||
return globals()[string]
|
||||
|
||||
|
||||
class Token(object):
|
||||
def __init__(self, name):
|
||||
class TokenType(object):
|
||||
def __init__(self, name, contains_syntax=False):
|
||||
self.name = name
|
||||
self.contains_syntax = contains_syntax
|
||||
|
||||
def __repr__(self):
|
||||
return '%s(%s)' % (self.__class__.__name__, self.name)
|
||||
|
||||
|
||||
class Tokens(object):
|
||||
class TokenTypes(object):
|
||||
"""
|
||||
Basically an enum, but Python 2 doesn't have enums in the standard library.
|
||||
"""
|
||||
def __init__(self, names, contains_syntax):
|
||||
for name in names:
|
||||
setattr(self, name, Token(name))
|
||||
|
||||
self.contains_syntax = [getattr(self, name) for name in contains_syntax]
|
||||
setattr(self, name, TokenType(name, contains_syntax=name in contains_syntax))
|
||||
|
||||
|
||||
PythonTokens = Tokens((
|
||||
PythonTokenTypes = TokenTypes((
|
||||
'STRING', 'NUMBER', 'NAME', 'ERRORTOKEN', 'NEWLINE', 'INDENT', 'DEDENT',
|
||||
'ERROR_DEDENT', 'FSTRING_STRING', 'FSTRING_START', 'FSTRING_END', 'OP',
|
||||
'ENDMARKER'),
|
||||
|
||||
@@ -18,10 +18,7 @@ from collections import namedtuple
|
||||
import itertools as _itertools
|
||||
from codecs import BOM_UTF8
|
||||
|
||||
from parso.python.token import (tok_name, ENDMARKER, STRING, NUMBER, opmap,
|
||||
NAME, ERRORTOKEN, NEWLINE, INDENT, DEDENT,
|
||||
ERROR_DEDENT, FSTRING_STRING, FSTRING_START,
|
||||
FSTRING_END, OP)
|
||||
from parso.python.token import PythonTokenTypes, opmap
|
||||
from parso._compatibility import py_version
|
||||
from parso.utils import split_lines
|
||||
|
||||
@@ -242,12 +239,9 @@ class Token(namedtuple('Token', ['type', 'string', 'start_pos', 'prefix'])):
|
||||
|
||||
|
||||
class PythonToken(Token):
|
||||
def _get_type_name(self, exact=True):
|
||||
return tok_name[self.type]
|
||||
|
||||
def __repr__(self):
|
||||
return ('TokenInfo(type=%s, string=%r, start_pos=%r, prefix=%r)' %
|
||||
self._replace(type=self._get_type_name()))
|
||||
self._replace(type=self.type.name))
|
||||
|
||||
|
||||
class FStringNode(object):
|
||||
@@ -396,7 +390,9 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
||||
endmatch = endprog.match(line)
|
||||
if endmatch:
|
||||
pos = endmatch.end(0)
|
||||
yield PythonToken(STRING, contstr + line[:pos], contstr_start, prefix)
|
||||
yield PythonToken(
|
||||
PythonTokenTypes.STRING, contstr + line[:pos],
|
||||
contstr_start, prefix)
|
||||
contstr = ''
|
||||
contline = None
|
||||
else:
|
||||
@@ -409,7 +405,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
||||
string, pos = _find_fstring_string(fstring_stack, line, lnum, pos)
|
||||
if string:
|
||||
yield PythonToken(
|
||||
FSTRING_STRING, string,
|
||||
PythonTokenTypes.FSTRING_STRING, string,
|
||||
fstring_stack[-1].last_string_start_pos,
|
||||
# Never has a prefix because it can start anywhere and
|
||||
# include whitespace.
|
||||
@@ -426,7 +422,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
||||
|
||||
if fstring_index is not None:
|
||||
yield PythonToken(
|
||||
FSTRING_END,
|
||||
PythonTokenTypes.FSTRING_END,
|
||||
fstring_stack[fstring_index].quote,
|
||||
(lnum, pos),
|
||||
prefix=additional_prefix,
|
||||
@@ -443,7 +439,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
||||
match = whitespace.match(line, pos)
|
||||
pos = match.end()
|
||||
yield PythonToken(
|
||||
ERRORTOKEN, line[pos:], (lnum, pos),
|
||||
PythonTokenTypes.ERRORTOKEN, line[pos:], (lnum, pos),
|
||||
additional_prefix + match.group(0)
|
||||
)
|
||||
additional_prefix = ''
|
||||
@@ -471,24 +467,24 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
||||
# TODO don't we need to change spos as well?
|
||||
start -= 1
|
||||
if start > indents[-1]:
|
||||
yield PythonToken(INDENT, '', spos, '')
|
||||
yield PythonToken(PythonTokenTypes.INDENT, '', spos, '')
|
||||
indents.append(start)
|
||||
while start < indents[-1]:
|
||||
if start > indents[-2]:
|
||||
yield PythonToken(ERROR_DEDENT, '', (lnum, 0), '')
|
||||
yield PythonToken(PythonTokenTypes.ERROR_DEDENT, '', (lnum, 0), '')
|
||||
break
|
||||
yield PythonToken(DEDENT, '', spos, '')
|
||||
yield PythonToken(PythonTokenTypes.DEDENT, '', spos, '')
|
||||
indents.pop()
|
||||
|
||||
if fstring_stack:
|
||||
fstring_index, end = _check_fstring_ending(fstring_stack, token)
|
||||
if fstring_index is not None:
|
||||
if end != 0:
|
||||
yield PythonToken(ERRORTOKEN, token[:end], spos, prefix)
|
||||
yield PythonToken(PythonTokenTypes.ERRORTOKEN, token[:end], spos, prefix)
|
||||
prefix = ''
|
||||
|
||||
yield PythonToken(
|
||||
FSTRING_END,
|
||||
PythonTokenTypes.FSTRING_END,
|
||||
fstring_stack[fstring_index].quote,
|
||||
(lnum, spos[1] + 1),
|
||||
prefix=prefix
|
||||
@@ -499,7 +495,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
||||
|
||||
if (initial in numchars or # ordinary number
|
||||
(initial == '.' and token != '.' and token != '...')):
|
||||
yield PythonToken(NUMBER, token, spos, prefix)
|
||||
yield PythonToken(PythonTokenTypes.NUMBER, token, spos, prefix)
|
||||
elif initial in '\r\n':
|
||||
if any(not f.allow_multiline() for f in fstring_stack):
|
||||
# Would use fstring_stack.clear, but that's not available
|
||||
@@ -507,7 +503,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
||||
fstring_stack[:] = []
|
||||
|
||||
if not new_line and paren_level == 0 and not fstring_stack:
|
||||
yield PythonToken(NEWLINE, token, spos, prefix)
|
||||
yield PythonToken(PythonTokenTypes.NEWLINE, token, spos, prefix)
|
||||
else:
|
||||
additional_prefix = prefix + token
|
||||
new_line = True
|
||||
@@ -520,7 +516,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
||||
if endmatch: # all on one line
|
||||
pos = endmatch.end(0)
|
||||
token = line[start:pos]
|
||||
yield PythonToken(STRING, token, spos, prefix)
|
||||
yield PythonToken(PythonTokenTypes.STRING, token, spos, prefix)
|
||||
else:
|
||||
contstr_start = (lnum, start) # multiple lines
|
||||
contstr = line[start:]
|
||||
@@ -537,10 +533,10 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
||||
contline = line
|
||||
break
|
||||
else: # ordinary string
|
||||
yield PythonToken(STRING, token, spos, prefix)
|
||||
yield PythonToken(PythonTokenTypes.STRING, token, spos, prefix)
|
||||
elif token in fstring_pattern_map: # The start of an fstring.
|
||||
fstring_stack.append(FStringNode(fstring_pattern_map[token]))
|
||||
yield PythonToken(FSTRING_START, token, spos, prefix)
|
||||
yield PythonToken(PythonTokenTypes.FSTRING_START, token, spos, prefix)
|
||||
elif is_identifier(initial): # ordinary name
|
||||
if token in always_break_tokens:
|
||||
fstring_stack[:] = []
|
||||
@@ -548,11 +544,11 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
||||
while True:
|
||||
indent = indents.pop()
|
||||
if indent > start:
|
||||
yield PythonToken(DEDENT, '', spos, '')
|
||||
yield PythonToken(PythonTokenTypes.DEDENT, '', spos, '')
|
||||
else:
|
||||
indents.append(indent)
|
||||
break
|
||||
yield PythonToken(NAME, token, spos, prefix)
|
||||
yield PythonToken(PythonTokenTypes.NAME, token, spos, prefix)
|
||||
elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n'): # continued stmt
|
||||
additional_prefix += prefix + line[start:]
|
||||
break
|
||||
@@ -575,13 +571,13 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
||||
# This check is needed in any case to check if it's a valid
|
||||
# operator or just some random unicode character.
|
||||
opmap[token]
|
||||
typ = OP
|
||||
typ = PythonTokenTypes.OP
|
||||
except KeyError:
|
||||
typ = ERRORTOKEN
|
||||
typ = PythonTokenTypes.ERRORTOKEN
|
||||
yield PythonToken(typ, token, spos, prefix)
|
||||
|
||||
if contstr:
|
||||
yield PythonToken(ERRORTOKEN, contstr, contstr_start, prefix)
|
||||
yield PythonToken(PythonTokenTypes.ERRORTOKEN, contstr, contstr_start, prefix)
|
||||
if contstr.endswith('\n'):
|
||||
new_line = True
|
||||
|
||||
@@ -589,8 +585,8 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
||||
# As the last position we just take the maximally possible position. We
|
||||
# remove -1 for the last new line.
|
||||
for indent in indents[1:]:
|
||||
yield PythonToken(DEDENT, '', end_pos, '')
|
||||
yield PythonToken(ENDMARKER, '', end_pos, additional_prefix)
|
||||
yield PythonToken(PythonTokenTypes.DEDENT, '', end_pos, '')
|
||||
yield PythonToken(PythonTokenTypes.ENDMARKER, '', end_pos, additional_prefix)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -124,7 +124,7 @@ class PythonLeaf(PythonMixin, Leaf):
|
||||
# indent error leafs somehow? No idea how, though.
|
||||
previous_leaf = self.get_previous_leaf()
|
||||
if previous_leaf is not None and previous_leaf.type == 'error_leaf' \
|
||||
and previous_leaf.original_type in ('indent', 'error_dedent'):
|
||||
and previous_leaf.token_type in ('INDENT', 'ERROR_DEDENT'):
|
||||
previous_leaf = previous_leaf.get_previous_leaf()
|
||||
|
||||
if previous_leaf is None:
|
||||
|
||||
@@ -229,6 +229,7 @@ class Leaf(NodeOrLeaf):
|
||||
|
||||
class TypedLeaf(Leaf):
|
||||
__slots__ = ('type',)
|
||||
|
||||
def __init__(self, type, value, start_pos, prefix=''):
|
||||
super(TypedLeaf, self).__init__(value, start_pos, prefix)
|
||||
self.type = type
|
||||
@@ -351,13 +352,13 @@ class ErrorLeaf(Leaf):
|
||||
A leaf that is either completely invalid in a language (like `$` in Python)
|
||||
or is invalid at that position. Like the star in `1 +* 1`.
|
||||
"""
|
||||
__slots__ = ('original_type',)
|
||||
__slots__ = ('token_type',)
|
||||
type = 'error_leaf'
|
||||
|
||||
def __init__(self, original_type, value, start_pos, prefix=''):
|
||||
def __init__(self, token_type, value, start_pos, prefix=''):
|
||||
super(ErrorLeaf, self).__init__(value, start_pos, prefix)
|
||||
self.original_type = original_type
|
||||
self.token_type = token_type
|
||||
|
||||
def __repr__(self):
|
||||
return "<%s: %s:%s, %s>" % \
|
||||
(type(self).__name__, self.original_type, repr(self.value), self.start_pos)
|
||||
(type(self).__name__, self.token_type, repr(self.value), self.start_pos)
|
||||
|
||||
@@ -1,20 +1,29 @@
|
||||
# -*- coding: utf-8 # This file contains Unicode characters.
|
||||
|
||||
from textwrap import dedent
|
||||
import tokenize as stdlib_tokenize
|
||||
|
||||
import pytest
|
||||
|
||||
from parso._compatibility import py_version
|
||||
from parso.utils import split_lines, parse_version_string
|
||||
from parso.python.token import (
|
||||
NAME, NEWLINE, STRING, INDENT, DEDENT, ERRORTOKEN, ENDMARKER, ERROR_DEDENT,
|
||||
FSTRING_START)
|
||||
from parso.python.token import PythonTokenTypes
|
||||
from parso.python import tokenize
|
||||
from parso import parse
|
||||
from parso.python.tokenize import PythonToken
|
||||
|
||||
|
||||
# To make it easier to access some of the token types, just put them here.
|
||||
NAME = PythonTokenTypes.NAME
|
||||
NEWLINE = PythonTokenTypes.NEWLINE
|
||||
STRING = PythonTokenTypes.STRING
|
||||
INDENT = PythonTokenTypes.INDENT
|
||||
DEDENT = PythonTokenTypes.DEDENT
|
||||
ERRORTOKEN = PythonTokenTypes.ERRORTOKEN
|
||||
ENDMARKER = PythonTokenTypes.ENDMARKER
|
||||
ERROR_DEDENT = PythonTokenTypes.ERROR_DEDENT
|
||||
FSTRING_START = PythonTokenTypes.FSTRING_START
|
||||
|
||||
|
||||
def _get_token_list(string):
|
||||
# Load the current version.
|
||||
version_info = parse_version_string()
|
||||
@@ -128,7 +137,7 @@ def test_identifier_contains_unicode():
|
||||
else:
|
||||
# Unicode tokens in Python 2 seem to be identified as operators.
|
||||
# They will be ignored in the parser, that's ok.
|
||||
assert unicode_token[0] == tokenize.ERRORTOKEN
|
||||
assert unicode_token[0] == ERRORTOKEN
|
||||
|
||||
|
||||
def test_quoted_strings():
|
||||
@@ -188,17 +197,17 @@ def test_ur_literals():
|
||||
|
||||
def test_error_literal():
|
||||
error_token, endmarker = _get_token_list('"\n')
|
||||
assert error_token.type == tokenize.ERRORTOKEN
|
||||
assert error_token.type == ERRORTOKEN
|
||||
assert endmarker.prefix == ''
|
||||
assert error_token.string == '"\n'
|
||||
assert endmarker.type == tokenize.ENDMARKER
|
||||
assert endmarker.type == ENDMARKER
|
||||
assert endmarker.prefix == ''
|
||||
|
||||
bracket, error_token, endmarker = _get_token_list('( """')
|
||||
assert error_token.type == tokenize.ERRORTOKEN
|
||||
assert error_token.type == ERRORTOKEN
|
||||
assert error_token.prefix == ' '
|
||||
assert error_token.string == '"""'
|
||||
assert endmarker.type == tokenize.ENDMARKER
|
||||
assert endmarker.type == ENDMARKER
|
||||
assert endmarker.prefix == ''
|
||||
|
||||
|
||||
@@ -236,14 +245,3 @@ def test_error_string():
|
||||
assert t1.prefix == ' '
|
||||
assert t1.string == '"\n'
|
||||
assert endmarker.string == ''
|
||||
|
||||
def test_tok_name_copied():
|
||||
# Make sure parso doesn't mutate the standard library
|
||||
tok_len = len(stdlib_tokenize.tok_name)
|
||||
correct_len = stdlib_tokenize.N_TOKENS
|
||||
if 'N_TOKENS' in stdlib_tokenize.tok_name.values(): # Python 3.7
|
||||
correct_len += 1
|
||||
if 'NT_OFFSET' in stdlib_tokenize.tok_name.values(): # Not there in PyPy
|
||||
correct_len += 1
|
||||
|
||||
assert tok_len == correct_len
|
||||
|
||||
Reference in New Issue
Block a user