mirror of
https://github.com/davidhalter/parso.git
synced 2025-12-07 05:14:29 +08:00
Introduce TokenTypes
This commit is contained in:
@@ -6,7 +6,7 @@ from parso.pgen2.pgen import generate_grammar
|
|||||||
from parso.utils import split_lines, python_bytes_to_unicode, parse_version_string
|
from parso.utils import split_lines, python_bytes_to_unicode, parse_version_string
|
||||||
from parso.python.diff import DiffParser
|
from parso.python.diff import DiffParser
|
||||||
from parso.python.tokenize import tokenize_lines, tokenize
|
from parso.python.tokenize import tokenize_lines, tokenize
|
||||||
from parso.python import token
|
from parso.python.token import PythonTokenTypes
|
||||||
from parso.cache import parser_cache, load_module, save_module
|
from parso.cache import parser_cache, load_module, save_module
|
||||||
from parso.parser import BaseParser
|
from parso.parser import BaseParser
|
||||||
from parso.python.parser import Parser as PythonParser
|
from parso.python.parser import Parser as PythonParser
|
||||||
@@ -193,7 +193,7 @@ class Grammar(object):
|
|||||||
|
|
||||||
class PythonGrammar(Grammar):
|
class PythonGrammar(Grammar):
|
||||||
_error_normalizer_config = ErrorFinderConfig()
|
_error_normalizer_config = ErrorFinderConfig()
|
||||||
_token_namespace = token
|
_token_namespace = PythonTokenTypes
|
||||||
_start_nonterminal = 'file_input'
|
_start_nonterminal = 'file_input'
|
||||||
|
|
||||||
def __init__(self, version_info, bnf_text):
|
def __init__(self, version_info, bnf_text):
|
||||||
|
|||||||
@@ -16,9 +16,6 @@ fallback token code OP, but the parser needs the actual token code.
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from parso.python import token
|
|
||||||
|
|
||||||
|
|
||||||
class DFAPlan(object):
|
class DFAPlan(object):
|
||||||
def __init__(self, next_dfa, dfa_pushes=[]):
|
def __init__(self, next_dfa, dfa_pushes=[]):
|
||||||
self.next_dfa = next_dfa
|
self.next_dfa = next_dfa
|
||||||
@@ -111,7 +108,6 @@ class Grammar(object):
|
|||||||
|
|
||||||
# A named token (e.g. NAME, NUMBER, STRING)
|
# A named token (e.g. NAME, NUMBER, STRING)
|
||||||
itoken = getattr(self._token_namespace, label, None)
|
itoken = getattr(self._token_namespace, label, None)
|
||||||
assert isinstance(itoken, int), label
|
|
||||||
if itoken in self.tokens:
|
if itoken in self.tokens:
|
||||||
return self.tokens[itoken]
|
return self.tokens[itoken]
|
||||||
else:
|
else:
|
||||||
@@ -126,7 +122,7 @@ class Grammar(object):
|
|||||||
if value in self.reserved_syntax_strings:
|
if value in self.reserved_syntax_strings:
|
||||||
return self.reserved_syntax_strings[value]
|
return self.reserved_syntax_strings[value]
|
||||||
else:
|
else:
|
||||||
self.labels.append((token.NAME, value))
|
self.labels.append(('XXX', value))
|
||||||
self.reserved_syntax_strings[value] = ilabel
|
self.reserved_syntax_strings[value] = ilabel
|
||||||
return self.reserved_syntax_strings[value]
|
return self.reserved_syntax_strings[value]
|
||||||
|
|
||||||
|
|||||||
@@ -5,9 +5,9 @@
|
|||||||
# Copyright David Halter and Contributors
|
# Copyright David Halter and Contributors
|
||||||
# Modifications are dual-licensed: MIT and PSF.
|
# Modifications are dual-licensed: MIT and PSF.
|
||||||
|
|
||||||
from parso.python import tokenize
|
from parso.python.tokenize import tokenize
|
||||||
from parso.utils import parse_version_string
|
from parso.utils import parse_version_string
|
||||||
from parso.python import token
|
from parso.python.token import PythonTokenTypes
|
||||||
|
|
||||||
|
|
||||||
class GrammarParser():
|
class GrammarParser():
|
||||||
@@ -16,7 +16,7 @@ class GrammarParser():
|
|||||||
"""
|
"""
|
||||||
def __init__(self, bnf_grammar):
|
def __init__(self, bnf_grammar):
|
||||||
self._bnf_grammar = bnf_grammar
|
self._bnf_grammar = bnf_grammar
|
||||||
self.generator = tokenize.tokenize(
|
self.generator = tokenize(
|
||||||
bnf_grammar,
|
bnf_grammar,
|
||||||
version_info=parse_version_string('3.6')
|
version_info=parse_version_string('3.6')
|
||||||
)
|
)
|
||||||
@@ -24,16 +24,16 @@ class GrammarParser():
|
|||||||
|
|
||||||
def parse(self):
|
def parse(self):
|
||||||
# grammar: (NEWLINE | rule)* ENDMARKER
|
# grammar: (NEWLINE | rule)* ENDMARKER
|
||||||
while self.type != token.ENDMARKER:
|
while self.type != PythonTokenTypes.ENDMARKER:
|
||||||
while self.type == token.NEWLINE:
|
while self.type == PythonTokenTypes.NEWLINE:
|
||||||
self._gettoken()
|
self._gettoken()
|
||||||
|
|
||||||
# rule: NAME ':' rhs NEWLINE
|
# rule: NAME ':' rhs NEWLINE
|
||||||
self._current_rule_name = self._expect(token.NAME)
|
self._current_rule_name = self._expect(PythonTokenTypes.NAME)
|
||||||
self._expect(token.OP, ':')
|
self._expect(PythonTokenTypes.OP, ':')
|
||||||
|
|
||||||
a, z = self._parse_rhs()
|
a, z = self._parse_rhs()
|
||||||
self._expect(token.NEWLINE)
|
self._expect(PythonTokenTypes.NEWLINE)
|
||||||
|
|
||||||
yield a, z
|
yield a, z
|
||||||
|
|
||||||
@@ -60,7 +60,8 @@ class GrammarParser():
|
|||||||
def _parse_items(self):
|
def _parse_items(self):
|
||||||
# items: item+
|
# items: item+
|
||||||
a, b = self._parse_item()
|
a, b = self._parse_item()
|
||||||
while self.type in (token.NAME, token.STRING) or self.value in ('(', '['):
|
while self.type in (PythonTokenTypes.NAME, PythonTokenTypes.STRING) \
|
||||||
|
or self.value in ('(', '['):
|
||||||
c, d = self._parse_item()
|
c, d = self._parse_item()
|
||||||
# Need to end on the next item.
|
# Need to end on the next item.
|
||||||
b.add_arc(c)
|
b.add_arc(c)
|
||||||
@@ -72,7 +73,7 @@ class GrammarParser():
|
|||||||
if self.value == "[":
|
if self.value == "[":
|
||||||
self._gettoken()
|
self._gettoken()
|
||||||
a, z = self._parse_rhs()
|
a, z = self._parse_rhs()
|
||||||
self._expect(token.OP, ']')
|
self._expect(PythonTokenTypes.OP, ']')
|
||||||
# Make it also possible that there is no token and change the
|
# Make it also possible that there is no token and change the
|
||||||
# state.
|
# state.
|
||||||
a.add_arc(z)
|
a.add_arc(z)
|
||||||
@@ -97,9 +98,9 @@ class GrammarParser():
|
|||||||
if self.value == "(":
|
if self.value == "(":
|
||||||
self._gettoken()
|
self._gettoken()
|
||||||
a, z = self._parse_rhs()
|
a, z = self._parse_rhs()
|
||||||
self._expect(token.OP, ')')
|
self._expect(PythonTokenTypes.OP, ')')
|
||||||
return a, z
|
return a, z
|
||||||
elif self.type in (token.NAME, token.STRING):
|
elif self.type in (PythonTokenTypes.NAME, PythonTokenTypes.STRING):
|
||||||
a = NFAState(self._current_rule_name)
|
a = NFAState(self._current_rule_name)
|
||||||
z = NFAState(self._current_rule_name)
|
z = NFAState(self._current_rule_name)
|
||||||
# Make it clear that the state transition requires that value.
|
# Make it clear that the state transition requires that value.
|
||||||
@@ -110,10 +111,10 @@ class GrammarParser():
|
|||||||
self._raise_error("expected (...) or NAME or STRING, got %s/%s",
|
self._raise_error("expected (...) or NAME or STRING, got %s/%s",
|
||||||
self.type, self.value)
|
self.type, self.value)
|
||||||
|
|
||||||
def _expect(self, type, value=None):
|
def _expect(self, type_, value=None):
|
||||||
if self.type != type:
|
if self.type != type_:
|
||||||
self._raise_error("expected %s(%s), got %s(%s)",
|
self._raise_error("expected %s, got %s [%s]",
|
||||||
type, token.tok_name[type], self.type, self.value)
|
type_, self.type, self.value)
|
||||||
if value is not None and self.value != value:
|
if value is not None and self.value != value:
|
||||||
self._raise_error("expected %s, got %s", value, self.value)
|
self._raise_error("expected %s, got %s", value, self.value)
|
||||||
value = self.value
|
value = self.value
|
||||||
|
|||||||
@@ -14,8 +14,6 @@ See Parser/parser.c in the Python distribution for additional info on
|
|||||||
how this parsing engine works.
|
how this parsing engine works.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from parso.python import tokenize
|
|
||||||
|
|
||||||
|
|
||||||
class InternalParseError(Exception):
|
class InternalParseError(Exception):
|
||||||
"""
|
"""
|
||||||
@@ -24,9 +22,9 @@ class InternalParseError(Exception):
|
|||||||
wrong.
|
wrong.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, msg, type, value, start_pos):
|
def __init__(self, msg, type_, value, start_pos):
|
||||||
Exception.__init__(self, "%s: type=%r, value=%r, start_pos=%r" %
|
Exception.__init__(self, "%s: type=%r, value=%r, start_pos=%r" %
|
||||||
(msg, tokenize.tok_name[type], value, start_pos))
|
(msg, type_.name, value, start_pos))
|
||||||
self.msg = msg
|
self.msg = msg
|
||||||
self.type = type
|
self.type = type
|
||||||
self.value = value
|
self.value = value
|
||||||
@@ -69,9 +67,7 @@ class StackNode(object):
|
|||||||
|
|
||||||
def token_to_ilabel(grammar, type_, value):
|
def token_to_ilabel(grammar, type_, value):
|
||||||
# Map from token to label
|
# Map from token to label
|
||||||
# TODO this is not good, shouldn't use tokenize.NAME, but somehow use the
|
if type_.contains_syntax:
|
||||||
# grammar.
|
|
||||||
if type_ in (tokenize.NAME, tokenize.OP):
|
|
||||||
# Check for reserved words (keywords)
|
# Check for reserved words (keywords)
|
||||||
try:
|
try:
|
||||||
return grammar.reserved_syntax_strings[value]
|
return grammar.reserved_syntax_strings[value]
|
||||||
@@ -196,6 +192,7 @@ class PgenParser(object):
|
|||||||
# creating a new node. We still create expr_stmt and
|
# creating a new node. We still create expr_stmt and
|
||||||
# file_input though, because a lot of Jedi depends on its
|
# file_input though, because a lot of Jedi depends on its
|
||||||
# logic.
|
# logic.
|
||||||
|
print(tos.nodes)
|
||||||
if len(tos.nodes) == 1:
|
if len(tos.nodes) == 1:
|
||||||
new_node = tos.nodes[0]
|
new_node = tos.nodes[0]
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -13,8 +13,8 @@ import logging
|
|||||||
from parso.utils import split_lines
|
from parso.utils import split_lines
|
||||||
from parso.python.parser import Parser
|
from parso.python.parser import Parser
|
||||||
from parso.python.tree import EndMarker
|
from parso.python.tree import EndMarker
|
||||||
from parso.python.tokenize import (NEWLINE, PythonToken, ERROR_DEDENT,
|
from parso.python.tokenize import PythonToken
|
||||||
ENDMARKER, INDENT, DEDENT)
|
from parso.python.token import PythonTokenTypes
|
||||||
|
|
||||||
LOG = logging.getLogger(__name__)
|
LOG = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -29,7 +29,7 @@ def _get_last_line(node_or_leaf):
|
|||||||
|
|
||||||
def _ends_with_newline(leaf, suffix=''):
|
def _ends_with_newline(leaf, suffix=''):
|
||||||
if leaf.type == 'error_leaf':
|
if leaf.type == 'error_leaf':
|
||||||
typ = leaf.original_type
|
typ = leaf.token_type.lower()
|
||||||
else:
|
else:
|
||||||
typ = leaf.type
|
typ = leaf.type
|
||||||
|
|
||||||
@@ -167,8 +167,7 @@ class DiffParser(object):
|
|||||||
|
|
||||||
def _enabled_debugging(self, old_lines, lines_new):
|
def _enabled_debugging(self, old_lines, lines_new):
|
||||||
if self._module.get_code() != ''.join(lines_new):
|
if self._module.get_code() != ''.join(lines_new):
|
||||||
LOG.warning('parser issue:\n%s\n%s', ''.join(old_lines),
|
LOG.warning('parser issue:\n%s\n%s', ''.join(old_lines), ''.join(lines_new))
|
||||||
''.join(lines_new))
|
|
||||||
|
|
||||||
def _copy_from_old_parser(self, line_offset, until_line_old, until_line_new):
|
def _copy_from_old_parser(self, line_offset, until_line_old, until_line_new):
|
||||||
copied_nodes = [None]
|
copied_nodes = [None]
|
||||||
@@ -272,7 +271,6 @@ class DiffParser(object):
|
|||||||
# memoryview?
|
# memoryview?
|
||||||
parsed_until_line = self._nodes_stack.parsed_until_line
|
parsed_until_line = self._nodes_stack.parsed_until_line
|
||||||
lines_after = self._parser_lines_new[parsed_until_line:]
|
lines_after = self._parser_lines_new[parsed_until_line:]
|
||||||
#print('parse_content', parsed_until_line, lines_after, until_line)
|
|
||||||
tokens = self._diff_tokenize(
|
tokens = self._diff_tokenize(
|
||||||
lines_after,
|
lines_after,
|
||||||
until_line,
|
until_line,
|
||||||
@@ -292,7 +290,7 @@ class DiffParser(object):
|
|||||||
stack = self._active_parser.pgen_parser.stack
|
stack = self._active_parser.pgen_parser.stack
|
||||||
for typ, string, start_pos, prefix in tokens:
|
for typ, string, start_pos, prefix in tokens:
|
||||||
start_pos = start_pos[0] + line_offset, start_pos[1]
|
start_pos = start_pos[0] + line_offset, start_pos[1]
|
||||||
if typ == INDENT:
|
if typ == PythonTokenTypes.INDENT:
|
||||||
indents.append(start_pos[1])
|
indents.append(start_pos[1])
|
||||||
if is_first_token:
|
if is_first_token:
|
||||||
omitted_first_indent = True
|
omitted_first_indent = True
|
||||||
@@ -305,8 +303,9 @@ class DiffParser(object):
|
|||||||
|
|
||||||
# In case of omitted_first_indent, it might not be dedented fully.
|
# In case of omitted_first_indent, it might not be dedented fully.
|
||||||
# However this is a sign for us that a dedent happened.
|
# However this is a sign for us that a dedent happened.
|
||||||
if typ == DEDENT \
|
if typ == PythonTokenTypes.DEDENT \
|
||||||
or typ == ERROR_DEDENT and omitted_first_indent and len(indents) == 1:
|
or typ == PythonTokenTypes.ERROR_DEDENT \
|
||||||
|
and omitted_first_indent and len(indents) == 1:
|
||||||
indents.pop()
|
indents.pop()
|
||||||
if omitted_first_indent and not indents:
|
if omitted_first_indent and not indents:
|
||||||
# We are done here, only thing that can come now is an
|
# We are done here, only thing that can come now is an
|
||||||
@@ -316,18 +315,22 @@ class DiffParser(object):
|
|||||||
prefix = re.sub(r'(<=\n)[^\n]+$', '', prefix)
|
prefix = re.sub(r'(<=\n)[^\n]+$', '', prefix)
|
||||||
else:
|
else:
|
||||||
prefix = ''
|
prefix = ''
|
||||||
yield PythonToken(ENDMARKER, '', (start_pos[0] + line_offset, 0), prefix)
|
yield PythonToken(
|
||||||
|
PythonTokenTypes.ENDMARKER, '',
|
||||||
|
(start_pos[0] + line_offset, 0),
|
||||||
|
prefix
|
||||||
|
)
|
||||||
break
|
break
|
||||||
elif typ == NEWLINE and start_pos[0] >= until_line:
|
elif typ == PythonTokenTypes.NEWLINE and start_pos[0] >= until_line:
|
||||||
yield PythonToken(typ, string, start_pos, prefix)
|
yield PythonToken(typ, string, start_pos, prefix)
|
||||||
# Check if the parser is actually in a valid suite state.
|
# Check if the parser is actually in a valid suite state.
|
||||||
if suite_or_file_input_is_valid(self._pgen_grammar, stack):
|
if suite_or_file_input_is_valid(self._pgen_grammar, stack):
|
||||||
start_pos = start_pos[0] + 1, 0
|
start_pos = start_pos[0] + 1, 0
|
||||||
while len(indents) > int(omitted_first_indent):
|
while len(indents) > int(omitted_first_indent):
|
||||||
indents.pop()
|
indents.pop()
|
||||||
yield PythonToken(DEDENT, '', start_pos, '')
|
yield PythonToken(PythonTokenTypes.DEDENT, '', start_pos, '')
|
||||||
|
|
||||||
yield PythonToken(ENDMARKER, '', start_pos, '')
|
yield PythonToken(PythonTokenTypes.ENDMARKER, '', start_pos, '')
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
continue
|
continue
|
||||||
@@ -536,7 +539,7 @@ class _NodesStack(object):
|
|||||||
line_offset_index = -2
|
line_offset_index = -2
|
||||||
|
|
||||||
elif (new_nodes[-1].type in ('error_leaf', 'error_node') or
|
elif (new_nodes[-1].type in ('error_leaf', 'error_node') or
|
||||||
_is_flow_node(new_nodes[-1])):
|
_is_flow_node(new_nodes[-1])):
|
||||||
# Error leafs/nodes don't have a defined start/end. Error
|
# Error leafs/nodes don't have a defined start/end. Error
|
||||||
# nodes might not end with a newline (e.g. if there's an
|
# nodes might not end with a newline (e.g. if there's an
|
||||||
# open `(`). Therefore ignore all of them unless they are
|
# open `(`). Therefore ignore all of them unless they are
|
||||||
|
|||||||
@@ -306,12 +306,12 @@ class ErrorFinder(Normalizer):
|
|||||||
|
|
||||||
def visit_leaf(self, leaf):
|
def visit_leaf(self, leaf):
|
||||||
if leaf.type == 'error_leaf':
|
if leaf.type == 'error_leaf':
|
||||||
if leaf.original_type in ('indent', 'error_dedent'):
|
if leaf.token_type in ('INDENT', 'ERROR_DEDENT'):
|
||||||
# Indents/Dedents itself never have a prefix. They are just
|
# Indents/Dedents itself never have a prefix. They are just
|
||||||
# "pseudo" tokens that get removed by the syntax tree later.
|
# "pseudo" tokens that get removed by the syntax tree later.
|
||||||
# Therefore in case of an error we also have to check for this.
|
# Therefore in case of an error we also have to check for this.
|
||||||
spacing = list(leaf.get_next_leaf()._split_prefix())[-1]
|
spacing = list(leaf.get_next_leaf()._split_prefix())[-1]
|
||||||
if leaf.original_type == 'indent':
|
if leaf.token_type == 'INDENT':
|
||||||
message = 'unexpected indent'
|
message = 'unexpected indent'
|
||||||
else:
|
else:
|
||||||
message = 'unindent does not match any outer indentation level'
|
message = 'unindent does not match any outer indentation level'
|
||||||
|
|||||||
@@ -1,7 +1,5 @@
|
|||||||
from parso.python import tree
|
from parso.python import tree
|
||||||
from parso.python.token import (DEDENT, INDENT, ENDMARKER, NEWLINE, NUMBER,
|
from parso.python.token import PythonTokenTypes
|
||||||
STRING, tok_name, NAME, FSTRING_STRING,
|
|
||||||
FSTRING_START, FSTRING_END)
|
|
||||||
from parso.parser import BaseParser
|
from parso.parser import BaseParser
|
||||||
from parso.pgen2.parse import token_to_ilabel
|
from parso.pgen2.parse import token_to_ilabel
|
||||||
|
|
||||||
@@ -53,17 +51,18 @@ class Parser(BaseParser):
|
|||||||
|
|
||||||
# Names/Keywords are handled separately
|
# Names/Keywords are handled separately
|
||||||
_leaf_map = {
|
_leaf_map = {
|
||||||
STRING: tree.String,
|
PythonTokenTypes.STRING: tree.String,
|
||||||
NUMBER: tree.Number,
|
PythonTokenTypes.NUMBER: tree.Number,
|
||||||
NEWLINE: tree.Newline,
|
PythonTokenTypes.NEWLINE: tree.Newline,
|
||||||
ENDMARKER: tree.EndMarker,
|
PythonTokenTypes.ENDMARKER: tree.EndMarker,
|
||||||
FSTRING_STRING: tree.FStringString,
|
PythonTokenTypes.FSTRING_STRING: tree.FStringString,
|
||||||
FSTRING_START: tree.FStringStart,
|
PythonTokenTypes.FSTRING_START: tree.FStringStart,
|
||||||
FSTRING_END: tree.FStringEnd,
|
PythonTokenTypes.FSTRING_END: tree.FStringEnd,
|
||||||
}
|
}
|
||||||
|
|
||||||
def __init__(self, pgen_grammar, error_recovery=True, start_nonterminal='file_input'):
|
def __init__(self, pgen_grammar, error_recovery=True, start_nonterminal='file_input'):
|
||||||
super(Parser, self).__init__(pgen_grammar, start_nonterminal, error_recovery=error_recovery)
|
super(Parser, self).__init__(pgen_grammar, start_nonterminal,
|
||||||
|
error_recovery=error_recovery)
|
||||||
|
|
||||||
self.syntax_errors = []
|
self.syntax_errors = []
|
||||||
self._omit_dedent_list = []
|
self._omit_dedent_list = []
|
||||||
@@ -126,7 +125,7 @@ class Parser(BaseParser):
|
|||||||
|
|
||||||
def convert_leaf(self, pgen_grammar, type, value, prefix, start_pos):
|
def convert_leaf(self, pgen_grammar, type, value, prefix, start_pos):
|
||||||
# print('leaf', repr(value), token.tok_name[type])
|
# print('leaf', repr(value), token.tok_name[type])
|
||||||
if type == NAME:
|
if type == PythonTokenTypes.NAME:
|
||||||
if value in pgen_grammar.reserved_syntax_strings:
|
if value in pgen_grammar.reserved_syntax_strings:
|
||||||
return tree.Keyword(value, start_pos, prefix)
|
return tree.Keyword(value, start_pos, prefix)
|
||||||
else:
|
else:
|
||||||
@@ -143,7 +142,8 @@ class Parser(BaseParser):
|
|||||||
last_leaf = None
|
last_leaf = None
|
||||||
|
|
||||||
if self._start_nonterminal == 'file_input' and \
|
if self._start_nonterminal == 'file_input' and \
|
||||||
(typ == ENDMARKER or typ == DEDENT and '\n' not in last_leaf.value):
|
(typ == PythonTokenTypes.ENDMARKER or
|
||||||
|
typ == PythonTokenTypes.DEDENT and '\n' not in last_leaf.value):
|
||||||
def reduce_stack(states, newstate):
|
def reduce_stack(states, newstate):
|
||||||
# reduce
|
# reduce
|
||||||
state = newstate
|
state = newstate
|
||||||
@@ -158,7 +158,7 @@ class Parser(BaseParser):
|
|||||||
# end of a file, we have to recover even if the user doesn't want
|
# end of a file, we have to recover even if the user doesn't want
|
||||||
# error recovery.
|
# error recovery.
|
||||||
if stack[-1].dfa.from_rule == 'simple_stmt':
|
if stack[-1].dfa.from_rule == 'simple_stmt':
|
||||||
ilabel = token_to_ilabel(pgen_grammar, NEWLINE, value)
|
ilabel = token_to_ilabel(pgen_grammar, PythonTokenTypes.NEWLINE, value)
|
||||||
try:
|
try:
|
||||||
plan = stack[-1].dfa.ilabel_to_plan[ilabel]
|
plan = stack[-1].dfa.ilabel_to_plan[ilabel]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
@@ -199,12 +199,12 @@ class Parser(BaseParser):
|
|||||||
if self._stack_removal(stack, until_index + 1):
|
if self._stack_removal(stack, until_index + 1):
|
||||||
add_token_callback(typ, value, start_pos, prefix)
|
add_token_callback(typ, value, start_pos, prefix)
|
||||||
else:
|
else:
|
||||||
if typ == INDENT:
|
if typ == PythonTokenTypes.INDENT:
|
||||||
# For every deleted INDENT we have to delete a DEDENT as well.
|
# For every deleted INDENT we have to delete a DEDENT as well.
|
||||||
# Otherwise the parser will get into trouble and DEDENT too early.
|
# Otherwise the parser will get into trouble and DEDENT too early.
|
||||||
self._omit_dedent_list.append(self._indent_counter)
|
self._omit_dedent_list.append(self._indent_counter)
|
||||||
|
|
||||||
error_leaf = tree.PythonErrorLeaf(tok_name[typ].lower(), value, start_pos, prefix)
|
error_leaf = tree.PythonErrorLeaf(typ.name, value, start_pos, prefix)
|
||||||
stack[-1].nodes.append(error_leaf)
|
stack[-1].nodes.append(error_leaf)
|
||||||
|
|
||||||
tos = stack[-1]
|
tos = stack[-1]
|
||||||
@@ -230,7 +230,7 @@ class Parser(BaseParser):
|
|||||||
def _recovery_tokenize(self, tokens):
|
def _recovery_tokenize(self, tokens):
|
||||||
for typ, value, start_pos, prefix in tokens:
|
for typ, value, start_pos, prefix in tokens:
|
||||||
# print(tok_name[typ], repr(value), start_pos, repr(prefix))
|
# print(tok_name[typ], repr(value), start_pos, repr(prefix))
|
||||||
if typ == DEDENT:
|
if typ == PythonTokenTypes.DEDENT:
|
||||||
# We need to count indents, because if we just omit any DEDENT,
|
# We need to count indents, because if we just omit any DEDENT,
|
||||||
# we might omit them in the wrong place.
|
# we might omit them in the wrong place.
|
||||||
o = self._omit_dedent_list
|
o = self._omit_dedent_list
|
||||||
@@ -239,6 +239,6 @@ class Parser(BaseParser):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
self._indent_counter -= 1
|
self._indent_counter -= 1
|
||||||
elif typ == INDENT:
|
elif typ == PythonTokenTypes.INDENT:
|
||||||
self._indent_counter += 1
|
self._indent_counter += 1
|
||||||
yield typ, value, start_pos, prefix
|
yield typ, value, start_pos, prefix
|
||||||
|
|||||||
@@ -1,47 +1,4 @@
|
|||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
from itertools import count
|
|
||||||
from token import *
|
|
||||||
|
|
||||||
from parso._compatibility import py_version
|
|
||||||
|
|
||||||
# Don't mutate the standard library dict
|
|
||||||
tok_name = tok_name.copy()
|
|
||||||
|
|
||||||
_counter = count(N_TOKENS)
|
|
||||||
# Never want to see this thing again.
|
|
||||||
del N_TOKENS
|
|
||||||
|
|
||||||
COMMENT = next(_counter)
|
|
||||||
tok_name[COMMENT] = 'COMMENT'
|
|
||||||
|
|
||||||
NL = next(_counter)
|
|
||||||
tok_name[NL] = 'NL'
|
|
||||||
|
|
||||||
# Sets the attributes that don't exist in these tok_name versions.
|
|
||||||
if py_version >= 30:
|
|
||||||
BACKQUOTE = next(_counter)
|
|
||||||
tok_name[BACKQUOTE] = 'BACKQUOTE'
|
|
||||||
else:
|
|
||||||
RARROW = next(_counter)
|
|
||||||
tok_name[RARROW] = 'RARROW'
|
|
||||||
ELLIPSIS = next(_counter)
|
|
||||||
tok_name[ELLIPSIS] = 'ELLIPSIS'
|
|
||||||
|
|
||||||
if py_version < 35:
|
|
||||||
ATEQUAL = next(_counter)
|
|
||||||
tok_name[ATEQUAL] = 'ATEQUAL'
|
|
||||||
|
|
||||||
ERROR_DEDENT = next(_counter)
|
|
||||||
tok_name[ERROR_DEDENT] = 'ERROR_DEDENT'
|
|
||||||
|
|
||||||
FSTRING_START = next(_counter)
|
|
||||||
tok_name[FSTRING_START] = 'FSTRING_START'
|
|
||||||
FSTRING_END = next(_counter)
|
|
||||||
tok_name[FSTRING_END] = 'FSTRING_END'
|
|
||||||
FSTRING_STRING = next(_counter)
|
|
||||||
tok_name[FSTRING_STRING] = 'FSTRING_STRING'
|
|
||||||
EXCLAMATION = next(_counter)
|
|
||||||
tok_name[EXCLAMATION] = 'EXCLAMATION'
|
|
||||||
|
|
||||||
# Map from operator to number (since tokenize doesn't do this)
|
# Map from operator to number (since tokenize doesn't do this)
|
||||||
|
|
||||||
@@ -100,7 +57,7 @@ opmap_raw = """\
|
|||||||
opmap = {}
|
opmap = {}
|
||||||
for line in opmap_raw.splitlines():
|
for line in opmap_raw.splitlines():
|
||||||
op, name = line.split()
|
op, name = line.split()
|
||||||
opmap[op] = globals()[name]
|
opmap[op] = name
|
||||||
|
|
||||||
|
|
||||||
def generate_token_id(string):
|
def generate_token_id(string):
|
||||||
@@ -115,26 +72,25 @@ def generate_token_id(string):
|
|||||||
return globals()[string]
|
return globals()[string]
|
||||||
|
|
||||||
|
|
||||||
class Token(object):
|
class TokenType(object):
|
||||||
def __init__(self, name):
|
def __init__(self, name, contains_syntax=False):
|
||||||
self.name = name
|
self.name = name
|
||||||
|
self.contains_syntax = contains_syntax
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return '%s(%s)' % (self.__class__.__name__, self.name)
|
return '%s(%s)' % (self.__class__.__name__, self.name)
|
||||||
|
|
||||||
|
|
||||||
class Tokens(object):
|
class TokenTypes(object):
|
||||||
"""
|
"""
|
||||||
Basically an enum, but Python 2 doesn't have enums in the standard library.
|
Basically an enum, but Python 2 doesn't have enums in the standard library.
|
||||||
"""
|
"""
|
||||||
def __init__(self, names, contains_syntax):
|
def __init__(self, names, contains_syntax):
|
||||||
for name in names:
|
for name in names:
|
||||||
setattr(self, name, Token(name))
|
setattr(self, name, TokenType(name, contains_syntax=name in contains_syntax))
|
||||||
|
|
||||||
self.contains_syntax = [getattr(self, name) for name in contains_syntax]
|
|
||||||
|
|
||||||
|
|
||||||
PythonTokens = Tokens((
|
PythonTokenTypes = TokenTypes((
|
||||||
'STRING', 'NUMBER', 'NAME', 'ERRORTOKEN', 'NEWLINE', 'INDENT', 'DEDENT',
|
'STRING', 'NUMBER', 'NAME', 'ERRORTOKEN', 'NEWLINE', 'INDENT', 'DEDENT',
|
||||||
'ERROR_DEDENT', 'FSTRING_STRING', 'FSTRING_START', 'FSTRING_END', 'OP',
|
'ERROR_DEDENT', 'FSTRING_STRING', 'FSTRING_START', 'FSTRING_END', 'OP',
|
||||||
'ENDMARKER'),
|
'ENDMARKER'),
|
||||||
|
|||||||
@@ -18,10 +18,7 @@ from collections import namedtuple
|
|||||||
import itertools as _itertools
|
import itertools as _itertools
|
||||||
from codecs import BOM_UTF8
|
from codecs import BOM_UTF8
|
||||||
|
|
||||||
from parso.python.token import (tok_name, ENDMARKER, STRING, NUMBER, opmap,
|
from parso.python.token import PythonTokenTypes, opmap
|
||||||
NAME, ERRORTOKEN, NEWLINE, INDENT, DEDENT,
|
|
||||||
ERROR_DEDENT, FSTRING_STRING, FSTRING_START,
|
|
||||||
FSTRING_END, OP)
|
|
||||||
from parso._compatibility import py_version
|
from parso._compatibility import py_version
|
||||||
from parso.utils import split_lines
|
from parso.utils import split_lines
|
||||||
|
|
||||||
@@ -242,12 +239,9 @@ class Token(namedtuple('Token', ['type', 'string', 'start_pos', 'prefix'])):
|
|||||||
|
|
||||||
|
|
||||||
class PythonToken(Token):
|
class PythonToken(Token):
|
||||||
def _get_type_name(self, exact=True):
|
|
||||||
return tok_name[self.type]
|
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return ('TokenInfo(type=%s, string=%r, start_pos=%r, prefix=%r)' %
|
return ('TokenInfo(type=%s, string=%r, start_pos=%r, prefix=%r)' %
|
||||||
self._replace(type=self._get_type_name()))
|
self._replace(type=self.type.name))
|
||||||
|
|
||||||
|
|
||||||
class FStringNode(object):
|
class FStringNode(object):
|
||||||
@@ -396,7 +390,9 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
|||||||
endmatch = endprog.match(line)
|
endmatch = endprog.match(line)
|
||||||
if endmatch:
|
if endmatch:
|
||||||
pos = endmatch.end(0)
|
pos = endmatch.end(0)
|
||||||
yield PythonToken(STRING, contstr + line[:pos], contstr_start, prefix)
|
yield PythonToken(
|
||||||
|
PythonTokenTypes.STRING, contstr + line[:pos],
|
||||||
|
contstr_start, prefix)
|
||||||
contstr = ''
|
contstr = ''
|
||||||
contline = None
|
contline = None
|
||||||
else:
|
else:
|
||||||
@@ -409,7 +405,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
|||||||
string, pos = _find_fstring_string(fstring_stack, line, lnum, pos)
|
string, pos = _find_fstring_string(fstring_stack, line, lnum, pos)
|
||||||
if string:
|
if string:
|
||||||
yield PythonToken(
|
yield PythonToken(
|
||||||
FSTRING_STRING, string,
|
PythonTokenTypes.FSTRING_STRING, string,
|
||||||
fstring_stack[-1].last_string_start_pos,
|
fstring_stack[-1].last_string_start_pos,
|
||||||
# Never has a prefix because it can start anywhere and
|
# Never has a prefix because it can start anywhere and
|
||||||
# include whitespace.
|
# include whitespace.
|
||||||
@@ -426,7 +422,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
|||||||
|
|
||||||
if fstring_index is not None:
|
if fstring_index is not None:
|
||||||
yield PythonToken(
|
yield PythonToken(
|
||||||
FSTRING_END,
|
PythonTokenTypes.FSTRING_END,
|
||||||
fstring_stack[fstring_index].quote,
|
fstring_stack[fstring_index].quote,
|
||||||
(lnum, pos),
|
(lnum, pos),
|
||||||
prefix=additional_prefix,
|
prefix=additional_prefix,
|
||||||
@@ -443,7 +439,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
|||||||
match = whitespace.match(line, pos)
|
match = whitespace.match(line, pos)
|
||||||
pos = match.end()
|
pos = match.end()
|
||||||
yield PythonToken(
|
yield PythonToken(
|
||||||
ERRORTOKEN, line[pos:], (lnum, pos),
|
PythonTokenTypes.ERRORTOKEN, line[pos:], (lnum, pos),
|
||||||
additional_prefix + match.group(0)
|
additional_prefix + match.group(0)
|
||||||
)
|
)
|
||||||
additional_prefix = ''
|
additional_prefix = ''
|
||||||
@@ -471,24 +467,24 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
|||||||
# TODO don't we need to change spos as well?
|
# TODO don't we need to change spos as well?
|
||||||
start -= 1
|
start -= 1
|
||||||
if start > indents[-1]:
|
if start > indents[-1]:
|
||||||
yield PythonToken(INDENT, '', spos, '')
|
yield PythonToken(PythonTokenTypes.INDENT, '', spos, '')
|
||||||
indents.append(start)
|
indents.append(start)
|
||||||
while start < indents[-1]:
|
while start < indents[-1]:
|
||||||
if start > indents[-2]:
|
if start > indents[-2]:
|
||||||
yield PythonToken(ERROR_DEDENT, '', (lnum, 0), '')
|
yield PythonToken(PythonTokenTypes.ERROR_DEDENT, '', (lnum, 0), '')
|
||||||
break
|
break
|
||||||
yield PythonToken(DEDENT, '', spos, '')
|
yield PythonToken(PythonTokenTypes.DEDENT, '', spos, '')
|
||||||
indents.pop()
|
indents.pop()
|
||||||
|
|
||||||
if fstring_stack:
|
if fstring_stack:
|
||||||
fstring_index, end = _check_fstring_ending(fstring_stack, token)
|
fstring_index, end = _check_fstring_ending(fstring_stack, token)
|
||||||
if fstring_index is not None:
|
if fstring_index is not None:
|
||||||
if end != 0:
|
if end != 0:
|
||||||
yield PythonToken(ERRORTOKEN, token[:end], spos, prefix)
|
yield PythonToken(PythonTokenTypes.ERRORTOKEN, token[:end], spos, prefix)
|
||||||
prefix = ''
|
prefix = ''
|
||||||
|
|
||||||
yield PythonToken(
|
yield PythonToken(
|
||||||
FSTRING_END,
|
PythonTokenTypes.FSTRING_END,
|
||||||
fstring_stack[fstring_index].quote,
|
fstring_stack[fstring_index].quote,
|
||||||
(lnum, spos[1] + 1),
|
(lnum, spos[1] + 1),
|
||||||
prefix=prefix
|
prefix=prefix
|
||||||
@@ -499,7 +495,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
|||||||
|
|
||||||
if (initial in numchars or # ordinary number
|
if (initial in numchars or # ordinary number
|
||||||
(initial == '.' and token != '.' and token != '...')):
|
(initial == '.' and token != '.' and token != '...')):
|
||||||
yield PythonToken(NUMBER, token, spos, prefix)
|
yield PythonToken(PythonTokenTypes.NUMBER, token, spos, prefix)
|
||||||
elif initial in '\r\n':
|
elif initial in '\r\n':
|
||||||
if any(not f.allow_multiline() for f in fstring_stack):
|
if any(not f.allow_multiline() for f in fstring_stack):
|
||||||
# Would use fstring_stack.clear, but that's not available
|
# Would use fstring_stack.clear, but that's not available
|
||||||
@@ -507,7 +503,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
|||||||
fstring_stack[:] = []
|
fstring_stack[:] = []
|
||||||
|
|
||||||
if not new_line and paren_level == 0 and not fstring_stack:
|
if not new_line and paren_level == 0 and not fstring_stack:
|
||||||
yield PythonToken(NEWLINE, token, spos, prefix)
|
yield PythonToken(PythonTokenTypes.NEWLINE, token, spos, prefix)
|
||||||
else:
|
else:
|
||||||
additional_prefix = prefix + token
|
additional_prefix = prefix + token
|
||||||
new_line = True
|
new_line = True
|
||||||
@@ -520,7 +516,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
|||||||
if endmatch: # all on one line
|
if endmatch: # all on one line
|
||||||
pos = endmatch.end(0)
|
pos = endmatch.end(0)
|
||||||
token = line[start:pos]
|
token = line[start:pos]
|
||||||
yield PythonToken(STRING, token, spos, prefix)
|
yield PythonToken(PythonTokenTypes.STRING, token, spos, prefix)
|
||||||
else:
|
else:
|
||||||
contstr_start = (lnum, start) # multiple lines
|
contstr_start = (lnum, start) # multiple lines
|
||||||
contstr = line[start:]
|
contstr = line[start:]
|
||||||
@@ -537,10 +533,10 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
|||||||
contline = line
|
contline = line
|
||||||
break
|
break
|
||||||
else: # ordinary string
|
else: # ordinary string
|
||||||
yield PythonToken(STRING, token, spos, prefix)
|
yield PythonToken(PythonTokenTypes.STRING, token, spos, prefix)
|
||||||
elif token in fstring_pattern_map: # The start of an fstring.
|
elif token in fstring_pattern_map: # The start of an fstring.
|
||||||
fstring_stack.append(FStringNode(fstring_pattern_map[token]))
|
fstring_stack.append(FStringNode(fstring_pattern_map[token]))
|
||||||
yield PythonToken(FSTRING_START, token, spos, prefix)
|
yield PythonToken(PythonTokenTypes.FSTRING_START, token, spos, prefix)
|
||||||
elif is_identifier(initial): # ordinary name
|
elif is_identifier(initial): # ordinary name
|
||||||
if token in always_break_tokens:
|
if token in always_break_tokens:
|
||||||
fstring_stack[:] = []
|
fstring_stack[:] = []
|
||||||
@@ -548,11 +544,11 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
|||||||
while True:
|
while True:
|
||||||
indent = indents.pop()
|
indent = indents.pop()
|
||||||
if indent > start:
|
if indent > start:
|
||||||
yield PythonToken(DEDENT, '', spos, '')
|
yield PythonToken(PythonTokenTypes.DEDENT, '', spos, '')
|
||||||
else:
|
else:
|
||||||
indents.append(indent)
|
indents.append(indent)
|
||||||
break
|
break
|
||||||
yield PythonToken(NAME, token, spos, prefix)
|
yield PythonToken(PythonTokenTypes.NAME, token, spos, prefix)
|
||||||
elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n'): # continued stmt
|
elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n'): # continued stmt
|
||||||
additional_prefix += prefix + line[start:]
|
additional_prefix += prefix + line[start:]
|
||||||
break
|
break
|
||||||
@@ -575,13 +571,13 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
|||||||
# This check is needed in any case to check if it's a valid
|
# This check is needed in any case to check if it's a valid
|
||||||
# operator or just some random unicode character.
|
# operator or just some random unicode character.
|
||||||
opmap[token]
|
opmap[token]
|
||||||
typ = OP
|
typ = PythonTokenTypes.OP
|
||||||
except KeyError:
|
except KeyError:
|
||||||
typ = ERRORTOKEN
|
typ = PythonTokenTypes.ERRORTOKEN
|
||||||
yield PythonToken(typ, token, spos, prefix)
|
yield PythonToken(typ, token, spos, prefix)
|
||||||
|
|
||||||
if contstr:
|
if contstr:
|
||||||
yield PythonToken(ERRORTOKEN, contstr, contstr_start, prefix)
|
yield PythonToken(PythonTokenTypes.ERRORTOKEN, contstr, contstr_start, prefix)
|
||||||
if contstr.endswith('\n'):
|
if contstr.endswith('\n'):
|
||||||
new_line = True
|
new_line = True
|
||||||
|
|
||||||
@@ -589,8 +585,8 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
|||||||
# As the last position we just take the maximally possible position. We
|
# As the last position we just take the maximally possible position. We
|
||||||
# remove -1 for the last new line.
|
# remove -1 for the last new line.
|
||||||
for indent in indents[1:]:
|
for indent in indents[1:]:
|
||||||
yield PythonToken(DEDENT, '', end_pos, '')
|
yield PythonToken(PythonTokenTypes.DEDENT, '', end_pos, '')
|
||||||
yield PythonToken(ENDMARKER, '', end_pos, additional_prefix)
|
yield PythonToken(PythonTokenTypes.ENDMARKER, '', end_pos, additional_prefix)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@@ -124,7 +124,7 @@ class PythonLeaf(PythonMixin, Leaf):
|
|||||||
# indent error leafs somehow? No idea how, though.
|
# indent error leafs somehow? No idea how, though.
|
||||||
previous_leaf = self.get_previous_leaf()
|
previous_leaf = self.get_previous_leaf()
|
||||||
if previous_leaf is not None and previous_leaf.type == 'error_leaf' \
|
if previous_leaf is not None and previous_leaf.type == 'error_leaf' \
|
||||||
and previous_leaf.original_type in ('indent', 'error_dedent'):
|
and previous_leaf.token_type in ('INDENT', 'ERROR_DEDENT'):
|
||||||
previous_leaf = previous_leaf.get_previous_leaf()
|
previous_leaf = previous_leaf.get_previous_leaf()
|
||||||
|
|
||||||
if previous_leaf is None:
|
if previous_leaf is None:
|
||||||
|
|||||||
@@ -229,6 +229,7 @@ class Leaf(NodeOrLeaf):
|
|||||||
|
|
||||||
class TypedLeaf(Leaf):
|
class TypedLeaf(Leaf):
|
||||||
__slots__ = ('type',)
|
__slots__ = ('type',)
|
||||||
|
|
||||||
def __init__(self, type, value, start_pos, prefix=''):
|
def __init__(self, type, value, start_pos, prefix=''):
|
||||||
super(TypedLeaf, self).__init__(value, start_pos, prefix)
|
super(TypedLeaf, self).__init__(value, start_pos, prefix)
|
||||||
self.type = type
|
self.type = type
|
||||||
@@ -351,13 +352,13 @@ class ErrorLeaf(Leaf):
|
|||||||
A leaf that is either completely invalid in a language (like `$` in Python)
|
A leaf that is either completely invalid in a language (like `$` in Python)
|
||||||
or is invalid at that position. Like the star in `1 +* 1`.
|
or is invalid at that position. Like the star in `1 +* 1`.
|
||||||
"""
|
"""
|
||||||
__slots__ = ('original_type',)
|
__slots__ = ('token_type',)
|
||||||
type = 'error_leaf'
|
type = 'error_leaf'
|
||||||
|
|
||||||
def __init__(self, original_type, value, start_pos, prefix=''):
|
def __init__(self, token_type, value, start_pos, prefix=''):
|
||||||
super(ErrorLeaf, self).__init__(value, start_pos, prefix)
|
super(ErrorLeaf, self).__init__(value, start_pos, prefix)
|
||||||
self.original_type = original_type
|
self.token_type = token_type
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return "<%s: %s:%s, %s>" % \
|
return "<%s: %s:%s, %s>" % \
|
||||||
(type(self).__name__, self.original_type, repr(self.value), self.start_pos)
|
(type(self).__name__, self.token_type, repr(self.value), self.start_pos)
|
||||||
|
|||||||
@@ -1,20 +1,29 @@
|
|||||||
# -*- coding: utf-8 # This file contains Unicode characters.
|
# -*- coding: utf-8 # This file contains Unicode characters.
|
||||||
|
|
||||||
from textwrap import dedent
|
from textwrap import dedent
|
||||||
import tokenize as stdlib_tokenize
|
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from parso._compatibility import py_version
|
from parso._compatibility import py_version
|
||||||
from parso.utils import split_lines, parse_version_string
|
from parso.utils import split_lines, parse_version_string
|
||||||
from parso.python.token import (
|
from parso.python.token import PythonTokenTypes
|
||||||
NAME, NEWLINE, STRING, INDENT, DEDENT, ERRORTOKEN, ENDMARKER, ERROR_DEDENT,
|
|
||||||
FSTRING_START)
|
|
||||||
from parso.python import tokenize
|
from parso.python import tokenize
|
||||||
from parso import parse
|
from parso import parse
|
||||||
from parso.python.tokenize import PythonToken
|
from parso.python.tokenize import PythonToken
|
||||||
|
|
||||||
|
|
||||||
|
# To make it easier to access some of the token types, just put them here.
|
||||||
|
NAME = PythonTokenTypes.NAME
|
||||||
|
NEWLINE = PythonTokenTypes.NEWLINE
|
||||||
|
STRING = PythonTokenTypes.STRING
|
||||||
|
INDENT = PythonTokenTypes.INDENT
|
||||||
|
DEDENT = PythonTokenTypes.DEDENT
|
||||||
|
ERRORTOKEN = PythonTokenTypes.ERRORTOKEN
|
||||||
|
ENDMARKER = PythonTokenTypes.ENDMARKER
|
||||||
|
ERROR_DEDENT = PythonTokenTypes.ERROR_DEDENT
|
||||||
|
FSTRING_START = PythonTokenTypes.FSTRING_START
|
||||||
|
|
||||||
|
|
||||||
def _get_token_list(string):
|
def _get_token_list(string):
|
||||||
# Load the current version.
|
# Load the current version.
|
||||||
version_info = parse_version_string()
|
version_info = parse_version_string()
|
||||||
@@ -128,7 +137,7 @@ def test_identifier_contains_unicode():
|
|||||||
else:
|
else:
|
||||||
# Unicode tokens in Python 2 seem to be identified as operators.
|
# Unicode tokens in Python 2 seem to be identified as operators.
|
||||||
# They will be ignored in the parser, that's ok.
|
# They will be ignored in the parser, that's ok.
|
||||||
assert unicode_token[0] == tokenize.ERRORTOKEN
|
assert unicode_token[0] == ERRORTOKEN
|
||||||
|
|
||||||
|
|
||||||
def test_quoted_strings():
|
def test_quoted_strings():
|
||||||
@@ -188,17 +197,17 @@ def test_ur_literals():
|
|||||||
|
|
||||||
def test_error_literal():
|
def test_error_literal():
|
||||||
error_token, endmarker = _get_token_list('"\n')
|
error_token, endmarker = _get_token_list('"\n')
|
||||||
assert error_token.type == tokenize.ERRORTOKEN
|
assert error_token.type == ERRORTOKEN
|
||||||
assert endmarker.prefix == ''
|
assert endmarker.prefix == ''
|
||||||
assert error_token.string == '"\n'
|
assert error_token.string == '"\n'
|
||||||
assert endmarker.type == tokenize.ENDMARKER
|
assert endmarker.type == ENDMARKER
|
||||||
assert endmarker.prefix == ''
|
assert endmarker.prefix == ''
|
||||||
|
|
||||||
bracket, error_token, endmarker = _get_token_list('( """')
|
bracket, error_token, endmarker = _get_token_list('( """')
|
||||||
assert error_token.type == tokenize.ERRORTOKEN
|
assert error_token.type == ERRORTOKEN
|
||||||
assert error_token.prefix == ' '
|
assert error_token.prefix == ' '
|
||||||
assert error_token.string == '"""'
|
assert error_token.string == '"""'
|
||||||
assert endmarker.type == tokenize.ENDMARKER
|
assert endmarker.type == ENDMARKER
|
||||||
assert endmarker.prefix == ''
|
assert endmarker.prefix == ''
|
||||||
|
|
||||||
|
|
||||||
@@ -236,14 +245,3 @@ def test_error_string():
|
|||||||
assert t1.prefix == ' '
|
assert t1.prefix == ' '
|
||||||
assert t1.string == '"\n'
|
assert t1.string == '"\n'
|
||||||
assert endmarker.string == ''
|
assert endmarker.string == ''
|
||||||
|
|
||||||
def test_tok_name_copied():
|
|
||||||
# Make sure parso doesn't mutate the standard library
|
|
||||||
tok_len = len(stdlib_tokenize.tok_name)
|
|
||||||
correct_len = stdlib_tokenize.N_TOKENS
|
|
||||||
if 'N_TOKENS' in stdlib_tokenize.tok_name.values(): # Python 3.7
|
|
||||||
correct_len += 1
|
|
||||||
if 'NT_OFFSET' in stdlib_tokenize.tok_name.values(): # Not there in PyPy
|
|
||||||
correct_len += 1
|
|
||||||
|
|
||||||
assert tok_len == correct_len
|
|
||||||
|
|||||||
Reference in New Issue
Block a user