Use some tokenize names directly

This commit is contained in:
Dave Halter
2018-06-24 16:39:48 +02:00
parent 34ab35558f
commit e958b241c7
+34 -20
View File
@@ -23,6 +23,20 @@ from parso._compatibility import py_version
from parso.utils import split_lines from parso.utils import split_lines
STRING = PythonTokenTypes.STRING
NAME = PythonTokenTypes.NAME
NUMBER = PythonTokenTypes.NUMBER
OP = PythonTokenTypes.OP
NEWLINE = PythonTokenTypes.NEWLINE
INDENT = PythonTokenTypes.INDENT
DEDENT = PythonTokenTypes.DEDENT
ENDMARKER = PythonTokenTypes.ENDMARKER
ERRORTOKEN = PythonTokenTypes.ERRORTOKEN
ERROR_DEDENT = PythonTokenTypes.ERROR_DEDENT
FSTRING_START = PythonTokenTypes.FSTRING_START
FSTRING_STRING = PythonTokenTypes.FSTRING_STRING
FSTRING_END = PythonTokenTypes.FSTRING_END
TokenCollection = namedtuple( TokenCollection = namedtuple(
'TokenCollection', 'TokenCollection',
'pseudo_token single_quoted triple_quoted endpats whitespace ' 'pseudo_token single_quoted triple_quoted endpats whitespace '
@@ -391,7 +405,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
if endmatch: if endmatch:
pos = endmatch.end(0) pos = endmatch.end(0)
yield PythonToken( yield PythonToken(
PythonTokenTypes.STRING, contstr + line[:pos], STRING, contstr + line[:pos],
contstr_start, prefix) contstr_start, prefix)
contstr = '' contstr = ''
contline = None contline = None
@@ -405,7 +419,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
string, pos = _find_fstring_string(fstring_stack, line, lnum, pos) string, pos = _find_fstring_string(fstring_stack, line, lnum, pos)
if string: if string:
yield PythonToken( yield PythonToken(
PythonTokenTypes.FSTRING_STRING, string, FSTRING_STRING, string,
fstring_stack[-1].last_string_start_pos, fstring_stack[-1].last_string_start_pos,
# Never has a prefix because it can start anywhere and # Never has a prefix because it can start anywhere and
# include whitespace. # include whitespace.
@@ -422,7 +436,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
if fstring_index is not None: if fstring_index is not None:
yield PythonToken( yield PythonToken(
PythonTokenTypes.FSTRING_END, FSTRING_END,
fstring_stack[fstring_index].quote, fstring_stack[fstring_index].quote,
(lnum, pos), (lnum, pos),
prefix=additional_prefix, prefix=additional_prefix,
@@ -439,7 +453,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
match = whitespace.match(line, pos) match = whitespace.match(line, pos)
pos = match.end() pos = match.end()
yield PythonToken( yield PythonToken(
PythonTokenTypes.ERRORTOKEN, line[pos:], (lnum, pos), ERRORTOKEN, line[pos:], (lnum, pos),
additional_prefix + match.group(0) additional_prefix + match.group(0)
) )
additional_prefix = '' additional_prefix = ''
@@ -467,24 +481,24 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
# TODO don't we need to change spos as well? # TODO don't we need to change spos as well?
start -= 1 start -= 1
if start > indents[-1]: if start > indents[-1]:
yield PythonToken(PythonTokenTypes.INDENT, '', spos, '') yield PythonToken(INDENT, '', spos, '')
indents.append(start) indents.append(start)
while start < indents[-1]: while start < indents[-1]:
if start > indents[-2]: if start > indents[-2]:
yield PythonToken(PythonTokenTypes.ERROR_DEDENT, '', (lnum, 0), '') yield PythonToken(ERROR_DEDENT, '', (lnum, 0), '')
break break
yield PythonToken(PythonTokenTypes.DEDENT, '', spos, '') yield PythonToken(DEDENT, '', spos, '')
indents.pop() indents.pop()
if fstring_stack: if fstring_stack:
fstring_index, end = _check_fstring_ending(fstring_stack, token) fstring_index, end = _check_fstring_ending(fstring_stack, token)
if fstring_index is not None: if fstring_index is not None:
if end != 0: if end != 0:
yield PythonToken(PythonTokenTypes.ERRORTOKEN, token[:end], spos, prefix) yield PythonToken(ERRORTOKEN, token[:end], spos, prefix)
prefix = '' prefix = ''
yield PythonToken( yield PythonToken(
PythonTokenTypes.FSTRING_END, FSTRING_END,
fstring_stack[fstring_index].quote, fstring_stack[fstring_index].quote,
(lnum, spos[1] + 1), (lnum, spos[1] + 1),
prefix=prefix prefix=prefix
@@ -495,7 +509,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
if (initial in numchars or # ordinary number if (initial in numchars or # ordinary number
(initial == '.' and token != '.' and token != '...')): (initial == '.' and token != '.' and token != '...')):
yield PythonToken(PythonTokenTypes.NUMBER, token, spos, prefix) yield PythonToken(NUMBER, token, spos, prefix)
elif initial in '\r\n': elif initial in '\r\n':
if any(not f.allow_multiline() for f in fstring_stack): if any(not f.allow_multiline() for f in fstring_stack):
# Would use fstring_stack.clear, but that's not available # Would use fstring_stack.clear, but that's not available
@@ -503,7 +517,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
fstring_stack[:] = [] fstring_stack[:] = []
if not new_line and paren_level == 0 and not fstring_stack: if not new_line and paren_level == 0 and not fstring_stack:
yield PythonToken(PythonTokenTypes.NEWLINE, token, spos, prefix) yield PythonToken(NEWLINE, token, spos, prefix)
else: else:
additional_prefix = prefix + token additional_prefix = prefix + token
new_line = True new_line = True
@@ -516,7 +530,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
if endmatch: # all on one line if endmatch: # all on one line
pos = endmatch.end(0) pos = endmatch.end(0)
token = line[start:pos] token = line[start:pos]
yield PythonToken(PythonTokenTypes.STRING, token, spos, prefix) yield PythonToken(STRING, token, spos, prefix)
else: else:
contstr_start = (lnum, start) # multiple lines contstr_start = (lnum, start) # multiple lines
contstr = line[start:] contstr = line[start:]
@@ -533,10 +547,10 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
contline = line contline = line
break break
else: # ordinary string else: # ordinary string
yield PythonToken(PythonTokenTypes.STRING, token, spos, prefix) yield PythonToken(STRING, token, spos, prefix)
elif token in fstring_pattern_map: # The start of an fstring. elif token in fstring_pattern_map: # The start of an fstring.
fstring_stack.append(FStringNode(fstring_pattern_map[token])) fstring_stack.append(FStringNode(fstring_pattern_map[token]))
yield PythonToken(PythonTokenTypes.FSTRING_START, token, spos, prefix) yield PythonToken(FSTRING_START, token, spos, prefix)
elif is_identifier(initial): # ordinary name elif is_identifier(initial): # ordinary name
if token in always_break_tokens: if token in always_break_tokens:
fstring_stack[:] = [] fstring_stack[:] = []
@@ -544,11 +558,11 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
while True: while True:
indent = indents.pop() indent = indents.pop()
if indent > start: if indent > start:
yield PythonToken(PythonTokenTypes.DEDENT, '', spos, '') yield PythonToken(DEDENT, '', spos, '')
else: else:
indents.append(indent) indents.append(indent)
break break
yield PythonToken(PythonTokenTypes.NAME, token, spos, prefix) yield PythonToken(NAME, token, spos, prefix)
elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n'): # continued stmt elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n'): # continued stmt
additional_prefix += prefix + line[start:] additional_prefix += prefix + line[start:]
break break
@@ -567,10 +581,10 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
and fstring_stack[-1].parentheses_count == 1: and fstring_stack[-1].parentheses_count == 1:
fstring_stack[-1].format_spec_count += 1 fstring_stack[-1].format_spec_count += 1
yield PythonToken(PythonTokenTypes.OP, token, spos, prefix) yield PythonToken(OP, token, spos, prefix)
if contstr: if contstr:
yield PythonToken(PythonTokenTypes.ERRORTOKEN, contstr, contstr_start, prefix) yield PythonToken(ERRORTOKEN, contstr, contstr_start, prefix)
if contstr.endswith('\n'): if contstr.endswith('\n'):
new_line = True new_line = True
@@ -578,8 +592,8 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
# As the last position we just take the maximally possible position. We # As the last position we just take the maximally possible position. We
# remove -1 for the last new line. # remove -1 for the last new line.
for indent in indents[1:]: for indent in indents[1:]:
yield PythonToken(PythonTokenTypes.DEDENT, '', end_pos, '') yield PythonToken(DEDENT, '', end_pos, '')
yield PythonToken(PythonTokenTypes.ENDMARKER, '', end_pos, additional_prefix) yield PythonToken(ENDMARKER, '', end_pos, additional_prefix)
if __name__ == "__main__": if __name__ == "__main__":