Rename TokenInfo to PythonToken.

This commit is contained in:
Dave Halter
2017-08-21 09:33:41 +02:00
parent 1912551296
commit 88c3c0567a
3 changed files with 39 additions and 37 deletions

View File

@@ -13,7 +13,7 @@ import logging
from parso.utils import split_lines from parso.utils import split_lines
from parso.python.parser import Parser from parso.python.parser import Parser
from parso.python.tree import EndMarker from parso.python.tree import EndMarker
from parso.python.tokenize import (NEWLINE, TokenInfo, ERROR_DEDENT, from parso.python.tokenize import (NEWLINE, PythonToken, ERROR_DEDENT,
ENDMARKER, INDENT, DEDENT) ENDMARKER, INDENT, DEDENT)
@@ -315,23 +315,23 @@ class DiffParser(object):
prefix = re.sub(r'(<=\n)[^\n]+$', '', prefix) prefix = re.sub(r'(<=\n)[^\n]+$', '', prefix)
else: else:
prefix = '' prefix = ''
yield TokenInfo(ENDMARKER, '', (start_pos[0] + line_offset, 0), prefix) yield PythonToken(ENDMARKER, '', (start_pos[0] + line_offset, 0), prefix)
break break
elif typ == NEWLINE and start_pos[0] >= until_line: elif typ == NEWLINE and start_pos[0] >= until_line:
yield TokenInfo(typ, string, start_pos, prefix) yield PythonToken(typ, string, start_pos, prefix)
# Check if the parser is actually in a valid suite state. # Check if the parser is actually in a valid suite state.
if suite_or_file_input_is_valid(self._pgen_grammar, stack): if suite_or_file_input_is_valid(self._pgen_grammar, stack):
start_pos = start_pos[0] + 1, 0 start_pos = start_pos[0] + 1, 0
while len(indents) > int(omitted_first_indent): while len(indents) > int(omitted_first_indent):
indents.pop() indents.pop()
yield TokenInfo(DEDENT, '', start_pos, '') yield PythonToken(DEDENT, '', start_pos, '')
yield TokenInfo(ENDMARKER, '', start_pos, '') yield PythonToken(ENDMARKER, '', start_pos, '')
break break
else: else:
continue continue
yield TokenInfo(typ, string, start_pos, prefix) yield PythonToken(typ, string, start_pos, prefix)
class _NodesStackNode(object): class _NodesStackNode(object):

View File

@@ -203,11 +203,21 @@ def _create_token_collection(version_info):
) )
class TokenInfo(namedtuple('Token', ['type', 'string', 'start_pos', 'prefix'])): class Token(namedtuple('Token', ['type', 'string', 'start_pos', 'prefix'])):
@property
def end_pos(self):
lines = split_lines(self.string)
if len(lines) > 1:
return self.start_pos[0] + len(lines) - 1, 0
else:
return self.start_pos[0], self.start_pos[1] + len(self.string)
def __repr__(self): def __repr__(self):
return ('TokenInfo(type=%s, string=%r, start=%r, prefix=%r)' % return ('TokenInfo(type=%s, string=%r, start=%r, prefix=%r)' %
self._replace(type=self.get_type_name())) self._replace(type=self.get_type_name()))
class PythonToken(Token):
def get_type_name(self, exact=True): def get_type_name(self, exact=True):
if exact: if exact:
typ = self.exact_type typ = self.exact_type
@@ -222,14 +232,6 @@ class TokenInfo(namedtuple('Token', ['type', 'string', 'start_pos', 'prefix'])):
else: else:
return self.type return self.type
@property
def end_pos(self):
lines = split_lines(self.string)
if len(lines) > 1:
return self.start_pos[0] + len(lines) - 1, 0
else:
return self.start_pos[0], self.start_pos[1] + len(self.string)
def tokenize(code, version_info): def tokenize(code, version_info):
"""Generate tokens from a the source code (string).""" """Generate tokens from a the source code (string)."""
@@ -273,7 +275,7 @@ def tokenize_lines(lines, version_info):
endmatch = endprog.match(line) endmatch = endprog.match(line)
if endmatch: if endmatch:
pos = endmatch.end(0) pos = endmatch.end(0)
yield TokenInfo(STRING, contstr + line[:pos], contstr_start, prefix) yield PythonToken(STRING, contstr + line[:pos], contstr_start, prefix)
contstr = '' contstr = ''
contline = None contline = None
else: else:
@@ -287,7 +289,7 @@ def tokenize_lines(lines, version_info):
txt = line[pos:] txt = line[pos:]
if txt.endswith('\n'): if txt.endswith('\n'):
new_line = True new_line = True
yield TokenInfo(ERRORTOKEN, txt, (lnum, pos), additional_prefix) yield PythonToken(ERRORTOKEN, txt, (lnum, pos), additional_prefix)
additional_prefix = '' additional_prefix = ''
break break
@@ -312,21 +314,21 @@ def tokenize_lines(lines, version_info):
i += 1 i += 1
start -= 1 start -= 1
if start > indents[-1]: if start > indents[-1]:
yield TokenInfo(INDENT, '', spos, '') yield PythonToken(INDENT, '', spos, '')
indents.append(start) indents.append(start)
while start < indents[-1]: while start < indents[-1]:
if start > indents[-2]: if start > indents[-2]:
yield TokenInfo(ERROR_DEDENT, '', (lnum, 0), '') yield PythonToken(ERROR_DEDENT, '', (lnum, 0), '')
break break
yield TokenInfo(DEDENT, '', spos, '') yield PythonToken(DEDENT, '', spos, '')
indents.pop() indents.pop()
if (initial in numchars or # ordinary number if (initial in numchars or # ordinary number
(initial == '.' and token != '.' and token != '...')): (initial == '.' and token != '.' and token != '...')):
yield TokenInfo(NUMBER, token, spos, prefix) yield PythonToken(NUMBER, token, spos, prefix)
elif initial in '\r\n': elif initial in '\r\n':
if not new_line and paren_level == 0: if not new_line and paren_level == 0:
yield TokenInfo(NEWLINE, token, spos, prefix) yield PythonToken(NEWLINE, token, spos, prefix)
else: else:
additional_prefix = prefix + token additional_prefix = prefix + token
new_line = True new_line = True
@@ -339,7 +341,7 @@ def tokenize_lines(lines, version_info):
if endmatch: # all on one line if endmatch: # all on one line
pos = endmatch.end(0) pos = endmatch.end(0)
token = line[start:pos] token = line[start:pos]
yield TokenInfo(STRING, token, spos, prefix) yield PythonToken(STRING, token, spos, prefix)
else: else:
contstr_start = (lnum, start) # multiple lines contstr_start = (lnum, start) # multiple lines
contstr = line[start:] contstr = line[start:]
@@ -356,18 +358,18 @@ def tokenize_lines(lines, version_info):
contline = line contline = line
break break
else: # ordinary string else: # ordinary string
yield TokenInfo(STRING, token, spos, prefix) yield PythonToken(STRING, token, spos, prefix)
elif is_identifier(initial): # ordinary name elif is_identifier(initial): # ordinary name
if token in always_break_tokens: if token in always_break_tokens:
paren_level = 0 paren_level = 0
while True: while True:
indent = indents.pop() indent = indents.pop()
if indent > start: if indent > start:
yield TokenInfo(DEDENT, '', spos, '') yield PythonToken(DEDENT, '', spos, '')
else: else:
indents.append(indent) indents.append(indent)
break break
yield TokenInfo(NAME, token, spos, prefix) yield PythonToken(NAME, token, spos, prefix)
elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n'): # continued stmt elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n'): # continued stmt
additional_prefix += prefix + line[start:] additional_prefix += prefix + line[start:]
break break
@@ -383,10 +385,10 @@ def tokenize_lines(lines, version_info):
typ = opmap[token] typ = opmap[token]
except KeyError: except KeyError:
typ = ERRORTOKEN typ = ERRORTOKEN
yield TokenInfo(typ, token, spos, prefix) yield PythonToken(typ, token, spos, prefix)
if contstr: if contstr:
yield TokenInfo(ERRORTOKEN, contstr, contstr_start, prefix) yield PythonToken(ERRORTOKEN, contstr, contstr_start, prefix)
if contstr.endswith('\n'): if contstr.endswith('\n'):
new_line = True new_line = True
@@ -394,8 +396,8 @@ def tokenize_lines(lines, version_info):
# As the last position we just take the maximally possible position. We # As the last position we just take the maximally possible position. We
# remove -1 for the last new line. # remove -1 for the last new line.
for indent in indents[1:]: for indent in indents[1:]:
yield TokenInfo(DEDENT, '', end_pos, '') yield PythonToken(DEDENT, '', end_pos, '')
yield TokenInfo(ENDMARKER, '', end_pos, additional_prefix) yield PythonToken(ENDMARKER, '', end_pos, additional_prefix)
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -10,7 +10,7 @@ from parso.python.token import (
NAME, NEWLINE, STRING, INDENT, DEDENT, ERRORTOKEN, ENDMARKER, ERROR_DEDENT) NAME, NEWLINE, STRING, INDENT, DEDENT, ERRORTOKEN, ENDMARKER, ERROR_DEDENT)
from parso.python import tokenize from parso.python import tokenize
from parso import parse from parso import parse
from parso.python.tokenize import TokenInfo from parso.python.tokenize import PythonToken
def _get_token_list(string): def _get_token_list(string):
@@ -92,8 +92,8 @@ def test_tokenize_multiline_I():
# next line # next line
fundef = '''""""\n''' fundef = '''""""\n'''
token_list = _get_token_list(fundef) token_list = _get_token_list(fundef)
assert token_list == [TokenInfo(ERRORTOKEN, '""""\n', (1, 0), ''), assert token_list == [PythonToken(ERRORTOKEN, '""""\n', (1, 0), ''),
TokenInfo(ENDMARKER , '', (2, 0), '')] PythonToken(ENDMARKER , '', (2, 0), '')]
def test_tokenize_multiline_II(): def test_tokenize_multiline_II():
@@ -101,8 +101,8 @@ def test_tokenize_multiline_II():
# same line # same line
fundef = '''""""''' fundef = '''""""'''
token_list = _get_token_list(fundef) token_list = _get_token_list(fundef)
assert token_list == [TokenInfo(ERRORTOKEN, '""""', (1, 0), ''), assert token_list == [PythonToken(ERRORTOKEN, '""""', (1, 0), ''),
TokenInfo(ENDMARKER, '', (1, 4), '')] PythonToken(ENDMARKER, '', (1, 4), '')]
def test_tokenize_multiline_III(): def test_tokenize_multiline_III():
@@ -110,8 +110,8 @@ def test_tokenize_multiline_III():
# next line even if several newline # next line even if several newline
fundef = '''""""\n\n''' fundef = '''""""\n\n'''
token_list = _get_token_list(fundef) token_list = _get_token_list(fundef)
assert token_list == [TokenInfo(ERRORTOKEN, '""""\n\n', (1, 0), ''), assert token_list == [PythonToken(ERRORTOKEN, '""""\n\n', (1, 0), ''),
TokenInfo(ENDMARKER, '', (3, 0), '')] PythonToken(ENDMARKER, '', (3, 0), '')]
def test_identifier_contains_unicode(): def test_identifier_contains_unicode():