diff --git a/parso/python/diff.py b/parso/python/diff.py index dd37a18..86b7114 100644 --- a/parso/python/diff.py +++ b/parso/python/diff.py @@ -13,7 +13,7 @@ import logging from parso.utils import split_lines from parso.python.parser import Parser from parso.python.tree import EndMarker -from parso.python.tokenize import (NEWLINE, TokenInfo, ERROR_DEDENT, +from parso.python.tokenize import (NEWLINE, PythonToken, ERROR_DEDENT, ENDMARKER, INDENT, DEDENT) @@ -315,23 +315,23 @@ class DiffParser(object): prefix = re.sub(r'(<=\n)[^\n]+$', '', prefix) else: prefix = '' - yield TokenInfo(ENDMARKER, '', (start_pos[0] + line_offset, 0), prefix) + yield PythonToken(ENDMARKER, '', (start_pos[0] + line_offset, 0), prefix) break elif typ == NEWLINE and start_pos[0] >= until_line: - yield TokenInfo(typ, string, start_pos, prefix) + yield PythonToken(typ, string, start_pos, prefix) # Check if the parser is actually in a valid suite state. if suite_or_file_input_is_valid(self._pgen_grammar, stack): start_pos = start_pos[0] + 1, 0 while len(indents) > int(omitted_first_indent): indents.pop() - yield TokenInfo(DEDENT, '', start_pos, '') + yield PythonToken(DEDENT, '', start_pos, '') - yield TokenInfo(ENDMARKER, '', start_pos, '') + yield PythonToken(ENDMARKER, '', start_pos, '') break else: continue - yield TokenInfo(typ, string, start_pos, prefix) + yield PythonToken(typ, string, start_pos, prefix) class _NodesStackNode(object): diff --git a/parso/python/tokenize.py b/parso/python/tokenize.py index 02ceabf..9c65c61 100644 --- a/parso/python/tokenize.py +++ b/parso/python/tokenize.py @@ -203,11 +203,21 @@ def _create_token_collection(version_info): ) -class TokenInfo(namedtuple('Token', ['type', 'string', 'start_pos', 'prefix'])): +class Token(namedtuple('Token', ['type', 'string', 'start_pos', 'prefix'])): + @property + def end_pos(self): + lines = split_lines(self.string) + if len(lines) > 1: + return self.start_pos[0] + len(lines) - 1, 0 + else: + return self.start_pos[0], self.start_pos[1] + len(self.string) + def __repr__(self): return ('TokenInfo(type=%s, string=%r, start=%r, prefix=%r)' % self._replace(type=self.get_type_name())) + +class PythonToken(Token): def get_type_name(self, exact=True): if exact: typ = self.exact_type @@ -222,14 +232,6 @@ class TokenInfo(namedtuple('Token', ['type', 'string', 'start_pos', 'prefix'])): else: return self.type - @property - def end_pos(self): - lines = split_lines(self.string) - if len(lines) > 1: - return self.start_pos[0] + len(lines) - 1, 0 - else: - return self.start_pos[0], self.start_pos[1] + len(self.string) - def tokenize(code, version_info): """Generate tokens from a the source code (string).""" @@ -273,7 +275,7 @@ def tokenize_lines(lines, version_info): endmatch = endprog.match(line) if endmatch: pos = endmatch.end(0) - yield TokenInfo(STRING, contstr + line[:pos], contstr_start, prefix) + yield PythonToken(STRING, contstr + line[:pos], contstr_start, prefix) contstr = '' contline = None else: @@ -287,7 +289,7 @@ def tokenize_lines(lines, version_info): txt = line[pos:] if txt.endswith('\n'): new_line = True - yield TokenInfo(ERRORTOKEN, txt, (lnum, pos), additional_prefix) + yield PythonToken(ERRORTOKEN, txt, (lnum, pos), additional_prefix) additional_prefix = '' break @@ -312,21 +314,21 @@ def tokenize_lines(lines, version_info): i += 1 start -= 1 if start > indents[-1]: - yield TokenInfo(INDENT, '', spos, '') + yield PythonToken(INDENT, '', spos, '') indents.append(start) while start < indents[-1]: if start > indents[-2]: - yield TokenInfo(ERROR_DEDENT, '', (lnum, 0), '') + yield PythonToken(ERROR_DEDENT, '', (lnum, 0), '') break - yield TokenInfo(DEDENT, '', spos, '') + yield PythonToken(DEDENT, '', spos, '') indents.pop() if (initial in numchars or # ordinary number (initial == '.' and token != '.' and token != '...')): - yield TokenInfo(NUMBER, token, spos, prefix) + yield PythonToken(NUMBER, token, spos, prefix) elif initial in '\r\n': if not new_line and paren_level == 0: - yield TokenInfo(NEWLINE, token, spos, prefix) + yield PythonToken(NEWLINE, token, spos, prefix) else: additional_prefix = prefix + token new_line = True @@ -339,7 +341,7 @@ def tokenize_lines(lines, version_info): if endmatch: # all on one line pos = endmatch.end(0) token = line[start:pos] - yield TokenInfo(STRING, token, spos, prefix) + yield PythonToken(STRING, token, spos, prefix) else: contstr_start = (lnum, start) # multiple lines contstr = line[start:] @@ -356,18 +358,18 @@ def tokenize_lines(lines, version_info): contline = line break else: # ordinary string - yield TokenInfo(STRING, token, spos, prefix) + yield PythonToken(STRING, token, spos, prefix) elif is_identifier(initial): # ordinary name if token in always_break_tokens: paren_level = 0 while True: indent = indents.pop() if indent > start: - yield TokenInfo(DEDENT, '', spos, '') + yield PythonToken(DEDENT, '', spos, '') else: indents.append(indent) break - yield TokenInfo(NAME, token, spos, prefix) + yield PythonToken(NAME, token, spos, prefix) elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n'): # continued stmt additional_prefix += prefix + line[start:] break @@ -383,10 +385,10 @@ def tokenize_lines(lines, version_info): typ = opmap[token] except KeyError: typ = ERRORTOKEN - yield TokenInfo(typ, token, spos, prefix) + yield PythonToken(typ, token, spos, prefix) if contstr: - yield TokenInfo(ERRORTOKEN, contstr, contstr_start, prefix) + yield PythonToken(ERRORTOKEN, contstr, contstr_start, prefix) if contstr.endswith('\n'): new_line = True @@ -394,8 +396,8 @@ def tokenize_lines(lines, version_info): # As the last position we just take the maximally possible position. We # remove -1 for the last new line. for indent in indents[1:]: - yield TokenInfo(DEDENT, '', end_pos, '') - yield TokenInfo(ENDMARKER, '', end_pos, additional_prefix) + yield PythonToken(DEDENT, '', end_pos, '') + yield PythonToken(ENDMARKER, '', end_pos, additional_prefix) if __name__ == "__main__": diff --git a/test/test_tokenize.py b/test/test_tokenize.py index e2c184a..efef732 100644 --- a/test/test_tokenize.py +++ b/test/test_tokenize.py @@ -10,7 +10,7 @@ from parso.python.token import ( NAME, NEWLINE, STRING, INDENT, DEDENT, ERRORTOKEN, ENDMARKER, ERROR_DEDENT) from parso.python import tokenize from parso import parse -from parso.python.tokenize import TokenInfo +from parso.python.tokenize import PythonToken def _get_token_list(string): @@ -92,8 +92,8 @@ def test_tokenize_multiline_I(): # next line fundef = '''""""\n''' token_list = _get_token_list(fundef) - assert token_list == [TokenInfo(ERRORTOKEN, '""""\n', (1, 0), ''), - TokenInfo(ENDMARKER , '', (2, 0), '')] + assert token_list == [PythonToken(ERRORTOKEN, '""""\n', (1, 0), ''), + PythonToken(ENDMARKER , '', (2, 0), '')] def test_tokenize_multiline_II(): @@ -101,8 +101,8 @@ def test_tokenize_multiline_II(): # same line fundef = '''""""''' token_list = _get_token_list(fundef) - assert token_list == [TokenInfo(ERRORTOKEN, '""""', (1, 0), ''), - TokenInfo(ENDMARKER, '', (1, 4), '')] + assert token_list == [PythonToken(ERRORTOKEN, '""""', (1, 0), ''), + PythonToken(ENDMARKER, '', (1, 4), '')] def test_tokenize_multiline_III(): @@ -110,8 +110,8 @@ def test_tokenize_multiline_III(): # next line even if several newline fundef = '''""""\n\n''' token_list = _get_token_list(fundef) - assert token_list == [TokenInfo(ERRORTOKEN, '""""\n\n', (1, 0), ''), - TokenInfo(ENDMARKER, '', (3, 0), '')] + assert token_list == [PythonToken(ERRORTOKEN, '""""\n\n', (1, 0), ''), + PythonToken(ENDMARKER, '', (3, 0), '')] def test_identifier_contains_unicode():