From eaace104ddbfaaec37940a228797cf57363455db Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Tue, 16 Dec 2014 00:09:31 +0100 Subject: [PATCH] Replace the tokenizer's output with a tuple (switching back from a Token class). --- jedi/parser/__init__.py | 7 +- jedi/parser/fast.py | 5 +- jedi/parser/tokenize.py | 107 ++++-------------------------- jedi/parser/user_context.py | 20 +++--- test/test_parser/test_tokenize.py | 47 +++++++------ 5 files changed, 50 insertions(+), 136 deletions(-) diff --git a/jedi/parser/__init__.py b/jedi/parser/__init__.py index 6123b4f8..0079cef3 100644 --- a/jedi/parser/__init__.py +++ b/jedi/parser/__init__.py @@ -291,13 +291,10 @@ class Parser(object): self._scope = self.module """ - new_scope = False - for token in tokenizer: - typ = token.type - value = token.value + for typ, value, start_pos, prefix in tokenizer: if typ == tokenize.OP: typ = grammar.opmap[value] - yield typ, value, token.prefix, token.start_pos + yield typ, value, prefix, start_pos def __repr__(self): return "<%s: %s>" % (type(self).__name__, self.module) diff --git a/jedi/parser/fast.py b/jedi/parser/fast.py index 7c5877fb..69b02e97 100644 --- a/jedi/parser/fast.py +++ b/jedi/parser/fast.py @@ -12,8 +12,7 @@ from jedi.parser import Parser from jedi.parser import tree as pr from jedi.parser import tokenize from jedi import cache -from jedi.parser.tokenize import (source_tokens, Token, FLOWS, NEWLINE, - COMMENT, ENDMARKER) +from jedi.parser.tokenize import source_tokens, FLOWS, NEWLINE, COMMENT, ENDMARKER class Module(pr.Module, pr.Simple): @@ -387,7 +386,7 @@ class FastTokenizer(object): self.closed = False # fast parser options - self.current = self.previous = Token(None, '', (0, 0)) + self.current = self.previous = None, '', (0, 0) self.in_flow = False self.new_indent = False self.parser_indent = self.old_parser_indent = 0 diff --git a/jedi/parser/tokenize.py b/jedi/parser/tokenize.py index 010eb890..d5e48df7 100644 --- a/jedi/parser/tokenize.py +++ b/jedi/parser/tokenize.py @@ -17,8 +17,6 @@ from io import StringIO from token import (tok_name, N_TOKENS, ENDMARKER, STRING, NUMBER, NAME, OP, ERRORTOKEN, NEWLINE, INDENT, DEDENT) -from jedi._compatibility import u - cookie_re = re.compile("coding[:=]\s*([-\w.]+)") @@ -34,82 +32,6 @@ COMMENT = N_TOKENS tok_name[COMMENT] = 'COMMENT' -class Token(object): - """ - The token object is an efficient representation of the structure - (type, token, (start_pos_line, start_pos_col, prefix)). It has indexer - methods that maintain compatibility to existing code that expects the above - structure. - - >>> repr(Token(1, "test", (1, 1, ''))) - "" - >>> Token(1, 'bar', (3, 4, '')).__getstate__() - (1, 'bar', 3, 4, '') - >>> a = Token(0, 'baz', (0, 0, '')) - >>> a.__setstate__((1, 'foo', 3, 4, '')) - >>> a - - >>> a.start_pos - (3, 4) - >>> a.value - 'foo' - >>> a._start_pos_col - 4 - >>> Token(1, u("😷"), (1 ,1, '')).value + "p" == u("😷p") - True - """ - __slots__ = ("type", "value", "_start_pos_line", "_start_pos_col", - "prefix") - - def __init__(self, type, value, start_pos, prefix=''): - self.type = type - self.value = value - self._start_pos_line = start_pos[0] - self._start_pos_col = start_pos[1] - self.prefix = prefix - - def __repr__(self): - typ = tok_name[self.type] - content = typ, self.value,\ - (self._start_pos_line, self._start_pos_col, self.prefix) - return "<%s: %s>" % (type(self).__name__, content) - - @property - def start_pos(self): - return self._start_pos_line, self._start_pos_col - - @property - def end_pos(self): - """Returns end position respecting multiline tokens.""" - end_pos_line = self._start_pos_line - lines = self.value.split('\n') - if self.value.endswith('\n'): - lines = lines[:-1] - lines[-1] += '\n' - end_pos_line += len(lines) - 1 - end_pos_col = self._start_pos_col - # Check for multiline token - if self._start_pos_line == end_pos_line: - end_pos_col += len(lines[-1]) - else: - end_pos_col = len(lines[-1]) - return (end_pos_line, end_pos_col) - - # Make cache footprint smaller for faster unpickling - def __getstate__(self): - return (self.type, self.value, - self._start_pos_line, self._start_pos_col, - self.prefix) - - # TODO DELETE this is not needed anymore, I guess. It should not get pickled. - def __setstate__(self, state): - self.type = state[0] - self.value = state[1] - self._start_pos_line = state[2] - self._start_pos_col = state[3] - self.prefix = state[4] - - def group(*choices): return '(' + '|'.join(choices) + ')' @@ -239,7 +161,7 @@ def generate_tokens(readline, line_offset=0): line = readline() # readline returns empty when finished. See StringIO if not line: if contstr: - yield Token(ERRORTOKEN, contstr, contstr_start, prefix) + yield ERRORTOKEN, contstr, contstr_start, prefix break lnum += 1 @@ -249,8 +171,7 @@ def generate_tokens(readline, line_offset=0): endmatch = endprog.match(line) if endmatch: pos = endmatch.end(0) - yield Token(STRING, contstr + line[:pos], - contstr_start, prefix) + yield STRING, contstr + line[:pos], contstr_start, prefix contstr = '' contline = None else: @@ -266,7 +187,7 @@ def generate_tokens(readline, line_offset=0): # If a literal starts but doesn't end the whole rest of the # line is an error token. txt = line[pos:] - yield Token(ERRORTOKEN, txt, (lnum, pos)) + yield ERRORTOKEN, txt, (lnum, pos), prefix pos += 1 continue @@ -279,18 +200,18 @@ def generate_tokens(readline, line_offset=0): new_line = False if paren_level == 0: if start > indents[-1]: - yield Token(INDENT, '', spos, '') + yield INDENT, '', spos, '' indents.append(start) while start < indents[-1]: - yield Token(DEDENT, '', spos, '') + yield DEDENT, '', spos, '' indents.pop() if (initial in numchars or # ordinary number (initial == '.' and token != '.' and token != '...')): - yield Token(NUMBER, token, spos, prefix) + yield NUMBER, token, spos, prefix elif initial in '\r\n': if not new_line and paren_level == 0: - yield Token(NEWLINE, token, spos, prefix) + yield NEWLINE, token, spos, prefix new_line = True elif initial == '#': assert not token.endswith("\n") @@ -301,7 +222,7 @@ def generate_tokens(readline, line_offset=0): if endmatch: # all on one line pos = endmatch.end(0) token = line[start:pos] - yield Token(STRING, token, spos, prefix) + yield STRING, token, spos, prefix else: contstr_start = (lnum, start) # multiple lines contstr = line[start:] @@ -318,18 +239,18 @@ def generate_tokens(readline, line_offset=0): contline = line break else: # ordinary string - yield Token(STRING, token, spos, prefix) + yield STRING, token, spos, prefix elif initial in namechars: # ordinary name if token in ALWAYS_BREAK_TOKEN: paren_level = 0 while True: indent = indents.pop() if indent > start: - yield Token(DEDENT, '', (lnum, 0), '') + yield DEDENT, '', (lnum, 0), '' else: indents.append(indent) break - yield Token(NAME, token, spos, prefix) + yield NAME, token, spos, prefix elif initial == '\\' and line[start:] == '\\\n': # continued stmt continue else: @@ -337,8 +258,8 @@ def generate_tokens(readline, line_offset=0): paren_level += 1 elif token in ')]}': paren_level -= 1 - yield Token(OP, token, spos, prefix) + yield OP, token, spos, prefix for indent in indents[1:]: - yield Token(DEDENT, '', (lnum, 0), '') - yield Token(ENDMARKER, '', (lnum, 0), prefix) + yield DEDENT, '', (lnum, 0), '' + yield ENDMARKER, '', (lnum, 0), prefix diff --git a/jedi/parser/user_context.py b/jedi/parser/user_context.py index 5900606a..3a2ab85b 100644 --- a/jedi/parser/user_context.py +++ b/jedi/parser/user_context.py @@ -74,13 +74,12 @@ class UserContext(object): force_point = False last_type = None is_first = True - for tok in gen: - tok_type = tok.type - tok_str = tok.value - end = tok.end_pos + for tok_type, tok_str, tok_start_pos, prefix in gen: + # TODO end is not correct, doesn't take new lines in consideration. + end = tok_start_pos[0], tok_start_pos[-1] + len(tok_str) self._column_temp = self._line_length - end[1] if is_first: - if tok.start_pos != (1, 0): # whitespace is not a path + if tok_start_pos != (1, 0): # whitespace is not a path return u(''), start_cursor is_first = False @@ -118,7 +117,7 @@ class UserContext(object): else: if tok_str == '-': next_tok = next(gen) - if next_tok.value == 'e': + if next_tok[1] == 'e': gen.push_back(next_tok) else: break @@ -166,16 +165,15 @@ class UserContext(object): next_must_be_name = False next_is_key = False key_name = None - for token in self._get_backwards_tokenizer(self.position): - tok_str = token.value + for tok_type, tok_str, start_pos, prefix in self._get_backwards_tokenizer(self.position): if next_must_be_name: - if token.type == tokenize.NAME: + if tok_type == tokenize.NAME: call, _ = self._calc_path_until_cursor(start_pos=pos) return call, index, key_name index = 0 next_must_be_name = False elif next_is_key: - if token.type == tokenize.NAME: + if tok_type == tokenize.NAME: key_name = tok_str[::-1] next_is_key = False @@ -184,7 +182,7 @@ class UserContext(object): if level == 1: next_must_be_name = True level = 0 - end = token.end_pos + end = start_pos[0], start_pos[1] + 1 self._column_temp = self._line_length - end[1] pos = self._line_temp + 1, self._column_temp elif tok_str == ')': diff --git a/test/test_parser/test_tokenize.py b/test/test_parser/test_tokenize.py index 3acb203a..bff7c21c 100644 --- a/test/test_parser/test_tokenize.py +++ b/test/test_parser/test_tokenize.py @@ -31,9 +31,9 @@ asdfasdf""" + "h" simple_docstring_io = StringIO(simple_docstring) tokens = parser.tokenize.generate_tokens(simple_docstring_io.readline) token_list = list(tokens) - string_token = token_list[0] - self.assertEqual(string_token.prefix, '') - self.assertEqual(string_token.value, '"""simple one line docstring"""') + _, value, _, prefix = token_list[0] + self.assertEqual(prefix, '') + self.assertEqual(value, '"""simple one line docstring"""') def test_simple_with_whitespace(self): # Test a simple one line string with preceding whitespace and newline @@ -41,13 +41,13 @@ asdfasdf""" + "h" simple_docstring_io = StringIO(simple_docstring) tokens = parser.tokenize.generate_tokens(simple_docstring_io.readline) token_list = list(tokens) - string_token = token_list[0] - self.assertEqual(string_token.prefix, ' ') - self.assertEqual(string_token.value, '"""simple one line docstring"""') - self.assertEqual(string_token.type, STRING) - newline_token = token_list[1] - self.assertEqual(newline_token.prefix, ' ') - self.assertEqual(newline_token.type, NEWLINE) + typ, value, start_pos, prefix = token_list[0] + self.assertEqual(prefix, ' ') + self.assertEqual(value, '"""simple one line docstring"""') + self.assertEqual(typ, STRING) + typ, value, start_pos, prefix = token_list[1] + self.assertEqual(prefix, ' ') + self.assertEqual(typ, NEWLINE) def test_function_whitespace(self): # Test function definition whitespace identification @@ -59,20 +59,19 @@ asdfasdf""" + "h" fundef_io = StringIO(fundef) tokens = parser.tokenize.generate_tokens(fundef_io.readline) token_list = list(tokens) - print(token_list) - for t in token_list: - if t.value == 'test_whitespace': - self.assertEqual(t.prefix, ' ') - if t.value == '(': - self.assertEqual(t.prefix, '') - if t.value == '*': - self.assertEqual(t.prefix, '') - if t.value == '**': - self.assertEqual(t.prefix, ' ') - if t.value == 'print': - self.assertEqual(t.prefix, ' ') - if t.value == 'if': - self.assertEqual(t.prefix, ' ') + for _, value, _, prefix in token_list: + if value == 'test_whitespace': + self.assertEqual(prefix, ' ') + if value == '(': + self.assertEqual(prefix, '') + if value == '*': + self.assertEqual(prefix, '') + if value == '**': + self.assertEqual(prefix, ' ') + if value == 'print': + self.assertEqual(prefix, ' ') + if value == 'if': + self.assertEqual(prefix, ' ') def test_tokenizer_with_string_literal_backslash():