diff --git a/jedi/api/classes.py b/jedi/api/classes.py index 1a6bc1c6..ceb0d58e 100644 --- a/jedi/api/classes.py +++ b/jedi/api/classes.py @@ -5,6 +5,7 @@ the interesting information about completion and goto operations. """ import warnings from itertools import chain +import re from jedi._compatibility import next, unicode, use_metaclass from jedi import settings @@ -579,7 +580,10 @@ class Definition(use_metaclass(CachedMetaClass, BaseDefinition)): d = d.get_code() finally: first_leaf.prefix = old - return d.replace('\n', '').replace('\r', '') + # Delete comments: + d = re.sub('#[^\n]+\n', ' ', d) + # Delete multi spaces/newlines + return re.sub('\s+', ' ', d).strip() @property def desc_with_module(self): diff --git a/jedi/parser/tokenize.py b/jedi/parser/tokenize.py index 8a0f6bc8..6765f0b0 100644 --- a/jedi/parser/tokenize.py +++ b/jedi/parser/tokenize.py @@ -14,8 +14,8 @@ from __future__ import absolute_import import string import re from io import StringIO -from token import (tok_name, N_TOKENS, ENDMARKER, STRING, NUMBER, NAME, OP, - ERRORTOKEN, NEWLINE, INDENT, DEDENT) +from jedi.parser.token import (tok_name, N_TOKENS, ENDMARKER, STRING, NUMBER, + NAME, OP, ERRORTOKEN, NEWLINE, INDENT, DEDENT) cookie_re = re.compile("coding[:=]\s*([-\w.]+)") @@ -147,8 +147,11 @@ def source_tokens(source, line_offset=0): def generate_tokens(readline, line_offset=0): """ - The original stdlib Python version with minor modifications. - Modified to not care about dedents. + A heavily modified Python standard library tokenizer. + + Additionally to the default information, yields also the prefix of each + token. This idea comes from lib2to3. The prefix contains all information + that is irrelevant for the parser like newlines in parentheses or comments. """ paren_level = 0 # count parentheses indents = [0] @@ -158,6 +161,7 @@ def generate_tokens(readline, line_offset=0): contline = None new_line = False prefix = '' # Should never be required, but here for safety + additional_prefix = '' while True: # loop over lines in stream line = readline() # readline returns empty when finished. See StringIO if not line: @@ -192,7 +196,8 @@ def generate_tokens(readline, line_offset=0): pos += 1 continue - prefix = pseudomatch.group(1) + prefix = pseudomatch.group(1) + additional_prefix + additional_prefix = '' start, pos = pseudomatch.span(2) spos = (lnum, start) token, initial = line[start:pos], line[start] @@ -213,10 +218,12 @@ def generate_tokens(readline, line_offset=0): elif initial in '\r\n': if not new_line and paren_level == 0: yield NEWLINE, token, spos, prefix + else: + additional_prefix = prefix + token new_line = True - elif initial == '#': + elif initial == '#': # Comments assert not token.endswith("\n") - #yield Token(COMMENT, token, spos, prefix) + additional_prefix = prefix + token elif token in triple_quoted: endprog = endprogs[token] endmatch = endprog.match(line, pos) diff --git a/jedi/parser/user_context.py b/jedi/parser/user_context.py index b34d82a9..5562c783 100644 --- a/jedi/parser/user_context.py +++ b/jedi/parser/user_context.py @@ -59,7 +59,7 @@ class UserContext(object): first_line = (tok_str.splitlines() or [''])[0] column -= len(first_line) # Reverse the token again, so that it is in normal order again. - yield typ, tok_str[::-1], (self._line_temp, column), prefix + yield typ, tok_str[::-1], (self._line_temp, column), prefix[::-1] def _calc_path_until_cursor(self, start_pos): """ @@ -122,7 +122,7 @@ class UserContext(object): break start_cursor = tok_start_pos - string = tok_str + string + string = tok_str + prefix + string last_type = tok_type # Don't need whitespace around a statement. diff --git a/test/completion/goto.py b/test/completion/goto.py index ad438309..18cb8502 100644 --- a/test/completion/goto.py +++ b/test/completion/goto.py @@ -181,13 +181,13 @@ ab1(ClassDef);ab2(ClassDef);ab3(ClassDef) # ----------------- for i in range(1): - #! ['for i in range(1): i'] + #! ['for i in range(1): i'] i for key, value in [(1,2)]: - #! ['for key, value in [(1,2)]: key'] + #! ['for key, value in [(1,2)]: key'] key for i in []: - #! ['for i in []: i'] + #! ['for i in []: i'] i