diff --git a/jedi/parser/user_context.py b/jedi/parser/user_context.py index 3001e0af..3b11116d 100644 --- a/jedi/parser/user_context.py +++ b/jedi/parser/user_context.py @@ -12,6 +12,12 @@ from jedi import debug from jedi.common import PushBackIterator +REPLACE_STR = r"[bBuU]?[rR]?" + (r"(?:(')[^\n'\\]*(?:\\.[^\n'\\]*)*(?:'|$)" + + '|' + + r'(")[^\n"\\]*(?:\\.[^\n"\\]*)*(?:"|$))') +REPLACE_STR = re.compile(REPLACE_STR) + + class UserContext(object): """ :param source: The source code of the file. @@ -44,8 +50,9 @@ class UserContext(object): self._line_length = len(line) yield line[::-1] + '\n' - def _get_backwards_tokenizer(self, start_pos): - line_gen = self._backwards_line_generator(start_pos) + def _get_backwards_tokenizer(self, start_pos, line_gen=None): + if line_gen is None: + line_gen = self._backwards_line_generator(start_pos) token_gen = tokenize.generate_tokens(lambda: next(line_gen)) for typ, tok_str, tok_start_pos, prefix in token_gen: line = self.get_line(self._line_temp) @@ -150,16 +157,34 @@ class UserContext(object): """ :return: Tuple of string of the call and the index of the cursor. """ + def get_line(pos): + def simplify_str(match): + """ + To avoid having strings without end marks (error tokens) and + strings that just screw up all the call signatures, just + simplify everything. + """ + mark = match.group(1) or match.group(2) + return mark + ' ' * (len(match.group(0)) - 2) + mark + + line_gen = self._backwards_line_generator(pos) + for line in line_gen: + # We have to switch the already backwards lines twice, because + # we scan them from start. + line = line[::-1] + modified = re.sub(REPLACE_STR, simplify_str, line) + yield modified[::-1] + index = 0 level = 0 next_must_be_name = False next_is_key = False key_name = None - generator = self._get_backwards_tokenizer(self.position) + generator = self._get_backwards_tokenizer(self.position, get_line(self.position)) for tok_type, tok_str, start_pos, prefix in generator: - # TODO improve the speed by not tokenizing everything. - # def/class/import stops the process. - if next_must_be_name: + if tok_str in tokenize.ALWAYS_BREAK_TOKENS: + break + elif next_must_be_name: if tok_type == tokenize.NAME: end_pos = start_pos[0], start_pos[1] + len(tok_str) call, _ = self._calc_path_until_cursor(start_pos=end_pos) diff --git a/test/test_api/test_call_signatures.py b/test/test_api/test_call_signatures.py index 28546ecb..47675f07 100644 --- a/test/test_api/test_call_signatures.py +++ b/test/test_api/test_call_signatures.py @@ -177,12 +177,21 @@ class TestCallSignatures(TestCase): def test_whitespace_before_bracket(self): self._run('str (', 'str', 0) self._run('str (";', 'str', 0) + # TODO this is not actually valid Python, the newline token should be + # ignored. self._run('str\n(', 'str', 0) def test_brackets_in_string_literals(self): self._run('str (" (', 'str', 0) self._run('str (" )', 'str', 0) + def test_function_definitions_should_break(self): + """ + Function definitions (and other tokens that cannot exist within call + signatures) should break and not be able to return a call signature. + """ + assert not Script('str(\ndef x').call_signatures() + class TestParams(TestCase): def params(self, source, line=None, column=None):