diff --git a/jedi/parser/__init__.py b/jedi/parser/__init__.py index 92f47455..dd675dec 100644 --- a/jedi/parser/__init__.py +++ b/jedi/parser/__init__.py @@ -252,7 +252,7 @@ class Parser(object): #print('err', token.tok_name[typ], repr(value), start_pos, len(stack), index) self._stack_removal(grammar, stack, index + 1, value, start_pos) if typ == token.INDENT: - # For every deleted INDENT we got to delete a DEDENT as well. + # For every deleted INDENT we have to delete a DEDENT as well. # Otherwise the parser will get into trouble and DEDENT too early. self._omit_dedent += 1 @@ -307,7 +307,7 @@ class Parser(object): def _tokenize(self, tokenizer): for typ, value, start_pos, prefix in tokenizer: - # print(token.tok_name[typ], repr(value), start_pos, repr(prefix)) + print(token.tok_name[typ], repr(value), start_pos, repr(prefix)) if self._omit_dedent and typ == token.DEDENT: self._omit_dedent -= 1 continue diff --git a/jedi/parser/fast.py b/jedi/parser/fast.py index 3ab1bce8..64b3cd8a 100644 --- a/jedi/parser/fast.py +++ b/jedi/parser/fast.py @@ -340,7 +340,10 @@ class FastParser(use_metaclass(CachedFastParser)): text = '\n'.join(current_lines) del current_lines[:] self.number_of_splits += 1 - return text + if i == len(self._lines) - 1: + return text + else: + return text + '\n' def just_newlines(current_lines): for line in current_lines: @@ -359,7 +362,7 @@ class FastParser(use_metaclass(CachedFastParser)): new_indent = False in_flow = False # All things within flows are simply being ignored. - for l in self._lines: + for i, l in enumerate(self._lines): # check for dedents s = l.lstrip('\t ') indent = len(l) - len(s) @@ -385,6 +388,7 @@ class FastParser(use_metaclass(CachedFastParser)): in_flow = m.group(1) in FLOWS if not is_decorator and not in_flow: if not just_newlines(current_lines): + print('GEN', current_lines) yield gen_part() is_decorator = '@' == m.group(1) if not is_decorator: @@ -420,9 +424,9 @@ class FastParser(use_metaclass(CachedFastParser)): for code_part in self._split_parts(source): if not is_first: - #print('OFF', line_offset + 1, self.current_node.parser.module.end_pos) + print('OFF', line_offset, self.current_node.parser.module.end_pos) #import pdb; pdb.set_trace() - pass + pass # TODO remove if is_first or line_offset + 1 == self.current_node.parser.module.end_pos[0]: indent = len(code_part) - len(code_part.lstrip('\t ')) self.current_node = self.current_node.parent_until_indent(indent) @@ -462,10 +466,11 @@ class FastParser(use_metaclass(CachedFastParser)): #else: #print '#'*45, line_offset, p.module.end_pos, 'theheck\n', repr(code_part) - line_offset += code_part.count('\n') + 1 - start += len(code_part) + 1 # +1 for newline + line_offset += code_part.count('\n') + start += len(code_part) if added_newline: + print('REMOVE NL', self.current_node) self.current_node.remove_last_newline() # Now that the for loop is finished, we still want to close all nodes. @@ -493,9 +498,10 @@ class FastParser(use_metaclass(CachedFastParser)): """ Side effect: Alters the list of nodes. """ + print('r', repr(source)) h = hash(source) for index, node in enumerate(nodes): - #print('EQ', node, repr(node.source), repr(source)) + print('EQ', node, repr(node.source), repr(source)) if node.hash == h and node.source == source: node.reset_node() nodes.remove(node) @@ -503,9 +509,8 @@ class FastParser(use_metaclass(CachedFastParser)): else: tokenizer = FastTokenizer(parser_code, 0) self.number_parsers_used += 1 + print('CODE', repr(source)) p = Parser(self._grammar, parser_code, self.module_path, tokenizer=tokenizer) - #p.module.parent = self.module # With the new parser this is not - # necessary anymore? node = ParserNode(self.module) end = line_offset + p.module.end_pos[0] @@ -514,6 +519,7 @@ class FastParser(use_metaclass(CachedFastParser)): # ends on the next line, which is part of the next parser. But # the last parser includes the last new line. end -= 1 + print(line_offset, end) used_lines = self._lines[line_offset:end] code_part_actually_used = '\n'.join(used_lines) node.set_parser(p, code_part_actually_used) @@ -563,7 +569,6 @@ class FastTokenizer(object): self.previous = self.current self.current = current - print(self.current, self._expect_indent, self.previous) if typ == INDENT: self._indent_counter += 1 if not self._expect_indent and not self._first_stmt: @@ -628,17 +633,20 @@ class FastTokenizer(object): def _finish_dedents(self): if self._indent_counter: self._indent_counter -= 1 - return tokenize.DEDENT, '', self.current[2], '' + return DEDENT, '', self.current[2], '' elif not self._returned_endmarker: self._returned_endmarker = True # We're using the current prefix for the endmarker to not loose any # information. However we care about "lost" lines. The prefix of # the current line (indent) will always be included in the current # line. - t, _, start_pos, prefix = next(self._gen) + if self.current[0] == DEDENT: + prefix = next(self._gen)[3] + else: + prefix = self.current[3] # \Z for the end of the string. $ is bugged, because it has the # same behavior with or without re.MULTILINE. prefix = re.sub(r'[^\n]+\Z', '', prefix) - return ENDMARKER, '', start_pos, prefix + return ENDMARKER, '', self.current[2], prefix else: raise StopIteration diff --git a/test/test_parser/test_fast_parser.py b/test/test_parser/test_fast_parser.py index 84036900..4f8fcb6a 100644 --- a/test/test_parser/test_fast_parser.py +++ b/test/test_parser/test_fast_parser.py @@ -59,6 +59,21 @@ def test_carriage_return_splitting(): assert [n.value for lst in p.module.names_dict.values() for n in lst] == ['Foo'] +def test_split_parts(): + def splits(source): + class Obj(object): + _keyword_re = FastParser._keyword_re + number_of_splits = True + + return tuple(FastParser._split_parts(Obj(), source)) + + def test(*parts): + assert splits(''.join(parts)) == parts + + test('a\n\n', 'def b(): pass\n', 'c\n') + test('a\n', 'def b():\n pass\n', 'c\n') + + def check_fp(src, number_parsers_used, number_of_splits=None): if number_of_splits is None: number_of_splits = number_parsers_used @@ -206,11 +221,14 @@ def test_func_with_for_and_comment(): def func(): pass + for a in [1]: # COMMENT a""") check_fp(src, 2) - check_fp('a\n' + src, 1, 3) + # We don't need to parse the for loop, but we need to parse the other two, + # because the split is in a different place. + check_fp('a\n' + src, 2, 3) def test_one_statement_func():