diff --git a/jedi/parser/__init__.py b/jedi/parser/__init__.py index 1863ae2c..3332ea13 100644 --- a/jedi/parser/__init__.py +++ b/jedi/parser/__init__.py @@ -131,7 +131,7 @@ class Parser(object): p = pgen2.parse.Parser(grammar, self.convert_node, self.convert_leaf, self.error_recovery) tokenizer = tokenizer or tokenize.source_tokens(source) - self.module = p.parse(self._tokenize(tokenizer)) + self.module = p.parse(p.tokenize(self._tokenize(tokenizer))) self.module.used_names = self.used_names self.module.path = module_path @@ -247,6 +247,18 @@ class Parser(object): stack[start_index:] = [] def _tokenize(self, tokenizer): + """ + while first_pos[1] <= self._scope.start_pos[1] \ + and (token_type == tokenize.NAME or tok_str in ('(', '['))\ + and self._scope != self.module: + self._scope.end_pos = first_pos + self._scope = self._scope.parent + if isinstance(self._scope, pr.Module) \ + and not isinstance(self._scope, pr.SubModule): + self._scope = self.module + """ + + new_scope = False for token in tokenizer: typ = token.type value = token.value diff --git a/jedi/parser/pgen2/parse.py b/jedi/parser/pgen2/parse.py index aa95d16c..4e5e511f 100644 --- a/jedi/parser/pgen2/parse.py +++ b/jedi/parser/pgen2/parse.py @@ -96,9 +96,32 @@ class Parser(object): self.stack = [stackentry] self.rootnode = None self.error_recovery = error_recovery + indent_errors = [] # TODO generate those. - def parse(self, tokens): - for type, value, prefix, start_pos in tokens: + def tokenize(self, tokenizer): + """ + This is not a real tokenizer, but it adds indents. You could hand the + parse function a normal tokenizer (e.g. the lib2to3 one). But if we use + the parser stack we are able to do error recovery from wrong indents. + """ + indents = [0] + new_line = False + for type, value, prefix, start_pos in tokenizer: + if type == token.NEWLINE: + new_line = True + elif new_line: + indent = start_pos[1] + if indent > indents[-1]: + yield token.INDENT, '', '', start_pos + indents.append(indent) + while indent < indents[-1]: + yield token.DEDENT, '', '', start_pos + indents.pop() + new_line = False + yield type, value, prefix, start_pos + + def parse(self, tokenizer): + for type, value, prefix, start_pos in tokenizer: if self.addtoken(type, value, prefix, start_pos): break else: diff --git a/jedi/parser/tokenize.py b/jedi/parser/tokenize.py index 50cdd209..efe835ca 100644 --- a/jedi/parser/tokenize.py +++ b/jedi/parser/tokenize.py @@ -66,7 +66,7 @@ class Token(object): self.value = value self._start_pos_line = start_pos[0] self._start_pos_col = start_pos[1] - self.prefix = whitespace + self.prefix = prefix def __repr__(self): typ = tok_name[self.type] @@ -228,6 +228,7 @@ def generate_tokens(readline, line_offset=0): numchars = '0123456789' contstr = '' contline = None + new_line = False prefix = '' # Should never be required, but here for safety while True: # loop over lines in stream line = readline() # readline returns empty when finished. See StringIO @@ -269,14 +270,19 @@ def generate_tokens(readline, line_offset=0): spos = (lnum, start) token, initial = line[start:pos], line[start] + if new_line and initial not in '\r\n#': + new_line = False + if (initial in numchars or # ordinary number (initial == '.' and token != '.' and token != '...')): yield Token(NUMBER, token, spos, prefix) elif initial in '\r\n': - yield Token(NEWLINE, token, spos, prefix) + if not new_line: + yield Token(NEWLINE, token, spos, prefix) + new_line = True elif initial == '#': assert not token.endswith("\n") - yield Token(COMMENT, token, spos, prefix) + #yield Token(COMMENT, token, spos, prefix) elif token in triple_quoted: endprog = endprogs[token] endmatch = endprog.match(line, pos)