forked from VimPlug/jedi
The new tokenizer is more or less working now. Indents are calculated as they should
This commit is contained in:
@@ -131,7 +131,7 @@ class Parser(object):
|
||||
p = pgen2.parse.Parser(grammar, self.convert_node, self.convert_leaf,
|
||||
self.error_recovery)
|
||||
tokenizer = tokenizer or tokenize.source_tokens(source)
|
||||
self.module = p.parse(self._tokenize(tokenizer))
|
||||
self.module = p.parse(p.tokenize(self._tokenize(tokenizer)))
|
||||
|
||||
self.module.used_names = self.used_names
|
||||
self.module.path = module_path
|
||||
@@ -247,6 +247,18 @@ class Parser(object):
|
||||
stack[start_index:] = []
|
||||
|
||||
def _tokenize(self, tokenizer):
|
||||
"""
|
||||
while first_pos[1] <= self._scope.start_pos[1] \
|
||||
and (token_type == tokenize.NAME or tok_str in ('(', '['))\
|
||||
and self._scope != self.module:
|
||||
self._scope.end_pos = first_pos
|
||||
self._scope = self._scope.parent
|
||||
if isinstance(self._scope, pr.Module) \
|
||||
and not isinstance(self._scope, pr.SubModule):
|
||||
self._scope = self.module
|
||||
"""
|
||||
|
||||
new_scope = False
|
||||
for token in tokenizer:
|
||||
typ = token.type
|
||||
value = token.value
|
||||
|
||||
@@ -96,9 +96,32 @@ class Parser(object):
|
||||
self.stack = [stackentry]
|
||||
self.rootnode = None
|
||||
self.error_recovery = error_recovery
|
||||
indent_errors = [] # TODO generate those.
|
||||
|
||||
def parse(self, tokens):
|
||||
for type, value, prefix, start_pos in tokens:
|
||||
def tokenize(self, tokenizer):
|
||||
"""
|
||||
This is not a real tokenizer, but it adds indents. You could hand the
|
||||
parse function a normal tokenizer (e.g. the lib2to3 one). But if we use
|
||||
the parser stack we are able to do error recovery from wrong indents.
|
||||
"""
|
||||
indents = [0]
|
||||
new_line = False
|
||||
for type, value, prefix, start_pos in tokenizer:
|
||||
if type == token.NEWLINE:
|
||||
new_line = True
|
||||
elif new_line:
|
||||
indent = start_pos[1]
|
||||
if indent > indents[-1]:
|
||||
yield token.INDENT, '', '', start_pos
|
||||
indents.append(indent)
|
||||
while indent < indents[-1]:
|
||||
yield token.DEDENT, '', '', start_pos
|
||||
indents.pop()
|
||||
new_line = False
|
||||
yield type, value, prefix, start_pos
|
||||
|
||||
def parse(self, tokenizer):
|
||||
for type, value, prefix, start_pos in tokenizer:
|
||||
if self.addtoken(type, value, prefix, start_pos):
|
||||
break
|
||||
else:
|
||||
|
||||
@@ -66,7 +66,7 @@ class Token(object):
|
||||
self.value = value
|
||||
self._start_pos_line = start_pos[0]
|
||||
self._start_pos_col = start_pos[1]
|
||||
self.prefix = whitespace
|
||||
self.prefix = prefix
|
||||
|
||||
def __repr__(self):
|
||||
typ = tok_name[self.type]
|
||||
@@ -228,6 +228,7 @@ def generate_tokens(readline, line_offset=0):
|
||||
numchars = '0123456789'
|
||||
contstr = ''
|
||||
contline = None
|
||||
new_line = False
|
||||
prefix = '' # Should never be required, but here for safety
|
||||
while True: # loop over lines in stream
|
||||
line = readline() # readline returns empty when finished. See StringIO
|
||||
@@ -269,14 +270,19 @@ def generate_tokens(readline, line_offset=0):
|
||||
spos = (lnum, start)
|
||||
token, initial = line[start:pos], line[start]
|
||||
|
||||
if new_line and initial not in '\r\n#':
|
||||
new_line = False
|
||||
|
||||
if (initial in numchars or # ordinary number
|
||||
(initial == '.' and token != '.' and token != '...')):
|
||||
yield Token(NUMBER, token, spos, prefix)
|
||||
elif initial in '\r\n':
|
||||
yield Token(NEWLINE, token, spos, prefix)
|
||||
if not new_line:
|
||||
yield Token(NEWLINE, token, spos, prefix)
|
||||
new_line = True
|
||||
elif initial == '#':
|
||||
assert not token.endswith("\n")
|
||||
yield Token(COMMENT, token, spos, prefix)
|
||||
#yield Token(COMMENT, token, spos, prefix)
|
||||
elif token in triple_quoted:
|
||||
endprog = endprogs[token]
|
||||
endmatch = endprog.match(line, pos)
|
||||
|
||||
Reference in New Issue
Block a user