mirror of
https://github.com/davidhalter/jedi.git
synced 2026-02-21 06:58:26 +08:00
The new tokenizer is more or less working now. Indents are calculated as they should
This commit is contained in:
@@ -131,7 +131,7 @@ class Parser(object):
|
|||||||
p = pgen2.parse.Parser(grammar, self.convert_node, self.convert_leaf,
|
p = pgen2.parse.Parser(grammar, self.convert_node, self.convert_leaf,
|
||||||
self.error_recovery)
|
self.error_recovery)
|
||||||
tokenizer = tokenizer or tokenize.source_tokens(source)
|
tokenizer = tokenizer or tokenize.source_tokens(source)
|
||||||
self.module = p.parse(self._tokenize(tokenizer))
|
self.module = p.parse(p.tokenize(self._tokenize(tokenizer)))
|
||||||
|
|
||||||
self.module.used_names = self.used_names
|
self.module.used_names = self.used_names
|
||||||
self.module.path = module_path
|
self.module.path = module_path
|
||||||
@@ -247,6 +247,18 @@ class Parser(object):
|
|||||||
stack[start_index:] = []
|
stack[start_index:] = []
|
||||||
|
|
||||||
def _tokenize(self, tokenizer):
|
def _tokenize(self, tokenizer):
|
||||||
|
"""
|
||||||
|
while first_pos[1] <= self._scope.start_pos[1] \
|
||||||
|
and (token_type == tokenize.NAME or tok_str in ('(', '['))\
|
||||||
|
and self._scope != self.module:
|
||||||
|
self._scope.end_pos = first_pos
|
||||||
|
self._scope = self._scope.parent
|
||||||
|
if isinstance(self._scope, pr.Module) \
|
||||||
|
and not isinstance(self._scope, pr.SubModule):
|
||||||
|
self._scope = self.module
|
||||||
|
"""
|
||||||
|
|
||||||
|
new_scope = False
|
||||||
for token in tokenizer:
|
for token in tokenizer:
|
||||||
typ = token.type
|
typ = token.type
|
||||||
value = token.value
|
value = token.value
|
||||||
|
|||||||
@@ -96,9 +96,32 @@ class Parser(object):
|
|||||||
self.stack = [stackentry]
|
self.stack = [stackentry]
|
||||||
self.rootnode = None
|
self.rootnode = None
|
||||||
self.error_recovery = error_recovery
|
self.error_recovery = error_recovery
|
||||||
|
indent_errors = [] # TODO generate those.
|
||||||
|
|
||||||
def parse(self, tokens):
|
def tokenize(self, tokenizer):
|
||||||
for type, value, prefix, start_pos in tokens:
|
"""
|
||||||
|
This is not a real tokenizer, but it adds indents. You could hand the
|
||||||
|
parse function a normal tokenizer (e.g. the lib2to3 one). But if we use
|
||||||
|
the parser stack we are able to do error recovery from wrong indents.
|
||||||
|
"""
|
||||||
|
indents = [0]
|
||||||
|
new_line = False
|
||||||
|
for type, value, prefix, start_pos in tokenizer:
|
||||||
|
if type == token.NEWLINE:
|
||||||
|
new_line = True
|
||||||
|
elif new_line:
|
||||||
|
indent = start_pos[1]
|
||||||
|
if indent > indents[-1]:
|
||||||
|
yield token.INDENT, '', '', start_pos
|
||||||
|
indents.append(indent)
|
||||||
|
while indent < indents[-1]:
|
||||||
|
yield token.DEDENT, '', '', start_pos
|
||||||
|
indents.pop()
|
||||||
|
new_line = False
|
||||||
|
yield type, value, prefix, start_pos
|
||||||
|
|
||||||
|
def parse(self, tokenizer):
|
||||||
|
for type, value, prefix, start_pos in tokenizer:
|
||||||
if self.addtoken(type, value, prefix, start_pos):
|
if self.addtoken(type, value, prefix, start_pos):
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -66,7 +66,7 @@ class Token(object):
|
|||||||
self.value = value
|
self.value = value
|
||||||
self._start_pos_line = start_pos[0]
|
self._start_pos_line = start_pos[0]
|
||||||
self._start_pos_col = start_pos[1]
|
self._start_pos_col = start_pos[1]
|
||||||
self.prefix = whitespace
|
self.prefix = prefix
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
typ = tok_name[self.type]
|
typ = tok_name[self.type]
|
||||||
@@ -228,6 +228,7 @@ def generate_tokens(readline, line_offset=0):
|
|||||||
numchars = '0123456789'
|
numchars = '0123456789'
|
||||||
contstr = ''
|
contstr = ''
|
||||||
contline = None
|
contline = None
|
||||||
|
new_line = False
|
||||||
prefix = '' # Should never be required, but here for safety
|
prefix = '' # Should never be required, but here for safety
|
||||||
while True: # loop over lines in stream
|
while True: # loop over lines in stream
|
||||||
line = readline() # readline returns empty when finished. See StringIO
|
line = readline() # readline returns empty when finished. See StringIO
|
||||||
@@ -269,14 +270,19 @@ def generate_tokens(readline, line_offset=0):
|
|||||||
spos = (lnum, start)
|
spos = (lnum, start)
|
||||||
token, initial = line[start:pos], line[start]
|
token, initial = line[start:pos], line[start]
|
||||||
|
|
||||||
|
if new_line and initial not in '\r\n#':
|
||||||
|
new_line = False
|
||||||
|
|
||||||
if (initial in numchars or # ordinary number
|
if (initial in numchars or # ordinary number
|
||||||
(initial == '.' and token != '.' and token != '...')):
|
(initial == '.' and token != '.' and token != '...')):
|
||||||
yield Token(NUMBER, token, spos, prefix)
|
yield Token(NUMBER, token, spos, prefix)
|
||||||
elif initial in '\r\n':
|
elif initial in '\r\n':
|
||||||
yield Token(NEWLINE, token, spos, prefix)
|
if not new_line:
|
||||||
|
yield Token(NEWLINE, token, spos, prefix)
|
||||||
|
new_line = True
|
||||||
elif initial == '#':
|
elif initial == '#':
|
||||||
assert not token.endswith("\n")
|
assert not token.endswith("\n")
|
||||||
yield Token(COMMENT, token, spos, prefix)
|
#yield Token(COMMENT, token, spos, prefix)
|
||||||
elif token in triple_quoted:
|
elif token in triple_quoted:
|
||||||
endprog = endprogs[token]
|
endprog = endprogs[token]
|
||||||
endmatch = endprog.match(line, pos)
|
endmatch = endprog.match(line, pos)
|
||||||
|
|||||||
Reference in New Issue
Block a user