1
0
forked from VimPlug/jedi

First time a test partially passes of the new fast parser.

This commit is contained in:
Dave Halter
2016-08-30 23:12:24 +02:00
parent 42e5777620
commit d505c764de

View File

@@ -10,12 +10,12 @@ from jedi._compatibility import use_metaclass
from jedi import settings from jedi import settings
from jedi.common import splitlines from jedi.common import splitlines
from jedi.parser import ParserWithRecovery from jedi.parser import ParserWithRecovery
from jedi.parser.tree import Module, search_ancestor from jedi.parser.tree import Module, search_ancestor, EndMarker
from jedi.parser.utils import parser_cache from jedi.parser.utils import parser_cache
from jedi.parser import tokenize from jedi.parser import tokenize
from jedi import debug from jedi import debug
from jedi.parser.tokenize import (generate_tokens, NEWLINE, from jedi.parser.tokenize import (generate_tokens, NEWLINE,
ENDMARKER, INDENT, DEDENT) ENDMARKER, INDENT, DEDENT, tok_name)
class CachedFastParser(type): class CachedFastParser(type):
@@ -40,6 +40,17 @@ def _merge_names_dicts(base_dict, other_dict):
base_dict.setdefault(key, []).extend(names) base_dict.setdefault(key, []).extend(names)
def suite_or_file_input_is_valid(parser):
stack = parser.pgen_parser.stack
for dfa, newstate, (symbol_number, nodes) in reversed(stack):
if symbol_number == parser._grammar.symbol2number['suite']:
# If we don't have ondes already, the suite is not valid.
return bool(nodes)
# Not reaching a suite means that we're dealing with file_input levels
# where there's no need for a valid statement in it. It can also be empty.
return True
class DiffParser(): class DiffParser():
endmarker_type = 'endmarker' endmarker_type = 'endmarker'
@@ -77,6 +88,13 @@ class DiffParser():
- Set parsed_until_line - Set parsed_until_line
''' '''
self._lines_new = lines_new self._lines_new = lines_new
self._added_newline = False
# The Python grammar needs a newline at the end of a file.
if lines_new[-1] != '':
lines_new[-1] += '\n'
lines_new.append('')
self._added_newline = True
self._reset() self._reset()
self._old_children = self._module.children self._old_children = self._module.children
@@ -90,7 +108,11 @@ class DiffParser():
sm = difflib.SequenceMatcher(None, lines_old, lines_new) sm = difflib.SequenceMatcher(None, lines_old, lines_new)
print(len(lines_old), len(lines_new), lines_old, lines_new) print(len(lines_old), len(lines_new), lines_old, lines_new)
for operation, i1, i2, j1, j2 in sm.get_opcodes(): for operation, i1, i2, j1, j2 in sm.get_opcodes():
print(operation, i1, i2, j1, j2) print('\t\t', operation, i1, i2, j1, j2)
if j2 == len(lines_new):
# The empty part after the last newline is not relevant.
j2 -= 1
if operation == 'equal': if operation == 'equal':
line_offset = j1 - i1 line_offset = j1 - i1
self._copy_from_old_parser(line_offset, i2 + 1, j2) self._copy_from_old_parser(line_offset, i2 + 1, j2)
@@ -108,6 +130,9 @@ class DiffParser():
self._module.children = self._new_children self._module.children = self._new_children
# TODO insert endmarker # TODO insert endmarker
print(self._module.get_code()) print(self._module.get_code())
if self._added_newline:
self._parser.remove_last_newline()
self._parser.source = ''.join(lines_new)
def _insert(self, until_line_new): def _insert(self, until_line_new):
self._insert_count += 1 self._insert_count += 1
@@ -128,7 +153,7 @@ class DiffParser():
nodes = [] nodes = []
for node in p_children[index:]: for node in p_children[index:]:
if until_line_old < node.end_pos[0]: if until_line_old < node.end_pos[0]:
divided_node = self._divide_node(node) divided_node = self._divide_node(node, until_line_new)
if divided_node is not None: if divided_node is not None:
nodes.append(divided_node) nodes.append(divided_node)
break break
@@ -183,7 +208,6 @@ class DiffParser():
# endmarker. # endmarker.
pass pass
print(last_non_endmarker)
if last_non_endmarker.type in ('newline', 'dedent'): if last_non_endmarker.type in ('newline', 'dedent'):
# Newlines end on the next line, which means that they would cover # Newlines end on the next line, which means that they would cover
# the next line. That line is not fully parsed at this point. # the next line. That line is not fully parsed at this point.
@@ -200,7 +224,7 @@ class DiffParser():
nodes = nodes[:-1] nodes = nodes[:-1]
if not nodes: if not nodes:
return self._module return self._module
print("X", nodes) print("insert_nodes", nodes)
# Now the preparations are done. We are inserting the nodes. # Now the preparations are done. We are inserting the nodes.
if before_node is None: # Everything is empty. if before_node is None: # Everything is empty.
@@ -344,7 +368,7 @@ class DiffParser():
return nodes return nodes
def _parse_scope_node(self, until_line): def _parse_scope_node(self, until_line):
print('PARSE', until_line, self._parsed_until_line) print('PARSE', self._parsed_until_line, until_line)
# TODO speed up, shouldn't copy the whole list all the time. # TODO speed up, shouldn't copy the whole list all the time.
# memoryview? # memoryview?
lines_after = self._lines_new[self._parsed_until_line:] lines_after = self._lines_new[self._parsed_until_line:]
@@ -354,12 +378,12 @@ class DiffParser():
until_line, until_line,
line_offset=self._parsed_until_line line_offset=self._parsed_until_line
) )
self._parser = ParserWithRecovery( self._active_parser = ParserWithRecovery(
self._parser._grammar, self._parser._grammar,
source='\n', source='\n',
start_parsing=False start_parsing=False
) )
return self._parser.parse(tokenizer=tokenizer) return self._active_parser.parse(tokenizer=tokenizer)
def _post_parse(self): def _post_parse(self):
# Add the used names from the old parser to the new one. # Add the used names from the old parser to the new one.
@@ -373,41 +397,51 @@ class DiffParser():
if name.start_pos[0] in copied_line_numbers: if name.start_pos[0] in copied_line_numbers:
new_used_names.setdefault(key, []).add(name) new_used_names.setdefault(key, []).add(name)
# Add an endmarker.
last_leaf = self._temp_module.last_leaf()
while last_leaf.type == 'dedent':
last_leaf = last_leaf.get_previous_leaf()
endmarker = EndMarker(self._parser.position_modifier, '', last_leaf.end_pos, self._prefix)
endmarker.parent = self._module
self._new_children.append(endmarker)
def _diff_tokenize(self, lines, until_line, line_offset=0): def _diff_tokenize(self, lines, until_line, line_offset=0):
is_first_token = True is_first_token = True
omited_first_indent = False omitted_first_indent = False
indent_count = 0 indents = []
l = iter(lines) l = iter(lines)
tokens = generate_tokens(lambda: next(l, '')) tokens = generate_tokens(lambda: next(l, ''), use_exact_op_types=True)
for typ, string, start_pos, prefix in tokens: for typ, string, start_pos, prefix in tokens:
start_pos = start_pos[0] + line_offset, start_pos[1] start_pos = start_pos[0] + line_offset, start_pos[1]
if typ == tokenize.INDENT: if typ == tokenize.INDENT:
indent_count += 1 indents.append(start_pos[1])
if is_first_token: if is_first_token:
omited_first_indent = True omitted_first_indent = True
# We want to get rid of indents that are only here because # We want to get rid of indents that are only here because
# we only parse part of the file. These indents would only # we only parse part of the file. These indents would only
# get parsed as error leafs, which doesn't make any sense. # get parsed as error leafs, which doesn't make any sense.
is_first_token = False
continue continue
elif typ == tokenize.DEDENT: is_first_token = False
indent_count -= 1
if omited_first_indent and indent_count == 0: if typ == tokenize.DEDENT:
indents.pop()
if omitted_first_indent and not indents:
# We are done here, only thing that can come now is an # We are done here, only thing that can come now is an
# endmarker or another dedented code block. # endmarker or another dedented code block.
break break
elif typ == tokenize.NEWLINE and start_pos[0] >= until_line: elif typ == tokenize.NEWLINE and start_pos[0] >= until_line:
yield tokenize.TokenInfo(typ, string, start_pos, prefix) yield tokenize.TokenInfo(typ, string, start_pos, prefix)
# Check if the parser is actually in a valid suite state. # Check if the parser is actually in a valid suite state.
if 1: if suite_or_file_input_is_valid(self._active_parser):
x = self._parser.pgen_parser.stack while len(indents) > int(omitted_first_indent):
# TODO check if the parser is in a flow, and let it pass if indent_pos = start_pos[0] + 1, indents.pop()
# so. yield tokenize.TokenInfo(tokenize.DEDENT, '', indent_pos, '')
import pdb; pdb.set_trace()
break break
else:
continue
is_first_token = False print('tok', tok_name[typ], repr(string), start_pos)
print('tok', typ, string, start_pos)
yield tokenize.TokenInfo(typ, string, start_pos, prefix) yield tokenize.TokenInfo(typ, string, start_pos, prefix)
typ, string, start_pos, prefix = next(tokens) typ, string, start_pos, prefix = next(tokens)