1
0
forked from VimPlug/jedi

First time a test partially passes of the new fast parser.

This commit is contained in:
Dave Halter
2016-08-30 23:12:24 +02:00
parent 42e5777620
commit d505c764de

View File

@@ -10,12 +10,12 @@ from jedi._compatibility import use_metaclass
from jedi import settings
from jedi.common import splitlines
from jedi.parser import ParserWithRecovery
from jedi.parser.tree import Module, search_ancestor
from jedi.parser.tree import Module, search_ancestor, EndMarker
from jedi.parser.utils import parser_cache
from jedi.parser import tokenize
from jedi import debug
from jedi.parser.tokenize import (generate_tokens, NEWLINE,
ENDMARKER, INDENT, DEDENT)
ENDMARKER, INDENT, DEDENT, tok_name)
class CachedFastParser(type):
@@ -40,6 +40,17 @@ def _merge_names_dicts(base_dict, other_dict):
base_dict.setdefault(key, []).extend(names)
def suite_or_file_input_is_valid(parser):
stack = parser.pgen_parser.stack
for dfa, newstate, (symbol_number, nodes) in reversed(stack):
if symbol_number == parser._grammar.symbol2number['suite']:
# If we don't have ondes already, the suite is not valid.
return bool(nodes)
# Not reaching a suite means that we're dealing with file_input levels
# where there's no need for a valid statement in it. It can also be empty.
return True
class DiffParser():
endmarker_type = 'endmarker'
@@ -77,6 +88,13 @@ class DiffParser():
- Set parsed_until_line
'''
self._lines_new = lines_new
self._added_newline = False
# The Python grammar needs a newline at the end of a file.
if lines_new[-1] != '':
lines_new[-1] += '\n'
lines_new.append('')
self._added_newline = True
self._reset()
self._old_children = self._module.children
@@ -90,7 +108,11 @@ class DiffParser():
sm = difflib.SequenceMatcher(None, lines_old, lines_new)
print(len(lines_old), len(lines_new), lines_old, lines_new)
for operation, i1, i2, j1, j2 in sm.get_opcodes():
print(operation, i1, i2, j1, j2)
print('\t\t', operation, i1, i2, j1, j2)
if j2 == len(lines_new):
# The empty part after the last newline is not relevant.
j2 -= 1
if operation == 'equal':
line_offset = j1 - i1
self._copy_from_old_parser(line_offset, i2 + 1, j2)
@@ -108,6 +130,9 @@ class DiffParser():
self._module.children = self._new_children
# TODO insert endmarker
print(self._module.get_code())
if self._added_newline:
self._parser.remove_last_newline()
self._parser.source = ''.join(lines_new)
def _insert(self, until_line_new):
self._insert_count += 1
@@ -128,7 +153,7 @@ class DiffParser():
nodes = []
for node in p_children[index:]:
if until_line_old < node.end_pos[0]:
divided_node = self._divide_node(node)
divided_node = self._divide_node(node, until_line_new)
if divided_node is not None:
nodes.append(divided_node)
break
@@ -183,7 +208,6 @@ class DiffParser():
# endmarker.
pass
print(last_non_endmarker)
if last_non_endmarker.type in ('newline', 'dedent'):
# Newlines end on the next line, which means that they would cover
# the next line. That line is not fully parsed at this point.
@@ -200,7 +224,7 @@ class DiffParser():
nodes = nodes[:-1]
if not nodes:
return self._module
print("X", nodes)
print("insert_nodes", nodes)
# Now the preparations are done. We are inserting the nodes.
if before_node is None: # Everything is empty.
@@ -344,7 +368,7 @@ class DiffParser():
return nodes
def _parse_scope_node(self, until_line):
print('PARSE', until_line, self._parsed_until_line)
print('PARSE', self._parsed_until_line, until_line)
# TODO speed up, shouldn't copy the whole list all the time.
# memoryview?
lines_after = self._lines_new[self._parsed_until_line:]
@@ -354,12 +378,12 @@ class DiffParser():
until_line,
line_offset=self._parsed_until_line
)
self._parser = ParserWithRecovery(
self._active_parser = ParserWithRecovery(
self._parser._grammar,
source='\n',
start_parsing=False
)
return self._parser.parse(tokenizer=tokenizer)
return self._active_parser.parse(tokenizer=tokenizer)
def _post_parse(self):
# Add the used names from the old parser to the new one.
@@ -373,41 +397,51 @@ class DiffParser():
if name.start_pos[0] in copied_line_numbers:
new_used_names.setdefault(key, []).add(name)
# Add an endmarker.
last_leaf = self._temp_module.last_leaf()
while last_leaf.type == 'dedent':
last_leaf = last_leaf.get_previous_leaf()
endmarker = EndMarker(self._parser.position_modifier, '', last_leaf.end_pos, self._prefix)
endmarker.parent = self._module
self._new_children.append(endmarker)
def _diff_tokenize(self, lines, until_line, line_offset=0):
is_first_token = True
omited_first_indent = False
indent_count = 0
omitted_first_indent = False
indents = []
l = iter(lines)
tokens = generate_tokens(lambda: next(l, ''))
tokens = generate_tokens(lambda: next(l, ''), use_exact_op_types=True)
for typ, string, start_pos, prefix in tokens:
start_pos = start_pos[0] + line_offset, start_pos[1]
if typ == tokenize.INDENT:
indent_count += 1
indents.append(start_pos[1])
if is_first_token:
omited_first_indent = True
omitted_first_indent = True
# We want to get rid of indents that are only here because
# we only parse part of the file. These indents would only
# get parsed as error leafs, which doesn't make any sense.
is_first_token = False
continue
elif typ == tokenize.DEDENT:
indent_count -= 1
if omited_first_indent and indent_count == 0:
is_first_token = False
if typ == tokenize.DEDENT:
indents.pop()
if omitted_first_indent and not indents:
# We are done here, only thing that can come now is an
# endmarker or another dedented code block.
break
elif typ == tokenize.NEWLINE and start_pos[0] >= until_line:
yield tokenize.TokenInfo(typ, string, start_pos, prefix)
# Check if the parser is actually in a valid suite state.
if 1:
x = self._parser.pgen_parser.stack
# TODO check if the parser is in a flow, and let it pass if
# so.
import pdb; pdb.set_trace()
if suite_or_file_input_is_valid(self._active_parser):
while len(indents) > int(omitted_first_indent):
indent_pos = start_pos[0] + 1, indents.pop()
yield tokenize.TokenInfo(tokenize.DEDENT, '', indent_pos, '')
break
else:
continue
is_first_token = False
print('tok', typ, string, start_pos)
print('tok', tok_name[typ], repr(string), start_pos)
yield tokenize.TokenInfo(typ, string, start_pos, prefix)
typ, string, start_pos, prefix = next(tokens)