forked from VimPlug/jedi
First time a test partially passes of the new fast parser.
This commit is contained in:
@@ -10,12 +10,12 @@ from jedi._compatibility import use_metaclass
|
|||||||
from jedi import settings
|
from jedi import settings
|
||||||
from jedi.common import splitlines
|
from jedi.common import splitlines
|
||||||
from jedi.parser import ParserWithRecovery
|
from jedi.parser import ParserWithRecovery
|
||||||
from jedi.parser.tree import Module, search_ancestor
|
from jedi.parser.tree import Module, search_ancestor, EndMarker
|
||||||
from jedi.parser.utils import parser_cache
|
from jedi.parser.utils import parser_cache
|
||||||
from jedi.parser import tokenize
|
from jedi.parser import tokenize
|
||||||
from jedi import debug
|
from jedi import debug
|
||||||
from jedi.parser.tokenize import (generate_tokens, NEWLINE,
|
from jedi.parser.tokenize import (generate_tokens, NEWLINE,
|
||||||
ENDMARKER, INDENT, DEDENT)
|
ENDMARKER, INDENT, DEDENT, tok_name)
|
||||||
|
|
||||||
|
|
||||||
class CachedFastParser(type):
|
class CachedFastParser(type):
|
||||||
@@ -40,6 +40,17 @@ def _merge_names_dicts(base_dict, other_dict):
|
|||||||
base_dict.setdefault(key, []).extend(names)
|
base_dict.setdefault(key, []).extend(names)
|
||||||
|
|
||||||
|
|
||||||
|
def suite_or_file_input_is_valid(parser):
|
||||||
|
stack = parser.pgen_parser.stack
|
||||||
|
for dfa, newstate, (symbol_number, nodes) in reversed(stack):
|
||||||
|
if symbol_number == parser._grammar.symbol2number['suite']:
|
||||||
|
# If we don't have ondes already, the suite is not valid.
|
||||||
|
return bool(nodes)
|
||||||
|
# Not reaching a suite means that we're dealing with file_input levels
|
||||||
|
# where there's no need for a valid statement in it. It can also be empty.
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
class DiffParser():
|
class DiffParser():
|
||||||
endmarker_type = 'endmarker'
|
endmarker_type = 'endmarker'
|
||||||
|
|
||||||
@@ -77,6 +88,13 @@ class DiffParser():
|
|||||||
- Set parsed_until_line
|
- Set parsed_until_line
|
||||||
'''
|
'''
|
||||||
self._lines_new = lines_new
|
self._lines_new = lines_new
|
||||||
|
self._added_newline = False
|
||||||
|
# The Python grammar needs a newline at the end of a file.
|
||||||
|
if lines_new[-1] != '':
|
||||||
|
lines_new[-1] += '\n'
|
||||||
|
lines_new.append('')
|
||||||
|
self._added_newline = True
|
||||||
|
|
||||||
self._reset()
|
self._reset()
|
||||||
|
|
||||||
self._old_children = self._module.children
|
self._old_children = self._module.children
|
||||||
@@ -90,7 +108,11 @@ class DiffParser():
|
|||||||
sm = difflib.SequenceMatcher(None, lines_old, lines_new)
|
sm = difflib.SequenceMatcher(None, lines_old, lines_new)
|
||||||
print(len(lines_old), len(lines_new), lines_old, lines_new)
|
print(len(lines_old), len(lines_new), lines_old, lines_new)
|
||||||
for operation, i1, i2, j1, j2 in sm.get_opcodes():
|
for operation, i1, i2, j1, j2 in sm.get_opcodes():
|
||||||
print(operation, i1, i2, j1, j2)
|
print('\t\t', operation, i1, i2, j1, j2)
|
||||||
|
if j2 == len(lines_new):
|
||||||
|
# The empty part after the last newline is not relevant.
|
||||||
|
j2 -= 1
|
||||||
|
|
||||||
if operation == 'equal':
|
if operation == 'equal':
|
||||||
line_offset = j1 - i1
|
line_offset = j1 - i1
|
||||||
self._copy_from_old_parser(line_offset, i2 + 1, j2)
|
self._copy_from_old_parser(line_offset, i2 + 1, j2)
|
||||||
@@ -108,6 +130,9 @@ class DiffParser():
|
|||||||
self._module.children = self._new_children
|
self._module.children = self._new_children
|
||||||
# TODO insert endmarker
|
# TODO insert endmarker
|
||||||
print(self._module.get_code())
|
print(self._module.get_code())
|
||||||
|
if self._added_newline:
|
||||||
|
self._parser.remove_last_newline()
|
||||||
|
self._parser.source = ''.join(lines_new)
|
||||||
|
|
||||||
def _insert(self, until_line_new):
|
def _insert(self, until_line_new):
|
||||||
self._insert_count += 1
|
self._insert_count += 1
|
||||||
@@ -128,7 +153,7 @@ class DiffParser():
|
|||||||
nodes = []
|
nodes = []
|
||||||
for node in p_children[index:]:
|
for node in p_children[index:]:
|
||||||
if until_line_old < node.end_pos[0]:
|
if until_line_old < node.end_pos[0]:
|
||||||
divided_node = self._divide_node(node)
|
divided_node = self._divide_node(node, until_line_new)
|
||||||
if divided_node is not None:
|
if divided_node is not None:
|
||||||
nodes.append(divided_node)
|
nodes.append(divided_node)
|
||||||
break
|
break
|
||||||
@@ -183,7 +208,6 @@ class DiffParser():
|
|||||||
# endmarker.
|
# endmarker.
|
||||||
pass
|
pass
|
||||||
|
|
||||||
print(last_non_endmarker)
|
|
||||||
if last_non_endmarker.type in ('newline', 'dedent'):
|
if last_non_endmarker.type in ('newline', 'dedent'):
|
||||||
# Newlines end on the next line, which means that they would cover
|
# Newlines end on the next line, which means that they would cover
|
||||||
# the next line. That line is not fully parsed at this point.
|
# the next line. That line is not fully parsed at this point.
|
||||||
@@ -200,7 +224,7 @@ class DiffParser():
|
|||||||
nodes = nodes[:-1]
|
nodes = nodes[:-1]
|
||||||
if not nodes:
|
if not nodes:
|
||||||
return self._module
|
return self._module
|
||||||
print("X", nodes)
|
print("insert_nodes", nodes)
|
||||||
|
|
||||||
# Now the preparations are done. We are inserting the nodes.
|
# Now the preparations are done. We are inserting the nodes.
|
||||||
if before_node is None: # Everything is empty.
|
if before_node is None: # Everything is empty.
|
||||||
@@ -344,7 +368,7 @@ class DiffParser():
|
|||||||
return nodes
|
return nodes
|
||||||
|
|
||||||
def _parse_scope_node(self, until_line):
|
def _parse_scope_node(self, until_line):
|
||||||
print('PARSE', until_line, self._parsed_until_line)
|
print('PARSE', self._parsed_until_line, until_line)
|
||||||
# TODO speed up, shouldn't copy the whole list all the time.
|
# TODO speed up, shouldn't copy the whole list all the time.
|
||||||
# memoryview?
|
# memoryview?
|
||||||
lines_after = self._lines_new[self._parsed_until_line:]
|
lines_after = self._lines_new[self._parsed_until_line:]
|
||||||
@@ -354,12 +378,12 @@ class DiffParser():
|
|||||||
until_line,
|
until_line,
|
||||||
line_offset=self._parsed_until_line
|
line_offset=self._parsed_until_line
|
||||||
)
|
)
|
||||||
self._parser = ParserWithRecovery(
|
self._active_parser = ParserWithRecovery(
|
||||||
self._parser._grammar,
|
self._parser._grammar,
|
||||||
source='\n',
|
source='\n',
|
||||||
start_parsing=False
|
start_parsing=False
|
||||||
)
|
)
|
||||||
return self._parser.parse(tokenizer=tokenizer)
|
return self._active_parser.parse(tokenizer=tokenizer)
|
||||||
|
|
||||||
def _post_parse(self):
|
def _post_parse(self):
|
||||||
# Add the used names from the old parser to the new one.
|
# Add the used names from the old parser to the new one.
|
||||||
@@ -373,41 +397,51 @@ class DiffParser():
|
|||||||
if name.start_pos[0] in copied_line_numbers:
|
if name.start_pos[0] in copied_line_numbers:
|
||||||
new_used_names.setdefault(key, []).add(name)
|
new_used_names.setdefault(key, []).add(name)
|
||||||
|
|
||||||
|
# Add an endmarker.
|
||||||
|
last_leaf = self._temp_module.last_leaf()
|
||||||
|
while last_leaf.type == 'dedent':
|
||||||
|
last_leaf = last_leaf.get_previous_leaf()
|
||||||
|
endmarker = EndMarker(self._parser.position_modifier, '', last_leaf.end_pos, self._prefix)
|
||||||
|
endmarker.parent = self._module
|
||||||
|
self._new_children.append(endmarker)
|
||||||
|
|
||||||
def _diff_tokenize(self, lines, until_line, line_offset=0):
|
def _diff_tokenize(self, lines, until_line, line_offset=0):
|
||||||
is_first_token = True
|
is_first_token = True
|
||||||
omited_first_indent = False
|
omitted_first_indent = False
|
||||||
indent_count = 0
|
indents = []
|
||||||
l = iter(lines)
|
l = iter(lines)
|
||||||
tokens = generate_tokens(lambda: next(l, ''))
|
tokens = generate_tokens(lambda: next(l, ''), use_exact_op_types=True)
|
||||||
for typ, string, start_pos, prefix in tokens:
|
for typ, string, start_pos, prefix in tokens:
|
||||||
start_pos = start_pos[0] + line_offset, start_pos[1]
|
start_pos = start_pos[0] + line_offset, start_pos[1]
|
||||||
if typ == tokenize.INDENT:
|
if typ == tokenize.INDENT:
|
||||||
indent_count += 1
|
indents.append(start_pos[1])
|
||||||
if is_first_token:
|
if is_first_token:
|
||||||
omited_first_indent = True
|
omitted_first_indent = True
|
||||||
# We want to get rid of indents that are only here because
|
# We want to get rid of indents that are only here because
|
||||||
# we only parse part of the file. These indents would only
|
# we only parse part of the file. These indents would only
|
||||||
# get parsed as error leafs, which doesn't make any sense.
|
# get parsed as error leafs, which doesn't make any sense.
|
||||||
|
is_first_token = False
|
||||||
continue
|
continue
|
||||||
elif typ == tokenize.DEDENT:
|
is_first_token = False
|
||||||
indent_count -= 1
|
|
||||||
if omited_first_indent and indent_count == 0:
|
if typ == tokenize.DEDENT:
|
||||||
|
indents.pop()
|
||||||
|
if omitted_first_indent and not indents:
|
||||||
# We are done here, only thing that can come now is an
|
# We are done here, only thing that can come now is an
|
||||||
# endmarker or another dedented code block.
|
# endmarker or another dedented code block.
|
||||||
break
|
break
|
||||||
elif typ == tokenize.NEWLINE and start_pos[0] >= until_line:
|
elif typ == tokenize.NEWLINE and start_pos[0] >= until_line:
|
||||||
yield tokenize.TokenInfo(typ, string, start_pos, prefix)
|
yield tokenize.TokenInfo(typ, string, start_pos, prefix)
|
||||||
# Check if the parser is actually in a valid suite state.
|
# Check if the parser is actually in a valid suite state.
|
||||||
if 1:
|
if suite_or_file_input_is_valid(self._active_parser):
|
||||||
x = self._parser.pgen_parser.stack
|
while len(indents) > int(omitted_first_indent):
|
||||||
# TODO check if the parser is in a flow, and let it pass if
|
indent_pos = start_pos[0] + 1, indents.pop()
|
||||||
# so.
|
yield tokenize.TokenInfo(tokenize.DEDENT, '', indent_pos, '')
|
||||||
import pdb; pdb.set_trace()
|
|
||||||
break
|
break
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
|
||||||
is_first_token = False
|
print('tok', tok_name[typ], repr(string), start_pos)
|
||||||
|
|
||||||
print('tok', typ, string, start_pos)
|
|
||||||
yield tokenize.TokenInfo(typ, string, start_pos, prefix)
|
yield tokenize.TokenInfo(typ, string, start_pos, prefix)
|
||||||
|
|
||||||
typ, string, start_pos, prefix = next(tokens)
|
typ, string, start_pos, prefix = next(tokens)
|
||||||
|
|||||||
Reference in New Issue
Block a user