forked from VimPlug/jedi
Merge pull request #430 from hattya/dev
improve fast parser for incremental parsing
This commit is contained in:
@@ -183,6 +183,9 @@ class ParserNode(object):
|
|||||||
|
|
||||||
|
|
||||||
class FastParser(use_metaclass(CachedFastParser)):
|
class FastParser(use_metaclass(CachedFastParser)):
|
||||||
|
|
||||||
|
_keyword_re = re.compile('^[ \t]*(def|class|@|%s)' % '|'.join(tokenize.FLOWS))
|
||||||
|
|
||||||
def __init__(self, code, module_path=None):
|
def __init__(self, code, module_path=None):
|
||||||
# set values like `pr.Module`.
|
# set values like `pr.Module`.
|
||||||
self.module_path = module_path
|
self.module_path = module_path
|
||||||
@@ -196,7 +199,7 @@ class FastParser(use_metaclass(CachedFastParser)):
|
|||||||
self._parse(code)
|
self._parse(code)
|
||||||
except:
|
except:
|
||||||
# FastParser is cached, be careful with exceptions
|
# FastParser is cached, be careful with exceptions
|
||||||
self.parsers[:] = []
|
del self.parsers[:]
|
||||||
raise
|
raise
|
||||||
|
|
||||||
def update(self, code):
|
def update(self, code):
|
||||||
@@ -206,7 +209,7 @@ class FastParser(use_metaclass(CachedFastParser)):
|
|||||||
self._parse(code)
|
self._parse(code)
|
||||||
except:
|
except:
|
||||||
# FastParser is cached, be careful with exceptions
|
# FastParser is cached, be careful with exceptions
|
||||||
self.parsers[:] = []
|
del self.parsers[:]
|
||||||
raise
|
raise
|
||||||
|
|
||||||
def _split_parts(self, code):
|
def _split_parts(self, code):
|
||||||
@@ -215,34 +218,26 @@ class FastParser(use_metaclass(CachedFastParser)):
|
|||||||
each part seperately and therefore cache parts of the file and not
|
each part seperately and therefore cache parts of the file and not
|
||||||
everything.
|
everything.
|
||||||
"""
|
"""
|
||||||
def add_part():
|
def gen_part():
|
||||||
txt = '\n'.join(current_lines)
|
text = '\n'.join(current_lines)
|
||||||
if txt:
|
del current_lines[:]
|
||||||
if add_to_last and parts:
|
return text
|
||||||
parts[-1] += '\n' + txt
|
|
||||||
else:
|
|
||||||
parts.append(txt)
|
|
||||||
current_lines[:] = []
|
|
||||||
|
|
||||||
r_keyword = '^[ \t]*(def|class|@|%s)' % '|'.join(tokenize.FLOWS)
|
|
||||||
|
|
||||||
# Split only new lines. Distinction between \r\n is the tokenizer's
|
# Split only new lines. Distinction between \r\n is the tokenizer's
|
||||||
# job.
|
# job.
|
||||||
self._lines = code.split('\n')
|
self._lines = code.split('\n')
|
||||||
current_lines = []
|
current_lines = []
|
||||||
parts = []
|
|
||||||
is_decorator = False
|
is_decorator = False
|
||||||
current_indent = 0
|
current_indent = 0
|
||||||
old_indent = 0
|
old_indent = 0
|
||||||
new_indent = False
|
new_indent = False
|
||||||
in_flow = False
|
in_flow = False
|
||||||
add_to_last = False
|
|
||||||
# All things within flows are simply being ignored.
|
# All things within flows are simply being ignored.
|
||||||
for i, l in enumerate(self._lines):
|
for l in self._lines:
|
||||||
# check for dedents
|
# check for dedents
|
||||||
m = re.match('^([\t ]*)(.?)', l)
|
s = l.lstrip('\t ')
|
||||||
indent = len(m.group(1))
|
indent = len(l) - len(s)
|
||||||
if m.group(2) in ['', '#']:
|
if not s or s[0] in ('#', '\r'):
|
||||||
current_lines.append(l) # just ignore comments and blank lines
|
current_lines.append(l) # just ignore comments and blank lines
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@@ -250,8 +245,8 @@ class FastParser(use_metaclass(CachedFastParser)):
|
|||||||
current_indent = indent
|
current_indent = indent
|
||||||
new_indent = False
|
new_indent = False
|
||||||
if not in_flow or indent < old_indent:
|
if not in_flow or indent < old_indent:
|
||||||
add_part()
|
if current_lines:
|
||||||
add_to_last = False
|
yield gen_part()
|
||||||
in_flow = False
|
in_flow = False
|
||||||
elif new_indent:
|
elif new_indent:
|
||||||
current_indent = indent
|
current_indent = indent
|
||||||
@@ -259,12 +254,12 @@ class FastParser(use_metaclass(CachedFastParser)):
|
|||||||
|
|
||||||
# Check lines for functions/classes and split the code there.
|
# Check lines for functions/classes and split the code there.
|
||||||
if not in_flow:
|
if not in_flow:
|
||||||
m = re.match(r_keyword, l)
|
m = self._keyword_re.match(l)
|
||||||
if m:
|
if m:
|
||||||
in_flow = m.group(1) in tokenize.FLOWS
|
in_flow = m.group(1) in tokenize.FLOWS
|
||||||
if not is_decorator and not in_flow:
|
if not is_decorator and not in_flow:
|
||||||
add_part()
|
if current_lines:
|
||||||
add_to_last = False
|
yield gen_part()
|
||||||
is_decorator = '@' == m.group(1)
|
is_decorator = '@' == m.group(1)
|
||||||
if not is_decorator:
|
if not is_decorator:
|
||||||
old_indent = current_indent
|
old_indent = current_indent
|
||||||
@@ -272,12 +267,15 @@ class FastParser(use_metaclass(CachedFastParser)):
|
|||||||
new_indent = True
|
new_indent = True
|
||||||
elif is_decorator:
|
elif is_decorator:
|
||||||
is_decorator = False
|
is_decorator = False
|
||||||
add_to_last = True
|
|
||||||
|
|
||||||
current_lines.append(l)
|
current_lines.append(l)
|
||||||
add_part()
|
if current_lines:
|
||||||
|
# skip newline at end of code,
|
||||||
return parts
|
# since it is not counted by Parser
|
||||||
|
if not current_lines[-1]:
|
||||||
|
del current_lines[-1]
|
||||||
|
if current_lines:
|
||||||
|
yield gen_part()
|
||||||
|
|
||||||
def _parse(self, code):
|
def _parse(self, code):
|
||||||
""" :type code: str """
|
""" :type code: str """
|
||||||
@@ -285,24 +283,20 @@ class FastParser(use_metaclass(CachedFastParser)):
|
|||||||
new, temp = self._get_parser(unicode(''), unicode(''), 0, [], False)
|
new, temp = self._get_parser(unicode(''), unicode(''), 0, [], False)
|
||||||
return new
|
return new
|
||||||
|
|
||||||
parts = self._split_parts(code)
|
del self.parsers[:]
|
||||||
self.parsers[:] = []
|
|
||||||
|
|
||||||
line_offset = 0
|
line_offset = 0
|
||||||
start = 0
|
start = 0
|
||||||
p = None
|
p = None
|
||||||
is_first = True
|
is_first = True
|
||||||
|
for code_part in self._split_parts(code):
|
||||||
for code_part in parts:
|
|
||||||
lines = code_part.count('\n') + 1
|
|
||||||
if is_first or line_offset >= p.module.end_pos[0]:
|
if is_first or line_offset >= p.module.end_pos[0]:
|
||||||
indent = len(re.match(r'[ \t]*', code_part).group(0))
|
indent = len(code_part) - len(code_part.lstrip('\t '))
|
||||||
if is_first and self.current_node is not None:
|
if is_first and self.current_node is not None:
|
||||||
nodes = [self.current_node]
|
nodes = [self.current_node]
|
||||||
else:
|
else:
|
||||||
nodes = []
|
nodes = []
|
||||||
if self.current_node is not None:
|
if self.current_node is not None:
|
||||||
|
|
||||||
self.current_node = \
|
self.current_node = \
|
||||||
self.current_node.parent_until_indent(indent)
|
self.current_node.parent_until_indent(indent)
|
||||||
nodes += self.current_node.old_children
|
nodes += self.current_node.old_children
|
||||||
@@ -347,7 +341,7 @@ class FastParser(use_metaclass(CachedFastParser)):
|
|||||||
#else:
|
#else:
|
||||||
#print '#'*45, line_offset, p.module.end_pos, 'theheck\n', repr(code_part)
|
#print '#'*45, line_offset, p.module.end_pos, 'theheck\n', repr(code_part)
|
||||||
|
|
||||||
line_offset += lines
|
line_offset += code_part.count('\n') + 1
|
||||||
start += len(code_part) + 1 # +1 for newline
|
start += len(code_part) + 1 # +1 for newline
|
||||||
|
|
||||||
if self.parsers:
|
if self.parsers:
|
||||||
@@ -358,29 +352,26 @@ class FastParser(use_metaclass(CachedFastParser)):
|
|||||||
self.module.end_pos = self.parsers[-1].module.end_pos
|
self.module.end_pos = self.parsers[-1].module.end_pos
|
||||||
|
|
||||||
# print(self.parsers[0].module.get_code())
|
# print(self.parsers[0].module.get_code())
|
||||||
del code
|
|
||||||
|
|
||||||
def _get_parser(self, code, parser_code, line_offset, nodes, no_docstr):
|
def _get_parser(self, code, parser_code, line_offset, nodes, no_docstr):
|
||||||
h = hash(code)
|
h = hash(code)
|
||||||
hashes = [n.hash for n in nodes]
|
for index, node in enumerate(nodes):
|
||||||
node = None
|
if node.hash != h or node.code != code:
|
||||||
try:
|
continue
|
||||||
index = hashes.index(h)
|
|
||||||
if nodes[index].code != code:
|
if node != self.current_node:
|
||||||
raise ValueError()
|
offset = int(nodes[0] == self.current_node)
|
||||||
except ValueError:
|
self.current_node.old_children.pop(index - offset)
|
||||||
|
p = node.parser
|
||||||
|
m = p.module
|
||||||
|
m.line_offset += line_offset + 1 - m.start_pos[0]
|
||||||
|
break
|
||||||
|
else:
|
||||||
tokenizer = FastTokenizer(parser_code, line_offset)
|
tokenizer = FastTokenizer(parser_code, line_offset)
|
||||||
p = Parser(parser_code, self.module_path, tokenizer=tokenizer,
|
p = Parser(parser_code, self.module_path, tokenizer=tokenizer,
|
||||||
top_module=self.module, no_docstr=no_docstr)
|
top_module=self.module, no_docstr=no_docstr)
|
||||||
p.module.parent = self.module
|
p.module.parent = self.module
|
||||||
else:
|
node = None
|
||||||
if nodes[index] != self.current_node:
|
|
||||||
offset = int(nodes[0] == self.current_node)
|
|
||||||
self.current_node.old_children.pop(index - offset)
|
|
||||||
node = nodes.pop(index)
|
|
||||||
p = node.parser
|
|
||||||
m = p.module
|
|
||||||
m.line_offset += line_offset + 1 - m.start_pos[0]
|
|
||||||
|
|
||||||
return p, node
|
return p, node
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user