Merge pull request #208 from davidhalter/fast

New improved Fast Parser
This commit is contained in:
David Halter
2013-05-02 14:11:05 -07:00
9 changed files with 445 additions and 217 deletions
+2
View File
@@ -177,5 +177,7 @@ except ImportError:
try: try:
encoding = sys.stdout.encoding encoding = sys.stdout.encoding
if encoding is None:
encoding = 'utf-8'
except AttributeError: except AttributeError:
encoding = 'ascii' encoding = 'ascii'
+72 -16
View File
@@ -7,6 +7,8 @@ import tokenizer as tokenize
from jedi._compatibility import next, reraise from jedi._compatibility import next, reraise
from jedi import settings from jedi import settings
FLOWS = ['if', 'else', 'elif', 'while', 'with', 'try', 'except', 'finally']
class MultiLevelStopIteration(Exception): class MultiLevelStopIteration(Exception):
""" """
@@ -81,17 +83,25 @@ class PushBackIterator(object):
class NoErrorTokenizer(object): class NoErrorTokenizer(object):
def __init__(self, readline, offset=(0, 0), stop_on_scope=False): def __init__(self, readline, offset=(0, 0), is_fast_parser=False):
self.readline = readline self.readline = readline
self.gen = PushBackIterator(tokenize.generate_tokens(readline)) self.gen = tokenize.generate_tokens(readline)
self.offset = offset self.offset = offset
self.stop_on_scope = stop_on_scope
self.first_scope = False
self.closed = False self.closed = False
self.first = True self.is_first = True
self.push_backs = []
# fast parser options
self.is_fast_parser = is_fast_parser
self.current = self.previous = [None, None, (0, 0), (0, 0), '']
self.in_flow = False
self.new_indent = False
self.parser_indent = self.old_parser_indent = 0
self.is_decorator = False
self.first_stmt = True
def push_last_back(self): def push_last_back(self):
self.gen.push_back(self.current) self.push_backs.append(self.current)
def next(self): def next(self):
""" Python 2 Compatibility """ """ Python 2 Compatibility """
@@ -100,25 +110,71 @@ class NoErrorTokenizer(object):
def __next__(self): def __next__(self):
if self.closed: if self.closed:
raise MultiLevelStopIteration() raise MultiLevelStopIteration()
if self.push_backs:
return self.push_backs.pop(0)
self.last_previous = self.previous
self.previous = self.current
self.current = next(self.gen) self.current = next(self.gen)
c = list(self.current) c = list(self.current)
# stop if a new class or definition is started at position zero. if c[0] == tokenize.ENDMARKER:
breaks = ['def', 'class', '@'] self.current = self.previous
if self.stop_on_scope and c[1] in breaks and c[2][1] == 0: self.previous = self.last_previous
if self.first_scope: raise MultiLevelStopIteration()
self.closed = True
raise MultiLevelStopIteration()
elif c[1] != '@':
self.first_scope = True
if self.first: # this is exactly the same check as in fast_parser, but this time with
# tokenize and therefore precise.
breaks = ['def', 'class', '@']
if self.is_first:
c[2] = self.offset[0] + c[2][0], self.offset[1] + c[2][1] c[2] = self.offset[0] + c[2][0], self.offset[1] + c[2][1]
c[3] = self.offset[0] + c[3][0], self.offset[1] + c[3][1] c[3] = self.offset[0] + c[3][0], self.offset[1] + c[3][1]
self.first = False self.is_first = False
else: else:
c[2] = self.offset[0] + c[2][0], c[2][1] c[2] = self.offset[0] + c[2][0], c[2][1]
c[3] = self.offset[0] + c[3][0], c[3][1] c[3] = self.offset[0] + c[3][0], c[3][1]
self.current = c
def close():
if not self.first_stmt:
self.closed = True
raise MultiLevelStopIteration()
# ignore indents/comments
if self.is_fast_parser \
and self.previous[0] in (tokenize.INDENT, tokenize.NL, None,
tokenize.NEWLINE, tokenize.DEDENT) \
and c[0] not in (tokenize.COMMENT, tokenize.INDENT,
tokenize.NL, tokenize.NEWLINE, tokenize.DEDENT):
#print c, tokenize.tok_name[c[0]]
tok = c[1]
indent = c[2][1]
if indent < self.parser_indent: # -> dedent
self.parser_indent = indent
self.new_indent = False
if not self.in_flow or indent < self.old_parser_indent:
close()
self.in_flow = False
elif self.new_indent:
self.parser_indent = indent
self.new_indent = False
if not self.in_flow:
if tok in FLOWS or tok in breaks:
self.in_flow = tok in FLOWS
if not self.is_decorator and not self.in_flow:
close()
self.is_decorator = '@' == tok
if not self.is_decorator:
self.old_parser_indent = self.parser_indent
self.parser_indent += 1 # new scope: must be higher
self.new_indent = True
if tok != '@':
if self.first_stmt and not self.new_indent:
self.parser_indent = indent
self.first_stmt = False
return c return c
+8 -4
View File
@@ -131,22 +131,22 @@ def get_names_of_scope(scope, position=None, star_search=True,
... def func(): ... def func():
... y = None ... y = None
... ''') ... ''')
>>> scope = parser.scope.subscopes[0] >>> scope = parser.module.subscopes[0]
>>> scope >>> scope
<Function: func@3-6> <Function: func@3-5>
`get_names_of_scope` is a generator. First it yields names from `get_names_of_scope` is a generator. First it yields names from
most inner scope. most inner scope.
>>> pairs = list(get_names_of_scope(scope)) >>> pairs = list(get_names_of_scope(scope))
>>> pairs[0] >>> pairs[0]
(<Function: func@3-6>, [<Name: y@4,4>]) (<Function: func@3-5>, [<Name: y@4,4>])
Then it yield the names from one level outer scope. For this Then it yield the names from one level outer scope. For this
example, this is the most outer scope. example, this is the most outer scope.
>>> pairs[1] >>> pairs[1]
(<SubModule: None@1-6>, [<Name: x@2,0>, <Name: func@3,4>]) (<SubModule: None@1-5>, [<Name: x@2,0>, <Name: func@3,4>])
Finally, it yields names from builtin, if `include_builtin` is Finally, it yields names from builtin, if `include_builtin` is
true (default). true (default).
@@ -160,6 +160,10 @@ def get_names_of_scope(scope, position=None, star_search=True,
in_func_scope = scope in_func_scope = scope
non_flow = scope.get_parent_until(pr.Flow, reverse=True) non_flow = scope.get_parent_until(pr.Flow, reverse=True)
while scope: while scope:
if isinstance(scope, pr.SubModule) and scope.parent:
# we don't want submodules to report if we have modules.
scope = scope.parent
continue
# `pr.Class` is used, because the parent is never `Class`. # `pr.Class` is used, because the parent is never `Class`.
# Ignore the Flows, because the classes and functions care for that. # Ignore the Flows, because the classes and functions care for that.
# InstanceElement of Class is ignored, if it is not the start scope. # InstanceElement of Class is ignored, if it is not the start scope.
+302 -156
View File
@@ -4,84 +4,44 @@ anything changes, it only reparses the changed parts. But because it's not
finished (and still not working as I want), I won't document it any further. finished (and still not working as I want), I won't document it any further.
""" """
import re import re
import operator
from jedi._compatibility import use_metaclass, reduce from jedi._compatibility import use_metaclass
from jedi import settings from jedi import settings
from jedi import parsing from jedi import parsing
from jedi import parsing_representation as pr from jedi import parsing_representation as pr
from jedi import cache from jedi import cache
from jedi import common
SCOPE_CONTENTS = ['asserts', 'subscopes', 'imports', 'statements', 'returns']
class Module(pr.Simple, pr.Module): class Module(pr.Simple, pr.Module):
def __init__(self, parsers): def __init__(self, parsers):
self._end_pos = None, None
super(Module, self).__init__(self, (1, 0)) super(Module, self).__init__(self, (1, 0))
self.parsers = parsers self.parsers = parsers
self.reset_caches() self.reset_caches()
self.line_offset = 0
self.start_pos = 1, 0
self.end_pos = None, None
def reset_caches(self): def reset_caches(self):
""" This module does a whole lot of caching, because it uses different """ This module does a whole lot of caching, because it uses different
parsers. """ parsers. """
self.cache = {} self._used_names = None
for p in self.parsers: for p in self.parsers:
p.user_scope = None p.user_scope = None
p.user_stmt = None p.user_stmt = None
def _get(self, name, operation, execute=False, *args, **kwargs):
key = (name, args, frozenset(kwargs.items()))
if key not in self.cache:
if execute:
objs = (getattr(p.module, name)(*args, **kwargs)
for p in self.parsers)
else:
objs = (getattr(p.module, name) for p in self.parsers)
self.cache[key] = reduce(operation, objs)
return self.cache[key]
def __getattr__(self, name): def __getattr__(self, name):
operators = { if name.startswith('__'):
'get_imports': operator.add, raise AttributeError('Not available!')
'get_code': operator.add,
'get_set_vars': operator.add,
'get_defined_names': operator.add,
'is_empty': operator.and_
}
properties = {
'subscopes': operator.add,
'imports': operator.add,
'statements': operator.add,
'imports': operator.add,
'asserts': operator.add,
'global_vars': operator.add
}
if name in operators:
return lambda *args, **kwargs: self._get(name, operators[name],
True, *args, **kwargs)
elif name in properties:
return self._get(name, properties[name])
else: else:
raise AttributeError("__getattr__ doesn't offer %s" % name) return getattr(self.parsers[0].module, name)
def get_statement_for_position(self, pos):
key = 'get_statement_for_position', pos
if key not in self.cache:
for p in self.parsers:
s = p.module.get_statement_for_position(pos)
if s:
self.cache[key] = s
break
else:
self.cache[key] = None
return self.cache[key]
@property @property
def used_names(self): def used_names(self):
if not self.parsers: if self._used_names is None:
raise NotImplementedError("Parser doesn't exist.")
key = 'used_names'
if key not in self.cache:
dct = {} dct = {}
for p in self.parsers: for p in self.parsers:
for k, statement_set in p.module.used_names.items(): for k, statement_set in p.module.used_names.items():
@@ -90,52 +50,8 @@ class Module(pr.Simple, pr.Module):
else: else:
dct[k] = set(statement_set) dct[k] = set(statement_set)
self.cache[key] = dct self._used_names = dct
return self.cache[key] return self._used_names
@property
def docstr(self):
if not self.parsers:
raise NotImplementedError("Parser doesn't exist.")
return self.parsers[0].module.docstr
@property
def name(self):
if not self.parsers:
raise NotImplementedError("Parser doesn't exist.")
return self.parsers[0].module.name
@property
def path(self):
if not self.parsers:
raise NotImplementedError("Parser doesn't exist.")
return self.parsers[0].module.path
@property
def is_builtin(self):
if not self.parsers:
raise NotImplementedError("Parser doesn't exist.")
return self.parsers[0].module.is_builtin
@property
def start_pos(self):
""" overwrite start_pos of Simple """
return 1, 0
@start_pos.setter
def start_pos(self):
""" ignore """
pass
@property
def end_pos(self):
return self._end_pos
@end_pos.setter
def end_pos(self, value):
if None in self._end_pos \
or None not in value and self._end_pos < value:
self._end_pos = value
def __repr__(self): def __repr__(self):
return "<%s: %s@%s-%s>" % (type(self).__name__, self.name, return "<%s: %s@%s-%s>" % (type(self).__name__, self.name,
@@ -158,12 +74,120 @@ class CachedFastParser(type):
return p return p
class ParserNode(object):
def __init__(self, parser, code, parent=None):
self.parent = parent
self.code = code
self.hash = hash(code)
self.children = []
# must be created before new things are added to it.
self.save_contents(parser)
def save_contents(self, parser):
self.parser = parser
try:
# with fast_parser we have either 1 subscope or only statements.
self.content_scope = parser.module.subscopes[0]
except IndexError:
self.content_scope = parser.module
scope = self.content_scope
self._contents = {}
for c in SCOPE_CONTENTS:
self._contents[c] = list(getattr(scope, c))
self._is_generator = scope.is_generator
self.old_children = self.children
self.children = []
def reset_contents(self):
scope = self.content_scope
for key, c in self._contents.items():
setattr(scope, key, list(c))
scope.is_generator = self._is_generator
self.parser.user_scope = self.parser.module
if self.parent is None:
# Global vars of the first one can be deleted, in the global scope
# they make no sense.
self.parser.module.global_vars = []
for c in self.children:
c.reset_contents()
def parent_until_indent(self, indent=None):
if indent is None or self.indent >= indent and self.parent:
self.old_children = []
if self.parent is not None:
return self.parent.parent_until_indent(indent)
return self
@property
def indent(self):
if not self.parent:
return 0
module = self.parser.module
try:
el = module.subscopes[0]
except IndexError:
try:
el = module.statements[0]
except IndexError:
try:
el = module.imports[0]
except IndexError:
try:
el = [r for r in module.returns if r is not None][0]
except IndexError:
return self.parent.indent + 1
return el.start_pos[1]
def _set_items(self, parser, set_parent=False):
# insert parser objects into current structure
scope = self.content_scope
for c in SCOPE_CONTENTS:
content = getattr(scope, c)
items = getattr(parser.module, c)
if set_parent:
for i in items:
if i is None:
continue # happens with empty returns
i.parent = scope.use_as_parent
if isinstance(i, (pr.Function, pr.Class)):
for d in i.decorators:
d.parent = scope.use_as_parent
content += items
# global_vars
cur = self
while cur.parent is not None:
cur = cur.parent
cur.parser.module.global_vars += parser.module.global_vars
scope.is_generator |= parser.module.is_generator
def add_node(self, node, set_parent=False):
"""Adding a node means adding a node that was already added earlier"""
self.children.append(node)
self._set_items(node.parser, set_parent=set_parent)
node.old_children = node.children
node.children = []
return node
def add_parser(self, parser, code):
return self.add_node(ParserNode(parser, code, self), True)
class FastParser(use_metaclass(CachedFastParser)): class FastParser(use_metaclass(CachedFastParser)):
def __init__(self, code, module_path=None, user_position=None): def __init__(self, code, module_path=None, user_position=None):
# set values like `pr.Module`. # set values like `pr.Module`.
self.module_path = module_path self.module_path = module_path
self.user_position = user_position self.user_position = user_position
self._user_scope = None
self.current_node = None
self.parsers = [] self.parsers = []
self.module = Module(self.parsers) self.module = Module(self.parsers)
self.reset_caches() self.reset_caches()
@@ -175,12 +199,12 @@ class FastParser(use_metaclass(CachedFastParser)):
if self._user_scope is None: if self._user_scope is None:
for p in self.parsers: for p in self.parsers:
if p.user_scope: if p.user_scope:
if self._user_scope is not None and not \ if isinstance(p.user_scope, pr.SubModule):
isinstance(self._user_scope, pr.SubModule):
continue continue
self._user_scope = p.user_scope self._user_scope = p.user_scope
if isinstance(self._user_scope, pr.SubModule): if isinstance(self._user_scope, pr.SubModule) \
or self._user_scope is None:
self._user_scope = self.module self._user_scope = self.module
return self._user_scope return self._user_scope
@@ -199,79 +223,201 @@ class FastParser(use_metaclass(CachedFastParser)):
self._parse(code) self._parse(code)
def scan_user_scope(self, sub_module): def _scan_user_scope(self, sub_module):
""" Scan with self.user_position. """ Scan with self.user_position. """
:type sub_module: pr.SubModule
"""
for scope in sub_module.statements + sub_module.subscopes: for scope in sub_module.statements + sub_module.subscopes:
if isinstance(scope, pr.Scope): if isinstance(scope, pr.Scope):
if scope.start_pos <= self.user_position <= scope.end_pos: if scope.start_pos <= self.user_position <= scope.end_pos:
return self.scan_user_scope(scope) or scope return self._scan_user_scope(scope) or scope
return None return None
def _split_parts(self, code):
"""
Split the code into different parts. This makes it possible to parse
each part seperately and therefore cache parts of the file and not
everything.
"""
def add_part():
txt = '\n'.join(current_lines)
if txt:
if add_to_last and parts:
parts[-1] += '\n' + txt
else:
parts.append(txt)
current_lines[:] = []
r_keyword = '^[ \t]*(def|class|@|%s)' % '|'.join(common.FLOWS)
lines = code.splitlines()
current_lines = []
parts = []
is_decorator = False
current_indent = 0
old_indent = 0
new_indent = False
in_flow = False
add_to_last = False
# All things within flows are simply being ignored.
for i, l in enumerate(lines):
# check for dedents
m = re.match('^([\t ]*)(.?)', l)
indent = len(m.group(1))
if m.group(2) in ['', '#']:
current_lines.append(l) # just ignore comments and blank lines
continue
if indent < current_indent: # -> dedent
current_indent = indent
new_indent = False
if not in_flow or indent < old_indent:
add_part()
add_to_last = False
in_flow = False
elif new_indent:
current_indent = indent
new_indent = False
# Check lines for functions/classes and split the code there.
if not in_flow:
m = re.match(r_keyword, l)
if m:
in_flow = m.group(1) in common.FLOWS
if not is_decorator and not in_flow:
add_part()
add_to_last = False
is_decorator = '@' == m.group(1)
if not is_decorator:
old_indent = current_indent
current_indent += 1 # it must be higher
new_indent = True
elif is_decorator:
is_decorator = False
add_to_last = True
current_lines.append(l)
add_part()
return parts
def _parse(self, code): def _parse(self, code):
""" :type code: str """ """ :type code: str """
r = r'(?:\n(?:def|class|@.*?\n(?:def|class))|^).*?' \ def empty_parser():
r'(?=\n(?:def|class|@)|$)' new, temp = self._get_parser('', '', 0, [])
parts = re.findall(r, code, re.DOTALL) return new
if len(parts) > 1 and not re.match('def|class|@', parts[0]): parts = self._split_parts(code)
# Merge the first two because `common.NoErrorTokenizer` is not able self.parsers[:] = []
# to know if there's a class/func or not.
# Therefore every part has it's own class/func. Exactly one.
parts[0] += parts[1]
parts.pop(1)
if settings.fast_parser_always_reparse:
self.parsers[:] = []
# dict comprehensions are not available in 2.6 :-(
hashes = dict((p.hash, p) for p in self.parsers)
line_offset = 0 line_offset = 0
start = 0 start = 0
p = None p = None
parser_order = 0 is_first = True
for code_part in parts: for code_part in parts:
lines = code_part.count('\n') lines = code_part.count('\n') + 1
# the parser is using additional newlines, therefore substract if is_first or line_offset >= p.end_pos[0]:
if p is None or line_offset >= p.end_pos[0] - 2: indent = len(re.match(r'[ \t]*', code_part).group(0))
# check if code_part has already been parsed if is_first and self.current_node is not None:
h = hash(code_part) nodes = [self.current_node]
if h in hashes and hashes[h].code == code_part:
p = hashes[h]
del hashes[h]
m = p.module
m.line_offset += line_offset + 1 - m.start_pos[0]
if self.user_position is not None and \
m.start_pos <= self.user_position <= m.end_pos:
# It's important to take care of the whole user
# positioning stuff, if no reparsing is being done.
p.user_stmt = m.get_statement_for_position(
self.user_position, include_imports=True)
if p.user_stmt:
p.user_scope = p.user_stmt.parent
else:
p.user_scope = self.scan_user_scope(m) \
or self.module
else: else:
p = parsing.Parser(code[start:], nodes = []
self.module_path, self.user_position, if self.current_node is not None:
offset=(line_offset, 0), stop_on_scope=True,
top_module=self.module)
p.hash = h self.current_node = \
p.code = code_part self.current_node.parent_until_indent(indent)
p.module.parent = self.module nodes += self.current_node.old_children
self.parsers.insert(parser_order, p)
# check if code_part has already been parsed
#print '#'*45,line_offset, p and p.end_pos, '\n', code_part
p, node = self._get_parser(code_part, code[start:],
line_offset, nodes)
if is_first and p.module.subscopes:
# special case, we cannot use a function subscope as a
# base scope, subscopes would save all the other contents
new = empty_parser()
if self.current_node is None:
self.current_node = ParserNode(new, '')
else:
self.current_node.save_contents(new)
self.parsers.append(new)
is_first = False
if is_first:
if self.current_node is None:
self.current_node = ParserNode(p, code_part)
else:
self.current_node.save_contents(p)
else:
if node is None:
self.current_node = \
self.current_node.add_parser(p, code_part)
else:
self.current_node = self.current_node.add_node(node)
if self.current_node.parent and (isinstance(p.user_scope,
pr.SubModule) or p.user_scope is None) \
and self.user_position \
and p.start_pos <= self.user_position < p.end_pos:
p.user_scope = self.current_node.parent.content_scope
self.parsers.append(p)
is_first = False
else:
#print '#'*45, line_offset, p.end_pos, 'theheck\n', code_part
pass
parser_order += 1
line_offset += lines line_offset += lines
start += len(code_part) start += len(code_part) + 1 # +1 for newline
self.parsers[parser_order + 1:] = []
if self.parsers:
self.current_node = self.current_node.parent_until_indent()
else:
self.parsers.append(empty_parser())
self.module.end_pos = self.parsers[-1].end_pos
#print(self.parsers[0].module.get_code())
del code
def _get_parser(self, code, parser_code, line_offset, nodes):
h = hash(code)
hashes = [n.hash for n in nodes]
node = None
try:
index = hashes.index(h)
if nodes[index].code != code:
raise ValueError()
except ValueError:
p = parsing.Parser(parser_code, self.module_path,
self.user_position, offset=(line_offset, 0),
is_fast_parser=True, top_module=self.module)
p.module.parent = self.module
else:
if nodes[index] != self.current_node:
offset = int(nodes[0] == self.current_node)
self.current_node.old_children.pop(index - offset)
node = nodes.pop(index)
p = node.parser
m = p.module
m.line_offset += line_offset + 1 - m.start_pos[0]
if self.user_position is not None and \
m.start_pos[0] <= self.user_position[0] <= m.end_pos[0]:
# It's important to take care of the whole user
# positioning stuff, if no reparsing is being done.
p.user_stmt = m.get_statement_for_position(
self.user_position, include_imports=True)
if p.user_stmt:
p.user_scope = p.user_stmt.parent
else:
p.user_scope = self._scan_user_scope(m) or m
return p, node
def reset_caches(self): def reset_caches(self):
self._user_scope = None self._user_scope = None
self._user_stmt = None self._user_stmt = None
self.module.reset_caches() self.module.reset_caches()
if self.current_node is not None:
self.current_node.reset_contents()
+20 -13
View File
@@ -42,29 +42,27 @@ class Parser(object):
:param user_position: The line/column, the user is currently on. :param user_position: The line/column, the user is currently on.
:type user_position: tuple(int, int) :type user_position: tuple(int, int)
:param no_docstr: If True, a string at the beginning is not a docstr. :param no_docstr: If True, a string at the beginning is not a docstr.
:param stop_on_scope: Stop if a scope appears -> for fast_parser :param is_fast_parser: -> for fast_parser
:param top_module: Use this module as a parent instead of `self.module`. :param top_module: Use this module as a parent instead of `self.module`.
""" """
def __init__(self, source, module_path=None, user_position=None, def __init__(self, source, module_path=None, user_position=None,
no_docstr=False, offset=(0, 0), stop_on_scope=None, no_docstr=False, offset=(0, 0), is_fast_parser=None,
top_module=None): top_module=None):
self.user_position = user_position self.user_position = user_position
self.user_scope = None self.user_scope = None
self.user_stmt = None self.user_stmt = None
self.no_docstr = no_docstr self.no_docstr = no_docstr
self.start_pos = self.end_pos = 1 + offset[0], offset[1]
# initialize global Scope # initialize global Scope
self.module = pr.SubModule(module_path, (offset[0] + 1, offset[1]), self.module = pr.SubModule(module_path, self.start_pos, top_module)
top_module)
self.scope = self.module self.scope = self.module
self.current = (None, None) self.current = (None, None)
self.start_pos = 1, 0
self.end_pos = 1, 0
source = source + '\n' # end with \n, because the parser needs it source = source + '\n' # end with \n, because the parser needs it
buf = StringIO(source) buf = StringIO(source)
self._gen = common.NoErrorTokenizer(buf.readline, offset, self._gen = common.NoErrorTokenizer(buf.readline, offset,
stop_on_scope) is_fast_parser)
self.top_module = top_module or self.module self.top_module = top_module or self.module
try: try:
self._parse() self._parse()
@@ -80,6 +78,12 @@ class Parser(object):
# because of `self.module.used_names`. # because of `self.module.used_names`.
d.parent = self.module d.parent = self.module
if self.current[0] in (tokenize.NL, tokenize.NEWLINE):
# we added a newline before, so we need to "remove" it again.
self.end_pos = self._gen.previous[2]
if self.current[0] == tokenize.INDENT:
self.end_pos = self._gen.last_previous[2]
self.start_pos = self.module.start_pos self.start_pos = self.module.start_pos
self.module.end_pos = self.end_pos self.module.end_pos = self.end_pos
del self._gen del self._gen
@@ -171,8 +175,6 @@ class Parser(object):
while True: while True:
defunct = False defunct = False
token_type, tok = self.next() token_type, tok = self.next()
if token_type == tokenize.ENDMARKER:
break
if brackets and tok == '\n': if brackets and tok == '\n':
self.next() self.next()
if tok == '(': # python allows only one `(` in the statement. if tok == '(': # python allows only one `(` in the statement.
@@ -421,12 +423,18 @@ class Parser(object):
def __next__(self): def __next__(self):
""" Generate the next tokenize pattern. """ """ Generate the next tokenize pattern. """
try: try:
typ, tok, self.start_pos, self.end_pos, \ typ, tok, start_pos, end_pos, self.parserline = next(self._gen)
self.parserline = next(self._gen) # dedents shouldn't change positions
if typ != tokenize.DEDENT:
self.start_pos, self.end_pos = start_pos, end_pos
except (StopIteration, common.MultiLevelStopIteration): except (StopIteration, common.MultiLevelStopIteration):
# on finish, set end_pos correctly # on finish, set end_pos correctly
s = self.scope s = self.scope
while s is not None: while s is not None:
if isinstance(s, pr.Module) \
and not isinstance(s, pr.SubModule):
self.module.end_pos = self.end_pos
break
s.end_pos = self.end_pos s.end_pos = self.end_pos
s = s.parent s = s.parent
raise raise
@@ -662,7 +670,6 @@ class Parser(object):
self.freshscope = False self.freshscope = False
else: else:
if token_type not in [tokenize.COMMENT, tokenize.INDENT, if token_type not in [tokenize.COMMENT, tokenize.INDENT,
tokenize.NEWLINE, tokenize.NL, tokenize.NEWLINE, tokenize.NL]:
tokenize.ENDMARKER]:
debug.warning('token not classified', tok, token_type, debug.warning('token not classified', tok, token_type,
self.start_pos[0]) self.start_pos[0])
+19 -15
View File
@@ -22,7 +22,7 @@ The easiest way to play with this module is to use :class:`parsing.Parser`.
>>> parser = Parser('import os', 'example.py') >>> parser = Parser('import os', 'example.py')
>>> submodule = parser.scope >>> submodule = parser.scope
>>> submodule >>> submodule
<SubModule: example.py@1-2> <SubModule: example.py@1-1>
Any subclasses of :class:`Scope`, including :class:`SubModule` has Any subclasses of :class:`Scope`, including :class:`SubModule` has
attribute :attr:`imports <Scope.imports>`. This attribute has import attribute :attr:`imports <Scope.imports>`. This attribute has import
@@ -32,7 +32,6 @@ statements in this scope. Check this out:
[<Import: import os @1,0>] [<Import: import os @1,0>]
See also :attr:`Scope.subscopes` and :attr:`Scope.statements`. See also :attr:`Scope.subscopes` and :attr:`Scope.statements`.
""" """
from __future__ import with_statement from __future__ import with_statement
@@ -152,6 +151,10 @@ class Scope(Simple, IsScope):
self.statements = [] self.statements = []
self.docstr = '' self.docstr = ''
self.asserts = [] self.asserts = []
# Needed here for fast_parser, because the fast_parser splits and
# returns will be in "normal" modules.
self.returns = []
self.is_generator = False
def add_scope(self, sub, decorators): def add_scope(self, sub, decorators):
sub.parent = self.use_as_parent sub.parent = self.use_as_parent
@@ -196,15 +199,15 @@ class Scope(Simple, IsScope):
string = "" string = ""
if len(self.docstr) > 0: if len(self.docstr) > 0:
string += '"""' + self.docstr + '"""\n' string += '"""' + self.docstr + '"""\n'
for i in self.imports:
string += i.get_code()
for sub in self.subscopes:
string += sub.get_code(first_indent=True, indention=indention)
returns = self.returns if hasattr(self, 'returns') else [] objs = self.subscopes + self.imports + self.statements + self.returns
ret_str = '' if isinstance(self, Lambda) else 'return ' for obj in sorted(objs, key=lambda x: x.start_pos):
for stmt in self.statements + returns: if isinstance(obj, Scope):
string += (ret_str if stmt in returns else '') + stmt.get_code() string += obj.get_code(first_indent=True, indention=indention)
else:
if obj in self.returns and not isinstance(self, Lambda):
string += 'yield ' if self.is_generator else 'return '
string += obj.get_code()
if first_indent: if first_indent:
string = common.indent_block(string, indention=indention) string = common.indent_block(string, indention=indention)
@@ -399,7 +402,7 @@ class Class(Scope):
string = "\n".join('@' + stmt.get_code() for stmt in self.decorators) string = "\n".join('@' + stmt.get_code() for stmt in self.decorators)
string += 'class %s' % (self.name) string += 'class %s' % (self.name)
if len(self.supers) > 0: if len(self.supers) > 0:
sup = ','.join(stmt.get_code() for stmt in self.supers) sup = ', '.join(stmt.get_code(False) for stmt in self.supers)
string += '(%s)' % sup string += '(%s)' % sup
string += ':\n' string += ':\n'
string += super(Class, self).get_code(True, indention) string += super(Class, self).get_code(True, indention)
@@ -441,8 +444,6 @@ class Function(Scope):
p.parent = self.use_as_parent p.parent = self.use_as_parent
p.parent_function = self.use_as_parent p.parent_function = self.use_as_parent
self.decorators = [] self.decorators = []
self.returns = []
self.is_generator = False
self.listeners = set() # not used here, but in evaluation. self.listeners = set() # not used here, but in evaluation.
if annotation is not None: if annotation is not None:
@@ -451,13 +452,16 @@ class Function(Scope):
def get_code(self, first_indent=False, indention=' '): def get_code(self, first_indent=False, indention=' '):
string = "\n".join('@' + stmt.get_code() for stmt in self.decorators) string = "\n".join('@' + stmt.get_code() for stmt in self.decorators)
params = ','.join([stmt.get_code() for stmt in self.params]) params = ', '.join([stmt.get_code(False) for stmt in self.params])
string += "def %s(%s):\n" % (self.name, params) string += "def %s(%s):\n" % (self.name, params)
string += super(Function, self).get_code(True, indention) string += super(Function, self).get_code(True, indention)
if self.is_empty(): if self.is_empty():
string += "pass\n" string += 'pass\n'
return string return string
def is_empty(self):
return super(Function, self).is_empty() and not self.returns
def get_set_vars(self): def get_set_vars(self):
n = super(Function, self).get_set_vars() n = super(Function, self).get_set_vars()
for p in self.params: for p in self.params:
-7
View File
@@ -33,7 +33,6 @@ Parser
~~~~~~ ~~~~~~
.. autodata:: fast_parser .. autodata:: fast_parser
.. autodata:: fast_parser_always_reparse
.. autodata:: use_function_definition_cache .. autodata:: use_function_definition_cache
@@ -150,12 +149,6 @@ something has been changed e.g. to a function. If this happens, only the
function is being reparsed. function is being reparsed.
""" """
fast_parser_always_reparse = False
"""
This is just a debugging option. Always reparsing means that the fast parser
is basically useless. So don't use it.
"""
use_function_definition_cache = True use_function_definition_cache = True
""" """
Use the cache (full cache) to generate function_definition's. This may fail Use the cache (full cache) to generate function_definition's. This may fail
+16 -4
View File
@@ -126,6 +126,10 @@ class IntegrationTestCase(object):
self.path = path self.path = path
self.skip = None self.skip = None
@property
def module_name(self):
return re.sub('.*/|\.py', '', self.path)
def __repr__(self): def __repr__(self):
name = os.path.basename(self.path) if self.path else None name = os.path.basename(self.path) if self.path else None
return '<%s: %s:%s:%s>' % (self.__class__.__name__, return '<%s: %s:%s:%s>' % (self.__class__.__name__,
@@ -203,7 +207,7 @@ class IntegrationTestCase(object):
# this means that there is a module specified # this means that there is a module specified
wanted.append(pos_tup) wanted.append(pos_tup)
else: else:
wanted.append(('renaming', self.line_nr + pos_tup[0], wanted.append((self.module_name, self.line_nr + pos_tup[0],
pos_tup[1])) pos_tup[1]))
return compare_cb(self, compare, sorted(wanted)) return compare_cb(self, compare, sorted(wanted))
@@ -327,7 +331,8 @@ if __name__ == '__main__':
cases += collect_dir_tests(completion_test_dir, test_files, True) cases += collect_dir_tests(completion_test_dir, test_files, True)
def file_change(current, tests, fails): def file_change(current, tests, fails):
current = os.path.basename(current) if current is not None:
current = os.path.basename(current)
print('%s \t\t %s tests and %s fails.' % (current, tests, fails)) print('%s \t\t %s tests and %s fails.' % (current, tests, fails))
def report(case, actual, desired): def report(case, actual, desired):
@@ -335,13 +340,20 @@ if __name__ == '__main__':
return 0 return 0
else: else:
print("\ttest fail @%d, actual = %s, desired = %s" print("\ttest fail @%d, actual = %s, desired = %s"
% (case.line_nr, actual, desired)) % (case.line_nr - 1, actual, desired))
return 1 return 1
import traceback
current = cases[0].path if cases else None current = cases[0].path if cases else None
count = fails = 0 count = fails = 0
for c in cases: for c in cases:
if c.run(report): try:
if c.run(report):
tests_fail += 1
fails += 1
except Exception:
traceback.print_exc()
print("\ttest fail @%d" % (c.line_nr - 1))
tests_fail += 1 tests_fail += 1
fails += 1 fails += 1
count += 1 count += 1
+6 -2
View File
@@ -18,6 +18,8 @@ from jedi._compatibility import utf8, unicode
from jedi import api from jedi import api
api_classes = api.api_classes api_classes = api.api_classes
import pytest
#jedi.set_debug_function(jedi.debug.print_to_stdout) #jedi.set_debug_function(jedi.debug.print_to_stdout)
@@ -296,10 +298,12 @@ class TestRegression(TestBase):
def test_unicode_attribute(self): def test_unicode_attribute(self):
""" github jedi-vim issue #94 """ """ github jedi-vim issue #94 """
s1 = utf8('#-*- coding: utf-8 -*-\nclass Person():\n name = "e"\n\nPerson().name.') s1 = utf8('#-*- coding: utf-8 -*-\nclass Person():\n'
' name = "e"\n\nPerson().name.')
completions1 = self.complete(s1) completions1 = self.complete(s1)
assert 'strip' in [c.word for c in completions1] assert 'strip' in [c.word for c in completions1]
s2 = utf8('#-*- coding: utf-8 -*-\nclass Person():\n name = "é"\n\nPerson().name.') s2 = utf8('#-*- coding: utf-8 -*-\nclass Person():\n'
' name = "é"\n\nPerson().name.')
completions2 = self.complete(s2) completions2 = self.complete(s2)
assert 'strip' in [c.word for c in completions2] assert 'strip' in [c.word for c in completions2]