1
0
forked from VimPlug/jedi

Merge branch 'dev' into get_code_fidelity

Conflicts:
	jedi/evaluate/docstrings.py
	jedi/parser/representation.py

Resolving merge problems:
* Introducing docstring for compiled.fake
* Partly fixing poor decision in TokenDocstring __init__
This commit is contained in:
Jean-Louis Fuchs
2014-01-28 00:37:06 +01:00
74 changed files with 4318 additions and 4322 deletions

View File

@@ -15,9 +15,6 @@ within the statement. This lowers memory usage and cpu time and reduces the
complexity of the ``Parser`` (there's another parser sitting inside
``Statement``, which produces ``Array`` and ``Call``).
"""
from __future__ import with_statement
import tokenizer as tokenize
import keyword
from jedi._compatibility import next, StringIO
@@ -25,6 +22,7 @@ from jedi import debug
from jedi import common
from jedi.parser import representation as pr
from jedi.parser import token as token_pr
from jedi.parser import tokenize
class Parser(object):
@@ -36,18 +34,12 @@ class Parser(object):
:type source: str
:param module_path: The path of the module in the file system, may be None.
:type module_path: str
:param user_position: The line/column, the user is currently on.
:type user_position: tuple(int, int)
:param no_docstr: If True, a string at the beginning is not a docstr.
:param is_fast_parser: -> for fast_parser
:param top_module: Use this module as a parent instead of `self.module`.
"""
def __init__(self, source, module_path=None, user_position=None,
no_docstr=False, offset=(0, 0), is_fast_parser=None,
top_module=None):
self.user_position = user_position
self.user_scope = None
self.user_stmt = None
def __init__(self, source, module_path=None, no_docstr=False,
offset=(0, 0), is_fast_parser=None, top_module=None):
self.no_docstr = no_docstr
self.start_pos = self.end_pos = 1 + offset[0], offset[1]
@@ -58,8 +50,7 @@ class Parser(object):
source = source + '\n' # end with \n, because the parser needs it
buf = StringIO(source)
self._gen = common.NoErrorTokenizer(buf.readline, offset,
is_fast_parser)
self._gen = tokenize.NoErrorTokenizer(buf.readline, offset, is_fast_parser)
self.top_module = top_module or self.module
try:
self._parse()
@@ -97,19 +88,6 @@ class Parser(object):
self.module.used_names[tok_name] = set([simple])
self.module.temp_used_names = []
if not self.user_position:
return
# the position is right
if simple.start_pos <= self.user_position <= simple.end_pos:
if self.user_stmt is not None:
# if there is already a user position (another import, because
# imports are splitted) the names are checked.
for n in simple.get_set_vars():
if n.start_pos < self.user_position <= n.end_pos:
self.user_stmt = simple
else:
self.user_stmt = simple
def _parse_dot_name(self, pre_used_token=None):
"""
The dot name parser parses a name, variable or function and returns
@@ -255,11 +233,7 @@ class Parser(object):
return None
# because of 2 line func param definitions
scope = pr.Function(self.module, fname, params, first_pos, annotation)
if self.user_scope and scope != self.user_scope \
and self.user_position > first_pos:
self.user_scope = scope
return scope
return pr.Function(self.module, fname, params, first_pos, annotation)
def _parse_class(self):
"""
@@ -272,11 +246,8 @@ class Parser(object):
first_pos = self.start_pos
token_type, cname = self.next()
if token_type != tokenize.NAME:
debug.warning(
"class: syntax err, token is not a name@%s (%s: %s)" % (
self.start_pos[0], tokenize.tok_name[token_type], cname
)
)
debug.warning("class: syntax err, token is not a name@%s (%s: %s)",
self.start_pos[0], tokenize.tok_name[token_type], cname)
return None
cname = pr.Name(self.module, [(cname, self.start_pos)], self.start_pos,
@@ -289,15 +260,10 @@ class Parser(object):
token_type, _next = self.next()
if _next != ':':
debug.warning("class syntax: %s@%s" % (cname, self.start_pos[0]))
debug.warning("class syntax: %s@%s", cname, self.start_pos[0])
return None
# because of 2 line class initializations
scope = pr.Class(self.module, cname, super, first_pos)
if self.user_scope and scope != self.user_scope \
and self.user_position > first_pos:
self.user_scope = scope
return scope
return pr.Class(self.module, cname, super, first_pos)
def _parse_statement(self, pre_used_token=None, added_breaks=None,
stmt_class=pr.Statement, names_are_set_vars=False):
@@ -448,15 +414,6 @@ class Parser(object):
s = s.parent
raise
if self.user_position and (
self.start_pos[0] == self.user_position[0]
or self.user_scope is None
and self.start_pos[0] >= self.user_position[0]
):
debug.dbg('user scope found [%s] = %s' %
(self.parserline.replace('\n', ''), repr(self._scope)))
self.user_scope = self._scope
self._current = typ, tok
return self._current
@@ -480,8 +437,8 @@ class Parser(object):
# This iterator stuff is not intentional. It grew historically.
for token_type, tok in self.iterator:
self.module.temp_used_names = []
# debug.dbg('main: tok=[%s] type=[%s] indent=[%s]'\
# % (tok, tokenize.tok_name[token_type], start_position[0]))
# debug.dbg('main: tok=[%s] type=[%s] indent=[%s]', \
# tok, tokenize.tok_name[token_type], start_position[0])
while token_type == tokenize.DEDENT and self._scope != self.module:
token_type, tok = self.next()
@@ -511,8 +468,7 @@ class Parser(object):
if tok == 'def':
func = self._parse_function()
if func is None:
debug.warning("function: syntax error@%s" %
self.start_pos[0])
debug.warning("function: syntax error@%s", self.start_pos[0])
continue
self.freshscope = True
self._scope = self._scope.add_scope(func, self._decorators)
@@ -556,7 +512,7 @@ class Parser(object):
tok = 'import'
mod = None
if not mod and not relative_count or tok != "import":
debug.warning("from: syntax error@%s" % self.start_pos[0])
debug.warning("from: syntax error@%s", self.start_pos[0])
defunct = True
if tok != 'import':
self._gen.push_last_back()
@@ -577,25 +533,18 @@ class Parser(object):
elif tok == 'for':
set_stmt, tok = self._parse_statement(added_breaks=['in'],
names_are_set_vars=True)
if tok == 'in':
statement, tok = self._parse_statement()
if tok == ':':
s = [] if statement is None else [statement]
f = pr.ForFlow(self.module, s, first_pos, set_stmt)
self._scope = self._scope.add_statement(f)
else:
debug.warning('syntax err, for flow started @%s',
self.start_pos[0])
if statement is not None:
statement.parent = use_as_parent_scope
if set_stmt is not None:
set_stmt.parent = use_as_parent_scope
else:
debug.warning('syntax err, for flow incomplete @%s',
self.start_pos[0])
if set_stmt is not None:
set_stmt.parent = use_as_parent_scope
if tok != 'in':
debug.warning('syntax err, for flow incomplete @%s', self.start_pos[0])
try:
statement, tok = self._parse_statement()
except StopIteration:
statement, tok = None, None
s = [] if statement is None else [statement]
f = pr.ForFlow(self.module, s, first_pos, set_stmt)
self._scope = self._scope.add_statement(f)
if tok != ':':
debug.warning('syntax err, for flow started @%s', self.start_pos[0])
elif tok in ['if', 'while', 'try', 'with'] + extended_flow:
added_breaks = []
command = tok
@@ -604,8 +553,7 @@ class Parser(object):
# multiple inputs because of with
inputs = []
first = True
while first or command == 'with' \
and tok not in [':', '\n']:
while first or command == 'with' and tok not in [':', '\n']:
statement, tok = \
self._parse_statement(added_breaks=added_breaks)
if command == 'except' and tok == ',':
@@ -619,25 +567,21 @@ class Parser(object):
inputs.append(statement)
first = False
if tok == ':':
f = pr.Flow(self.module, command, inputs, first_pos)
if command in extended_flow:
# the last statement has to be another part of
# the flow statement, because a dedent releases the
# main scope, so just take the last statement.
try:
s = self._scope.statements[-1].set_next(f)
except (AttributeError, IndexError):
# If set_next doesn't exist, just add it.
s = self._scope.add_statement(f)
else:
f = pr.Flow(self.module, command, inputs, first_pos)
if command in extended_flow:
# the last statement has to be another part of
# the flow statement, because a dedent releases the
# main scope, so just take the last statement.
try:
s = self._scope.statements[-1].set_next(f)
except (AttributeError, IndexError):
# If set_next doesn't exist, just add it.
s = self._scope.add_statement(f)
self._scope = s
else:
for i in inputs:
i.parent = use_as_parent_scope
debug.warning('syntax err, flow started @%s',
self.start_pos[0])
s = self._scope.add_statement(f)
self._scope = s
if tok != ':':
debug.warning('syntax err, flow started @%s', self.start_pos[0])
# returns
elif tok in ['return', 'yield']:
s = self.start_pos
@@ -692,7 +636,7 @@ class Parser(object):
else:
if token_type not in [tokenize.COMMENT, tokenize.INDENT,
tokenize.NEWLINE, tokenize.NL]:
debug.warning('token not classified', tok, token_type,
self.start_pos[0])
debug.warning('token not classified %s %s %s', tok,
token_type, self.start_pos[0])
continue
self.no_docstr = False

View File

@@ -7,13 +7,11 @@ import re
from jedi._compatibility import use_metaclass
from jedi import settings
from jedi import common
from jedi.parser import Parser
from jedi.parser import representation as pr
from jedi.parser import tokenize
from jedi import cache
from jedi import common
SCOPE_CONTENTS = ['asserts', 'subscopes', 'imports', 'statements', 'returns']
class Module(pr.Simple, pr.Module):
@@ -28,10 +26,8 @@ class Module(pr.Simple, pr.Module):
def reset_caches(self):
""" This module does a whole lot of caching, because it uses different
parsers. """
self._used_names = None
for p in self.parsers:
p.user_scope = None
p.user_stmt = None
with common.ignored(AttributeError):
del self._used_names
def __getattr__(self, name):
if name.startswith('__'):
@@ -40,18 +36,16 @@ class Module(pr.Simple, pr.Module):
return getattr(self.parsers[0].module, name)
@property
@cache.underscore_memoization
def used_names(self):
if self._used_names is None:
dct = {}
for p in self.parsers:
for k, statement_set in p.module.used_names.items():
if k in dct:
dct[k] |= statement_set
else:
dct[k] = set(statement_set)
self._used_names = dct
return self._used_names
used_names = {}
for p in self.parsers:
for k, statement_set in p.module.used_names.items():
if k in used_names:
used_names[k] |= statement_set
else:
used_names[k] = set(statement_set)
return used_names
def __repr__(self):
return "<%s: %s@%s-%s>" % (type(self).__name__, self.name,
@@ -60,17 +54,16 @@ class Module(pr.Simple, pr.Module):
class CachedFastParser(type):
""" This is a metaclass for caching `FastParser`. """
def __call__(self, source, module_path=None, user_position=None):
def __call__(self, source, module_path=None):
if not settings.fast_parser:
return Parser(source, module_path, user_position)
return Parser(source, module_path)
pi = cache.parser_cache.get(module_path, None)
if pi is None or isinstance(pi.parser, Parser):
p = super(CachedFastParser, self).__call__(source, module_path,
user_position)
p = super(CachedFastParser, self).__call__(source, module_path)
else:
p = pi.parser # pi is a `cache.ParserCacheItem`
p.update(source, user_position)
p.update(source)
return p
@@ -95,7 +88,7 @@ class ParserNode(object):
scope = self.content_scope
self._contents = {}
for c in SCOPE_CONTENTS:
for c in pr.SCOPE_CONTENTS:
self._contents[c] = list(getattr(scope, c))
self._is_generator = scope.is_generator
@@ -107,7 +100,6 @@ class ParserNode(object):
for key, c in self._contents.items():
setattr(scope, key, list(c))
scope.is_generator = self._is_generator
self.parser.user_scope = self.parser.module
if self.parent is None:
# Global vars of the first one can be deleted, in the global scope
@@ -147,7 +139,7 @@ class ParserNode(object):
def _set_items(self, parser, set_parent=False):
# insert parser objects into current structure
scope = self.content_scope
for c in SCOPE_CONTENTS:
for c in pr.SCOPE_CONTENTS:
content = getattr(scope, c)
items = getattr(parser.module, c)
if set_parent:
@@ -174,6 +166,11 @@ class ParserNode(object):
self._set_items(node.parser, set_parent=set_parent)
node.old_children = node.children
node.children = []
scope = self.content_scope
while scope is not None:
scope.end_pos = node.content_scope.end_pos
scope = scope.parent
return node
def add_parser(self, parser, code):
@@ -181,11 +178,9 @@ class ParserNode(object):
class FastParser(use_metaclass(CachedFastParser)):
def __init__(self, code, module_path=None, user_position=None):
def __init__(self, code, module_path=None):
# set values like `pr.Module`.
self.module_path = module_path
self.user_position = user_position
self._user_scope = None
self.current_node = None
self.parsers = []
@@ -199,34 +194,9 @@ class FastParser(use_metaclass(CachedFastParser)):
self.parsers[:] = []
raise
@property
def user_scope(self):
if self._user_scope is None:
for p in self.parsers:
if p.user_scope:
if isinstance(p.user_scope, pr.SubModule):
continue
self._user_scope = p.user_scope
if isinstance(self._user_scope, pr.SubModule) \
or self._user_scope is None:
self._user_scope = self.module
return self._user_scope
@property
def user_stmt(self):
if self._user_stmt is None:
for p in self.parsers:
if p.user_stmt:
self._user_stmt = p.user_stmt
break
return self._user_stmt
def update(self, code, user_position=None):
self.user_position = user_position
def update(self, code):
self.reset_caches()
try:
self._parse(code)
except:
@@ -234,14 +204,6 @@ class FastParser(use_metaclass(CachedFastParser)):
self.parsers[:] = []
raise
def _scan_user_scope(self, sub_module):
""" Scan with self.user_position. """
for scope in sub_module.statements + sub_module.subscopes:
if isinstance(scope, pr.Scope):
if scope.start_pos <= self.user_position <= scope.end_pos:
return self._scan_user_scope(scope) or scope
return None
def _split_parts(self, code):
"""
Split the code into different parts. This makes it possible to parse
@@ -257,7 +219,7 @@ class FastParser(use_metaclass(CachedFastParser)):
parts.append(txt)
current_lines[:] = []
r_keyword = '^[ \t]*(def|class|@|%s)' % '|'.join(common.FLOWS)
r_keyword = '^[ \t]*(def|class|@|%s)' % '|'.join(tokenize.FLOWS)
self._lines = code.splitlines()
current_lines = []
@@ -292,7 +254,7 @@ class FastParser(use_metaclass(CachedFastParser)):
if not in_flow:
m = re.match(r_keyword, l)
if m:
in_flow = m.group(1) in common.FLOWS
in_flow = m.group(1) in tokenize.FLOWS
if not is_decorator and not in_flow:
add_part()
add_to_last = False
@@ -372,12 +334,6 @@ class FastParser(use_metaclass(CachedFastParser)):
else:
self.current_node = self.current_node.add_node(node)
if self.current_node.parent and (isinstance(p.user_scope,
pr.SubModule) or p.user_scope is None) \
and self.user_position \
and p.start_pos <= self.user_position < p.end_pos:
p.user_scope = self.current_node.parent.content_scope
self.parsers.append(p)
is_first = False
@@ -406,10 +362,9 @@ class FastParser(use_metaclass(CachedFastParser)):
if nodes[index].code != code:
raise ValueError()
except ValueError:
p = Parser(parser_code, self.module_path,
self.user_position, offset=(line_offset, 0),
is_fast_parser=True, top_module=self.module,
no_docstr=no_docstr)
p = Parser(parser_code, self.module_path, offset=(line_offset, 0),
is_fast_parser=True, top_module=self.module,
no_docstr=no_docstr)
p.module.parent = self.module
else:
if nodes[index] != self.current_node:
@@ -419,22 +374,10 @@ class FastParser(use_metaclass(CachedFastParser)):
p = node.parser
m = p.module
m.line_offset += line_offset + 1 - m.start_pos[0]
if self.user_position is not None and \
m.start_pos[0] <= self.user_position[0] <= m.end_pos[0]:
# It's important to take care of the whole user
# positioning stuff, if no reparsing is being done.
p.user_stmt = m.get_statement_for_position(
self.user_position, include_imports=True)
if p.user_stmt:
p.user_scope = p.user_stmt.parent
else:
p.user_scope = self._scan_user_scope(m) or m
return p, node
def reset_caches(self):
self._user_scope = None
self._user_stmt = None
self.module.reset_caches()
if self.current_node is not None:
self.current_node.reset_contents()

View File

@@ -33,20 +33,23 @@ statements in this scope. Check this out:
See also :attr:`Scope.subscopes` and :attr:`Scope.statements`.
"""
from __future__ import with_statement
import os
import re
import tokenizer as tokenize
from inspect import cleandoc
from ast import literal_eval
from jedi._compatibility import next, Python3Method, encoding, unicode, is_py3k
from jedi._compatibility import next, Python3Method, encoding, unicode, is_py3
from jedi import common
from jedi import debug
from jedi import cache
from jedi.parser import tokenize
from jedi.parser import token as token_pr
SCOPE_CONTENTS = ['asserts', 'subscopes', 'imports', 'statements', 'returns']
class GetCodeState(object):
"""A helper class for passing the state of get_code in a thread-safe
manner"""
@@ -55,7 +58,6 @@ class GetCodeState(object):
def __init__(self):
self.last_pos = (0, 0)
class Base(object):
"""
This is just here to have an isinstance check, which is also used on
@@ -156,7 +158,7 @@ class Simple(Base):
def __repr__(self):
code = self.get_code().replace('\n', ' ')
if not is_py3k:
if not is_py3:
code = code.encode(encoding, 'replace')
return "<%s: %s@%s,%s>" % \
(type(self).__name__, code, self.start_pos[0], self.start_pos[1])
@@ -313,6 +315,10 @@ class Scope(Simple, IsScope):
if self.isinstance(Function):
checks += self.params + self.decorators
checks += [r for r in self.returns if r is not None]
if self.isinstance(Flow):
checks += self.inputs
if self.isinstance(ForFlow) and self.set_stmt is not None:
checks.append(self.set_stmt)
for s in checks:
if isinstance(s, Flow):
@@ -368,7 +374,6 @@ class SubModule(Scope, Module):
super(SubModule, self).__init__(self, start_pos)
self.path = path
self.global_vars = []
self._name = None
self.used_names = {}
self.temp_used_names = []
# this may be changed depending on fast_parser
@@ -394,25 +399,19 @@ class SubModule(Scope, Module):
return n
@property
@cache.underscore_memoization
def name(self):
""" This is used for the goto functions. """
if self._name is not None:
return self._name
if self.path is None:
string = '' # no path -> empty name
else:
sep = (re.escape(os.path.sep),) * 2
r = re.search(r'([^%s]*?)(%s__init__)?(\.py|\.so)?$' % sep,
self.path)
r = re.search(r'([^%s]*?)(%s__init__)?(\.py|\.so)?$' % sep, self.path)
# remove PEP 3149 names
string = re.sub('\.[a-z]+-\d{2}[mud]{0,3}$', '', r.group(1))
# positions are not real therefore choose (0, 0)
names = [(string, (0, 0))]
self._name = Name(self, names, (0, 0), (0, 0), self.use_as_parent)
return self._name
def is_builtin(self):
return not (self.path is None or self.path.endswith('.py'))
return Name(self, names, (0, 0), (0, 0), self.use_as_parent)
@property
def has_explicit_absolute_import(self):
@@ -525,7 +524,7 @@ class Function(Scope):
try:
n.append(p.get_name())
except IndexError:
debug.warning("multiple names in param %s" % n)
debug.warning("multiple names in param %s", n)
return n
def get_call_signature(self, width=72, funcname=None):
@@ -795,7 +794,7 @@ class Statement(Simple):
:type start_pos: 2-tuple of int
:param start_pos: Position (line, column) of the Statement.
"""
__slots__ = ('token_list', '_set_vars', 'as_names', '_commands',
__slots__ = ('token_list', '_set_vars', 'as_names', '_expression_list',
'_assignment_details', 'docstr', '_names_are_set_vars')
def __init__(self, module, token_list, start_pos, end_pos, parent=None,
@@ -817,7 +816,6 @@ class Statement(Simple):
self.as_names = list(as_names)
# cache
self._commands = None
self._assignment_details = []
# this is important for other scripts
@@ -834,7 +832,7 @@ class Statement(Simple):
return '%s %s ' % (''.join(pieces), assignment)
code = ''.join(assemble(*a) for a in self.assignment_details)
code += assemble(self.get_commands())
code += assemble(self.expression_list())
if self.docstr:
code += '\n"""%s"""' % self.docstr.as_string()
@@ -846,12 +844,12 @@ class Statement(Simple):
def get_set_vars(self):
""" Get the names for the statement. """
if self._set_vars is None:
self._set_vars = []
def search_calls(calls):
for call in calls:
if isinstance(call, Array):
for stmt in call:
search_calls(stmt.get_commands())
search_calls(stmt.expression_list())
elif isinstance(call, Call):
c = call
# Check if there's an execution in it, if so this is
@@ -865,12 +863,13 @@ class Statement(Simple):
continue
self._set_vars.append(call.name)
self._set_vars = []
for calls, operation in self.assignment_details:
search_calls(calls)
if not self.assignment_details and self._names_are_set_vars:
# In the case of Param, it's also a defining name without ``=``
search_calls(self.get_commands())
search_calls(self.expression_list())
return self._set_vars + self.as_names
def is_global(self):
@@ -889,17 +888,14 @@ class Statement(Simple):
would result in ``[(Name(x), '='), (Array([Name(y), Name(z)]), '=')]``.
"""
# parse statement which creates the assignment details.
self.get_commands()
self.expression_list()
return self._assignment_details
def get_commands(self):
if self._commands is None:
self._commands = ['time neeeeed'] # avoid recursions
self._commands = self._parse_statement()
return self._commands
def _parse_statement(self):
@cache.underscore_memoization
def expression_list(self):
"""
Parse a statement.
This is not done in the main parser, because it might be slow and
most of the statements won't need this data anyway. This is something
'like' a lazy execution.
@@ -966,10 +962,10 @@ class Statement(Simple):
# it's not possible to set it earlier
tok.parent = self
else:
tok = tok_temp.token
tok = tok_temp.token
start_tok_pos = tok_temp.start_pos
last_end_pos = end_pos
end_pos = tok_temp.end_pos
last_end_pos = end_pos
end_pos = tok_temp.end_pos
if first:
first = False
start_pos = start_tok_pos
@@ -1062,7 +1058,7 @@ class Statement(Simple):
stmt = Statement(self._sub_module, token_list,
start_pos, arr.end_pos)
arr.parent = stmt
stmt.token_list = stmt._commands = [arr]
stmt.token_list = stmt._expression_list = [arr]
else:
for t in stmt.token_list:
if isinstance(t, Name):
@@ -1075,12 +1071,12 @@ class Statement(Simple):
middle, tok = parse_stmt_or_arr(token_iterator, ['in'], True)
if tok != 'in' or middle is None:
debug.warning('list comprehension middle @%s' % str(start_pos))
debug.warning('list comprehension middle @%s', start_pos)
return None, tok
in_clause, tok = parse_stmt_or_arr(token_iterator)
if in_clause is None:
debug.warning('list comprehension in @%s' % str(start_pos))
debug.warning('list comprehension in @%s', start_pos)
return None, tok
return ListComprehension(st, middle, in_clause, self), tok
@@ -1101,9 +1097,9 @@ class Statement(Simple):
end_pos = tok.end_pos
else:
token_type = tok_temp.token_type
tok = tok_temp.token
start_pos = tok_temp.start_pos
end_pos = tok_temp.end_pos
tok = tok_temp.token
start_pos = tok_temp.start_pos
end_pos = tok_temp.end_pos
if is_assignment(tok):
# This means, there is an assignment here.
# Add assignments, which can be more than one
@@ -1126,9 +1122,7 @@ class Statement(Simple):
is_literal = token_type in [tokenize.STRING, tokenize.NUMBER]
if isinstance(tok, Name) or is_literal:
cls = Call
if is_literal:
cls = String if token_type == tokenize.STRING else Number
cls = Literal if is_literal else Call
call = cls(self._sub_module, tok, start_pos, end_pos, self)
if is_chain:
@@ -1149,7 +1143,7 @@ class Statement(Simple):
if result and isinstance(result[-1], StatementElement):
is_chain = True
elif tok == ',': # implies a tuple
# commands is now an array not a statement anymore
# expression is now an array not a statement anymore
t = result[0]
start_pos = t[2] if isinstance(t, tuple) else t.start_pos
@@ -1172,7 +1166,7 @@ class Statement(Simple):
self.parent,
set_name_parents=False
)
stmt._commands = result
stmt._expression_list = result
arr, break_tok = parse_array(token_iterator, Array.TUPLE,
stmt.start_pos, stmt)
result = [arr]
@@ -1213,7 +1207,7 @@ class Param(Statement):
""" get the name of the param """
n = self.get_set_vars()
if len(n) > 1:
debug.warning("Multiple param names (%s)." % n)
debug.warning("Multiple param names (%s).", n)
return n[0]
@@ -1292,25 +1286,14 @@ class Literal(StatementElement):
def get_code(self):
return self.literal + super(Literal, self).get_code()
def type_as_string(self):
return type(self.value).__name__
def __repr__(self):
if is_py3k:
if is_py3:
s = self.literal
else:
s = self.literal.encode('ascii', 'replace')
return "<%s: %s>" % (type(self).__name__, s)
class String(Literal):
pass
class Number(Literal):
pass
class Array(StatementElement):
"""
Describes the different python types for an array, but also empty

View File

@@ -151,8 +151,16 @@ class TokenDocstring(TokenNoCompat):
as_string() will clean the token representing the docstring.
"""
def __init__(self, token):
self.__setstate__(token.__getstate__())
def __init__(self, token=None, state=None):
if token:
self.__setstate__(token.__getstate__())
else:
self.__setstate__(state)
@classmethod
def fake_docstring(cls, docstr):
# TODO: fixme when tests are up again
return TokenDocstring(state=(0, '"""\n%s\n"""' % docstr, 0, 0))
def as_string(self):
"""Returns a literal cleaned version of the token"""

387
jedi/parser/tokenize.py Normal file
View File

@@ -0,0 +1,387 @@
"""
This tokenizer has been copied from the ``tokenize.py`` standard library
tokenizer. The reason was simple: The standanrd library tokenizer fails
if the indentation is not right. The fast parser of jedi however requires
"wrong" indentation.
Basically this is a stripped down version of the standard library module, so
you can read the documentation there.
"""
from __future__ import absolute_import
import string
import re
from token import *
import collections
cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
from jedi import common
namechars = string.ascii_letters + '_'
COMMENT = N_TOKENS
tok_name[COMMENT] = 'COMMENT'
NL = N_TOKENS + 1
tok_name[NL] = 'NL'
ENCODING = N_TOKENS + 2
tok_name[ENCODING] = 'ENCODING'
N_TOKENS += 3
class TokenInfo(collections.namedtuple('TokenInfo', 'type string start end line')):
def __repr__(self):
annotated_type = '%d (%s)' % (self.type, tok_name[self.type])
return ('TokenInfo(type=%s, string=%r, start=%r, end=%r, line=%r)' %
self._replace(type=annotated_type))
def group(*choices):
return '(' + '|'.join(choices) + ')'
def any(*choices):
return group(*choices) + '*'
def maybe(*choices):
return group(*choices) + '?'
# Note: we use unicode matching for names ("\w") but ascii matching for
# number literals.
Whitespace = r'[ \f\t]*'
Comment = r'#[^\r\n]*'
Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
Name = r'\w+'
Hexnumber = r'0[xX][0-9a-fA-F]+'
Binnumber = r'0[bB][01]+'
Octnumber = r'0[oO][0-7]+'
Decnumber = r'(?:0+|[1-9][0-9]*)'
Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
Exponent = r'[eE][-+]?[0-9]+'
Pointfloat = group(r'[0-9]+\.[0-9]*', r'\.[0-9]+') + maybe(Exponent)
Expfloat = r'[0-9]+' + Exponent
Floatnumber = group(Pointfloat, Expfloat)
Imagnumber = group(r'[0-9]+[jJ]', Floatnumber + r'[jJ]')
Number = group(Imagnumber, Floatnumber, Intnumber)
# Tail end of ' string.
Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
# Tail end of " string.
Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
# Tail end of ''' string.
Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
# Tail end of """ string.
Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
Triple = group("[bB]?[rR]?'''", '[bB]?[rR]?"""')
# Single-line ' or " string.
String = group(r"[bB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
r'[bB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*"')
# Because of leftmost-then-longest match semantics, be sure to put the
# longest operators first (e.g., if = came before ==, == would get
# recognized as two instances of =).
Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"!=",
r"//=?", r"->",
r"[+\-*/%&|^=<>]=?",
r"~")
Bracket = '[][(){}]'
Special = group(r'\r?\n', r'\.\.\.', r'[:;.,@]')
Funny = group(Operator, Bracket, Special)
PlainToken = group(Number, Funny, String, Name)
Token = Ignore + PlainToken
# First (or only) line of ' or " string.
ContStr = group(r"[bB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
group("'", r'\\\r?\n'),
r'[bB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
group('"', r'\\\r?\n'))
PseudoExtras = group(r'\\\r?\n', Comment, Triple)
PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
def _compile(expr):
return re.compile(expr, re.UNICODE)
tokenprog, pseudoprog, single3prog, double3prog = map(
_compile, (Token, PseudoToken, Single3, Double3))
endprogs = {"'": _compile(Single), '"': _compile(Double),
"'''": single3prog, '"""': double3prog,
"r'''": single3prog, 'r"""': double3prog,
"b'''": single3prog, 'b"""': double3prog,
"br'''": single3prog, 'br"""': double3prog,
"R'''": single3prog, 'R"""': double3prog,
"B'''": single3prog, 'B"""': double3prog,
"bR'''": single3prog, 'bR"""': double3prog,
"Br'''": single3prog, 'Br"""': double3prog,
"BR'''": single3prog, 'BR"""': double3prog,
'r': None, 'R': None, 'b': None, 'B': None}
triple_quoted = {}
for t in ("'''", '"""',
"r'''", 'r"""', "R'''", 'R"""',
"b'''", 'b"""', "B'''", 'B"""',
"br'''", 'br"""', "Br'''", 'Br"""',
"bR'''", 'bR"""', "BR'''", 'BR"""'):
triple_quoted[t] = t
single_quoted = {}
for t in ("'", '"',
"r'", 'r"', "R'", 'R"',
"b'", 'b"', "B'", 'B"',
"br'", 'br"', "Br'", 'Br"',
"bR'", 'bR"', "BR'", 'BR"'):
single_quoted[t] = t
del _compile
tabsize = 8
class TokenError(Exception):
pass
def generate_tokens(readline):
lnum = parenlev = continued = 0
numchars = '0123456789'
contstr, needcont = '', 0
contline = None
indents = [0]
while True: # loop over lines in stream
try:
line = readline()
except StopIteration:
line = b''
lnum += 1
pos, max = 0, len(line)
if contstr: # continued string
if not line:
# multiline string has not been finished
break
endmatch = endprog.match(line)
if endmatch:
pos = end = endmatch.end(0)
yield TokenInfo(STRING, contstr + line[:end],
strstart, (lnum, end), contline + line)
contstr, needcont = '', 0
contline = None
elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':
yield TokenInfo(ERRORTOKEN, contstr + line,
strstart, (lnum, len(line)), contline)
contstr = ''
contline = None
continue
else:
contstr = contstr + line
contline = contline + line
continue
elif parenlev == 0 and not continued: # new statement
if not line:
break
column = 0
while pos < max: # measure leading whitespace
if line[pos] == ' ':
column += 1
elif line[pos] == '\t':
column = (column // tabsize + 1) * tabsize
elif line[pos] == '\f':
column = 0
else:
break
pos += 1
if pos == max:
break
if line[pos] in '#\r\n': # skip comments or blank lines
if line[pos] == '#':
comment_token = line[pos:].rstrip('\r\n')
nl_pos = pos + len(comment_token)
yield TokenInfo(COMMENT, comment_token,
(lnum, pos), (lnum, pos + len(comment_token)), line)
yield TokenInfo(NL, line[nl_pos:],
(lnum, nl_pos), (lnum, len(line)), line)
else:
yield TokenInfo(
(NL, COMMENT)[line[pos] == '#'], line[pos:],
(lnum, pos), (lnum, len(line)), line)
continue
if column > indents[-1]: # count indents or dedents
indents.append(column)
yield TokenInfo(INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
while column < indents[-1]:
indents = indents[:-1]
yield TokenInfo(DEDENT, '', (lnum, pos), (lnum, pos), line)
else: # continued statement
if not line:
# basically a statement has not been finished here.
break
continued = 0
while pos < max:
pseudomatch = pseudoprog.match(line, pos)
if pseudomatch: # scan for tokens
start, end = pseudomatch.span(1)
spos, epos, pos = (lnum, start), (lnum, end), end
token, initial = line[start:end], line[start]
if (initial in numchars or # ordinary number
(initial == '.' and token != '.' and token != '...')):
yield TokenInfo(NUMBER, token, spos, epos, line)
elif initial in '\r\n':
yield TokenInfo(NL if parenlev > 0 else NEWLINE,
token, spos, epos, line)
elif initial == '#':
assert not token.endswith("\n")
yield TokenInfo(COMMENT, token, spos, epos, line)
elif token in triple_quoted:
endprog = endprogs[token]
endmatch = endprog.match(line, pos)
if endmatch: # all on one line
pos = endmatch.end(0)
token = line[start:pos]
yield TokenInfo(STRING, token, spos, (lnum, pos), line)
else:
strstart = (lnum, start) # multiple lines
contstr = line[start:]
contline = line
break
elif initial in single_quoted or \
token[:2] in single_quoted or \
token[:3] in single_quoted:
if token[-1] == '\n': # continued string
strstart = (lnum, start)
endprog = (endprogs[initial] or endprogs[token[1]] or
endprogs[token[2]])
contstr, needcont = line[start:], 1
contline = line
break
else: # ordinary string
yield TokenInfo(STRING, token, spos, epos, line)
elif initial in namechars: # ordinary name
yield TokenInfo(NAME, token, spos, epos, line)
elif initial == '\\': # continued stmt
continued = 1
else:
if initial in '([{':
parenlev += 1
elif initial in ')]}':
parenlev -= 1
yield TokenInfo(OP, token, spos, epos, line)
else:
yield TokenInfo(ERRORTOKEN, line[pos],
(lnum, pos), (lnum, pos + 1), line)
pos += 1
for indent in indents[1:]: # pop remaining indent levels
yield TokenInfo(DEDENT, '', (lnum, 0), (lnum, 0), '')
yield TokenInfo(ENDMARKER, '', (lnum, 0), (lnum, 0), '')
# From here on we have custom stuff (everything before was originally Python
# internal code).
FLOWS = ['if', 'else', 'elif', 'while', 'with', 'try', 'except', 'finally']
class NoErrorTokenizer(object):
def __init__(self, readline, offset=(0, 0), is_fast_parser=False):
self.readline = readline
self.gen = generate_tokens(readline)
self.offset = offset
self.closed = False
self.is_first = True
self.push_backs = []
# fast parser options
self.is_fast_parser = is_fast_parser
self.current = self.previous = [None, None, (0, 0), (0, 0), '']
self.in_flow = False
self.new_indent = False
self.parser_indent = self.old_parser_indent = 0
self.is_decorator = False
self.first_stmt = True
def push_last_back(self):
self.push_backs.append(self.current)
def next(self):
""" Python 2 Compatibility """
return self.__next__()
def __next__(self):
if self.closed:
raise common.MultiLevelStopIteration()
if self.push_backs:
return self.push_backs.pop(0)
self.last_previous = self.previous
self.previous = self.current
self.current = next(self.gen)
c = list(self.current)
if c[0] == ENDMARKER:
self.current = self.previous
self.previous = self.last_previous
raise common.MultiLevelStopIteration()
# this is exactly the same check as in fast_parser, but this time with
# tokenize and therefore precise.
breaks = ['def', 'class', '@']
if self.is_first:
c[2] = self.offset[0] + c[2][0], self.offset[1] + c[2][1]
c[3] = self.offset[0] + c[3][0], self.offset[1] + c[3][1]
self.is_first = False
else:
c[2] = self.offset[0] + c[2][0], c[2][1]
c[3] = self.offset[0] + c[3][0], c[3][1]
self.current = c
def close():
if not self.first_stmt:
self.closed = True
raise common.MultiLevelStopIteration()
# ignore indents/comments
if self.is_fast_parser \
and self.previous[0] in (INDENT, NL, None, NEWLINE, DEDENT) \
and c[0] not in (COMMENT, INDENT, NL, NEWLINE, DEDENT):
# print c, tok_name[c[0]]
tok = c[1]
indent = c[2][1]
if indent < self.parser_indent: # -> dedent
self.parser_indent = indent
self.new_indent = False
if not self.in_flow or indent < self.old_parser_indent:
close()
self.in_flow = False
elif self.new_indent:
self.parser_indent = indent
self.new_indent = False
if not self.in_flow:
if tok in FLOWS or tok in breaks:
self.in_flow = tok in FLOWS
if not self.is_decorator and not self.in_flow:
close()
self.is_decorator = '@' == tok
if not self.is_decorator:
self.old_parser_indent = self.parser_indent
self.parser_indent += 1 # new scope: must be higher
self.new_indent = True
if tok != '@':
if self.first_stmt and not self.new_indent:
self.parser_indent = indent
self.first_stmt = False
return c

232
jedi/parser/user_context.py Normal file
View File

@@ -0,0 +1,232 @@
import re
import os
import sys
from jedi import cache
from jedi.parser import tokenize
from jedi.parser.fast import FastParser
from jedi.parser import representation
from jedi import debug
class UserContext(object):
"""
:param source: The source code of the file.
:param position: The position, the user is currently in. Only important \
for the main file.
"""
def __init__(self, source, position):
self.source = source
self.position = position
self._line_cache = None
# this two are only used, because there is no nonlocal in Python 2
self._line_temp = None
self._relevant_temp = None
@cache.underscore_memoization
def get_path_until_cursor(self):
""" Get the path under the cursor. """
path, self._start_cursor_pos = self._calc_path_until_cursor(self.position)
return path
def _calc_path_until_cursor(self, start_pos=None):
def fetch_line():
if self._is_first:
self._is_first = False
self._line_length = self._column_temp
line = self._first_line
else:
line = self.get_line(self._line_temp)
self._line_length = len(line)
line = line + '\n'
# add lines with a backslash at the end
while True:
self._line_temp -= 1
last_line = self.get_line(self._line_temp)
#print self._line_temp, repr(last_line)
if last_line and last_line[-1] == '\\':
line = last_line[:-1] + ' ' + line
self._line_length = len(last_line)
else:
break
return line[::-1]
self._is_first = True
self._line_temp, self._column_temp = start_cursor = start_pos
self._first_line = self.get_line(self._line_temp)[:self._column_temp]
open_brackets = ['(', '[', '{']
close_brackets = [')', ']', '}']
gen = tokenize.generate_tokens(fetch_line)
string = ''
level = 0
force_point = False
last_type = None
try:
for token_type, tok, start, end, line in gen:
# print 'tok', token_type, tok, force_point
if last_type == token_type == tokenize.NAME:
string += ' '
if level > 0:
if tok in close_brackets:
level += 1
if tok in open_brackets:
level -= 1
elif tok == '.':
force_point = False
elif force_point:
# it is reversed, therefore a number is getting recognized
# as a floating point number
if token_type == tokenize.NUMBER and tok[0] == '.':
force_point = False
else:
break
elif tok in close_brackets:
level += 1
elif token_type in [tokenize.NAME, tokenize.STRING]:
force_point = True
elif token_type == tokenize.NUMBER:
pass
else:
self._column_temp = self._line_length - end[1]
break
x = start_pos[0] - end[0] + 1
l = self.get_line(x)
l = self._first_line if x == start_pos[0] else l
start_cursor = x, len(l) - end[1]
self._column_temp = self._line_length - end[1]
string += tok
last_type = token_type
except tokenize.TokenError:
debug.warning("Tokenize couldn't finish: %s", sys.exc_info)
# string can still contain spaces at the end
return string[::-1].strip(), start_cursor
def get_path_under_cursor(self):
"""
Return the path under the cursor. If there is a rest of the path left,
it will be added to the stuff before it.
"""
return self.get_path_until_cursor() + self.get_path_after_cursor()
def get_path_after_cursor(self):
line = self.get_line(self.position[0])
return re.search("[\w\d]*", line[self.position[1]:]).group(0)
def get_operator_under_cursor(self):
line = self.get_line(self.position[0])
after = re.match("[^\w\s]+", line[self.position[1]:])
before = re.match("[^\w\s]+", line[:self.position[1]][::-1])
return (before.group(0) if before is not None else '') \
+ (after.group(0) if after is not None else '')
def get_context(self, yield_positions=False):
pos = self._start_cursor_pos
while True:
# remove non important white space
line = self.get_line(pos[0])
while True:
if pos[1] == 0:
line = self.get_line(pos[0] - 1)
if line and line[-1] == '\\':
pos = pos[0] - 1, len(line) - 1
continue
else:
break
if line[pos[1] - 1].isspace():
pos = pos[0], pos[1] - 1
else:
break
try:
result, pos = self._calc_path_until_cursor(start_pos=pos)
if yield_positions:
yield pos
else:
yield result
except StopIteration:
if yield_positions:
yield None
else:
yield ''
def get_line(self, line_nr):
if not self._line_cache:
self._line_cache = self.source.splitlines()
if self.source:
if self.source[-1] == '\n':
self._line_cache.append('')
else: # ''.splitlines() == []
self._line_cache = ['']
if line_nr == 0:
# This is a fix for the zeroth line. We need a newline there, for
# the backwards parser.
return ''
if line_nr < 0:
raise StopIteration()
try:
return self._line_cache[line_nr - 1]
except IndexError:
raise StopIteration()
def get_position_line(self):
return self.get_line(self.position[0])[:self.position[1]]
class UserContextParser(object):
def __init__(self, source, path, position, user_context):
self._source = source
self._path = path and os.path.abspath(path)
self._position = position
self._user_context = user_context
@cache.underscore_memoization
def _parser(self):
cache.invalidate_star_import_cache(self._path)
parser = FastParser(self._source, self._path)
# Don't pickle that module, because the main module is changing quickly
cache.save_parser(self._path, None, parser, pickling=False)
return parser
@cache.underscore_memoization
def _get_user_stmt(self):
return self.module().get_statement_for_position(self._position,
include_imports=True)
def user_stmt(self, is_completion=False):
user_stmt = self._get_user_stmt()
debug.speed('parsed')
if is_completion and not user_stmt:
# for statements like `from x import ` (cursor not in statement)
pos = next(self._user_context.get_context(yield_positions=True))
last_stmt = pos and \
self.module().get_statement_for_position(pos, include_imports=True)
if isinstance(last_stmt, representation.Import):
user_stmt = last_stmt
return user_stmt
@cache.underscore_memoization
def user_scope(self):
user_stmt = self._get_user_stmt()
if user_stmt is None:
def scan(scope):
for s in scope.statements + scope.subscopes:
if isinstance(s, representation.Scope):
if s.start_pos <= self._position <= s.end_pos:
return scan(s) or s
return scan(self.module()) or self.module()
else:
return user_stmt.parent
def module(self):
return self._parser().module