mirror of
https://github.com/davidhalter/jedi.git
synced 2025-12-07 06:24:27 +08:00
407 lines
14 KiB
Python
407 lines
14 KiB
Python
"""
|
|
Don't confuse these classes with :mod:`parsing_representation` modules, the
|
|
modules here can access these representation with ``module.parser.module``.
|
|
``Module`` exists mainly for caching purposes.
|
|
|
|
Basically :mod:`modules` offers the classes:
|
|
|
|
- ``CachedModule``, a base class for Cachedmodule.
|
|
- ``Module`` the class for all normal Python modules (not builtins, they are at
|
|
home at :mod:`builtin`).
|
|
- ``ModuleWithCursor``, holds the module information for :class:`api.Script`.
|
|
|
|
Apart from those classes there's a ``sys.path`` fetching function, as well as
|
|
`Virtual Env` and `Django` detection.
|
|
"""
|
|
from __future__ import with_statement
|
|
|
|
import re
|
|
import tokenizer as tokenize
|
|
import sys
|
|
import os
|
|
|
|
from jedi._compatibility import exec_function, unicode, is_py25, literal_eval
|
|
from jedi import cache
|
|
from jedi import parsing
|
|
from jedi import parsing_representation as pr
|
|
from jedi import fast_parser
|
|
from jedi import debug
|
|
from jedi import settings
|
|
from jedi import common
|
|
|
|
|
|
class CachedModule(object):
|
|
"""
|
|
The base type for all modules, which is not to be confused with
|
|
`parsing_representation.Module`. Caching happens here.
|
|
"""
|
|
|
|
def __init__(self, path=None, name=None):
|
|
self.path = path and os.path.abspath(path)
|
|
self.name = name
|
|
self._parser = None
|
|
|
|
@property
|
|
def parser(self):
|
|
""" get the parser lazy """
|
|
if self._parser is None:
|
|
self._parser = cache.load_module(self.path, self.name) \
|
|
or self._load_module()
|
|
return self._parser
|
|
|
|
def _get_source(self):
|
|
raise NotImplementedError()
|
|
|
|
def _load_module(self):
|
|
source = self._get_source()
|
|
p = self.path or self.name
|
|
p = fast_parser.FastParser(source, p)
|
|
cache.save_module(self.path, self.name, p)
|
|
return p
|
|
|
|
|
|
class Module(CachedModule):
|
|
"""
|
|
Manages all files, that are parsed and caches them.
|
|
|
|
:param path: The module path of the file.
|
|
:param source: The source code of the file.
|
|
"""
|
|
def __init__(self, path, source=None):
|
|
super(Module, self).__init__(path=path)
|
|
if source is None:
|
|
with open(path) as f:
|
|
source = f.read()
|
|
self.source = source_to_unicode(source)
|
|
self._line_cache = None
|
|
|
|
def _get_source(self):
|
|
""" Just one time """
|
|
s = self.source
|
|
del self.source # memory efficiency
|
|
return s
|
|
|
|
|
|
class ModuleWithCursor(Module):
|
|
"""
|
|
Manages all files, that are parsed and caches them.
|
|
Important are the params source and path, one of them has to
|
|
be there.
|
|
|
|
:param source: The source code of the file.
|
|
:param path: The module path of the file or None.
|
|
:param position: The position, the user is currently in. Only important \
|
|
for the main file.
|
|
"""
|
|
def __init__(self, path, source, position):
|
|
super(ModuleWithCursor, self).__init__(path, source)
|
|
self.position = position
|
|
|
|
# this two are only used, because there is no nonlocal in Python 2
|
|
self._line_temp = None
|
|
self._relevant_temp = None
|
|
|
|
self.source = source
|
|
self._part_parser = None
|
|
|
|
@property
|
|
def parser(self):
|
|
""" get the parser lazy """
|
|
if not self._parser:
|
|
with common.ignored(KeyError):
|
|
parser = cache.parser_cache[self.path].parser
|
|
cache.invalidate_star_import_cache(parser.module)
|
|
# Call the parser already here, because it will be used anyways.
|
|
# Also, the position is here important (which will not be used by
|
|
# default), therefore fill the cache here.
|
|
self._parser = fast_parser.FastParser(self.source, self.path,
|
|
self.position)
|
|
# don't pickle that module, because it's changing fast
|
|
cache.save_module(self.path, self.name, self._parser,
|
|
pickling=False)
|
|
return self._parser
|
|
|
|
def get_path_until_cursor(self):
|
|
""" Get the path under the cursor. """
|
|
result = self._get_path_until_cursor()
|
|
self._start_cursor_pos = self._line_temp + 1, self._column_temp
|
|
return result
|
|
|
|
def _get_path_until_cursor(self, start_pos=None):
|
|
def fetch_line():
|
|
line = self.get_line(self._line_temp)
|
|
if self._is_first:
|
|
self._is_first = False
|
|
self._line_length = self._column_temp
|
|
line = line[:self._column_temp]
|
|
else:
|
|
self._line_length = len(line)
|
|
line = line + '\n'
|
|
# add lines with a backslash at the end
|
|
while 1:
|
|
self._line_temp -= 1
|
|
last_line = self.get_line(self._line_temp)
|
|
if last_line and last_line[-1] == '\\':
|
|
line = last_line[:-1] + ' ' + line
|
|
else:
|
|
break
|
|
return line[::-1]
|
|
|
|
self._is_first = True
|
|
if start_pos is None:
|
|
self._line_temp = self.position[0]
|
|
self._column_temp = self.position[1]
|
|
else:
|
|
self._line_temp, self._column_temp = start_pos
|
|
|
|
open_brackets = ['(', '[', '{']
|
|
close_brackets = [')', ']', '}']
|
|
|
|
gen = tokenize.generate_tokens(fetch_line)
|
|
string = ''
|
|
level = 0
|
|
force_point = False
|
|
last_type = None
|
|
try:
|
|
for token_type, tok, start, end, line in gen:
|
|
#print 'tok', token_type, tok, force_point
|
|
if last_type == token_type == tokenize.NAME:
|
|
string += ' '
|
|
|
|
if level > 0:
|
|
if tok in close_brackets:
|
|
level += 1
|
|
if tok in open_brackets:
|
|
level -= 1
|
|
elif tok == '.':
|
|
force_point = False
|
|
elif force_point:
|
|
# it is reversed, therefore a number is getting recognized
|
|
# as a floating point number
|
|
if token_type == tokenize.NUMBER and tok[0] == '.':
|
|
force_point = False
|
|
else:
|
|
break
|
|
elif tok in close_brackets:
|
|
level += 1
|
|
elif token_type in [tokenize.NAME, tokenize.STRING]:
|
|
force_point = True
|
|
elif token_type == tokenize.NUMBER:
|
|
pass
|
|
else:
|
|
break
|
|
|
|
self._column_temp = self._line_length - end[1]
|
|
string += tok
|
|
last_type = token_type
|
|
except tokenize.TokenError:
|
|
debug.warning("Tokenize couldn't finish", sys.exc_info)
|
|
|
|
# string can still contain spaces at the end
|
|
return string[::-1].strip()
|
|
|
|
def get_path_under_cursor(self):
|
|
"""
|
|
Return the path under the cursor. If there is a rest of the path left,
|
|
it will be added to the stuff before it.
|
|
"""
|
|
line = self.get_line(self.position[0])
|
|
after = re.search("[\w\d]*", line[self.position[1]:]).group(0)
|
|
return self.get_path_until_cursor() + after
|
|
|
|
def get_operator_under_cursor(self):
|
|
line = self.get_line(self.position[0])
|
|
after = re.match("[^\w\s]+", line[self.position[1]:])
|
|
before = re.match("[^\w\s]+", line[:self.position[1]][::-1])
|
|
return (before.group(0) if before is not None else '') \
|
|
+ (after.group(0) if after is not None else '')
|
|
|
|
def get_context(self):
|
|
pos = self._start_cursor_pos
|
|
while pos > (1, 0):
|
|
# remove non important white space
|
|
line = self.get_line(pos[0])
|
|
while pos[1] > 0 and line[pos[1] - 1].isspace():
|
|
pos = pos[0], pos[1] - 1
|
|
|
|
try:
|
|
yield self._get_path_until_cursor(start_pos=pos)
|
|
except StopIteration:
|
|
yield ''
|
|
pos = self._line_temp, self._column_temp
|
|
|
|
while True:
|
|
yield ''
|
|
|
|
def get_line(self, line_nr):
|
|
if not self._line_cache:
|
|
self._line_cache = self.source.splitlines()
|
|
if not self.source: # ''.splitlines() == []
|
|
self._line_cache = [self.source]
|
|
|
|
if line_nr == 0:
|
|
# This is a fix for the zeroth line. We need a newline there, for
|
|
# the backwards parser.
|
|
return ''
|
|
if line_nr < 0:
|
|
raise StopIteration()
|
|
try:
|
|
return self._line_cache[line_nr - 1]
|
|
except IndexError:
|
|
raise StopIteration()
|
|
|
|
def get_part_parser(self):
|
|
""" Returns a parser that contains only part of the source code. This
|
|
exists only because of performance reasons.
|
|
"""
|
|
if self._part_parser:
|
|
return self._part_parser
|
|
|
|
# TODO check for docstrings
|
|
length = settings.part_line_length
|
|
offset = max(self.position[0] - length, 0)
|
|
s = '\n'.join(self.source.splitlines()[offset:offset + length])
|
|
self._part_parser = parsing.Parser(s, self.path, self.position,
|
|
offset=(offset, 0))
|
|
return self._part_parser
|
|
|
|
|
|
def get_sys_path():
|
|
def check_virtual_env(sys_path):
|
|
""" Add virtualenv's site-packages to the `sys.path`."""
|
|
venv = os.getenv('VIRTUAL_ENV')
|
|
if not venv:
|
|
return
|
|
venv = os.path.abspath(venv)
|
|
p = os.path.join(
|
|
venv, 'lib', 'python%d.%d' % sys.version_info[:2], 'site-packages')
|
|
sys_path.insert(0, p)
|
|
|
|
check_virtual_env(sys.path)
|
|
return [p for p in sys.path if p != ""]
|
|
|
|
|
|
@cache.memoize_default([])
|
|
def sys_path_with_modifications(module):
|
|
def execute_code(code):
|
|
c = "import os; from os.path import *; result=%s"
|
|
variables = {'__file__': module.path}
|
|
try:
|
|
exec_function(c % code, variables)
|
|
except Exception:
|
|
debug.warning('sys path detected, but failed to evaluate')
|
|
return None
|
|
try:
|
|
res = variables['result']
|
|
if isinstance(res, str):
|
|
return os.path.abspath(res)
|
|
else:
|
|
return None
|
|
except KeyError:
|
|
return None
|
|
|
|
def check_module(module):
|
|
try:
|
|
possible_stmts = module.used_names['path']
|
|
except KeyError:
|
|
return get_sys_path()
|
|
|
|
sys_path = list(get_sys_path()) # copy
|
|
for p in possible_stmts:
|
|
if not isinstance(p, pr.Statement):
|
|
continue
|
|
commands = p.get_commands()
|
|
if len(commands) != 1: # sys.path command is just one thing.
|
|
continue
|
|
call = commands[0]
|
|
n = call.name
|
|
if not isinstance(n, pr.Name) or len(n.names) != 3:
|
|
continue
|
|
if n.names[:2] != ('sys', 'path'):
|
|
continue
|
|
array_cmd = n.names[2]
|
|
if call.execution is None:
|
|
continue
|
|
exe = call.execution
|
|
if not (array_cmd == 'insert' and len(exe) == 2
|
|
or array_cmd == 'append' and len(exe) == 1):
|
|
continue
|
|
|
|
if array_cmd == 'insert':
|
|
exe_type, exe.type = exe.type, pr.Array.NOARRAY
|
|
exe_pop = exe.values.pop(0)
|
|
res = execute_code(exe.get_code())
|
|
if res is not None:
|
|
sys_path.insert(0, res)
|
|
debug.dbg('sys path inserted: %s' % res)
|
|
exe.type = exe_type
|
|
exe.values.insert(0, exe_pop)
|
|
elif array_cmd == 'append':
|
|
res = execute_code(exe.get_code())
|
|
if res is not None:
|
|
sys_path.append(res)
|
|
debug.dbg('sys path added: %s' % res)
|
|
return sys_path
|
|
|
|
if module.path is None:
|
|
return [] # support for modules without a path is intentionally bad.
|
|
|
|
curdir = os.path.abspath(os.curdir)
|
|
with common.ignored(OSError):
|
|
os.chdir(os.path.dirname(module.path))
|
|
|
|
result = check_module(module)
|
|
result += detect_django_path(module.path)
|
|
|
|
# cleanup, back to old directory
|
|
os.chdir(curdir)
|
|
return result
|
|
|
|
|
|
def detect_django_path(module_path):
|
|
""" Detects the path of the very well known Django library (if used) """
|
|
result = []
|
|
while True:
|
|
new = os.path.dirname(module_path)
|
|
# If the module_path doesn't change anymore, we're finished -> /
|
|
if new == module_path:
|
|
break
|
|
else:
|
|
module_path = new
|
|
|
|
with common.ignored(IOError):
|
|
with open(module_path + os.path.sep + 'manage.py'):
|
|
debug.dbg('Found django path: %s' % module_path)
|
|
result.append(module_path)
|
|
return result
|
|
|
|
|
|
def source_to_unicode(source, encoding=None):
|
|
def detect_encoding():
|
|
""" For the implementation of encoding definitions in Python, look at:
|
|
http://www.python.org/dev/peps/pep-0263/
|
|
http://docs.python.org/2/reference/lexical_analysis.html#encoding-\
|
|
declarations
|
|
"""
|
|
byte_mark = '\xef\xbb\xbf' if is_py25 else \
|
|
literal_eval(r"b'\xef\xbb\xbf'")
|
|
if source.startswith(byte_mark):
|
|
# UTF-8 byte-order mark
|
|
return 'utf-8'
|
|
|
|
first_two_lines = re.match(r'(?:[^\n]*\n){0,2}', str(source)).group(0)
|
|
possible_encoding = re.search(r"coding[=:]\s*([-\w.]+)",
|
|
first_two_lines)
|
|
if possible_encoding:
|
|
return possible_encoding.group(1)
|
|
else:
|
|
# the default if nothing else has been set -> PEP 263
|
|
return encoding if encoding is not None else 'iso-8859-1'
|
|
|
|
if isinstance(source, unicode):
|
|
# only cast str/bytes
|
|
return source
|
|
|
|
# cast to unicode by default
|
|
return unicode(source, detect_encoding(), 'replace')
|