1
0
forked from VimPlug/jedi
Files
jedi-fork/jedi/modules.py
2014-01-05 13:07:37 +01:00

255 lines
9.1 KiB
Python

"""
Don't confuse these classes with :mod:`parsing_representation` modules, the
modules here can access these representation with ``module.parser.module``.
``Module`` exists mainly for caching purposes.
Basically :mod:`modules` offers the classes:
- ``CachedModule``, a base class for Cachedmodule.
- ``Module`` the class for all normal Python modules (not builtins, they are at
home at :mod:`builtin`).
- ``ModuleWithCursor``, holds the module information for :class:`api.Script`.
Apart from those classes there's a ``sys.path`` fetching function, as well as
`Virtual Env` and `Django` detection.
"""
from __future__ import with_statement
import re
import sys
import os
from ast import literal_eval
from jedi._compatibility import unicode
from jedi import cache
from jedi.parser import tokenizer as tokenize
from jedi.parser import fast
from jedi import debug
def load_module(path=None, source=None, name=None):
def load(source):
if path is not None and path.endswith('.py'):
if source is None:
with open(path) as f:
source = f.read()
else:
# TODO refactoring remove
from jedi.evaluate import builtin
return builtin.BuiltinModule(path, name).parser.module
p = path or name
p = fast.FastParser(source_to_unicode(source), p)
cache.save_parser(path, name, p)
return p.module
cached = cache.load_parser(path, name)
return load(source) if cached is None else cached.module
class ModuleWithCursor(object):
"""
Manages all files, that are parsed and caches them.
Important are the params source and path, one of them has to
be there.
:param source: The source code of the file.
:param path: The module path of the file or None.
:param position: The position, the user is currently in. Only important \
for the main file.
"""
def __init__(self, path, source, position):
super(ModuleWithCursor, self).__init__()
self.path = path and os.path.abspath(path)
self.name = None
self.source = source
self.position = position
self._path_until_cursor = None
self._line_cache = None
# this two are only used, because there is no nonlocal in Python 2
self._line_temp = None
self._relevant_temp = None
def get_path_until_cursor(self):
""" Get the path under the cursor. """
if self._path_until_cursor is None: # small caching
self._path_until_cursor, self._start_cursor_pos = \
self._get_path_until_cursor(self.position)
return self._path_until_cursor
def _get_path_until_cursor(self, start_pos=None):
def fetch_line():
if self._is_first:
self._is_first = False
self._line_length = self._column_temp
line = self._first_line
else:
line = self.get_line(self._line_temp)
self._line_length = len(line)
line = line + '\n'
# add lines with a backslash at the end
while True:
self._line_temp -= 1
last_line = self.get_line(self._line_temp)
#print self._line_temp, repr(last_line)
if last_line and last_line[-1] == '\\':
line = last_line[:-1] + ' ' + line
self._line_length = len(last_line)
else:
break
return line[::-1]
self._is_first = True
self._line_temp, self._column_temp = start_cursor = start_pos
self._first_line = self.get_line(self._line_temp)[:self._column_temp]
open_brackets = ['(', '[', '{']
close_brackets = [')', ']', '}']
gen = tokenize.generate_tokens(fetch_line)
string = ''
level = 0
force_point = False
last_type = None
try:
for token_type, tok, start, end, line in gen:
# print 'tok', token_type, tok, force_point
if last_type == token_type == tokenize.NAME:
string += ' '
if level > 0:
if tok in close_brackets:
level += 1
if tok in open_brackets:
level -= 1
elif tok == '.':
force_point = False
elif force_point:
# it is reversed, therefore a number is getting recognized
# as a floating point number
if token_type == tokenize.NUMBER and tok[0] == '.':
force_point = False
else:
break
elif tok in close_brackets:
level += 1
elif token_type in [tokenize.NAME, tokenize.STRING]:
force_point = True
elif token_type == tokenize.NUMBER:
pass
else:
self._column_temp = self._line_length - end[1]
break
x = start_pos[0] - end[0] + 1
l = self.get_line(x)
l = self._first_line if x == start_pos[0] else l
start_cursor = x, len(l) - end[1]
self._column_temp = self._line_length - end[1]
string += tok
last_type = token_type
except tokenize.TokenError:
debug.warning("Tokenize couldn't finish", sys.exc_info)
# string can still contain spaces at the end
return string[::-1].strip(), start_cursor
def get_path_under_cursor(self):
"""
Return the path under the cursor. If there is a rest of the path left,
it will be added to the stuff before it.
"""
return self.get_path_until_cursor() + self.get_path_after_cursor()
def get_path_after_cursor(self):
line = self.get_line(self.position[0])
return re.search("[\w\d]*", line[self.position[1]:]).group(0)
def get_operator_under_cursor(self):
line = self.get_line(self.position[0])
after = re.match("[^\w\s]+", line[self.position[1]:])
before = re.match("[^\w\s]+", line[:self.position[1]][::-1])
return (before.group(0) if before is not None else '') \
+ (after.group(0) if after is not None else '')
def get_context(self, yield_positions=False):
pos = self._start_cursor_pos
while True:
# remove non important white space
line = self.get_line(pos[0])
while True:
if pos[1] == 0:
line = self.get_line(pos[0] - 1)
if line and line[-1] == '\\':
pos = pos[0] - 1, len(line) - 1
continue
else:
break
if line[pos[1] - 1].isspace():
pos = pos[0], pos[1] - 1
else:
break
try:
result, pos = self._get_path_until_cursor(start_pos=pos)
if yield_positions:
yield pos
else:
yield result
except StopIteration:
if yield_positions:
yield None
else:
yield ''
def get_line(self, line_nr):
if not self._line_cache:
self._line_cache = self.source.splitlines()
if self.source:
if self.source[-1] == '\n':
self._line_cache.append('')
else: # ''.splitlines() == []
self._line_cache = ['']
if line_nr == 0:
# This is a fix for the zeroth line. We need a newline there, for
# the backwards parser.
return ''
if line_nr < 0:
raise StopIteration()
try:
return self._line_cache[line_nr - 1]
except IndexError:
raise StopIteration()
def get_position_line(self):
return self.get_line(self.position[0])[:self.position[1]]
def source_to_unicode(source, encoding=None):
def detect_encoding():
""" For the implementation of encoding definitions in Python, look at:
http://www.python.org/dev/peps/pep-0263/
http://docs.python.org/2/reference/lexical_analysis.html#encoding-\
declarations
"""
byte_mark = literal_eval(r"b'\xef\xbb\xbf'")
if source.startswith(byte_mark):
# UTF-8 byte-order mark
return 'utf-8'
first_two_lines = re.match(r'(?:[^\n]*\n){0,2}', str(source)).group(0)
possible_encoding = re.search(r"coding[=:]\s*([-\w.]+)",
first_two_lines)
if possible_encoding:
return possible_encoding.group(1)
else:
# the default if nothing else has been set -> PEP 263
return encoding if encoding is not None else 'iso-8859-1'
if isinstance(source, unicode):
# only cast str/bytes
return source
# cast to unicode by default
return unicode(source, detect_encoding(), 'replace')