1
0
forked from VimPlug/jedi
Files
jedi-fork/jedi/evaluate/builtin.py
2014-01-05 10:41:41 +01:00

457 lines
15 KiB
Python

"""
A big part of the Python standard libraries are unfortunately not only written
in Python. The process works like this:
- ``BuiltinModule`` imports the builtin module (e.g. ``sys``)
- then ``BuiltinModule`` generates code with the docstrings of builtin
functions.
- The :mod:`parsing` parser processes the generated code.
This is possible, because many builtin functions supply docstrings, for example
the method ``list.index`` has the following attribute ``__doc__``:
L.index(value, [start, [stop]]) -> integer -- return first index of value.
Raises ValueError if the value is not present.
`PEP 257 <http://www.python.org/dev/peps/pep-0257/#one-line-docstrings>`_
teaches how docstrings should look like for C functions.
Additionally there's a ``Builtin`` instance in this module, to make it
possible to access functions like ``list`` and ``int`` directly, the same way
|jedi| access other functions.
"""
from __future__ import with_statement
from jedi._compatibility import exec_function, is_py3k
import re
import sys
import os
if is_py3k:
import io
import types
import inspect
from jedi import common
from jedi import debug
from jedi.parser import Parser
from jedi.parser import fast
from jedi import modules
from jedi import cache
class BuiltinModule(object):
"""
This module is a parser for all builtin modules, which are programmed in
C/C++. It should also work on third party modules.
It can be instantiated with either a path or a name of the module. The path
is important for third party modules.
:param name: The name of the module.
:param path: The path of the module.
:param sys_path: The sys.path, which is can be customizable.
"""
map_types = {
'floating point number': '0.0',
'string': '""',
'str': '""',
'character': '"a"',
'integer': '0',
'int': '0',
'dictionary': '{}',
'list': '[]',
'file object': 'file("")',
# TODO things like dbg: ('not working', 'tuple of integers')
}
if is_py3k:
map_types['file object'] = 'import io; return io.TextIOWrapper()'
def __init__(self, path=None, name=None, sys_path=None):
if sys_path is None:
sys_path = modules.get_sys_path()
self.sys_path = list(sys_path)
if not name:
name = os.path.basename(path)
name = name.rpartition('.')[0] # cut file type (normally .so)
self.name = name
self.path = path and os.path.abspath(path)
@property
@cache.underscore_memoization
def parser(self):
""" get the parser lazy """
return cache.load_parser(self.path, self.name) or self._load_module()
def _load_module(self):
source = _generate_code(self.module, self._load_mixins())
p = self.path or self.name
p = fast.FastParser(source, p)
cache.save_parser(self.path, self.name, p)
return p
@property
@cache.underscore_memoization
def module(self):
"""get module also lazy"""
def load_module(name, path):
if path:
self.sys_path.insert(0, path)
temp, sys.path = sys.path, self.sys_path
content = {}
try:
exec_function('import %s as module' % name, content)
module = content['module']
except AttributeError:
# use sys.modules, because you cannot access some modules
# directly. -> #59
module = sys.modules[name]
sys.path = temp
if path:
self.sys_path.pop(0)
return module
# module might already be defined
path = self.path
name = self.name
if self.path:
dot_path = []
p = self.path
# search for the builtin with the correct path
while p and p not in sys.path:
p, sep, mod = p.rpartition(os.path.sep)
dot_path.append(mod.partition('.')[0])
if p:
name = ".".join(reversed(dot_path))
path = p
else:
path = os.path.dirname(self.path)
return load_module(name, path)
def _load_mixins(self):
"""
Load functions that are mixed in to the standard library.
E.g. builtins are written in C (binaries), but my autocompletion only
understands Python code. By mixing in Python code, the autocompletion
should work much better for builtins.
"""
regex = r'^(def|class)\s+([\w\d]+)'
def process_code(code, depth=0):
funcs = {}
matches = list(re.finditer(regex, code, re.MULTILINE))
positions = [m.start() for m in matches]
for i, pos in enumerate(positions):
try:
code_block = code[pos:positions[i + 1]]
except IndexError:
code_block = code[pos:len(code)]
structure_name = matches[i].group(1)
name = matches[i].group(2)
if structure_name == 'def':
funcs[name] = code_block
elif structure_name == 'class':
if depth > 0:
raise NotImplementedError()
# remove class line
c = re.sub(r'^[^\n]+', '', code_block)
# remove whitespace
c = re.compile(r'^[ ]{4}', re.MULTILINE).sub('', c)
funcs[name] = process_code(c)
else:
raise NotImplementedError()
return funcs
name = self.name
# sometimes there are stupid endings like `_sqlite3.cpython-32mu`
name = re.sub(r'\..*', '', name)
if name == '__builtin__' and not is_py3k:
name = 'builtins'
path = os.path.dirname(os.path.abspath(__file__))
try:
with open(os.path.join(path, 'mixin', name) + '.pym') as f:
s = f.read()
except IOError:
return {}
else:
mixin_dct = process_code(s)
if is_py3k and self.name == Builtin.name:
# in the case of Py3k xrange is now range
mixin_dct['range'] = mixin_dct['xrange']
return mixin_dct
def _generate_code(scope, mixin_funcs={}, depth=0):
"""
Generate a string, which uses python syntax as an input to the Parser.
"""
def get_doc(obj, indent=False):
doc = inspect.getdoc(obj)
if doc:
doc = ('r"""\n%s\n"""\n' % doc)
if indent:
doc = common.indent_block(doc)
return doc
return ''
def is_in_base_classes(cls, name, comparison):
""" Base classes may contain the exact same object """
if name in mixin_funcs:
return False
try:
mro = cls.mro()
except TypeError:
# this happens, if cls == type
return False
for base in mro[1:]:
try:
attr = getattr(base, name)
except AttributeError:
continue
if attr == comparison:
return True
return False
def get_scope_objects(names):
"""
Looks for the names defined with dir() in an objects and divides
them into different object types.
"""
classes = {}
funcs = {}
stmts = {}
members = {}
for n in names:
try:
# this has a builtin_function_or_method
exe = getattr(scope, n)
except AttributeError:
# happens e.g. in properties of
# PyQt4.QtGui.QStyleOptionComboBox.currentText
# -> just set it to None
members[n] = None
else:
if inspect.isclass(scope):
if is_in_base_classes(scope, n, exe):
continue
if inspect.isbuiltin(exe) or inspect.ismethod(exe) \
or inspect.ismethoddescriptor(exe):
funcs[n] = exe
elif inspect.isclass(exe) or inspect.ismodule(exe):
classes[n] = exe
elif inspect.ismemberdescriptor(exe):
members[n] = exe
else:
stmts[n] = exe
return classes, funcs, stmts, members
code = ''
if inspect.ismodule(scope): # generate comment where the code's from.
try:
path = scope.__file__
except AttributeError:
path = '?'
code += '# Generated module %s from %s\n' % (scope.__name__, path)
code += get_doc(scope)
# Remove some magic vars, (TODO why?)
names = set(dir(scope)) - set(['__file__', '__name__', '__doc__',
'__path__', '__package__'])
classes, funcs, stmts, members = get_scope_objects(names)
# classes
for name, cl in classes.items():
bases = (c.__name__ for c in cl.__bases__) if inspect.isclass(cl) \
else []
code += 'class %s(%s):\n' % (name, ','.join(bases))
if depth == 0:
try:
mixin = mixin_funcs[name]
except KeyError:
mixin = {}
cl_code = _generate_code(cl, mixin, depth + 1)
code += common.indent_block(cl_code)
code += '\n'
# functions
for name, func in funcs.items():
params, ret = _parse_function_doc(func)
if depth > 0:
params = 'self, ' + params
doc_str = get_doc(func, indent=True)
try:
mixin = mixin_funcs[name]
except KeyError:
# normal code generation
code += 'def %s(%s):\n' % (name, params)
code += doc_str
code += common.indent_block('%s\n\n' % ret)
else:
# generation of code with mixins
# the parser only supports basic functions with a newline after
# the double dots
# find doc_str place
try:
pos = re.search(r'\):\s*\n', mixin).end()
except TypeError:
# pypy uses a different reversed builtin
if name == 'reversed':
mixin = 'def reversed(sequence):\n' \
' for i in self.__sequence: yield i'
pos = 24
else:
debug.warning('mixin trouble in pypy: %s', name)
raise
if pos is None:
raise Exception("Builtin function not parsed correctly")
code += mixin[:pos] + doc_str + mixin[pos:]
# class members (functions) properties?
for name, func in members.items():
# recursion problem in properties TODO remove
if name in ['fget', 'fset', 'fdel']:
continue
ret = 'pass'
code += '@property\ndef %s(self):\n' % (name)
code += common.indent_block(get_doc(func) + '%s\n\n' % ret)
# variables
for name, value in stmts.items():
if is_py3k:
file_type = io.TextIOWrapper
else:
file_type = types.FileType
if isinstance(value, file_type):
value = 'open()'
elif name == 'None':
value = ''
elif type(value).__name__ in ['int', 'bool', 'float',
'dict', 'list', 'tuple']:
value = repr(value)
else:
# get the type, if the type is not simple.
mod = type(value).__module__
value = type(value).__name__ + '()'
if mod != '__builtin__':
value = '%s.%s' % (mod, value)
code += '%s = %s\n' % (name, value)
return code
def _parse_function_doc(func):
"""
Takes a function and returns the params and return value as a tuple.
This is nothing more than a docstring parser.
"""
# TODO: things like utime(path, (atime, mtime)) and a(b [, b]) -> None
doc = inspect.getdoc(func)
if doc is None:
return '', 'pass'
# get full string, parse round parentheses: def func(a, (b,c))
try:
# unbound methods such as pyqtSignals have no __name__
if not hasattr(func, "__name__"):
return '', 'pass'
count = 0
debug.dbg(func, func.__name__, doc)
start = doc.index('(')
for i, s in enumerate(doc[start:]):
if s == '(':
count += 1
elif s == ')':
count -= 1
if count == 0:
end = start + i
break
param_str = doc[start + 1:end]
except (ValueError, UnboundLocalError):
# ValueError for doc.index
# UnboundLocalError for undefined end in last line
debug.dbg('no brackets found - no param')
end = 0
param_str = ''
else:
# remove square brackets, that show an optional param ( = None)
def change_options(m):
args = m.group(1).split(',')
for i, a in enumerate(args):
if a and '=' not in a:
args[i] += '=None'
return ','.join(args)
while True:
param_str, changes = re.subn(r' ?\[([^\[\]]+)\]',
change_options, param_str)
if changes == 0:
break
param_str = param_str.replace('-', '_') # see: isinstance.__doc__
# parse return value
r = re.search('-[>-]* ', doc[end:end + 7])
if r is None:
ret = ''
else:
index = end + r.end()
# get result type, which can contain newlines
pattern = re.compile(r'(,\n|[^\n-])+')
ret_str = pattern.match(doc, index).group(0).strip()
# New object -> object()
ret_str = re.sub(r'[nN]ew (.*)', r'\1()', ret_str)
ret = BuiltinModule.map_types.get(ret_str, ret_str)
if ret == ret_str and ret not in ['None', 'object', 'tuple', 'set']:
debug.dbg('not working', ret_str)
ret = ('return ' if 'return' not in ret else '') + ret
return param_str, ret
class Builtin(object):
""" The builtin scope / module """
# Python 3 compatibility
if is_py3k:
name = 'builtins'
else:
name = '__builtin__'
@property
@cache.underscore_memoization
def builtin(self):
return BuiltinModule(name=self.name)
@property
def scope(self):
return self.builtin.parser.module
def magic_function_scope(self, evaluator):
try:
return self._magic_function_scope
except AttributeError:
# depth = 1 because this is not a module
class Container(object):
FunctionType = types.FunctionType
source = _generate_code(Container, depth=0)
parser = Parser(source, None)
module = parser.module
module.parent = self.scope
typ = evaluator.follow_path(iter(['FunctionType']), [module], module)
s = self._magic_function_scope = typ.pop()
return s
Builtin = Builtin()