""" A big part of the Python standard libraries are unfortunately not only written in Python. The process works like this: - ``BuiltinModule`` imports the builtin module (e.g. ``sys``) - then ``BuiltinModule`` generates code with the docstrings of builtin functions. - The :mod:`parsing` parser processes the generated code. This is possible, because many builtin functions supply docstrings, for example the method ``list.index`` has the following attribute ``__doc__``: L.index(value, [start, [stop]]) -> integer -- return first index of value. Raises ValueError if the value is not present. `PEP 257 `_ teaches how docstrings should look like for C functions. Additionally there's a ``Builtin`` instance in this module, to make it possible to access functions like ``list`` and ``int`` directly, the same way |jedi| access other functions. """ from __future__ import with_statement from jedi._compatibility import exec_function, is_py3k import re import sys import os if is_py3k: import io import types import inspect from jedi import common from jedi import debug from jedi.parser import Parser from jedi.parser import fast from jedi import modules from jedi import cache class BuiltinModule(object): """ This module is a parser for all builtin modules, which are programmed in C/C++. It should also work on third party modules. It can be instantiated with either a path or a name of the module. The path is important for third party modules. :param name: The name of the module. :param path: The path of the module. :param sys_path: The sys.path, which is can be customizable. """ map_types = { 'floating point number': '0.0', 'string': '""', 'str': '""', 'character': '"a"', 'integer': '0', 'int': '0', 'dictionary': '{}', 'list': '[]', 'file object': 'file("")', # TODO things like dbg: ('not working', 'tuple of integers') } if is_py3k: map_types['file object'] = 'import io; return io.TextIOWrapper()' def __init__(self, path=None, name=None, sys_path=None): if sys_path is None: sys_path = modules.get_sys_path() self.sys_path = list(sys_path) if not name: name = os.path.basename(path) name = name.rpartition('.')[0] # cut file type (normally .so) self.name = name self.path = path and os.path.abspath(path) @property @cache.underscore_memoization def parser(self): """ get the parser lazy """ return cache.load_parser(self.path, self.name) or self._load_module() def _load_module(self): source = _generate_code(self.module, self._load_mixins()) p = self.path or self.name p = fast.FastParser(source, p) cache.save_parser(self.path, self.name, p) return p @property @cache.underscore_memoization def module(self): """get module also lazy""" def load_module(name, path): if path: self.sys_path.insert(0, path) temp, sys.path = sys.path, self.sys_path content = {} try: exec_function('import %s as module' % name, content) module = content['module'] except AttributeError: # use sys.modules, because you cannot access some modules # directly. -> #59 module = sys.modules[name] sys.path = temp if path: self.sys_path.pop(0) return module # module might already be defined path = self.path name = self.name if self.path: dot_path = [] p = self.path # search for the builtin with the correct path while p and p not in sys.path: p, sep, mod = p.rpartition(os.path.sep) dot_path.append(mod.partition('.')[0]) if p: name = ".".join(reversed(dot_path)) path = p else: path = os.path.dirname(self.path) return load_module(name, path) def _load_mixins(self): """ Load functions that are mixed in to the standard library. E.g. builtins are written in C (binaries), but my autocompletion only understands Python code. By mixing in Python code, the autocompletion should work much better for builtins. """ regex = r'^(def|class)\s+([\w\d]+)' def process_code(code, depth=0): funcs = {} matches = list(re.finditer(regex, code, re.MULTILINE)) positions = [m.start() for m in matches] for i, pos in enumerate(positions): try: code_block = code[pos:positions[i + 1]] except IndexError: code_block = code[pos:len(code)] structure_name = matches[i].group(1) name = matches[i].group(2) if structure_name == 'def': funcs[name] = code_block elif structure_name == 'class': if depth > 0: raise NotImplementedError() # remove class line c = re.sub(r'^[^\n]+', '', code_block) # remove whitespace c = re.compile(r'^[ ]{4}', re.MULTILINE).sub('', c) funcs[name] = process_code(c) else: raise NotImplementedError() return funcs name = self.name # sometimes there are stupid endings like `_sqlite3.cpython-32mu` name = re.sub(r'\..*', '', name) if name == '__builtin__' and not is_py3k: name = 'builtins' path = os.path.dirname(os.path.abspath(__file__)) try: with open(os.path.join(path, 'mixin', name) + '.pym') as f: s = f.read() except IOError: return {} else: mixin_dct = process_code(s) if is_py3k and self.name == Builtin.name: # in the case of Py3k xrange is now range mixin_dct['range'] = mixin_dct['xrange'] return mixin_dct def _generate_code(scope, mixin_funcs={}, depth=0): """ Generate a string, which uses python syntax as an input to the Parser. """ def get_doc(obj, indent=False): doc = inspect.getdoc(obj) if doc: doc = ('r"""\n%s\n"""\n' % doc) if indent: doc = common.indent_block(doc) return doc return '' def is_in_base_classes(cls, name, comparison): """ Base classes may contain the exact same object """ if name in mixin_funcs: return False try: mro = cls.mro() except TypeError: # this happens, if cls == type return False for base in mro[1:]: try: attr = getattr(base, name) except AttributeError: continue if attr == comparison: return True return False def get_scope_objects(names): """ Looks for the names defined with dir() in an objects and divides them into different object types. """ classes = {} funcs = {} stmts = {} members = {} for n in names: try: # this has a builtin_function_or_method exe = getattr(scope, n) except AttributeError: # happens e.g. in properties of # PyQt4.QtGui.QStyleOptionComboBox.currentText # -> just set it to None members[n] = None else: if inspect.isclass(scope): if is_in_base_classes(scope, n, exe): continue if inspect.isbuiltin(exe) or inspect.ismethod(exe) \ or inspect.ismethoddescriptor(exe): funcs[n] = exe elif inspect.isclass(exe) or inspect.ismodule(exe): classes[n] = exe elif inspect.ismemberdescriptor(exe): members[n] = exe else: stmts[n] = exe return classes, funcs, stmts, members code = '' if inspect.ismodule(scope): # generate comment where the code's from. try: path = scope.__file__ except AttributeError: path = '?' code += '# Generated module %s from %s\n' % (scope.__name__, path) code += get_doc(scope) # Remove some magic vars, (TODO why?) names = set(dir(scope)) - set(['__file__', '__name__', '__doc__', '__path__', '__package__']) classes, funcs, stmts, members = get_scope_objects(names) # classes for name, cl in classes.items(): bases = (c.__name__ for c in cl.__bases__) if inspect.isclass(cl) \ else [] code += 'class %s(%s):\n' % (name, ','.join(bases)) if depth == 0: try: mixin = mixin_funcs[name] except KeyError: mixin = {} cl_code = _generate_code(cl, mixin, depth + 1) code += common.indent_block(cl_code) code += '\n' # functions for name, func in funcs.items(): params, ret = _parse_function_doc(func) if depth > 0: params = 'self, ' + params doc_str = get_doc(func, indent=True) try: mixin = mixin_funcs[name] except KeyError: # normal code generation code += 'def %s(%s):\n' % (name, params) code += doc_str code += common.indent_block('%s\n\n' % ret) else: # generation of code with mixins # the parser only supports basic functions with a newline after # the double dots # find doc_str place try: pos = re.search(r'\):\s*\n', mixin).end() except TypeError: # pypy uses a different reversed builtin if name == 'reversed': mixin = 'def reversed(sequence):\n' \ ' for i in self.__sequence: yield i' pos = 24 else: debug.warning('mixin trouble in pypy: %s', name) raise if pos is None: raise Exception("Builtin function not parsed correctly") code += mixin[:pos] + doc_str + mixin[pos:] # class members (functions) properties? for name, func in members.items(): # recursion problem in properties TODO remove if name in ['fget', 'fset', 'fdel']: continue ret = 'pass' code += '@property\ndef %s(self):\n' % (name) code += common.indent_block(get_doc(func) + '%s\n\n' % ret) # variables for name, value in stmts.items(): if is_py3k: file_type = io.TextIOWrapper else: file_type = types.FileType if isinstance(value, file_type): value = 'open()' elif name == 'None': value = '' elif type(value).__name__ in ['int', 'bool', 'float', 'dict', 'list', 'tuple']: value = repr(value) else: # get the type, if the type is not simple. mod = type(value).__module__ value = type(value).__name__ + '()' if mod != '__builtin__': value = '%s.%s' % (mod, value) code += '%s = %s\n' % (name, value) return code def _parse_function_doc(func): """ Takes a function and returns the params and return value as a tuple. This is nothing more than a docstring parser. """ # TODO: things like utime(path, (atime, mtime)) and a(b [, b]) -> None doc = inspect.getdoc(func) if doc is None: return '', 'pass' # get full string, parse round parentheses: def func(a, (b,c)) try: # unbound methods such as pyqtSignals have no __name__ if not hasattr(func, "__name__"): return '', 'pass' count = 0 debug.dbg(func, func.__name__, doc) start = doc.index('(') for i, s in enumerate(doc[start:]): if s == '(': count += 1 elif s == ')': count -= 1 if count == 0: end = start + i break param_str = doc[start + 1:end] except (ValueError, UnboundLocalError): # ValueError for doc.index # UnboundLocalError for undefined end in last line debug.dbg('no brackets found - no param') end = 0 param_str = '' else: # remove square brackets, that show an optional param ( = None) def change_options(m): args = m.group(1).split(',') for i, a in enumerate(args): if a and '=' not in a: args[i] += '=None' return ','.join(args) while True: param_str, changes = re.subn(r' ?\[([^\[\]]+)\]', change_options, param_str) if changes == 0: break param_str = param_str.replace('-', '_') # see: isinstance.__doc__ # parse return value r = re.search('-[>-]* ', doc[end:end + 7]) if r is None: ret = '' else: index = end + r.end() # get result type, which can contain newlines pattern = re.compile(r'(,\n|[^\n-])+') ret_str = pattern.match(doc, index).group(0).strip() # New object -> object() ret_str = re.sub(r'[nN]ew (.*)', r'\1()', ret_str) ret = BuiltinModule.map_types.get(ret_str, ret_str) if ret == ret_str and ret not in ['None', 'object', 'tuple', 'set']: debug.dbg('not working', ret_str) ret = ('return ' if 'return' not in ret else '') + ret return param_str, ret class Builtin(object): """ The builtin scope / module """ # Python 3 compatibility if is_py3k: name = 'builtins' else: name = '__builtin__' @property @cache.underscore_memoization def builtin(self): return BuiltinModule(name=self.name) @property def scope(self): return self.builtin.parser.module def magic_function_scope(self, evaluator): try: return self._magic_function_scope except AttributeError: # depth = 1 because this is not a module class Container(object): FunctionType = types.FunctionType source = _generate_code(Container, depth=0) parser = Parser(source, None) module = parser.module module.parent = self.scope typ = evaluator.follow_path(iter(['FunctionType']), [module], module) s = self._magic_function_scope = typ.pop() return s Builtin = Builtin()