diff --git a/jedi/evaluate/builtin.py b/jedi/evaluate/builtin.py deleted file mode 100644 index 19e0147c..00000000 --- a/jedi/evaluate/builtin.py +++ /dev/null @@ -1,455 +0,0 @@ -""" -A big part of the Python standard libraries are unfortunately not only written -in Python. The process works like this: - -- ``BuiltinModule`` imports the builtin module (e.g. ``sys``) -- then ``BuiltinModule`` generates code with the docstrings of builtin - functions. -- The :mod:`parsing` parser processes the generated code. - -This is possible, because many builtin functions supply docstrings, for example -the method ``list.index`` has the following attribute ``__doc__``: - - L.index(value, [start, [stop]]) -> integer -- return first index of value. - Raises ValueError if the value is not present. - -`PEP 257 `_ -teaches how docstrings should look like for C functions. - -Additionally there's a ``Builtin`` instance in this module, to make it -possible to access functions like ``list`` and ``int`` directly, the same way -|jedi| access other functions. -""" - -from jedi._compatibility import exec_function, is_py3k - -import re -import sys -import os -if is_py3k: - import io -import types -import inspect - -from jedi import common -from jedi import debug -from jedi.parser import Parser -from jedi.parser import fast -from jedi.evaluate.sys_path import get_sys_path -from jedi import cache - - -class BuiltinModule(object): - """ - This module is a parser for all builtin modules, which are programmed in - C/C++. It should also work on third party modules. - It can be instantiated with either a path or a name of the module. The path - is important for third party modules. - - :param name: The name of the module. - :param path: The path of the module. - :param sys_path: The sys.path, which is can be customizable. - """ - - map_types = { - 'floating point number': '0.0', - 'string': '""', - 'str': '""', - 'character': '"a"', - 'integer': '0', - 'int': '0', - 'dictionary': '{}', - 'list': '[]', - 'file object': 'file("")', - # TODO things like dbg: ('not working', 'tuple of integers') - } - - if is_py3k: - map_types['file object'] = 'import io; return io.TextIOWrapper()' - - def __init__(self, path=None, name=None, sys_path=None): - if sys_path is None: - sys_path = get_sys_path() - self.sys_path = list(sys_path) - - if not name: - name = os.path.basename(path) - name = name.rpartition('.')[0] # cut file type (normally .so) - self.name = name - - self.path = path and os.path.abspath(path) - - @property - @cache.underscore_memoization - def parser(self): - """ get the parser lazy """ - return cache.load_parser(self.path, self.name) or self._load_module() - - def _load_module(self): - source = _generate_code(self.module, self._load_mixins()) - p = self.path or self.name - p = fast.FastParser(source, p) - cache.save_parser(self.path, self.name, p) - return p - - @property - @cache.underscore_memoization - def module(self): - """get module also lazy""" - def load_module(name, path): - if path: - self.sys_path.insert(0, path) - - temp, sys.path = sys.path, self.sys_path - content = {} - try: - exec_function('import %s as module' % name, content) - module = content['module'] - except AttributeError: - # use sys.modules, because you cannot access some modules - # directly. -> #59 - module = sys.modules[name] - sys.path = temp - - if path: - self.sys_path.pop(0) - return module - - # module might already be defined - path = self.path - name = self.name - if self.path: - dot_path = [] - p = self.path - # search for the builtin with the correct path - while p and p not in sys.path: - p, sep, mod = p.rpartition(os.path.sep) - dot_path.append(mod.partition('.')[0]) - if p: - name = ".".join(reversed(dot_path)) - path = p - else: - path = os.path.dirname(self.path) - return load_module(name, path) - - def _load_mixins(self): - """ - Load functions that are mixed in to the standard library. - E.g. builtins are written in C (binaries), but my autocompletion only - understands Python code. By mixing in Python code, the autocompletion - should work much better for builtins. - """ - regex = r'^(def|class)\s+([\w\d]+)' - - def process_code(code, depth=0): - funcs = {} - matches = list(re.finditer(regex, code, re.MULTILINE)) - positions = [m.start() for m in matches] - for i, pos in enumerate(positions): - try: - code_block = code[pos:positions[i + 1]] - except IndexError: - code_block = code[pos:len(code)] - structure_name = matches[i].group(1) - name = matches[i].group(2) - if structure_name == 'def': - funcs[name] = code_block - elif structure_name == 'class': - if depth > 0: - raise NotImplementedError() - - # remove class line - c = re.sub(r'^[^\n]+', '', code_block) - # remove whitespace - c = re.compile(r'^[ ]{4}', re.MULTILINE).sub('', c) - - funcs[name] = process_code(c) - else: - raise NotImplementedError() - return funcs - - name = self.name - # sometimes there are stupid endings like `_sqlite3.cpython-32mu` - name = re.sub(r'\..*', '', name) - - if name == '__builtin__' and not is_py3k: - name = 'builtins' - path = os.path.dirname(os.path.abspath(__file__)) - try: - with open(os.path.join(path, 'mixin', name) + '.pym') as f: - s = f.read() - except IOError: - return {} - else: - mixin_dct = process_code(s) - if is_py3k and self.name == Builtin.name: - # in the case of Py3k xrange is now range - mixin_dct['range'] = mixin_dct['xrange'] - return mixin_dct - - -def _generate_code(scope, mixin_funcs={}, depth=0): - """ - Generate a string, which uses python syntax as an input to the Parser. - """ - def get_doc(obj, indent=False): - doc = inspect.getdoc(obj) - if doc: - doc = ('r"""\n%s\n"""\n' % doc) - if indent: - doc = common.indent_block(doc) - return doc - return '' - - def is_in_base_classes(cls, name, comparison): - """ Base classes may contain the exact same object """ - if name in mixin_funcs: - return False - try: - mro = cls.mro() - except TypeError: - # this happens, if cls == type - return False - for base in mro[1:]: - try: - attr = getattr(base, name) - except AttributeError: - continue - if attr == comparison: - return True - return False - - def get_scope_objects(names): - """ - Looks for the names defined with dir() in an objects and divides - them into different object types. - """ - classes = {} - funcs = {} - stmts = {} - members = {} - for n in names: - try: - # this has a builtin_function_or_method - exe = getattr(scope, n) - except AttributeError: - # happens e.g. in properties of - # PyQt4.QtGui.QStyleOptionComboBox.currentText - # -> just set it to None - members[n] = None - else: - if inspect.isclass(scope): - if is_in_base_classes(scope, n, exe): - continue - if inspect.isbuiltin(exe) or inspect.ismethod(exe) \ - or inspect.ismethoddescriptor(exe): - funcs[n] = exe - elif inspect.isclass(exe) or inspect.ismodule(exe): - classes[n] = exe - elif inspect.ismemberdescriptor(exe): - members[n] = exe - else: - stmts[n] = exe - return classes, funcs, stmts, members - - code = '' - if inspect.ismodule(scope): # generate comment where the code's from. - try: - path = scope.__file__ - except AttributeError: - path = '?' - code += '# Generated module %s from %s\n' % (scope.__name__, path) - - code += get_doc(scope) - - # Remove some magic vars, (TODO why?) - names = set(dir(scope)) - set(['__file__', '__name__', '__doc__', - '__path__', '__package__']) - - classes, funcs, stmts, members = get_scope_objects(names) - - # classes - for name, cl in classes.items(): - bases = (c.__name__ for c in cl.__bases__) if inspect.isclass(cl) \ - else [] - code += 'class %s(%s):\n' % (name, ','.join(bases)) - if depth == 0: - try: - mixin = mixin_funcs[name] - except KeyError: - mixin = {} - cl_code = _generate_code(cl, mixin, depth + 1) - code += common.indent_block(cl_code) - code += '\n' - - # functions - for name, func in funcs.items(): - params, ret = _parse_function_doc(func) - if depth > 0: - params = 'self, ' + params - doc_str = get_doc(func, indent=True) - try: - mixin = mixin_funcs[name] - except KeyError: - # normal code generation - code += 'def %s(%s):\n' % (name, params) - code += doc_str - code += common.indent_block('%s\n\n' % ret) - else: - # generation of code with mixins - # the parser only supports basic functions with a newline after - # the double dots - # find doc_str place - try: - pos = re.search(r'\):\s*\n', mixin).end() - except TypeError: - # pypy uses a different reversed builtin - if name == 'reversed': - mixin = 'def reversed(sequence):\n' \ - ' for i in self.__sequence: yield i' - pos = 24 - else: - debug.warning('mixin trouble in pypy: %s', name) - raise - if pos is None: - raise Exception("Builtin function not parsed correctly") - code += mixin[:pos] + doc_str + mixin[pos:] - - # class members (functions) properties? - for name, func in members.items(): - # recursion problem in properties TODO remove - if name in ['fget', 'fset', 'fdel']: - continue - ret = 'pass' - code += '@property\ndef %s(self):\n' % (name) - code += common.indent_block(get_doc(func) + '%s\n\n' % ret) - - # variables - for name, value in stmts.items(): - if is_py3k: - file_type = io.TextIOWrapper - else: - file_type = types.FileType - if isinstance(value, file_type): - value = 'open()' - elif name == 'None': - value = '' - elif type(value).__name__ in ['int', 'bool', 'float', - 'dict', 'list', 'tuple']: - value = repr(value) - else: - # get the type, if the type is not simple. - mod = type(value).__module__ - value = type(value).__name__ + '()' - if mod != '__builtin__': - value = '%s.%s' % (mod, value) - code += '%s = %s\n' % (name, value) - - return code - - -def _parse_function_doc(func): - """ - Takes a function and returns the params and return value as a tuple. - This is nothing more than a docstring parser. - """ - # TODO: things like utime(path, (atime, mtime)) and a(b [, b]) -> None - doc = inspect.getdoc(func) - - if doc is None: - return '', 'pass' - - # get full string, parse round parentheses: def func(a, (b,c)) - try: - # unbound methods such as pyqtSignals have no __name__ - if not hasattr(func, "__name__"): - return '', 'pass' - count = 0 - debug.dbg(func, func.__name__, doc) - start = doc.index('(') - for i, s in enumerate(doc[start:]): - if s == '(': - count += 1 - elif s == ')': - count -= 1 - if count == 0: - end = start + i - break - param_str = doc[start + 1:end] - except (ValueError, UnboundLocalError): - # ValueError for doc.index - # UnboundLocalError for undefined end in last line - debug.dbg('no brackets found - no param') - end = 0 - param_str = '' - else: - # remove square brackets, that show an optional param ( = None) - def change_options(m): - args = m.group(1).split(',') - for i, a in enumerate(args): - if a and '=' not in a: - args[i] += '=None' - return ','.join(args) - - while True: - param_str, changes = re.subn(r' ?\[([^\[\]]+)\]', - change_options, param_str) - if changes == 0: - break - param_str = param_str.replace('-', '_') # see: isinstance.__doc__ - - # parse return value - r = re.search('-[>-]* ', doc[end:end + 7]) - if r is None: - ret = '' - else: - index = end + r.end() - # get result type, which can contain newlines - pattern = re.compile(r'(,\n|[^\n-])+') - ret_str = pattern.match(doc, index).group(0).strip() - # New object -> object() - ret_str = re.sub(r'[nN]ew (.*)', r'\1()', ret_str) - - ret = BuiltinModule.map_types.get(ret_str, ret_str) - if ret == ret_str and ret not in ['None', 'object', 'tuple', 'set']: - debug.dbg('not working', ret_str) - - ret = ('return ' if 'return' not in ret else '') + ret - return param_str, ret - - -class Builtin(object): - """ The builtin scope / module """ - # Python 3 compatibility - if is_py3k: - name = 'builtins' - else: - name = '__builtin__' - - @property - @cache.underscore_memoization - def builtin(self): - return BuiltinModule(name=self.name) - - @property - def scope(self): - return self.builtin.parser.module - - def magic_function_scope(self, evaluator): - try: - return self._magic_function_scope - except AttributeError: - # depth = 1 because this is not a module - class Container(object): - FunctionType = types.FunctionType - source = _generate_code(Container, depth=0) - parser = Parser(source, None) - module = parser.module - module.parent = self.scope - typ = evaluator.follow_path(iter(['FunctionType']), [module], module) - - s = self._magic_function_scope = typ.pop() - return s - - -Builtin = Builtin() diff --git a/test/test_builtin.py b/test/test_builtin.py deleted file mode 100644 index b9513c30..00000000 --- a/test/test_builtin.py +++ /dev/null @@ -1,13 +0,0 @@ -from jedi.evaluate import builtin - - -def test_parse_function_doc_illegal_docstr(): - - def test_func(a): - """ - test_func(o - - doesn't have a closing bracket. - """ - - assert ('', '') == builtin._parse_function_doc(test_func) diff --git a/test/test_compiled.py b/test/test_compiled.py index 88eb8ba9..bb5aa1c2 100644 --- a/test/test_compiled.py +++ b/test/test_compiled.py @@ -21,3 +21,12 @@ def test_fake_loading(): string = compiled.builtin.get_subscope_by_name('str') from_name = compiled._create_from_name(compiled.builtin, string, '__init__') assert isinstance(from_name, Function) + + +def test_parse_function_doc_illegal_docstr(): + docstr = """ + test_func(o + + doesn't have a closing bracket. + """ + assert ('', '') == compiled._parse_function_doc(docstr)