jedi-fork/jedi/modules.py

"""
Don't confuse these classes with :mod:`parsing_representation` modules, the
modules here can access these representation with ``module.parser.module``.
``Module`` exists mainly for caching purposes.

Basically :mod:`modules` offers the classes:

- ``CachedModule``, a base class for Cachedmodule.
- ``Module`` the class for all normal Python modules (not builtins, they are at
  home at :mod:`builtin`).
- ``ModuleWithCursor``, holds the module information for :class:`api.Script`.

Apart from those classes there's a ``sys.path`` fetching function, as well as
`Virtual Env` and `Django` detection.
"""
from __future__ import with_statement

import re
import sys
import os
from ast import literal_eval

from jedi._compatibility import unicode
from jedi import cache
from jedi.parser import tokenizer as tokenize
from jedi.parser import fast
from jedi import debug


def load_module(path=None, source=None, name=None):
    def load(source):
        if path is not None and path.endswith('.py'):
            if source is None:
                with open(path) as f:
                    source = f.read()
        else:
            # TODO refactoring remove
            from jedi.evaluate import builtin
            return builtin.BuiltinModule(path, name).parser.module
        p = path or name
        p = fast.FastParser(source_to_unicode(source), p)
        cache.save_parser(path, name, p)
        return p.module

    cached = cache.load_parser(path, name)
    return load(source) if cached is None else cached.module


class ModuleWithCursor(object):
    """
    Manages all files, that are parsed and caches them.
    Important are the params source and path, one of them has to
    be there.

    :param source: The source code of the file.
    :param path: The module path of the file or None.
    :param position: The position, the user is currently in. Only important \
    for the main file.
    """
    def __init__(self, path, source, position):
        super(ModuleWithCursor, self).__init__()
        self.path = path and os.path.abspath(path)
        self.name = None
        self.source = source
        self.position = position
        self._path_until_cursor = None
        self._line_cache = None

        # this two are only used, because there is no nonlocal in Python 2
        self._line_temp = None
        self._relevant_temp = None

    def get_path_until_cursor(self):
        """ Get the path under the cursor. """
        if self._path_until_cursor is None:  # small caching
            self._path_until_cursor, self._start_cursor_pos = \
                self._get_path_until_cursor(self.position)
        return self._path_until_cursor

    def _get_path_until_cursor(self, start_pos=None):
        def fetch_line():
            if self._is_first:
                self._is_first = False
                self._line_length = self._column_temp
                line = self._first_line
            else:
                line = self.get_line(self._line_temp)
                self._line_length = len(line)
                line = line + '\n'
            # add lines with a backslash at the end
            while True:
                self._line_temp -= 1
                last_line = self.get_line(self._line_temp)
                #print self._line_temp, repr(last_line)
                if last_line and last_line[-1] == '\\':
                    line = last_line[:-1] + ' ' + line
                    self._line_length = len(last_line)
                else:
                    break
            return line[::-1]

        self._is_first = True
        self._line_temp, self._column_temp = start_cursor = start_pos
        self._first_line = self.get_line(self._line_temp)[:self._column_temp]

        open_brackets = ['(', '[', '{']
        close_brackets = [')', ']', '}']

        gen = tokenize.generate_tokens(fetch_line)
        string = ''
        level = 0
        force_point = False
        last_type = None
        try:
            for token_type, tok, start, end, line in gen:
                # print 'tok', token_type, tok, force_point
                if last_type == token_type == tokenize.NAME:
                    string += ' '

                if level > 0:
                    if tok in close_brackets:
                        level += 1
                    if tok in open_brackets:
                        level -= 1
                elif tok == '.':
                    force_point = False
                elif force_point:
                    # it is reversed, therefore a number is getting recognized
                    # as a floating point number
                    if token_type == tokenize.NUMBER and tok[0] == '.':
                        force_point = False
                    else:
                        break
                elif tok in close_brackets:
                    level += 1
                elif token_type in [tokenize.NAME, tokenize.STRING]:
                    force_point = True
                elif token_type == tokenize.NUMBER:
                    pass
                else:
                    self._column_temp = self._line_length - end[1]
                    break

                x = start_pos[0] - end[0] + 1
                l = self.get_line(x)
                l = self._first_line if x == start_pos[0] else l
                start_cursor = x, len(l) - end[1]
                self._column_temp = self._line_length - end[1]
                string += tok
                last_type = token_type
        except tokenize.TokenError:
            debug.warning("Tokenize couldn't finish", sys.exc_info)

        # string can still contain spaces at the end
        return string[::-1].strip(), start_cursor

    def get_path_under_cursor(self):
        """
        Return the path under the cursor. If there is a rest of the path left,
        it will be added to the stuff before it.
        """
        return self.get_path_until_cursor() + self.get_path_after_cursor()

    def get_path_after_cursor(self):
        line = self.get_line(self.position[0])
        return re.search("[\w\d]*", line[self.position[1]:]).group(0)

    def get_operator_under_cursor(self):
        line = self.get_line(self.position[0])
        after = re.match("[^\w\s]+", line[self.position[1]:])
        before = re.match("[^\w\s]+", line[:self.position[1]][::-1])
        return (before.group(0) if before is not None else '') \
            + (after.group(0) if after is not None else '')

    def get_context(self, yield_positions=False):
        pos = self._start_cursor_pos
        while True:
            # remove non important white space
            line = self.get_line(pos[0])
            while True:
                if pos[1] == 0:
                    line = self.get_line(pos[0] - 1)
                    if line and line[-1] == '\\':
                        pos = pos[0] - 1, len(line) - 1
                        continue
                    else:
                        break

                if line[pos[1] - 1].isspace():
                    pos = pos[0], pos[1] - 1
                else:
                    break

            try:
                result, pos = self._get_path_until_cursor(start_pos=pos)
                if yield_positions:
                    yield pos
                else:
                    yield result
            except StopIteration:
                if yield_positions:
                    yield None
                else:
                    yield ''

    def get_line(self, line_nr):
        if not self._line_cache:
            self._line_cache = self.source.splitlines()
            if self.source:
                if self.source[-1] == '\n':
                    self._line_cache.append('')
            else:  # ''.splitlines() == []
                self._line_cache = ['']

        if line_nr == 0:
            # This is a fix for the zeroth line. We need a newline there, for
            # the backwards parser.
            return ''
        if line_nr < 0:
            raise StopIteration()
        try:
            return self._line_cache[line_nr - 1]
        except IndexError:
            raise StopIteration()

    def get_position_line(self):
        return self.get_line(self.position[0])[:self.position[1]]
def source_to_unicode(source, encoding=None):
    def detect_encoding():
        """ For the implementation of encoding definitions in Python, look at:
        http://www.python.org/dev/peps/pep-0263/
        http://docs.python.org/2/reference/lexical_analysis.html#encoding-\
                                                                declarations
        """
        byte_mark = literal_eval(r"b'\xef\xbb\xbf'")
        if source.startswith(byte_mark):
            # UTF-8 byte-order mark
            return 'utf-8'

        first_two_lines = re.match(r'(?:[^\n]*\n){0,2}', str(source)).group(0)
        possible_encoding = re.search(r"coding[=:]\s*([-\w.]+)",
                                      first_two_lines)
        if possible_encoding:
            return possible_encoding.group(1)
        else:
            # the default if nothing else has been set -> PEP 263
            return encoding if encoding is not None else 'iso-8859-1'

    if isinstance(source, unicode):
        # only cast str/bytes
        return source

    # cast to unicode by default
    return unicode(source, detect_encoding(), 'replace')