splitlines and source_to_unicode are utils of parso.

2017-05-20 09:55:16 -04:00
parent 0f4b7db56a
commit 50c7137437
9 changed files with 27 additions and 118 deletions
--- a/jedi/api/init.py
+++ b/jedi/api/init.py
@@ -16,6 +16,8 @@ import sys
 from parso.python import load_grammar
 from parso.python import tree
 from parso.python import parse
 from parso.utils import source_to_unicode, splitlines
 from jedi.parser_utils import get_executable_nodes, get_statement_of_position
 from jedi import debug
 from jedi import settings
@@ -108,8 +110,8 @@ class Script(object):
            with open(path, 'rb') as f:
                source = f.read()
-        self._source = common.source_to_unicode(source, encoding)
+        self._source = source_to_unicode(source, encoding)
-        self._code_lines = common.splitlines(self._source)
+        self._code_lines = splitlines(self._source)
        line = max(len(self._code_lines), 1) if line is None else line
        if not (0 < line <= len(self._code_lines)):
            raise ValueError('`line` parameter is not in a valid range.')
--- a/jedi/api/helpers.py
+++ b/jedi/api/helpers.py
@@ -4,13 +4,14 @@ Helpers for the API
 import re
 from collections import namedtuple
 from jedi._compatibility import u
 from jedi.evaluate.helpers import evaluate_call_of_leaf
 from parso.python.parser import Parser
 from parso.python import tree
 from parso import tokenize
 from parso.utils import splitlines
 from jedi._compatibility import u
 from jedi.evaluate.helpers import evaluate_call_of_leaf
 from jedi.cache import time_cache
 from jedi import common
 CompletionParts = namedtuple('CompletionParts', ['path', 'has_dot', 'name'])
@@ -52,7 +53,7 @@ class OnErrorLeaf(Exception):
 def _is_on_comment(leaf, position):
-    comment_lines = common.splitlines(leaf.prefix)
+    comment_lines = splitlines(leaf.prefix)
    difference = leaf.start_pos[0] - position[0]
    prefix_start_pos = leaf.get_start_pos_of_prefix()
    if difference == 0:
--- a/jedi/common.py
+++ b/jedi/common.py
@@ -2,10 +2,8 @@
 import sys
 import contextlib
 import functools
 import re
 from ast import literal_eval
-from jedi._compatibility import unicode, reraise
+from jedi._compatibility import reraise
 from jedi import settings
@@ -115,77 +113,6 @@ def ignored(*exceptions):
        pass
 def source_to_unicode(source, encoding=None):
    def detect_encoding():
        """
        For the implementation of encoding definitions in Python, look at:
        - http://www.python.org/dev/peps/pep-0263/
        - http://docs.python.org/2/reference/lexical_analysis.html#encoding-declarations
        """
        byte_mark = literal_eval(r"b'\xef\xbb\xbf'")
        if source.startswith(byte_mark):
            # UTF-8 byte-order mark
            return 'utf-8'
        first_two_lines = re.match(br'(?:[^\n]*\n){0,2}', source).group(0)
        possible_encoding = re.search(br"coding[=:]\s*([-\w.]+)",
                                      first_two_lines)
        if possible_encoding:
            return possible_encoding.group(1)
        else:
            # the default if nothing else has been set -> PEP 263
            return encoding if encoding is not None else 'utf-8'
    if isinstance(source, unicode):
        # only cast str/bytes
        return source
    encoding = detect_encoding()
    if not isinstance(encoding, unicode):
        encoding = unicode(encoding, 'utf-8', 'replace')
    # cast to unicode by default
    return unicode(source, encoding, 'replace')
 def splitlines(string, keepends=False):
    """
    A splitlines for Python code. In contrast to Python's ``str.splitlines``,
    looks at form feeds and other special characters as normal text. Just
    splits ``\n`` and ``\r\n``.
    Also different: Returns ``['']`` for an empty string input.
    In Python 2.7 form feeds are used as normal characters when using
    str.splitlines. However in Python 3 somewhere there was a decision to split
    also on form feeds.
    """
    if keepends:
        lst = string.splitlines(True)
        # We have to merge lines that were broken by form feed characters.
        merge = []
        for i, line in enumerate(lst):
            if line.endswith('\f'):
                merge.append(i)
        for index in reversed(merge):
            try:
                lst[index] = lst[index] + lst[index + 1]
                del lst[index + 1]
            except IndexError:
                # index + 1 can be empty and therefore there's no need to
                # merge.
                pass
        # The stdlib's implementation of the end is inconsistent when calling
        # it with/without keepends. One time there's an empty string in the
        # end, one time there's none.
        if string.endswith('\n') or string == '':
            lst.append('')
        return lst
    else:
        return re.split('\n|\r\n', string)
 def unite(iterable):
    """Turns a two dimensional array into a one dimensional."""
    return set(typ for types in iterable for typ in types)
--- a/jedi/evaluate/imports.py
+++ b/jedi/evaluate/imports.py
@@ -16,14 +16,16 @@ import os
 import pkgutil
 import sys
 from jedi._compatibility import find_module, unicode, ImplicitNSInfo
 from jedi import debug
 from jedi import settings
 from jedi.common import source_to_unicode, unite
 from parso.python import parse
 from parso.python import tree
 from parso.tree import search_ancestor
 from parso.cache import parser_cache
 from parso.utils import source_to_unicode
 from jedi._compatibility import find_module, unicode, ImplicitNSInfo
 from jedi import debug
 from jedi import settings
 from jedi.common import unite
 from jedi.evaluate import sys_path
 from jedi.evaluate import helpers
 from jedi.evaluate import compiled
--- a/jedi/evaluate/representation.py
+++ b/jedi/evaluate/representation.py
@@ -43,10 +43,11 @@ import imp
 import re
 from itertools import chain
 from jedi._compatibility import use_metaclass
 from parso.python import tree
 from parso.utils import source_to_unicode
 from jedi._compatibility import use_metaclass
 from jedi import debug
 from jedi import common
 from jedi.evaluate.cache import memoize_default, CachedMetaClass, NO_DEFAULT
 from jedi.evaluate import compiled
 from jedi.evaluate import recursion
@@ -546,7 +547,7 @@ class ModuleContext(use_metaclass(CachedMetaClass, context.TreeContext)):
        init_path = self.py__file__()
        if os.path.basename(init_path) == '__init__.py':
            with open(init_path, 'rb') as f:
-                content = common.source_to_unicode(f.read())
+                content = source_to_unicode(f.read())
                # these are strings that need to be used for namespace packages,
                # the first one is ``pkgutil``, the second ``pkg_resources``.
                options = ('declare_namespace(__name__)', 'extend_path(__path__')
--- a/jedi/refactoring.py
+++ b/jedi/refactoring.py
@@ -15,6 +15,7 @@ following functions (sometimes bug-prone):
 import difflib
 from jedi import common
 from parso.utils import source_to_unicode, splitlines
 from jedi.evaluate import helpers
@@ -82,7 +83,7 @@ def _rename(names, replace_str):
                with open(current_path) as f:
                    source = f.read()
-            new_lines = common.splitlines(common.source_to_unicode(source))
+            new_lines = splitlines(source_to_unicode(source))
            old_lines = new_lines[:]
        nr, indent = name.line, name.column
@@ -100,7 +101,7 @@ def extract(script, new_name):
    :type source: str
    :return: list of changed lines/changed files
    """
-    new_lines = common.splitlines(common.source_to_unicode(script.source))
+    new_lines = splitlines(source_to_unicode(script.source))
    old_lines = new_lines[:]
    user_stmt = script._parser.user_stmt()
@@ -159,7 +160,7 @@ def inline(script):
    """
    :type script: api.Script
    """
-    new_lines = common.splitlines(common.source_to_unicode(script.source))
+    new_lines = splitlines(source_to_unicode(script.source))
    dct = {}
--- a/jedi/utils.py
+++ b/jedi/utils.py
@@ -11,9 +11,10 @@ import re
 import os
 import sys
 from parso.utils import splitlines
 from jedi import Interpreter
 from jedi.api.helpers import get_on_completion_name
 from jedi import common
 READLINE_DEBUG = False
@@ -85,7 +86,7 @@ def setup_readline(namespace_module=__main__):
                    logging.debug("Start REPL completion: " + repr(text))
                    interpreter = Interpreter(text, [namespace_module.__dict__])
-                    lines = common.splitlines(text)
+                    lines = splitlines(text)
                    position = (len(lines), len(lines[-1]))
                    name = get_on_completion_name(
                        interpreter._get_module_node(),
--- a/test/test_common.py
+++ b/test/test_common.py
@@ -1,17 +0,0 @@
 from jedi.common import splitlines
 def test_splitlines_no_keepends():
    assert splitlines('asd\r\n') == ['asd', '']
    assert splitlines('asd\r\n\f') == ['asd', '\f']
    assert splitlines('\fasd\r\n') == ['\fasd', '']
    assert splitlines('') == ['']
    assert splitlines('\n') == ['', '']
 def test_splitlines_keepends():
    assert splitlines('asd\r\n', keepends=True) == ['asd\r\n', '']
    assert splitlines('asd\r\n\f', keepends=True) == ['asd\r\n', '\f']
    assert splitlines('\fasd\r\n', keepends=True) == ['\fasd\r\n', '']
    assert splitlines('', keepends=True) == ['']
    assert splitlines('\n', keepends=True) == ['\n', '']
--- a/test/test_regression.py
+++ b/test/test_regression.py
@@ -179,15 +179,6 @@ class TestRegression(TestCase):
            else:
                assert n == limit
    def test_source_to_unicode_unicode_text(self):
        source = (
            b"# vim: fileencoding=utf-8\n"
            b"# \xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x81\x88\xe3\x81\x8a\n"
        )
        actual = common.source_to_unicode(source)
        expected = source.decode('utf-8')
        assert actual == expected
 def test_loading_unicode_files_with_bad_global_charset(monkeypatch, tmpdir):
    dirname = str(tmpdir.mkdir('jedi-test'))