splitlines and source_to_unicode are utils of parso.

2025-12-06 14:04:26 +08:00 · 2017-05-20 09:55:16 -04:00
parent 0f4b7db56a
commit 50c7137437
9 changed files with 27 additions and 118 deletions
--- a/jedi/api/init.py
+++ b/jedi/api/init.py
@@ -16,6 +16,8 @@ import sys
 from parso.python import load_grammar
 from parso.python import tree
 from parso.python import parse
+from parso.utils import source_to_unicode, splitlines
+
 from jedi.parser_utils import get_executable_nodes, get_statement_of_position
 from jedi import debug
 from jedi import settings
@@ -108,8 +110,8 @@ class Script(object):
            with open(path, 'rb') as f:
                source = f.read()

-        self._source = common.source_to_unicode(source, encoding)
-        self._code_lines = common.splitlines(self._source)
+        self._source = source_to_unicode(source, encoding)
+        self._code_lines = splitlines(self._source)
        line = max(len(self._code_lines), 1) if line is None else line
        if not (0 < line <= len(self._code_lines)):
            raise ValueError('`line` parameter is not in a valid range.')
--- a/jedi/api/helpers.py
+++ b/jedi/api/helpers.py
@@ -4,13 +4,14 @@ Helpers for the API
 import re
 from collections import namedtuple

-from jedi._compatibility import u
-from jedi.evaluate.helpers import evaluate_call_of_leaf
 from parso.python.parser import Parser
 from parso.python import tree
 from parso import tokenize
+from parso.utils import splitlines
+
+from jedi._compatibility import u
+from jedi.evaluate.helpers import evaluate_call_of_leaf
 from jedi.cache import time_cache
-from jedi import common


 CompletionParts = namedtuple('CompletionParts', ['path', 'has_dot', 'name'])
@@ -52,7 +53,7 @@ class OnErrorLeaf(Exception):


 def _is_on_comment(leaf, position):
-    comment_lines = common.splitlines(leaf.prefix)
+    comment_lines = splitlines(leaf.prefix)
    difference = leaf.start_pos[0] - position[0]
    prefix_start_pos = leaf.get_start_pos_of_prefix()
    if difference == 0:
--- a/jedi/common.py
+++ b/jedi/common.py
@@ -2,10 +2,8 @@
 import sys
 import contextlib
 import functools
-import re
-from ast import literal_eval

-from jedi._compatibility import unicode, reraise
+from jedi._compatibility import reraise
 from jedi import settings


@@ -115,77 +113,6 @@ def ignored(*exceptions):
        pass


-def source_to_unicode(source, encoding=None):
-    def detect_encoding():
-        """
-        For the implementation of encoding definitions in Python, look at:
-        - http://www.python.org/dev/peps/pep-0263/
-        - http://docs.python.org/2/reference/lexical_analysis.html#encoding-declarations
-        """
-        byte_mark = literal_eval(r"b'\xef\xbb\xbf'")
-        if source.startswith(byte_mark):
-            # UTF-8 byte-order mark
-            return 'utf-8'
-
-        first_two_lines = re.match(br'(?:[^\n]*\n){0,2}', source).group(0)
-        possible_encoding = re.search(br"coding[=:]\s*([-\w.]+)",
-                                      first_two_lines)
-        if possible_encoding:
-            return possible_encoding.group(1)
-        else:
-            # the default if nothing else has been set -> PEP 263
-            return encoding if encoding is not None else 'utf-8'
-
-    if isinstance(source, unicode):
-        # only cast str/bytes
-        return source
-
-    encoding = detect_encoding()
-    if not isinstance(encoding, unicode):
-        encoding = unicode(encoding, 'utf-8', 'replace')
-    # cast to unicode by default
-    return unicode(source, encoding, 'replace')
-
-
-def splitlines(string, keepends=False):
-    """
-    A splitlines for Python code. In contrast to Python's ``str.splitlines``,
-    looks at form feeds and other special characters as normal text. Just
-    splits ``\n`` and ``\r\n``.
-    Also different: Returns ``['']`` for an empty string input.
-
-    In Python 2.7 form feeds are used as normal characters when using
-    str.splitlines. However in Python 3 somewhere there was a decision to split
-    also on form feeds.
-    """
-    if keepends:
-        lst = string.splitlines(True)
-
-        # We have to merge lines that were broken by form feed characters.
-        merge = []
-        for i, line in enumerate(lst):
-            if line.endswith('\f'):
-                merge.append(i)
-
-        for index in reversed(merge):
-            try:
-                lst[index] = lst[index] + lst[index + 1]
-                del lst[index + 1]
-            except IndexError:
-                # index + 1 can be empty and therefore there's no need to
-                # merge.
-                pass
-
-        # The stdlib's implementation of the end is inconsistent when calling
-        # it with/without keepends. One time there's an empty string in the
-        # end, one time there's none.
-        if string.endswith('\n') or string == '':
-            lst.append('')
-        return lst
-    else:
-        return re.split('\n|\r\n', string)
-
-
 def unite(iterable):
    """Turns a two dimensional array into a one dimensional."""
    return set(typ for types in iterable for typ in types)
--- a/jedi/evaluate/imports.py
+++ b/jedi/evaluate/imports.py
@@ -16,14 +16,16 @@ import os
 import pkgutil
 import sys

-from jedi._compatibility import find_module, unicode, ImplicitNSInfo
-from jedi import debug
-from jedi import settings
-from jedi.common import source_to_unicode, unite
 from parso.python import parse
 from parso.python import tree
 from parso.tree import search_ancestor
 from parso.cache import parser_cache
+from parso.utils import source_to_unicode
+
+from jedi._compatibility import find_module, unicode, ImplicitNSInfo
+from jedi import debug
+from jedi import settings
+from jedi.common import unite
 from jedi.evaluate import sys_path
 from jedi.evaluate import helpers
 from jedi.evaluate import compiled
--- a/jedi/evaluate/representation.py
+++ b/jedi/evaluate/representation.py
@@ -43,10 +43,11 @@ import imp
 import re
 from itertools import chain

-from jedi._compatibility import use_metaclass
 from parso.python import tree
+from parso.utils import source_to_unicode
+
+from jedi._compatibility import use_metaclass
 from jedi import debug
-from jedi import common
 from jedi.evaluate.cache import memoize_default, CachedMetaClass, NO_DEFAULT
 from jedi.evaluate import compiled
 from jedi.evaluate import recursion
@@ -546,7 +547,7 @@ class ModuleContext(use_metaclass(CachedMetaClass, context.TreeContext)):
        init_path = self.py__file__()
        if os.path.basename(init_path) == '__init__.py':
            with open(init_path, 'rb') as f:
-                content = common.source_to_unicode(f.read())
+                content = source_to_unicode(f.read())
                # these are strings that need to be used for namespace packages,
                # the first one is ``pkgutil``, the second ``pkg_resources``.
                options = ('declare_namespace(__name__)', 'extend_path(__path__')
--- a/jedi/refactoring.py
+++ b/jedi/refactoring.py
@@ -15,6 +15,7 @@ following functions (sometimes bug-prone):
 import difflib

 from jedi import common
+from parso.utils import source_to_unicode, splitlines
 from jedi.evaluate import helpers


@@ -82,7 +83,7 @@ def _rename(names, replace_str):
                with open(current_path) as f:
                    source = f.read()

-            new_lines = common.splitlines(common.source_to_unicode(source))
+            new_lines = splitlines(source_to_unicode(source))
            old_lines = new_lines[:]

        nr, indent = name.line, name.column
@@ -100,7 +101,7 @@ def extract(script, new_name):
    :type source: str
    :return: list of changed lines/changed files
    """
-    new_lines = common.splitlines(common.source_to_unicode(script.source))
+    new_lines = splitlines(source_to_unicode(script.source))
    old_lines = new_lines[:]

    user_stmt = script._parser.user_stmt()
@@ -159,7 +160,7 @@ def inline(script):
    """
    :type script: api.Script
    """
-    new_lines = common.splitlines(common.source_to_unicode(script.source))
+    new_lines = splitlines(source_to_unicode(script.source))

    dct = {}

--- a/jedi/utils.py
+++ b/jedi/utils.py
@@ -11,9 +11,10 @@ import re
 import os
 import sys

+from parso.utils import splitlines
+
 from jedi import Interpreter
 from jedi.api.helpers import get_on_completion_name
-from jedi import common


 READLINE_DEBUG = False
@@ -85,7 +86,7 @@ def setup_readline(namespace_module=__main__):
                    logging.debug("Start REPL completion: " + repr(text))
                    interpreter = Interpreter(text, [namespace_module.__dict__])

-                    lines = common.splitlines(text)
+                    lines = splitlines(text)
                    position = (len(lines), len(lines[-1]))
                    name = get_on_completion_name(
                        interpreter._get_module_node(),
--- a/test/test_common.py
+++ b/test/test_common.py
@@ -1,17 +0,0 @@
-from jedi.common import splitlines
-
-
-def test_splitlines_no_keepends():
-    assert splitlines('asd\r\n') == ['asd', '']
-    assert splitlines('asd\r\n\f') == ['asd', '\f']
-    assert splitlines('\fasd\r\n') == ['\fasd', '']
-    assert splitlines('') == ['']
-    assert splitlines('\n') == ['', '']
-
-
-def test_splitlines_keepends():
-    assert splitlines('asd\r\n', keepends=True) == ['asd\r\n', '']
-    assert splitlines('asd\r\n\f', keepends=True) == ['asd\r\n', '\f']
-    assert splitlines('\fasd\r\n', keepends=True) == ['\fasd\r\n', '']
-    assert splitlines('', keepends=True) == ['']
-    assert splitlines('\n', keepends=True) == ['\n', '']
--- a/test/test_regression.py
+++ b/test/test_regression.py
@@ -179,15 +179,6 @@ class TestRegression(TestCase):
            else:
                assert n == limit

-    def test_source_to_unicode_unicode_text(self):
-        source = (
-            b"# vim: fileencoding=utf-8\n"
-            b"# \xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x81\x88\xe3\x81\x8a\n"
-        )
-        actual = common.source_to_unicode(source)
-        expected = source.decode('utf-8')
-        assert actual == expected
-

 def test_loading_unicode_files_with_bad_global_charset(monkeypatch, tmpdir):
    dirname = str(tmpdir.mkdir('jedi-test'))