diff --git a/jedi/api/__init__.py b/jedi/api/__init__.py index 40d0dd55..bf105de9 100644 --- a/jedi/api/__init__.py +++ b/jedi/api/__init__.py @@ -16,6 +16,8 @@ import sys from parso.python import load_grammar from parso.python import tree from parso.python import parse +from parso.utils import source_to_unicode, splitlines + from jedi.parser_utils import get_executable_nodes, get_statement_of_position from jedi import debug from jedi import settings @@ -108,8 +110,8 @@ class Script(object): with open(path, 'rb') as f: source = f.read() - self._source = common.source_to_unicode(source, encoding) - self._code_lines = common.splitlines(self._source) + self._source = source_to_unicode(source, encoding) + self._code_lines = splitlines(self._source) line = max(len(self._code_lines), 1) if line is None else line if not (0 < line <= len(self._code_lines)): raise ValueError('`line` parameter is not in a valid range.') diff --git a/jedi/api/helpers.py b/jedi/api/helpers.py index dbb74adc..df8d35de 100644 --- a/jedi/api/helpers.py +++ b/jedi/api/helpers.py @@ -4,13 +4,14 @@ Helpers for the API import re from collections import namedtuple -from jedi._compatibility import u -from jedi.evaluate.helpers import evaluate_call_of_leaf from parso.python.parser import Parser from parso.python import tree from parso import tokenize +from parso.utils import splitlines + +from jedi._compatibility import u +from jedi.evaluate.helpers import evaluate_call_of_leaf from jedi.cache import time_cache -from jedi import common CompletionParts = namedtuple('CompletionParts', ['path', 'has_dot', 'name']) @@ -52,7 +53,7 @@ class OnErrorLeaf(Exception): def _is_on_comment(leaf, position): - comment_lines = common.splitlines(leaf.prefix) + comment_lines = splitlines(leaf.prefix) difference = leaf.start_pos[0] - position[0] prefix_start_pos = leaf.get_start_pos_of_prefix() if difference == 0: diff --git a/jedi/common.py b/jedi/common.py index 4f47a210..810ec127 100644 --- a/jedi/common.py +++ b/jedi/common.py @@ -2,10 +2,8 @@ import sys import contextlib import functools -import re -from ast import literal_eval -from jedi._compatibility import unicode, reraise +from jedi._compatibility import reraise from jedi import settings @@ -115,77 +113,6 @@ def ignored(*exceptions): pass -def source_to_unicode(source, encoding=None): - def detect_encoding(): - """ - For the implementation of encoding definitions in Python, look at: - - http://www.python.org/dev/peps/pep-0263/ - - http://docs.python.org/2/reference/lexical_analysis.html#encoding-declarations - """ - byte_mark = literal_eval(r"b'\xef\xbb\xbf'") - if source.startswith(byte_mark): - # UTF-8 byte-order mark - return 'utf-8' - - first_two_lines = re.match(br'(?:[^\n]*\n){0,2}', source).group(0) - possible_encoding = re.search(br"coding[=:]\s*([-\w.]+)", - first_two_lines) - if possible_encoding: - return possible_encoding.group(1) - else: - # the default if nothing else has been set -> PEP 263 - return encoding if encoding is not None else 'utf-8' - - if isinstance(source, unicode): - # only cast str/bytes - return source - - encoding = detect_encoding() - if not isinstance(encoding, unicode): - encoding = unicode(encoding, 'utf-8', 'replace') - # cast to unicode by default - return unicode(source, encoding, 'replace') - - -def splitlines(string, keepends=False): - """ - A splitlines for Python code. In contrast to Python's ``str.splitlines``, - looks at form feeds and other special characters as normal text. Just - splits ``\n`` and ``\r\n``. - Also different: Returns ``['']`` for an empty string input. - - In Python 2.7 form feeds are used as normal characters when using - str.splitlines. However in Python 3 somewhere there was a decision to split - also on form feeds. - """ - if keepends: - lst = string.splitlines(True) - - # We have to merge lines that were broken by form feed characters. - merge = [] - for i, line in enumerate(lst): - if line.endswith('\f'): - merge.append(i) - - for index in reversed(merge): - try: - lst[index] = lst[index] + lst[index + 1] - del lst[index + 1] - except IndexError: - # index + 1 can be empty and therefore there's no need to - # merge. - pass - - # The stdlib's implementation of the end is inconsistent when calling - # it with/without keepends. One time there's an empty string in the - # end, one time there's none. - if string.endswith('\n') or string == '': - lst.append('') - return lst - else: - return re.split('\n|\r\n', string) - - def unite(iterable): """Turns a two dimensional array into a one dimensional.""" return set(typ for types in iterable for typ in types) diff --git a/jedi/evaluate/imports.py b/jedi/evaluate/imports.py index 5ab9938b..ba92076b 100644 --- a/jedi/evaluate/imports.py +++ b/jedi/evaluate/imports.py @@ -16,14 +16,16 @@ import os import pkgutil import sys -from jedi._compatibility import find_module, unicode, ImplicitNSInfo -from jedi import debug -from jedi import settings -from jedi.common import source_to_unicode, unite from parso.python import parse from parso.python import tree from parso.tree import search_ancestor from parso.cache import parser_cache +from parso.utils import source_to_unicode + +from jedi._compatibility import find_module, unicode, ImplicitNSInfo +from jedi import debug +from jedi import settings +from jedi.common import unite from jedi.evaluate import sys_path from jedi.evaluate import helpers from jedi.evaluate import compiled diff --git a/jedi/evaluate/representation.py b/jedi/evaluate/representation.py index a3497278..fe8d5cfa 100644 --- a/jedi/evaluate/representation.py +++ b/jedi/evaluate/representation.py @@ -43,10 +43,11 @@ import imp import re from itertools import chain -from jedi._compatibility import use_metaclass from parso.python import tree +from parso.utils import source_to_unicode + +from jedi._compatibility import use_metaclass from jedi import debug -from jedi import common from jedi.evaluate.cache import memoize_default, CachedMetaClass, NO_DEFAULT from jedi.evaluate import compiled from jedi.evaluate import recursion @@ -546,7 +547,7 @@ class ModuleContext(use_metaclass(CachedMetaClass, context.TreeContext)): init_path = self.py__file__() if os.path.basename(init_path) == '__init__.py': with open(init_path, 'rb') as f: - content = common.source_to_unicode(f.read()) + content = source_to_unicode(f.read()) # these are strings that need to be used for namespace packages, # the first one is ``pkgutil``, the second ``pkg_resources``. options = ('declare_namespace(__name__)', 'extend_path(__path__') diff --git a/jedi/refactoring.py b/jedi/refactoring.py index 7db7da90..e81b0040 100644 --- a/jedi/refactoring.py +++ b/jedi/refactoring.py @@ -15,6 +15,7 @@ following functions (sometimes bug-prone): import difflib from jedi import common +from parso.utils import source_to_unicode, splitlines from jedi.evaluate import helpers @@ -82,7 +83,7 @@ def _rename(names, replace_str): with open(current_path) as f: source = f.read() - new_lines = common.splitlines(common.source_to_unicode(source)) + new_lines = splitlines(source_to_unicode(source)) old_lines = new_lines[:] nr, indent = name.line, name.column @@ -100,7 +101,7 @@ def extract(script, new_name): :type source: str :return: list of changed lines/changed files """ - new_lines = common.splitlines(common.source_to_unicode(script.source)) + new_lines = splitlines(source_to_unicode(script.source)) old_lines = new_lines[:] user_stmt = script._parser.user_stmt() @@ -159,7 +160,7 @@ def inline(script): """ :type script: api.Script """ - new_lines = common.splitlines(common.source_to_unicode(script.source)) + new_lines = splitlines(source_to_unicode(script.source)) dct = {} diff --git a/jedi/utils.py b/jedi/utils.py index a890dae0..655dc1ba 100644 --- a/jedi/utils.py +++ b/jedi/utils.py @@ -11,9 +11,10 @@ import re import os import sys +from parso.utils import splitlines + from jedi import Interpreter from jedi.api.helpers import get_on_completion_name -from jedi import common READLINE_DEBUG = False @@ -85,7 +86,7 @@ def setup_readline(namespace_module=__main__): logging.debug("Start REPL completion: " + repr(text)) interpreter = Interpreter(text, [namespace_module.__dict__]) - lines = common.splitlines(text) + lines = splitlines(text) position = (len(lines), len(lines[-1])) name = get_on_completion_name( interpreter._get_module_node(), diff --git a/test/test_common.py b/test/test_common.py deleted file mode 100644 index 217cdf52..00000000 --- a/test/test_common.py +++ /dev/null @@ -1,17 +0,0 @@ -from jedi.common import splitlines - - -def test_splitlines_no_keepends(): - assert splitlines('asd\r\n') == ['asd', ''] - assert splitlines('asd\r\n\f') == ['asd', '\f'] - assert splitlines('\fasd\r\n') == ['\fasd', ''] - assert splitlines('') == [''] - assert splitlines('\n') == ['', ''] - - -def test_splitlines_keepends(): - assert splitlines('asd\r\n', keepends=True) == ['asd\r\n', ''] - assert splitlines('asd\r\n\f', keepends=True) == ['asd\r\n', '\f'] - assert splitlines('\fasd\r\n', keepends=True) == ['\fasd\r\n', ''] - assert splitlines('', keepends=True) == [''] - assert splitlines('\n', keepends=True) == ['\n', ''] diff --git a/test/test_regression.py b/test/test_regression.py index f32db6ca..32671b73 100644 --- a/test/test_regression.py +++ b/test/test_regression.py @@ -179,15 +179,6 @@ class TestRegression(TestCase): else: assert n == limit - def test_source_to_unicode_unicode_text(self): - source = ( - b"# vim: fileencoding=utf-8\n" - b"# \xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x81\x88\xe3\x81\x8a\n" - ) - actual = common.source_to_unicode(source) - expected = source.decode('utf-8') - assert actual == expected - def test_loading_unicode_files_with_bad_global_charset(monkeypatch, tmpdir): dirname = str(tmpdir.mkdir('jedi-test'))