mirror of
https://github.com/davidhalter/jedi.git
synced 2025-12-06 14:04:26 +08:00
splitlines and source_to_unicode are utils of parso.
This commit is contained in:
@@ -16,6 +16,8 @@ import sys
|
||||
from parso.python import load_grammar
|
||||
from parso.python import tree
|
||||
from parso.python import parse
|
||||
from parso.utils import source_to_unicode, splitlines
|
||||
|
||||
from jedi.parser_utils import get_executable_nodes, get_statement_of_position
|
||||
from jedi import debug
|
||||
from jedi import settings
|
||||
@@ -108,8 +110,8 @@ class Script(object):
|
||||
with open(path, 'rb') as f:
|
||||
source = f.read()
|
||||
|
||||
self._source = common.source_to_unicode(source, encoding)
|
||||
self._code_lines = common.splitlines(self._source)
|
||||
self._source = source_to_unicode(source, encoding)
|
||||
self._code_lines = splitlines(self._source)
|
||||
line = max(len(self._code_lines), 1) if line is None else line
|
||||
if not (0 < line <= len(self._code_lines)):
|
||||
raise ValueError('`line` parameter is not in a valid range.')
|
||||
|
||||
@@ -4,13 +4,14 @@ Helpers for the API
|
||||
import re
|
||||
from collections import namedtuple
|
||||
|
||||
from jedi._compatibility import u
|
||||
from jedi.evaluate.helpers import evaluate_call_of_leaf
|
||||
from parso.python.parser import Parser
|
||||
from parso.python import tree
|
||||
from parso import tokenize
|
||||
from parso.utils import splitlines
|
||||
|
||||
from jedi._compatibility import u
|
||||
from jedi.evaluate.helpers import evaluate_call_of_leaf
|
||||
from jedi.cache import time_cache
|
||||
from jedi import common
|
||||
|
||||
|
||||
CompletionParts = namedtuple('CompletionParts', ['path', 'has_dot', 'name'])
|
||||
@@ -52,7 +53,7 @@ class OnErrorLeaf(Exception):
|
||||
|
||||
|
||||
def _is_on_comment(leaf, position):
|
||||
comment_lines = common.splitlines(leaf.prefix)
|
||||
comment_lines = splitlines(leaf.prefix)
|
||||
difference = leaf.start_pos[0] - position[0]
|
||||
prefix_start_pos = leaf.get_start_pos_of_prefix()
|
||||
if difference == 0:
|
||||
|
||||
@@ -2,10 +2,8 @@
|
||||
import sys
|
||||
import contextlib
|
||||
import functools
|
||||
import re
|
||||
from ast import literal_eval
|
||||
|
||||
from jedi._compatibility import unicode, reraise
|
||||
from jedi._compatibility import reraise
|
||||
from jedi import settings
|
||||
|
||||
|
||||
@@ -115,77 +113,6 @@ def ignored(*exceptions):
|
||||
pass
|
||||
|
||||
|
||||
def source_to_unicode(source, encoding=None):
|
||||
def detect_encoding():
|
||||
"""
|
||||
For the implementation of encoding definitions in Python, look at:
|
||||
- http://www.python.org/dev/peps/pep-0263/
|
||||
- http://docs.python.org/2/reference/lexical_analysis.html#encoding-declarations
|
||||
"""
|
||||
byte_mark = literal_eval(r"b'\xef\xbb\xbf'")
|
||||
if source.startswith(byte_mark):
|
||||
# UTF-8 byte-order mark
|
||||
return 'utf-8'
|
||||
|
||||
first_two_lines = re.match(br'(?:[^\n]*\n){0,2}', source).group(0)
|
||||
possible_encoding = re.search(br"coding[=:]\s*([-\w.]+)",
|
||||
first_two_lines)
|
||||
if possible_encoding:
|
||||
return possible_encoding.group(1)
|
||||
else:
|
||||
# the default if nothing else has been set -> PEP 263
|
||||
return encoding if encoding is not None else 'utf-8'
|
||||
|
||||
if isinstance(source, unicode):
|
||||
# only cast str/bytes
|
||||
return source
|
||||
|
||||
encoding = detect_encoding()
|
||||
if not isinstance(encoding, unicode):
|
||||
encoding = unicode(encoding, 'utf-8', 'replace')
|
||||
# cast to unicode by default
|
||||
return unicode(source, encoding, 'replace')
|
||||
|
||||
|
||||
def splitlines(string, keepends=False):
|
||||
"""
|
||||
A splitlines for Python code. In contrast to Python's ``str.splitlines``,
|
||||
looks at form feeds and other special characters as normal text. Just
|
||||
splits ``\n`` and ``\r\n``.
|
||||
Also different: Returns ``['']`` for an empty string input.
|
||||
|
||||
In Python 2.7 form feeds are used as normal characters when using
|
||||
str.splitlines. However in Python 3 somewhere there was a decision to split
|
||||
also on form feeds.
|
||||
"""
|
||||
if keepends:
|
||||
lst = string.splitlines(True)
|
||||
|
||||
# We have to merge lines that were broken by form feed characters.
|
||||
merge = []
|
||||
for i, line in enumerate(lst):
|
||||
if line.endswith('\f'):
|
||||
merge.append(i)
|
||||
|
||||
for index in reversed(merge):
|
||||
try:
|
||||
lst[index] = lst[index] + lst[index + 1]
|
||||
del lst[index + 1]
|
||||
except IndexError:
|
||||
# index + 1 can be empty and therefore there's no need to
|
||||
# merge.
|
||||
pass
|
||||
|
||||
# The stdlib's implementation of the end is inconsistent when calling
|
||||
# it with/without keepends. One time there's an empty string in the
|
||||
# end, one time there's none.
|
||||
if string.endswith('\n') or string == '':
|
||||
lst.append('')
|
||||
return lst
|
||||
else:
|
||||
return re.split('\n|\r\n', string)
|
||||
|
||||
|
||||
def unite(iterable):
|
||||
"""Turns a two dimensional array into a one dimensional."""
|
||||
return set(typ for types in iterable for typ in types)
|
||||
|
||||
@@ -16,14 +16,16 @@ import os
|
||||
import pkgutil
|
||||
import sys
|
||||
|
||||
from jedi._compatibility import find_module, unicode, ImplicitNSInfo
|
||||
from jedi import debug
|
||||
from jedi import settings
|
||||
from jedi.common import source_to_unicode, unite
|
||||
from parso.python import parse
|
||||
from parso.python import tree
|
||||
from parso.tree import search_ancestor
|
||||
from parso.cache import parser_cache
|
||||
from parso.utils import source_to_unicode
|
||||
|
||||
from jedi._compatibility import find_module, unicode, ImplicitNSInfo
|
||||
from jedi import debug
|
||||
from jedi import settings
|
||||
from jedi.common import unite
|
||||
from jedi.evaluate import sys_path
|
||||
from jedi.evaluate import helpers
|
||||
from jedi.evaluate import compiled
|
||||
|
||||
@@ -43,10 +43,11 @@ import imp
|
||||
import re
|
||||
from itertools import chain
|
||||
|
||||
from jedi._compatibility import use_metaclass
|
||||
from parso.python import tree
|
||||
from parso.utils import source_to_unicode
|
||||
|
||||
from jedi._compatibility import use_metaclass
|
||||
from jedi import debug
|
||||
from jedi import common
|
||||
from jedi.evaluate.cache import memoize_default, CachedMetaClass, NO_DEFAULT
|
||||
from jedi.evaluate import compiled
|
||||
from jedi.evaluate import recursion
|
||||
@@ -546,7 +547,7 @@ class ModuleContext(use_metaclass(CachedMetaClass, context.TreeContext)):
|
||||
init_path = self.py__file__()
|
||||
if os.path.basename(init_path) == '__init__.py':
|
||||
with open(init_path, 'rb') as f:
|
||||
content = common.source_to_unicode(f.read())
|
||||
content = source_to_unicode(f.read())
|
||||
# these are strings that need to be used for namespace packages,
|
||||
# the first one is ``pkgutil``, the second ``pkg_resources``.
|
||||
options = ('declare_namespace(__name__)', 'extend_path(__path__')
|
||||
|
||||
@@ -15,6 +15,7 @@ following functions (sometimes bug-prone):
|
||||
import difflib
|
||||
|
||||
from jedi import common
|
||||
from parso.utils import source_to_unicode, splitlines
|
||||
from jedi.evaluate import helpers
|
||||
|
||||
|
||||
@@ -82,7 +83,7 @@ def _rename(names, replace_str):
|
||||
with open(current_path) as f:
|
||||
source = f.read()
|
||||
|
||||
new_lines = common.splitlines(common.source_to_unicode(source))
|
||||
new_lines = splitlines(source_to_unicode(source))
|
||||
old_lines = new_lines[:]
|
||||
|
||||
nr, indent = name.line, name.column
|
||||
@@ -100,7 +101,7 @@ def extract(script, new_name):
|
||||
:type source: str
|
||||
:return: list of changed lines/changed files
|
||||
"""
|
||||
new_lines = common.splitlines(common.source_to_unicode(script.source))
|
||||
new_lines = splitlines(source_to_unicode(script.source))
|
||||
old_lines = new_lines[:]
|
||||
|
||||
user_stmt = script._parser.user_stmt()
|
||||
@@ -159,7 +160,7 @@ def inline(script):
|
||||
"""
|
||||
:type script: api.Script
|
||||
"""
|
||||
new_lines = common.splitlines(common.source_to_unicode(script.source))
|
||||
new_lines = splitlines(source_to_unicode(script.source))
|
||||
|
||||
dct = {}
|
||||
|
||||
|
||||
@@ -11,9 +11,10 @@ import re
|
||||
import os
|
||||
import sys
|
||||
|
||||
from parso.utils import splitlines
|
||||
|
||||
from jedi import Interpreter
|
||||
from jedi.api.helpers import get_on_completion_name
|
||||
from jedi import common
|
||||
|
||||
|
||||
READLINE_DEBUG = False
|
||||
@@ -85,7 +86,7 @@ def setup_readline(namespace_module=__main__):
|
||||
logging.debug("Start REPL completion: " + repr(text))
|
||||
interpreter = Interpreter(text, [namespace_module.__dict__])
|
||||
|
||||
lines = common.splitlines(text)
|
||||
lines = splitlines(text)
|
||||
position = (len(lines), len(lines[-1]))
|
||||
name = get_on_completion_name(
|
||||
interpreter._get_module_node(),
|
||||
|
||||
@@ -1,17 +0,0 @@
|
||||
from jedi.common import splitlines
|
||||
|
||||
|
||||
def test_splitlines_no_keepends():
|
||||
assert splitlines('asd\r\n') == ['asd', '']
|
||||
assert splitlines('asd\r\n\f') == ['asd', '\f']
|
||||
assert splitlines('\fasd\r\n') == ['\fasd', '']
|
||||
assert splitlines('') == ['']
|
||||
assert splitlines('\n') == ['', '']
|
||||
|
||||
|
||||
def test_splitlines_keepends():
|
||||
assert splitlines('asd\r\n', keepends=True) == ['asd\r\n', '']
|
||||
assert splitlines('asd\r\n\f', keepends=True) == ['asd\r\n', '\f']
|
||||
assert splitlines('\fasd\r\n', keepends=True) == ['\fasd\r\n', '']
|
||||
assert splitlines('', keepends=True) == ['']
|
||||
assert splitlines('\n', keepends=True) == ['\n', '']
|
||||
@@ -179,15 +179,6 @@ class TestRegression(TestCase):
|
||||
else:
|
||||
assert n == limit
|
||||
|
||||
def test_source_to_unicode_unicode_text(self):
|
||||
source = (
|
||||
b"# vim: fileencoding=utf-8\n"
|
||||
b"# \xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x81\x88\xe3\x81\x8a\n"
|
||||
)
|
||||
actual = common.source_to_unicode(source)
|
||||
expected = source.decode('utf-8')
|
||||
assert actual == expected
|
||||
|
||||
|
||||
def test_loading_unicode_files_with_bad_global_charset(monkeypatch, tmpdir):
|
||||
dirname = str(tmpdir.mkdir('jedi-test'))
|
||||
|
||||
Reference in New Issue
Block a user