splitlines and source_to_unicode are utils of parso.

This commit is contained in:
Dave Halter
2017-05-20 09:55:16 -04:00
parent 0f4b7db56a
commit 50c7137437
9 changed files with 27 additions and 118 deletions

View File

@@ -16,6 +16,8 @@ import sys
from parso.python import load_grammar
from parso.python import tree
from parso.python import parse
from parso.utils import source_to_unicode, splitlines
from jedi.parser_utils import get_executable_nodes, get_statement_of_position
from jedi import debug
from jedi import settings
@@ -108,8 +110,8 @@ class Script(object):
with open(path, 'rb') as f:
source = f.read()
self._source = common.source_to_unicode(source, encoding)
self._code_lines = common.splitlines(self._source)
self._source = source_to_unicode(source, encoding)
self._code_lines = splitlines(self._source)
line = max(len(self._code_lines), 1) if line is None else line
if not (0 < line <= len(self._code_lines)):
raise ValueError('`line` parameter is not in a valid range.')

View File

@@ -4,13 +4,14 @@ Helpers for the API
import re
from collections import namedtuple
from jedi._compatibility import u
from jedi.evaluate.helpers import evaluate_call_of_leaf
from parso.python.parser import Parser
from parso.python import tree
from parso import tokenize
from parso.utils import splitlines
from jedi._compatibility import u
from jedi.evaluate.helpers import evaluate_call_of_leaf
from jedi.cache import time_cache
from jedi import common
CompletionParts = namedtuple('CompletionParts', ['path', 'has_dot', 'name'])
@@ -52,7 +53,7 @@ class OnErrorLeaf(Exception):
def _is_on_comment(leaf, position):
comment_lines = common.splitlines(leaf.prefix)
comment_lines = splitlines(leaf.prefix)
difference = leaf.start_pos[0] - position[0]
prefix_start_pos = leaf.get_start_pos_of_prefix()
if difference == 0:

View File

@@ -2,10 +2,8 @@
import sys
import contextlib
import functools
import re
from ast import literal_eval
from jedi._compatibility import unicode, reraise
from jedi._compatibility import reraise
from jedi import settings
@@ -115,77 +113,6 @@ def ignored(*exceptions):
pass
def source_to_unicode(source, encoding=None):
def detect_encoding():
"""
For the implementation of encoding definitions in Python, look at:
- http://www.python.org/dev/peps/pep-0263/
- http://docs.python.org/2/reference/lexical_analysis.html#encoding-declarations
"""
byte_mark = literal_eval(r"b'\xef\xbb\xbf'")
if source.startswith(byte_mark):
# UTF-8 byte-order mark
return 'utf-8'
first_two_lines = re.match(br'(?:[^\n]*\n){0,2}', source).group(0)
possible_encoding = re.search(br"coding[=:]\s*([-\w.]+)",
first_two_lines)
if possible_encoding:
return possible_encoding.group(1)
else:
# the default if nothing else has been set -> PEP 263
return encoding if encoding is not None else 'utf-8'
if isinstance(source, unicode):
# only cast str/bytes
return source
encoding = detect_encoding()
if not isinstance(encoding, unicode):
encoding = unicode(encoding, 'utf-8', 'replace')
# cast to unicode by default
return unicode(source, encoding, 'replace')
def splitlines(string, keepends=False):
"""
A splitlines for Python code. In contrast to Python's ``str.splitlines``,
looks at form feeds and other special characters as normal text. Just
splits ``\n`` and ``\r\n``.
Also different: Returns ``['']`` for an empty string input.
In Python 2.7 form feeds are used as normal characters when using
str.splitlines. However in Python 3 somewhere there was a decision to split
also on form feeds.
"""
if keepends:
lst = string.splitlines(True)
# We have to merge lines that were broken by form feed characters.
merge = []
for i, line in enumerate(lst):
if line.endswith('\f'):
merge.append(i)
for index in reversed(merge):
try:
lst[index] = lst[index] + lst[index + 1]
del lst[index + 1]
except IndexError:
# index + 1 can be empty and therefore there's no need to
# merge.
pass
# The stdlib's implementation of the end is inconsistent when calling
# it with/without keepends. One time there's an empty string in the
# end, one time there's none.
if string.endswith('\n') or string == '':
lst.append('')
return lst
else:
return re.split('\n|\r\n', string)
def unite(iterable):
"""Turns a two dimensional array into a one dimensional."""
return set(typ for types in iterable for typ in types)

View File

@@ -16,14 +16,16 @@ import os
import pkgutil
import sys
from jedi._compatibility import find_module, unicode, ImplicitNSInfo
from jedi import debug
from jedi import settings
from jedi.common import source_to_unicode, unite
from parso.python import parse
from parso.python import tree
from parso.tree import search_ancestor
from parso.cache import parser_cache
from parso.utils import source_to_unicode
from jedi._compatibility import find_module, unicode, ImplicitNSInfo
from jedi import debug
from jedi import settings
from jedi.common import unite
from jedi.evaluate import sys_path
from jedi.evaluate import helpers
from jedi.evaluate import compiled

View File

@@ -43,10 +43,11 @@ import imp
import re
from itertools import chain
from jedi._compatibility import use_metaclass
from parso.python import tree
from parso.utils import source_to_unicode
from jedi._compatibility import use_metaclass
from jedi import debug
from jedi import common
from jedi.evaluate.cache import memoize_default, CachedMetaClass, NO_DEFAULT
from jedi.evaluate import compiled
from jedi.evaluate import recursion
@@ -546,7 +547,7 @@ class ModuleContext(use_metaclass(CachedMetaClass, context.TreeContext)):
init_path = self.py__file__()
if os.path.basename(init_path) == '__init__.py':
with open(init_path, 'rb') as f:
content = common.source_to_unicode(f.read())
content = source_to_unicode(f.read())
# these are strings that need to be used for namespace packages,
# the first one is ``pkgutil``, the second ``pkg_resources``.
options = ('declare_namespace(__name__)', 'extend_path(__path__')

View File

@@ -15,6 +15,7 @@ following functions (sometimes bug-prone):
import difflib
from jedi import common
from parso.utils import source_to_unicode, splitlines
from jedi.evaluate import helpers
@@ -82,7 +83,7 @@ def _rename(names, replace_str):
with open(current_path) as f:
source = f.read()
new_lines = common.splitlines(common.source_to_unicode(source))
new_lines = splitlines(source_to_unicode(source))
old_lines = new_lines[:]
nr, indent = name.line, name.column
@@ -100,7 +101,7 @@ def extract(script, new_name):
:type source: str
:return: list of changed lines/changed files
"""
new_lines = common.splitlines(common.source_to_unicode(script.source))
new_lines = splitlines(source_to_unicode(script.source))
old_lines = new_lines[:]
user_stmt = script._parser.user_stmt()
@@ -159,7 +160,7 @@ def inline(script):
"""
:type script: api.Script
"""
new_lines = common.splitlines(common.source_to_unicode(script.source))
new_lines = splitlines(source_to_unicode(script.source))
dct = {}

View File

@@ -11,9 +11,10 @@ import re
import os
import sys
from parso.utils import splitlines
from jedi import Interpreter
from jedi.api.helpers import get_on_completion_name
from jedi import common
READLINE_DEBUG = False
@@ -85,7 +86,7 @@ def setup_readline(namespace_module=__main__):
logging.debug("Start REPL completion: " + repr(text))
interpreter = Interpreter(text, [namespace_module.__dict__])
lines = common.splitlines(text)
lines = splitlines(text)
position = (len(lines), len(lines[-1]))
name = get_on_completion_name(
interpreter._get_module_node(),

View File

@@ -1,17 +0,0 @@
from jedi.common import splitlines
def test_splitlines_no_keepends():
assert splitlines('asd\r\n') == ['asd', '']
assert splitlines('asd\r\n\f') == ['asd', '\f']
assert splitlines('\fasd\r\n') == ['\fasd', '']
assert splitlines('') == ['']
assert splitlines('\n') == ['', '']
def test_splitlines_keepends():
assert splitlines('asd\r\n', keepends=True) == ['asd\r\n', '']
assert splitlines('asd\r\n\f', keepends=True) == ['asd\r\n', '\f']
assert splitlines('\fasd\r\n', keepends=True) == ['\fasd\r\n', '']
assert splitlines('', keepends=True) == ['']
assert splitlines('\n', keepends=True) == ['\n', '']

View File

@@ -179,15 +179,6 @@ class TestRegression(TestCase):
else:
assert n == limit
def test_source_to_unicode_unicode_text(self):
source = (
b"# vim: fileencoding=utf-8\n"
b"# \xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x81\x88\xe3\x81\x8a\n"
)
actual = common.source_to_unicode(source)
expected = source.decode('utf-8')
assert actual == expected
def test_loading_unicode_files_with_bad_global_charset(monkeypatch, tmpdir):
dirname = str(tmpdir.mkdir('jedi-test'))