1
0
forked from VimPlug/jedi

splitlines and source_to_unicode are utils of parso.

This commit is contained in:
Dave Halter
2017-05-20 09:55:16 -04:00
parent 0f4b7db56a
commit 50c7137437
9 changed files with 27 additions and 118 deletions

View File

@@ -16,6 +16,8 @@ import sys
from parso.python import load_grammar from parso.python import load_grammar
from parso.python import tree from parso.python import tree
from parso.python import parse from parso.python import parse
from parso.utils import source_to_unicode, splitlines
from jedi.parser_utils import get_executable_nodes, get_statement_of_position from jedi.parser_utils import get_executable_nodes, get_statement_of_position
from jedi import debug from jedi import debug
from jedi import settings from jedi import settings
@@ -108,8 +110,8 @@ class Script(object):
with open(path, 'rb') as f: with open(path, 'rb') as f:
source = f.read() source = f.read()
self._source = common.source_to_unicode(source, encoding) self._source = source_to_unicode(source, encoding)
self._code_lines = common.splitlines(self._source) self._code_lines = splitlines(self._source)
line = max(len(self._code_lines), 1) if line is None else line line = max(len(self._code_lines), 1) if line is None else line
if not (0 < line <= len(self._code_lines)): if not (0 < line <= len(self._code_lines)):
raise ValueError('`line` parameter is not in a valid range.') raise ValueError('`line` parameter is not in a valid range.')

View File

@@ -4,13 +4,14 @@ Helpers for the API
import re import re
from collections import namedtuple from collections import namedtuple
from jedi._compatibility import u
from jedi.evaluate.helpers import evaluate_call_of_leaf
from parso.python.parser import Parser from parso.python.parser import Parser
from parso.python import tree from parso.python import tree
from parso import tokenize from parso import tokenize
from parso.utils import splitlines
from jedi._compatibility import u
from jedi.evaluate.helpers import evaluate_call_of_leaf
from jedi.cache import time_cache from jedi.cache import time_cache
from jedi import common
CompletionParts = namedtuple('CompletionParts', ['path', 'has_dot', 'name']) CompletionParts = namedtuple('CompletionParts', ['path', 'has_dot', 'name'])
@@ -52,7 +53,7 @@ class OnErrorLeaf(Exception):
def _is_on_comment(leaf, position): def _is_on_comment(leaf, position):
comment_lines = common.splitlines(leaf.prefix) comment_lines = splitlines(leaf.prefix)
difference = leaf.start_pos[0] - position[0] difference = leaf.start_pos[0] - position[0]
prefix_start_pos = leaf.get_start_pos_of_prefix() prefix_start_pos = leaf.get_start_pos_of_prefix()
if difference == 0: if difference == 0:

View File

@@ -2,10 +2,8 @@
import sys import sys
import contextlib import contextlib
import functools import functools
import re
from ast import literal_eval
from jedi._compatibility import unicode, reraise from jedi._compatibility import reraise
from jedi import settings from jedi import settings
@@ -115,77 +113,6 @@ def ignored(*exceptions):
pass pass
def source_to_unicode(source, encoding=None):
def detect_encoding():
"""
For the implementation of encoding definitions in Python, look at:
- http://www.python.org/dev/peps/pep-0263/
- http://docs.python.org/2/reference/lexical_analysis.html#encoding-declarations
"""
byte_mark = literal_eval(r"b'\xef\xbb\xbf'")
if source.startswith(byte_mark):
# UTF-8 byte-order mark
return 'utf-8'
first_two_lines = re.match(br'(?:[^\n]*\n){0,2}', source).group(0)
possible_encoding = re.search(br"coding[=:]\s*([-\w.]+)",
first_two_lines)
if possible_encoding:
return possible_encoding.group(1)
else:
# the default if nothing else has been set -> PEP 263
return encoding if encoding is not None else 'utf-8'
if isinstance(source, unicode):
# only cast str/bytes
return source
encoding = detect_encoding()
if not isinstance(encoding, unicode):
encoding = unicode(encoding, 'utf-8', 'replace')
# cast to unicode by default
return unicode(source, encoding, 'replace')
def splitlines(string, keepends=False):
"""
A splitlines for Python code. In contrast to Python's ``str.splitlines``,
looks at form feeds and other special characters as normal text. Just
splits ``\n`` and ``\r\n``.
Also different: Returns ``['']`` for an empty string input.
In Python 2.7 form feeds are used as normal characters when using
str.splitlines. However in Python 3 somewhere there was a decision to split
also on form feeds.
"""
if keepends:
lst = string.splitlines(True)
# We have to merge lines that were broken by form feed characters.
merge = []
for i, line in enumerate(lst):
if line.endswith('\f'):
merge.append(i)
for index in reversed(merge):
try:
lst[index] = lst[index] + lst[index + 1]
del lst[index + 1]
except IndexError:
# index + 1 can be empty and therefore there's no need to
# merge.
pass
# The stdlib's implementation of the end is inconsistent when calling
# it with/without keepends. One time there's an empty string in the
# end, one time there's none.
if string.endswith('\n') or string == '':
lst.append('')
return lst
else:
return re.split('\n|\r\n', string)
def unite(iterable): def unite(iterable):
"""Turns a two dimensional array into a one dimensional.""" """Turns a two dimensional array into a one dimensional."""
return set(typ for types in iterable for typ in types) return set(typ for types in iterable for typ in types)

View File

@@ -16,14 +16,16 @@ import os
import pkgutil import pkgutil
import sys import sys
from jedi._compatibility import find_module, unicode, ImplicitNSInfo
from jedi import debug
from jedi import settings
from jedi.common import source_to_unicode, unite
from parso.python import parse from parso.python import parse
from parso.python import tree from parso.python import tree
from parso.tree import search_ancestor from parso.tree import search_ancestor
from parso.cache import parser_cache from parso.cache import parser_cache
from parso.utils import source_to_unicode
from jedi._compatibility import find_module, unicode, ImplicitNSInfo
from jedi import debug
from jedi import settings
from jedi.common import unite
from jedi.evaluate import sys_path from jedi.evaluate import sys_path
from jedi.evaluate import helpers from jedi.evaluate import helpers
from jedi.evaluate import compiled from jedi.evaluate import compiled

View File

@@ -43,10 +43,11 @@ import imp
import re import re
from itertools import chain from itertools import chain
from jedi._compatibility import use_metaclass
from parso.python import tree from parso.python import tree
from parso.utils import source_to_unicode
from jedi._compatibility import use_metaclass
from jedi import debug from jedi import debug
from jedi import common
from jedi.evaluate.cache import memoize_default, CachedMetaClass, NO_DEFAULT from jedi.evaluate.cache import memoize_default, CachedMetaClass, NO_DEFAULT
from jedi.evaluate import compiled from jedi.evaluate import compiled
from jedi.evaluate import recursion from jedi.evaluate import recursion
@@ -546,7 +547,7 @@ class ModuleContext(use_metaclass(CachedMetaClass, context.TreeContext)):
init_path = self.py__file__() init_path = self.py__file__()
if os.path.basename(init_path) == '__init__.py': if os.path.basename(init_path) == '__init__.py':
with open(init_path, 'rb') as f: with open(init_path, 'rb') as f:
content = common.source_to_unicode(f.read()) content = source_to_unicode(f.read())
# these are strings that need to be used for namespace packages, # these are strings that need to be used for namespace packages,
# the first one is ``pkgutil``, the second ``pkg_resources``. # the first one is ``pkgutil``, the second ``pkg_resources``.
options = ('declare_namespace(__name__)', 'extend_path(__path__') options = ('declare_namespace(__name__)', 'extend_path(__path__')

View File

@@ -15,6 +15,7 @@ following functions (sometimes bug-prone):
import difflib import difflib
from jedi import common from jedi import common
from parso.utils import source_to_unicode, splitlines
from jedi.evaluate import helpers from jedi.evaluate import helpers
@@ -82,7 +83,7 @@ def _rename(names, replace_str):
with open(current_path) as f: with open(current_path) as f:
source = f.read() source = f.read()
new_lines = common.splitlines(common.source_to_unicode(source)) new_lines = splitlines(source_to_unicode(source))
old_lines = new_lines[:] old_lines = new_lines[:]
nr, indent = name.line, name.column nr, indent = name.line, name.column
@@ -100,7 +101,7 @@ def extract(script, new_name):
:type source: str :type source: str
:return: list of changed lines/changed files :return: list of changed lines/changed files
""" """
new_lines = common.splitlines(common.source_to_unicode(script.source)) new_lines = splitlines(source_to_unicode(script.source))
old_lines = new_lines[:] old_lines = new_lines[:]
user_stmt = script._parser.user_stmt() user_stmt = script._parser.user_stmt()
@@ -159,7 +160,7 @@ def inline(script):
""" """
:type script: api.Script :type script: api.Script
""" """
new_lines = common.splitlines(common.source_to_unicode(script.source)) new_lines = splitlines(source_to_unicode(script.source))
dct = {} dct = {}

View File

@@ -11,9 +11,10 @@ import re
import os import os
import sys import sys
from parso.utils import splitlines
from jedi import Interpreter from jedi import Interpreter
from jedi.api.helpers import get_on_completion_name from jedi.api.helpers import get_on_completion_name
from jedi import common
READLINE_DEBUG = False READLINE_DEBUG = False
@@ -85,7 +86,7 @@ def setup_readline(namespace_module=__main__):
logging.debug("Start REPL completion: " + repr(text)) logging.debug("Start REPL completion: " + repr(text))
interpreter = Interpreter(text, [namespace_module.__dict__]) interpreter = Interpreter(text, [namespace_module.__dict__])
lines = common.splitlines(text) lines = splitlines(text)
position = (len(lines), len(lines[-1])) position = (len(lines), len(lines[-1]))
name = get_on_completion_name( name = get_on_completion_name(
interpreter._get_module_node(), interpreter._get_module_node(),

View File

@@ -1,17 +0,0 @@
from jedi.common import splitlines
def test_splitlines_no_keepends():
assert splitlines('asd\r\n') == ['asd', '']
assert splitlines('asd\r\n\f') == ['asd', '\f']
assert splitlines('\fasd\r\n') == ['\fasd', '']
assert splitlines('') == ['']
assert splitlines('\n') == ['', '']
def test_splitlines_keepends():
assert splitlines('asd\r\n', keepends=True) == ['asd\r\n', '']
assert splitlines('asd\r\n\f', keepends=True) == ['asd\r\n', '\f']
assert splitlines('\fasd\r\n', keepends=True) == ['\fasd\r\n', '']
assert splitlines('', keepends=True) == ['']
assert splitlines('\n', keepends=True) == ['\n', '']

View File

@@ -179,15 +179,6 @@ class TestRegression(TestCase):
else: else:
assert n == limit assert n == limit
def test_source_to_unicode_unicode_text(self):
source = (
b"# vim: fileencoding=utf-8\n"
b"# \xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x81\x88\xe3\x81\x8a\n"
)
actual = common.source_to_unicode(source)
expected = source.decode('utf-8')
assert actual == expected
def test_loading_unicode_files_with_bad_global_charset(monkeypatch, tmpdir): def test_loading_unicode_files_with_bad_global_charset(monkeypatch, tmpdir):
dirname = str(tmpdir.mkdir('jedi-test')) dirname = str(tmpdir.mkdir('jedi-test'))