diff --git a/jedi/api.py b/jedi/api.py index e0a09b8a..4d0f1d7f 100644 --- a/jedi/api.py +++ b/jedi/api.py @@ -87,7 +87,7 @@ class Script(object): api_classes.clear_caches() debug.reset_time() - self.source = modules.source_to_unicode(source, encoding) + self.source = common.source_to_unicode(source, encoding) self._module = modules.ModuleWithCursor( path, source=self.source, position=self._pos) self._evaluator = Evaluator() @@ -671,7 +671,7 @@ def defined_names(source, path=None, encoding='utf-8'): :rtype: list of api_classes.Definition """ parser = Parser( - modules.source_to_unicode(source, encoding), + common.source_to_unicode(source, encoding), module_path=path, ) return api_classes._defined_names(Evaluator(), parser.module) diff --git a/jedi/common.py b/jedi/common.py index 48592585..54974344 100644 --- a/jedi/common.py +++ b/jedi/common.py @@ -2,7 +2,10 @@ import sys import contextlib import functools +import re +from ast import literal_eval +from jedi._compatibility import unicode from jedi.parser import tokenizer as tokenize from jedi._compatibility import next, reraise from jedi import settings @@ -197,7 +200,7 @@ def scale_speed_settings(factor): def indent_block(text, indention=' '): - """ This function indents a text block with a default of four spaces """ + """This function indents a text block with a default of four spaces.""" temp = '' while text and text[-1] == '\n': temp += text[-1] @@ -208,9 +211,41 @@ def indent_block(text, indention=' '): @contextlib.contextmanager def ignored(*exceptions): - """Context manager that ignores all of the specified exceptions. This will - be in the standard library starting with Python 3.4.""" + """ + Context manager that ignores all of the specified exceptions. This will + be in the standard library starting with Python 3.4. + """ try: yield except exceptions: pass + + +def source_to_unicode(source, encoding=None): + def detect_encoding(): + """ + For the implementation of encoding definitions in Python, look at: + http://www.python.org/dev/peps/pep-0263/ + http://docs.python.org/2/reference/lexical_analysis.html#encoding-\ + declarations + """ + byte_mark = literal_eval(r"b'\xef\xbb\xbf'") + if source.startswith(byte_mark): + # UTF-8 byte-order mark + return 'utf-8' + + first_two_lines = re.match(r'(?:[^\n]*\n){0,2}', str(source)).group(0) + possible_encoding = re.search(r"coding[=:]\s*([-\w.]+)", + first_two_lines) + if possible_encoding: + return possible_encoding.group(1) + else: + # the default if nothing else has been set -> PEP 263 + return encoding if encoding is not None else 'iso-8859-1' + + if isinstance(source, unicode): + # only cast str/bytes + return source + + # cast to unicode by default + return unicode(source, detect_encoding(), 'replace') diff --git a/jedi/evaluate/dynamic.py b/jedi/evaluate/dynamic.py index 2a97f747..53d1c2db 100644 --- a/jedi/evaluate/dynamic.py +++ b/jedi/evaluate/dynamic.py @@ -54,6 +54,7 @@ check for -> a is a string). There's big potential in these checks. import os from jedi import cache +from jedi.common import source_to_unicode from jedi.parser import representation as pr from jedi import modules from jedi import settings @@ -81,7 +82,7 @@ def get_directory_modules_for_name(mods, name): def check_fs(path): with open(path) as f: - source = modules.source_to_unicode(f.read()) + source = source_to_unicode(f.read()) if name in source: return modules.load_module(path, source) diff --git a/jedi/modules.py b/jedi/modules.py index 940e053b..989b684f 100644 --- a/jedi/modules.py +++ b/jedi/modules.py @@ -18,10 +18,9 @@ from __future__ import with_statement import re import sys import os -from ast import literal_eval -from jedi._compatibility import unicode from jedi import cache +from jedi.common import source_to_unicode from jedi.parser import tokenizer as tokenize from jedi.parser import fast from jedi import debug @@ -225,30 +224,3 @@ class ModuleWithCursor(object): def get_position_line(self): return self.get_line(self.position[0])[:self.position[1]] -def source_to_unicode(source, encoding=None): - def detect_encoding(): - """ For the implementation of encoding definitions in Python, look at: - http://www.python.org/dev/peps/pep-0263/ - http://docs.python.org/2/reference/lexical_analysis.html#encoding-\ - declarations - """ - byte_mark = literal_eval(r"b'\xef\xbb\xbf'") - if source.startswith(byte_mark): - # UTF-8 byte-order mark - return 'utf-8' - - first_two_lines = re.match(r'(?:[^\n]*\n){0,2}', str(source)).group(0) - possible_encoding = re.search(r"coding[=:]\s*([-\w.]+)", - first_two_lines) - if possible_encoding: - return possible_encoding.group(1) - else: - # the default if nothing else has been set -> PEP 263 - return encoding if encoding is not None else 'iso-8859-1' - - if isinstance(source, unicode): - # only cast str/bytes - return source - - # cast to unicode by default - return unicode(source, detect_encoding(), 'replace') diff --git a/jedi/refactoring.py b/jedi/refactoring.py index 4952cfd1..85d153ac 100644 --- a/jedi/refactoring.py +++ b/jedi/refactoring.py @@ -86,7 +86,7 @@ def _rename(names, replace_str): with open(current_path) as f: source = f.read() - new_lines = modules.source_to_unicode(source).splitlines() + new_lines = common.source_to_unicode(source).splitlines() old_lines = new_lines[:] nr, indent = name.line, name.column @@ -104,7 +104,7 @@ def extract(script, new_name): :type source: str :return: list of changed lines/changed files """ - new_lines = modules.source_to_unicode(script.source).splitlines() + new_lines = common.source_to_unicode(script.source).splitlines() old_lines = new_lines[:] user_stmt = script._parser.user_stmt @@ -163,7 +163,7 @@ def inline(script): """ :type script: api.Script """ - new_lines = modules.source_to_unicode(script.source).splitlines() + new_lines = common.source_to_unicode(script.source).splitlines() dct = {}