move source_to_unicode to common

2014-01-05 13:17:04 +01:00
parent 9523e70a71
commit fce36ebea4
5 changed files with 46 additions and 38 deletions
--- a/jedi/api.py
+++ b/jedi/api.py
@@ -87,7 +87,7 @@ class Script(object):

        api_classes.clear_caches()
        debug.reset_time()
-        self.source = modules.source_to_unicode(source, encoding)
+        self.source = common.source_to_unicode(source, encoding)
        self._module = modules.ModuleWithCursor(
            path, source=self.source, position=self._pos)
        self._evaluator = Evaluator()
@@ -671,7 +671,7 @@ def defined_names(source, path=None, encoding='utf-8'):
    :rtype: list of api_classes.Definition
    """
    parser = Parser(
-        modules.source_to_unicode(source, encoding),
+        common.source_to_unicode(source, encoding),
        module_path=path,
    )
    return api_classes._defined_names(Evaluator(), parser.module)
--- a/jedi/common.py
+++ b/jedi/common.py
@@ -2,7 +2,10 @@
 import sys
 import contextlib
 import functools
+import re
+from ast import literal_eval

+from jedi._compatibility import unicode
 from jedi.parser import tokenizer as tokenize
 from jedi._compatibility import next, reraise
 from jedi import settings
@@ -197,7 +200,7 @@ def scale_speed_settings(factor):


 def indent_block(text, indention='    '):
-    """ This function indents a text block with a default of four spaces """
+    """This function indents a text block with a default of four spaces."""
    temp = ''
    while text and text[-1] == '\n':
        temp += text[-1]
@@ -208,9 +211,41 @@ def indent_block(text, indention='    '):

@contextlib.contextmanager
 def ignored(*exceptions):
-    """Context manager that ignores all of the specified exceptions. This will
-    be in the standard library starting with Python 3.4."""
+    """
+    Context manager that ignores all of the specified exceptions. This will
+    be in the standard library starting with Python 3.4.
+    """
    try:
        yield
    except exceptions:
        pass
+
+
+def source_to_unicode(source, encoding=None):
+    def detect_encoding():
+        """
+        For the implementation of encoding definitions in Python, look at:
+        http://www.python.org/dev/peps/pep-0263/
+        http://docs.python.org/2/reference/lexical_analysis.html#encoding-\
+                                                                declarations
+        """
+        byte_mark = literal_eval(r"b'\xef\xbb\xbf'")
+        if source.startswith(byte_mark):
+            # UTF-8 byte-order mark
+            return 'utf-8'
+
+        first_two_lines = re.match(r'(?:[^\n]*\n){0,2}', str(source)).group(0)
+        possible_encoding = re.search(r"coding[=:]\s*([-\w.]+)",
+                                      first_two_lines)
+        if possible_encoding:
+            return possible_encoding.group(1)
+        else:
+            # the default if nothing else has been set -> PEP 263
+            return encoding if encoding is not None else 'iso-8859-1'
+
+    if isinstance(source, unicode):
+        # only cast str/bytes
+        return source
+
+    # cast to unicode by default
+    return unicode(source, detect_encoding(), 'replace')
--- a/jedi/evaluate/dynamic.py
+++ b/jedi/evaluate/dynamic.py
@@ -54,6 +54,7 @@ check for -> a is a string). There's big potential in these checks.
 import os

 from jedi import cache
+from jedi.common import source_to_unicode
 from jedi.parser import representation as pr
 from jedi import modules
 from jedi import settings
@@ -81,7 +82,7 @@ def get_directory_modules_for_name(mods, name):

    def check_fs(path):
        with open(path) as f:
-            source = modules.source_to_unicode(f.read())
+            source = source_to_unicode(f.read())
            if name in source:
                return modules.load_module(path, source)

--- a/jedi/modules.py
+++ b/jedi/modules.py
@@ -18,10 +18,9 @@ from __future__ import with_statement
 import re
 import sys
 import os
-from ast import literal_eval

-from jedi._compatibility import unicode
 from jedi import cache
+from jedi.common import source_to_unicode
 from jedi.parser import tokenizer as tokenize
 from jedi.parser import fast
 from jedi import debug
@@ -225,30 +224,3 @@ class ModuleWithCursor(object):

    def get_position_line(self):
        return self.get_line(self.position[0])[:self.position[1]]
-def source_to_unicode(source, encoding=None):
-    def detect_encoding():
-        """ For the implementation of encoding definitions in Python, look at:
-        http://www.python.org/dev/peps/pep-0263/
-        http://docs.python.org/2/reference/lexical_analysis.html#encoding-\
-                                                                declarations
-        """
-        byte_mark = literal_eval(r"b'\xef\xbb\xbf'")
-        if source.startswith(byte_mark):
-            # UTF-8 byte-order mark
-            return 'utf-8'
-
-        first_two_lines = re.match(r'(?:[^\n]*\n){0,2}', str(source)).group(0)
-        possible_encoding = re.search(r"coding[=:]\s*([-\w.]+)",
-                                      first_two_lines)
-        if possible_encoding:
-            return possible_encoding.group(1)
-        else:
-            # the default if nothing else has been set -> PEP 263
-            return encoding if encoding is not None else 'iso-8859-1'
-
-    if isinstance(source, unicode):
-        # only cast str/bytes
-        return source
-
-    # cast to unicode by default
-    return unicode(source, detect_encoding(), 'replace')
--- a/jedi/refactoring.py
+++ b/jedi/refactoring.py
@@ -86,7 +86,7 @@ def _rename(names, replace_str):
                with open(current_path) as f:
                    source = f.read()

-            new_lines = modules.source_to_unicode(source).splitlines()
+            new_lines = common.source_to_unicode(source).splitlines()
            old_lines = new_lines[:]

        nr, indent = name.line, name.column
@@ -104,7 +104,7 @@ def extract(script, new_name):
    :type source: str
    :return: list of changed lines/changed files
    """
-    new_lines = modules.source_to_unicode(script.source).splitlines()
+    new_lines = common.source_to_unicode(script.source).splitlines()
    old_lines = new_lines[:]

    user_stmt = script._parser.user_stmt
@@ -163,7 +163,7 @@ def inline(script):
    """
    :type script: api.Script
    """
-    new_lines = modules.source_to_unicode(script.source).splitlines()
+    new_lines = common.source_to_unicode(script.source).splitlines()

    dct = {}