From 2616143d108c0bc80f4f30c346f1bec94887308f Mon Sep 17 00:00:00 2001
From: Dave Halter <davidhalter88@gmail.com>
Date: Fri, 18 Jul 2014 17:43:25 +0200
Subject: [PATCH] unicode issues with docstrings should be gone, fixes #420

---
 jedi/parser/representation.py   |  8 +++++++-
 test/test_parser/test_parser.py | 10 ++++++++--
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/jedi/parser/representation.py b/jedi/parser/representation.py
index da13dfba..cbfa037d 100644
--- a/jedi/parser/representation.py
+++ b/jedi/parser/representation.py
@@ -85,7 +85,13 @@ class DocstringMixin(object):
         """ Returns a cleaned version of the docstring token. """
         try:
             # Returns a literal cleaned version of the ``Token``.
-            return unicode(cleandoc(literal_eval(self._doc_token.string)))
+            cleaned = cleandoc(literal_eval(self._doc_token.string))
+            # Since we want the docstr output to be always unicode, just force
+            # it.
+            if is_py3 or isinstance(cleaned, unicode):
+                return cleaned
+            else:
+                return unicode(cleaned, 'UTF-8', 'replace')
         except AttributeError:
             return u('')
 
diff --git a/test/test_parser/test_parser.py b/test/test_parser/test_parser.py
index 7449b11e..895e7a02 100644
--- a/test/test_parser/test_parser.py
+++ b/test/test_parser/test_parser.py
@@ -1,4 +1,6 @@
-from jedi._compatibility import u
+# -*- coding: utf-8 -*-
+
+from jedi._compatibility import u, is_py3
 from jedi.parser import Parser
 from jedi.parser.user_context import UserContextParser
 from jedi.parser import representation as pr
@@ -136,4 +138,8 @@ def test_hex_values_in_docstring():
             return 1
         '''
 
-    assert Parser(dedent(u(source))).module.subscopes[0].raw_doc == '\xff'
+    doc = Parser(dedent(u(source))).module.subscopes[0].raw_doc
+    if is_py3:
+        assert doc == '\xff'
+    else:
+        assert doc == u('�')