From 2616143d108c0bc80f4f30c346f1bec94887308f Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Fri, 18 Jul 2014 17:43:25 +0200 Subject: [PATCH] unicode issues with docstrings should be gone, fixes #420 --- jedi/parser/representation.py | 8 +++++++- test/test_parser/test_parser.py | 10 ++++++++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/jedi/parser/representation.py b/jedi/parser/representation.py index da13dfba..cbfa037d 100644 --- a/jedi/parser/representation.py +++ b/jedi/parser/representation.py @@ -85,7 +85,13 @@ class DocstringMixin(object): """ Returns a cleaned version of the docstring token. """ try: # Returns a literal cleaned version of the ``Token``. - return unicode(cleandoc(literal_eval(self._doc_token.string))) + cleaned = cleandoc(literal_eval(self._doc_token.string)) + # Since we want the docstr output to be always unicode, just force + # it. + if is_py3 or isinstance(cleaned, unicode): + return cleaned + else: + return unicode(cleaned, 'UTF-8', 'replace') except AttributeError: return u('') diff --git a/test/test_parser/test_parser.py b/test/test_parser/test_parser.py index 7449b11e..895e7a02 100644 --- a/test/test_parser/test_parser.py +++ b/test/test_parser/test_parser.py @@ -1,4 +1,6 @@ -from jedi._compatibility import u +# -*- coding: utf-8 -*- + +from jedi._compatibility import u, is_py3 from jedi.parser import Parser from jedi.parser.user_context import UserContextParser from jedi.parser import representation as pr @@ -136,4 +138,8 @@ def test_hex_values_in_docstring(): return 1 ''' - assert Parser(dedent(u(source))).module.subscopes[0].raw_doc == '\xff' + doc = Parser(dedent(u(source))).module.subscopes[0].raw_doc + if is_py3: + assert doc == '\xff' + else: + assert doc == u('�')