unicode issues with docstrings should be gone, fixes #420

This commit is contained in:
Dave Halter
2014-07-18 17:43:25 +02:00
parent e07f51387f
commit 2616143d10
2 changed files with 15 additions and 3 deletions

View File

@@ -85,7 +85,13 @@ class DocstringMixin(object):
""" Returns a cleaned version of the docstring token. """
try:
# Returns a literal cleaned version of the ``Token``.
return unicode(cleandoc(literal_eval(self._doc_token.string)))
cleaned = cleandoc(literal_eval(self._doc_token.string))
# Since we want the docstr output to be always unicode, just force
# it.
if is_py3 or isinstance(cleaned, unicode):
return cleaned
else:
return unicode(cleaned, 'UTF-8', 'replace')
except AttributeError:
return u('')

View File

@@ -1,4 +1,6 @@
from jedi._compatibility import u
# -*- coding: utf-8 -*-
from jedi._compatibility import u, is_py3
from jedi.parser import Parser
from jedi.parser.user_context import UserContextParser
from jedi.parser import representation as pr
@@ -136,4 +138,8 @@ def test_hex_values_in_docstring():
return 1
'''
assert Parser(dedent(u(source))).module.subscopes[0].raw_doc == '\xff'
doc = Parser(dedent(u(source))).module.subscopes[0].raw_doc
if is_py3:
assert doc == '\xff'
else:
assert doc == u('<EFBFBD>')