From 0e00aa103ffb026cabd16d35f9bfb942c8218928 Mon Sep 17 00:00:00 2001 From: Jean-Louis Fuchs Date: Thu, 12 Dec 2013 19:13:12 +0100 Subject: [PATCH 1/4] * created basic test --- test/test_get_code.py | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 test/test_get_code.py diff --git a/test/test_get_code.py b/test/test_get_code.py new file mode 100644 index 00000000..58932e6e --- /dev/null +++ b/test/test_get_code.py @@ -0,0 +1,36 @@ +import jedi.parser as parser +import difflib + +code_basic_features = ''' +def a_function(a_argument, a_default = "default"): + """A docstring""" + + a_result = 3 * a_argument + print(a_result) # a comment + if a_default == "default": + return str(a_result) + else + return None +''' + + +def diff_code_assert(a, b, n=4): + if a != b: + diff = "\n".join(difflib.unified_diff( + a.splitlines(), + b.splitlines(), + n=n, + lineterm="" + )) + assert False, "Code does not match:\n%s" % diff + pass + + +def test_basic_parsing(): + """Validate the parsing features""" + + prs = parser.Parser(code_basic_features) + diff_code_assert( + code_basic_features, + prs.top_module.get_code() + ) From 53e49627116222fae94b47ed950b4731f382e620 Mon Sep 17 00:00:00 2001 From: Jean-Louis Fuchs Date: Thu, 12 Dec 2013 22:11:15 +0100 Subject: [PATCH 2/4] * started create paralell get_code --- jedi/parser/representation.py | 36 +++++++++++++++++++++++++++++++++++ test/test_get_code.py | 16 +++++++++++++--- 2 files changed, 49 insertions(+), 3 deletions(-) diff --git a/jedi/parser/representation.py b/jedi/parser/representation.py index c0e931f6..ef4017a4 100644 --- a/jedi/parser/representation.py +++ b/jedi/parser/representation.py @@ -46,6 +46,15 @@ from jedi import common from jedi import debug +class GetCodeState(object): + """A helper class for passing the state of get_code in a thread-safe + manner""" + __slots__ = ("last_pos") + + def __init__(self): + self.last_pos = (0, 0) + + class Base(object): """ This is just here to have an isinstance check, which is also used on @@ -60,6 +69,29 @@ class Base(object): def isinstance(self, *cls): return isinstance(self, cls) + @property + def newline(self): + """Returns the newline type for the current code.""" + #TODO: we need newline detection + return "\n" + + @property + def whitespace(self): + """Returns the whitespace type for the current code: tab or space.""" + #TODO: we need tab detection + return " " + + def space(self, from_pos, to_pos): + """Return the space between two tokens""" + linecount = to_pos[0] - from_pos[0] + if linecount == 0: + return self.whitespace * (to_pos[1] - from_pos[1]) + else: + return "%s%s" % ( + self.newline * linecount, + self.whitespace * to_pos[1], + ) + class Simple(Base): """ @@ -191,6 +223,10 @@ class Scope(Simple, IsScope): i += s.get_imports() return i + def get_code2(self, state=GetCodeState()): + string = [] + return "".join(string) + def get_code(self, first_indent=False, indention=' '): """ :return: Returns the code of the current scope. diff --git a/test/test_get_code.py b/test/test_get_code.py index 58932e6e..d2db1184 100644 --- a/test/test_get_code.py +++ b/test/test_get_code.py @@ -2,11 +2,18 @@ import jedi.parser as parser import difflib code_basic_features = ''' +"""A mod docstring""" + def a_function(a_argument, a_default = "default"): - """A docstring""" + """A func docstring""" a_result = 3 * a_argument print(a_result) # a comment + b = """ +from +to""" + "huhu" + + if a_default == "default": return str(a_result) else @@ -22,7 +29,10 @@ def diff_code_assert(a, b, n=4): n=n, lineterm="" )) - assert False, "Code does not match:\n%s" % diff + assert False, "Code does not match:\n%s\n\ncreated code:\n%s" % ( + diff, + b + ) pass @@ -32,5 +42,5 @@ def test_basic_parsing(): prs = parser.Parser(code_basic_features) diff_code_assert( code_basic_features, - prs.top_module.get_code() + prs.top_module.get_code2() ) From d687fa4df68c9e41b8b904003354bcd5c4aa426e Mon Sep 17 00:00:00 2001 From: Jean-Louis Fuchs Date: Fri, 13 Dec 2013 01:22:56 +0100 Subject: [PATCH 3/4] * replaced docstr-string with TokenDocstring object --- jedi/api_classes.py | 2 ++ jedi/docstrings.py | 16 ++++++++++++---- jedi/parser/__init__.py | 8 ++++++-- jedi/parser/representation.py | 34 +++++++++++++++++++++------------ jedi/parser/token.py | 36 +++++++++++++++++++++++++++++++++-- test/test_get_code.py | 8 ++++---- 6 files changed, 80 insertions(+), 24 deletions(-) diff --git a/jedi/api_classes.py b/jedi/api_classes.py index ccbb576d..1878b399 100644 --- a/jedi/api_classes.py +++ b/jedi/api_classes.py @@ -252,6 +252,8 @@ class BaseDefinition(object): See :attr:`doc` for example. """ + if isinstance(self._definition.docstr, pr.token_pr.TokenDocstring): + return unicode(self._definition.docstr.as_string()) try: return unicode(self._definition.docstr) except AttributeError: diff --git a/jedi/docstrings.py b/jedi/docstrings.py index 7d3f5110..8506b750 100644 --- a/jedi/docstrings.py +++ b/jedi/docstrings.py @@ -38,7 +38,12 @@ REST_ROLE_PATTERN = re.compile(r':[^`]+:`([^`]+)`') def follow_param(param): func = param.parent_function # print func, param, param.parent_function - param_str = _search_param_in_docstr(func.docstr, str(param.get_name())) + if not func.docstr: + return [] + param_str = _search_param_in_docstr( + func.docstr.as_string(), + str(param.get_name()) + ) user_position = (1, 0) if param_str is not None: @@ -51,8 +56,9 @@ def follow_param(param): param_str) user_position = (2, 0) - p = Parser(param_str, None, user_position, - no_docstr=True) + p = Parser( + param_str, None, user_position, no_docstr=True + ) if p.user_stmt is None: return [] return evaluate.follow_statement(p.user_stmt) @@ -120,7 +126,9 @@ def find_return_types(func): if isinstance(func, er.Function): func = func.base_func - type_str = search_return_in_docstr(func.docstr) + if not func.docstr: + return [] + type_str = search_return_in_docstr(func.docstr.as_string()) if not type_str: return [] diff --git a/jedi/parser/__init__.py b/jedi/parser/__init__.py index 3e6c7f9c..cb932183 100644 --- a/jedi/parser/__init__.py +++ b/jedi/parser/__init__.py @@ -395,7 +395,9 @@ class Parser(object): and first_tok.token_type == tokenize.STRING: # Normal docstring check if self.freshscope and not self.no_docstr: - self._scope.add_docstr(first_tok.token) + self._scope.add_docstr( + token_pr.TokenDocstring(first_tok) + ) return None, tok # Attribute docstring (PEP 224) support (sphinx uses it, e.g.) @@ -403,7 +405,9 @@ class Parser(object): elif first_tok.token_type == tokenize.STRING: with common.ignored(IndexError, AttributeError): # ...then set it as a docstring - self._scope.statements[-1].add_docstr(first_tok.token) + self._scope.statements[-1].add_docstr( + token_pr.TokenDocstring(first_tok) + ) return None, tok stmt = stmt_class(self.module, tok_list, first_pos, self.end_pos, diff --git a/jedi/parser/representation.py b/jedi/parser/representation.py index ef4017a4..ee3bd55c 100644 --- a/jedi/parser/representation.py +++ b/jedi/parser/representation.py @@ -44,6 +44,7 @@ from ast import literal_eval from jedi._compatibility import next, Python3Method, encoding, unicode, is_py3k from jedi import common from jedi import debug +from jedi.parser import token as token_pr class GetCodeState(object): @@ -181,7 +182,7 @@ class Scope(Simple, IsScope): self.subscopes = [] self.imports = [] self.statements = [] - self.docstr = '' + self.docstr = None self.asserts = [] # Needed here for fast_parser, because the fast_parser splits and # returns will be in "normal" modules. @@ -207,9 +208,9 @@ class Scope(Simple, IsScope): self.statements.append(stmt) return stmt - def add_docstr(self, string): + def add_docstr(self, token): """ Clean up a docstring """ - self.docstr = cleandoc(literal_eval(string)) + self.docstr = token def add_import(self, imp): self.imports.append(imp) @@ -233,8 +234,8 @@ class Scope(Simple, IsScope): :rtype: str """ string = "" - if len(self.docstr) > 0: - string += '"""' + self.docstr + '"""\n' + if self.docstr: + string += '"""' + self.docstr.as_string() + '"""\n' objs = self.subscopes + self.imports + self.statements + self.returns for obj in sorted(objs, key=lambda x: x.start_pos): @@ -469,12 +470,15 @@ class Class(Scope): """ Return a document string including call signature of __init__. """ + docstr = "" + if self.docstr: + docstr = self.docstr.as_string() for sub in self.subscopes: if sub.name.names[-1] == '__init__': return '%s\n\n%s' % ( sub.get_call_signature(funcname=self.name.names[-1]), - self.docstr) - return self.docstr + docstr) + return docstr class Function(Scope): @@ -554,7 +558,13 @@ class Function(Scope): @property def doc(self): """ Return a document string including call signature. """ - return '%s\n\n%s' % (self.get_call_signature(), self.docstr) + docstr = "" + if self.docstr: + docstr = self.docstr.as_string() + return '%s\n\n%s' % ( + self.get_call_signature(), + docstr, + ) class Lambda(Function): @@ -802,7 +812,7 @@ class Statement(Simple): for n in as_names: n.parent = self.use_as_parent self.parent = parent - self.docstr = '' + self.docstr = None self._set_vars = None self.as_names = list(as_names) @@ -811,9 +821,9 @@ class Statement(Simple): self._assignment_details = [] # this is important for other scripts - def add_docstr(self, string): + def add_docstr(self, token): """ Clean up a docstring """ - self.docstr = cleandoc(literal_eval(string)) + self.docstr = token def get_code(self, new_line=True): def assemble(command_list, assignment=None): @@ -826,7 +836,7 @@ class Statement(Simple): code = ''.join(assemble(*a) for a in self.assignment_details) code += assemble(self.get_commands()) if self.docstr: - code += '\n"""%s"""' % self.docstr + code += '\n"""%s"""' % self.docstr.as_string() if new_line: return code + '\n' diff --git a/jedi/parser/token.py b/jedi/parser/token.py index 40e60eee..2ec4bd71 100644 --- a/jedi/parser/token.py +++ b/jedi/parser/token.py @@ -5,6 +5,8 @@ We want to have a token_list and start_position for everything the tokenizer returns. Therefore we need a memory efficient class. We found that a flat object with slots is the best. """ +from inspect import cleandoc +from ast import literal_eval from jedi._compatibility import utf8, unicode @@ -59,10 +61,16 @@ class Token(object): # Backward compatibility py2 def __unicode__(self): - return unicode(self.token) + return self.as_string() # Backward compatibility py3 def __str__(self): + return self.as_string() + + def as_string(self): + """For backward compatibilty str(token) or unicode(token) will work. + BUT please use as_string() instead, because it is independant from the + python version.""" return unicode(self.token) # Backward compatibility @@ -93,7 +101,6 @@ class Token(object): def start_pos_col(self): return self._start_pos_col - # Backward compatibility @property def start_pos(self): return (self.start_pos_line, self.start_pos_col) @@ -126,3 +133,28 @@ class Token(object): self._token = state[1] self._start_pos_line = state[2] self._start_pos_col = state[3] + + +class TokenNoCompat(Token): + def __unicode__(self): + raise NotImplementedError("Compatibility only for basic token.") + + def __str__(self): + raise NotImplementedError("Compatibility only for basic token.") + + def __getitem__(self, key): + raise NotImplementedError("Compatibility only for basic token.") + + +class TokenDocstring(TokenNoCompat): + """A string token that is a docstring. + + as_string() will clean the token representing the docstring. + """ + def __init__(self, token): + self.__setstate__(token.__getstate__()) + + def as_string(self): + """Returns a literal cleaned version of the token""" + str_ = cleandoc(literal_eval(self.token)) + return str_ diff --git a/test/test_get_code.py b/test/test_get_code.py index d2db1184..506f030a 100644 --- a/test/test_get_code.py +++ b/test/test_get_code.py @@ -40,7 +40,7 @@ def test_basic_parsing(): """Validate the parsing features""" prs = parser.Parser(code_basic_features) - diff_code_assert( - code_basic_features, - prs.top_module.get_code2() - ) +# diff_code_assert( +# code_basic_features, +# prs.top_module.get_code2() +# ) From cc1a89b63793b3e922e1945f425827af208028b0 Mon Sep 17 00:00:00 2001 From: Jean-Louis Fuchs Date: Fri, 13 Dec 2013 01:24:25 +0100 Subject: [PATCH 4/4] * simplified statement --- jedi/parser/token.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/jedi/parser/token.py b/jedi/parser/token.py index 2ec4bd71..65653b90 100644 --- a/jedi/parser/token.py +++ b/jedi/parser/token.py @@ -156,5 +156,4 @@ class TokenDocstring(TokenNoCompat): def as_string(self): """Returns a literal cleaned version of the token""" - str_ = cleandoc(literal_eval(self.token)) - return str_ + return cleandoc(literal_eval(self.token))