1
0
forked from VimPlug/jedi

fixed todo: get_code of two vars after each other makes whitespace

This commit is contained in:
David Halter
2012-02-29 16:27:25 +01:00
parent e83079d563
commit 3223d2e663
2 changed files with 263 additions and 173 deletions

View File

@@ -1,16 +1,18 @@
""""
TODO This is a parser
TODO Description: This is a parser
TODO be tolerant with indents
TODO dictionaries not working with statement parser
TODO except has local vars
TODO take special care for future imports
scope
imports
subscopes
statements
Ignored simple statements:
Ignored statements:
- print (no use for it)
- assert
- break, continue (because we avoid loops)
- del (also no used, since this script avoids loops and files)
- exec (dangerous - not controllable)
global is a special case and will not be used here
@@ -19,6 +21,7 @@ import sys
import tokenize
import cStringIO
import token
import re
def indent_block(text, indention=" "):
@@ -44,12 +47,21 @@ class Scope(object):
self.line_nr = line_nr
def add_scope(self, sub):
if sub == None:
print 'push scope: [%s@%s]' % (sub.name, sub.indent)
# print 'push scope: [%s@%s]' % (sub.name, sub.indent)
sub.parent = self
self.subscopes.append(sub)
return sub
def add_statement(self, stmt):
"""
Used to add a Statement or a Scope.
A statement would be a normal command (Statement) or a Scope (Flow).
"""
if isinstance(stmt, Scope):
stmt.parent = self
self.statements.append(stmt)
return stmt
def doc(self, str):
""" Clean up a docstring """
d = str.replace('\n', ' ')
@@ -83,11 +95,12 @@ class Scope(object):
if len(self.docstr) > 0:
string += '"""' + self.docstr + '"""\n'
for i in self.imports:
string += i.get_code() + '\n'
string += i.get_code()
for sub in self.subscopes:
string += str(sub.line_nr) + sub.get_code(first_indent=True, indention=indention)
for l in self.locals:
string += l + '\n'
#string += str(sub.line_nr)
string += sub.get_code(first_indent=True, indention=indention)
for stmt in self.statements:
string += stmt.get_code()
if first_indent:
string = indent_block(string, indention=indention)
@@ -97,7 +110,8 @@ class Scope(object):
"""
this function returns true if there are no subscopes, imports, locals.
"""
return not (self.locals or self.imports or self.subscopes)
return not (self.locals or self.imports or self.subscopes or \
self.statements)
class Class(Scope):
@@ -115,32 +129,67 @@ class Class(Scope):
str += "pass\n"
return str
class Flow(Scope):
"""
Used to describe programming structure - flow statements,
which indent code, but are not classes or functions:
- for
- while
- if
- try
- with
Therefore statements like else, except and finally are also here,
they are now saved in the root flow elements, but in the next variable.
:param command: The flow command, if, while, else, etc.
:type command: str
:param statement: The statement after the flow comand -> while 'statement'.
:type statement: Statement
:param indent: The indent level of the flow statement.
:type indent: int
:param line_nr: Line number of the flow statement.
:type line_nr: int
:param set_args: Local variables used in the for loop (only there).
:type set_args: list
"""
def __init__(self, code, functions, indent, line_nr):
super(Flow, self).__init__(name, indent, line_nr, None)
name = code
def __init__(self, command, statement, indent, line_nr, set_args=None):
name = "%s@%s" % (command, line_nr)
super(Flow, self).__init__(name, indent, line_nr, '')
self.command = command
self.statement = statement
self.set_args = set_args
self.next = None
def get_code(self, first_indent=False, indention=" "):
str = 'class %s' % (self.name)
str += ':\n'
str += super(Class, self).get_code(True, indention)
print "get_code class %s %i" % (self.name, self.is_empty())
if self.is_empty():
str += "pass\n"
if self.set_args:
args = ",".join(map(lambda x: x.get_code(), self.set_args))
args += ' in '
else:
args = ''
if self.statement:
stmt = self.statement.get_code(new_line=False)
else:
stmt = ''
str = "%s %s%s:\n" % (self.command, args, stmt)
str += super(Flow, self).get_code(True, indention)
if self.next:
str += self.next.get_code()
return str
def set_next(self, next):
""" Set the next element in the flow, those are else, except, etc. """
if self.next:
return self.next.set_next(next)
else:
self.next = next
next.parent = self.parent
return next
class Function(Scope):
def __init__(self, name, params, indent, line_nr, docstr=''):
Scope.__init__(self, name, indent, line_nr, docstr)
@@ -148,12 +197,9 @@ class Function(Scope):
def get_code(self, first_indent=False, indention=" "):
str = "def %s(%s):\n" % (self.name, ','.join(self.params))
#if len(self.docstr) > 0:
# str += self.childindent()+'"""'+self.docstr+'"""\n'
str += super(Function, self).get_code(True, indention)
if self.is_empty():
str += "pass\n"
#print "func", self.locals
return str
@@ -166,7 +212,7 @@ class Import(object):
:param line_nr: Line number.
:type line_nr: int
:param namespace: the import, as an array list, e.g. ['datetime', 'time']
:param namespace: the import, as an array list, e.g. ['datetime', 'time']
:type namespace: list
:param alias: the alias (valid in the current namespace).
:param from_ns: from declaration in an import.
@@ -185,61 +231,38 @@ class Import(object):
self.star = star
def get_code(self):
ns = ".".join(self.namespace)
if self.alias:
ns_str = "%s as %s" % (ns, self.alias)
ns_str = "%s as %s" % (self.namespace, self.alias)
else:
ns_str = ns
ns_str = str(self.namespace)
if self.from_ns:
if self.star:
ns_str = '*'
return "from %s import %s" % (self.from_ns, ns_str)
return "from %s import %s" % (self.from_ns, ns_str) + '\n'
else:
return "import " + ns_str
return "import " + ns_str + '\n'
class Statement(object):
"""
This is the class for Local and Functions
This is the class for all different statements.
:param code:
:param locals:
:param locals:
"""
def __init__(self, code, locals, functions):
def __init__(self, code, set_vars, used_funcs, used_vars, indent, line_nr):
self.code = code
self.locals = locals
self.functions = functions
self.set_vars = set_vars
self.used_funcs = used_funcs
self.used_vars = used_vars
def get_code(self):
raise NotImplementedError()
class Local(object):
"""
stores locals variables of any scopes
"""
def __init__(self, line_nr, left, right=None, is_global=False):
"""
@param line_nr
@param left: the left part of the local assignment
@param right: the right part of the assignment, must not be set
(in case of global)
@param is_global: defines a global variable
"""
self.indent = indent
self.line_nr = line_nr
self.left = left
self.right = right
def get_code(self):
if self.alias:
ns_str = "%s as %s" % (self.namespace, self.alias)
def get_code(self, new_line=True):
if new_line:
return self.code + '\n'
else:
ns_str = self.namespace
if self.from_ns:
if self.star:
ns_str = '*'
return "test from %s import %s" % (self.from_ns, ns_str)
else:
return "test import " + ns_str
return self.code
class Name(object):
@@ -249,13 +272,18 @@ class Name(object):
So a name like "module.class.function"
would result in an array of [module, class, function]
"""
def __init__(self, names):
def __init__(self, names, indent, line_nr):
super(Name, self).__init__()
self.names = names
self.indent = indent
self.line_nr = line_nr
def get_code(self):
""" returns the name again in a full string format """
return ".".join(names)
""" Returns the names in a full string format """
return ".".join(self.names)
def __str__(self):
return self.get_code()
class PyFuzzyParser(object):
@@ -266,27 +294,33 @@ class PyFuzzyParser(object):
def __init__(self):
self.top = Scope('global', 0, 0)
self.scope = self.top
self.current = (None, None, None)
def _parsedotname(self, pre_used_token=None):
""" @return (dottedname, nexttoken) """
"""
The dot name parser parses a name, variable or function and returns
their names.
:return: list of the names, token_type, nexttoken, start_indent.
:rtype: (Name, int, str, int)
"""
names = []
if pre_used_token is None:
tokentype, tok, indent = self.next()
if tokentype != tokenize.NAME and tok != '*':
token_type, tok, indent = self.next()
if token_type != tokenize.NAME and tok != '*':
return ([], tok)
else:
tokentype, tok, indent = pre_used_token
token_type, tok, indent = pre_used_token
names.append(tok)
start_indent = indent
while True:
tokentype, tok, indent = self.next()
token_type, tok, indent = self.next()
if tok != '.':
break
tokentype, tok, indent = self.next()
if tokentype != tokenize.NAME:
token_type, tok, indent = self.next()
if token_type != tokenize.NAME:
break
names.append(tok)
return (names, tok)
return (names, token_type, tok, start_indent)
def _parse_value_list(self, pre_used_token=None):
"""
@@ -295,32 +329,36 @@ class PyFuzzyParser(object):
"""
value_list = []
if pre_used_token:
tokentype, tok, indent = pre_used_token
n = self._parsedotname(tok)
token_type, tok, indent = pre_used_token
n, token_type, tok, start_indent = self._parsedotname(tok)
if n:
value_list.append(n)
value_list.append(Name(n, start_indent, self.line_nr))
tokentype, tok, indent = self.next()
while tok != 'in' and tokentype != tokenize.NEWLINE:
n = self._parsedotname(self.current)
token_type, tok, indent = self.next()
while tok != 'in' and token_type != tokenize.NEWLINE:
n, token_type, tok, start_indent = self._parsedotname(self.current)
if n:
value_list.append(n)
value_list.append(Name(n, start_indent, self.line_nr))
if tok == 'in':
break
tokentype, tok, indent = self.next()
print 'for_tok', tok
token_type, tok, indent = self.next()
return (value_list, tok)
def _parseimportlist(self):
imports = []
while True:
name, tok = self._parsedotname()
name, token_type, tok, start_indent = self._parsedotname()
if not name:
break
name2 = ''
name2 = None
if tok == 'as':
name2, tok = self._parsedotname()
imports.append((name, name2))
name2, token_type, tok, start_indent2 = self._parsedotname()
name2 = Name(name2, start_indent2, self.line_nr)
imports.append((Name(name, start_indent, self.line_nr), name2))
while tok != "," and "\n" not in tok:
tokentype, tok, indent = self.next()
token_type, tok, indent = self.next()
if tok != ",":
break
return imports
@@ -330,7 +368,7 @@ class PyFuzzyParser(object):
names = []
level = 1
while True:
tokentype, tok, indent = self.next()
token_type, tok, indent = self.next()
if tok in (')', ',') and level == 1:
if '=' not in name:
name = name.replace(' ', '')
@@ -351,43 +389,43 @@ class PyFuzzyParser(object):
name += "%s " % str(tok)
return names
def _parsefunction(self, indent):
tokentype, fname, ind = self.next()
if tokentype != tokenize.NAME:
token_type, fname, ind = self.next()
if token_type != tokenize.NAME:
return None
tokentype, open, ind = self.next()
token_type, open, ind = self.next()
if open != '(':
return None
params = self._parseparen()
tokentype, colon, ind = self.next()
token_type, colon, ind = self.next()
if colon != ':':
return None
return Function(fname, params, indent, self.line_nr)
def _parseclass(self, indent):
tokentype, cname, ind = self.next()
if tokentype != tokenize.NAME:
token_type, cname, ind = self.next()
if token_type != tokenize.NAME:
print "class: syntax error - token is not a name@%s (%s: %s)" \
% (self.line_nr, token.tok_name[token_type], cname)
return None
super = []
tokentype, next, ind = self.next()
token_type, next, ind = self.next()
if next == '(':
super = self._parseparen()
elif next != ':':
print "class: syntax error - %s@%s" % (cname, self.line_nr)
return None
return Class(cname, super, indent, self.line_nr)
def _parseassignment(self):
assign = ''
tokentype, tok, indent = self.next()
if tokentype == tokenize.STRING or tok == 'str':
token_type, tok, indent = self.next()
if token_type == tokenize.STRING or tok == 'str':
return '""'
elif tok == '(' or tok == 'tuple':
return '()'
@@ -395,7 +433,7 @@ class PyFuzzyParser(object):
return '[]'
elif tok == '{' or tok == 'dict':
return '{}'
elif tokentype == tokenize.NUMBER:
elif token_type == tokenize.NUMBER:
return '0'
elif tok == 'open' or tok == 'file':
return 'file'
@@ -407,7 +445,7 @@ class PyFuzzyParser(object):
assign += tok
level = 0
while True:
tokentype, tok, indent = self.next()
token_type, tok, indent = self.next()
if tok in ('(', '{', '['):
level += 1
elif tok in (']', '}', ')'):
@@ -420,19 +458,7 @@ class PyFuzzyParser(object):
assign += tok
return "%s" % assign
def _parse_words(self, pre_used_token):
"""
Used to parse a word, if the tokenizer returned a word at the start of
a new command.
:param pre_used_token: The pre parsed token.
:type pre_used_token: set
"""
return self._parse_statement(pre_used_token)
def _parse_statement(self, pre_used_token = None):
def _parse_statement(self, pre_used_token=None):
"""
Parses statements like:
@@ -451,30 +477,47 @@ class PyFuzzyParser(object):
used_funcs = []
used_vars = []
token_type, tok, indent = pre_used_token
while tok != '\n' and tok != ';':
if pre_used_token:
token_type, tok, indent = pre_used_token
else:
token_type, tok, indent = self.next()
is_break_token = lambda tok: tok in ['\n', ':', ';']
while not is_break_token(tok):
set_string = ''
print 'parse_stmt', tok, token.tok_name[token_type]
#print 'parse_stmt', tok, token.tok_name[token_type]
if token_type == tokenize.NAME:
print 'is_name', tok
if tok == 'pass':
set_string = ''
elif tok == 'return' or tok == 'del':
elif tok in ['return', 'yield', 'del', 'raise', 'assert']:
set_string = tok + ' '
elif tok == 'print':
set_string = ''
set_string = tok + ' '
else:
path, tok = self._parsedotname(self.current)
path, token_type, tok, start_indent = \
self._parsedotname(self.current)
print 'path', path
n = Name(path, start_indent, self.line_nr)
if tok == '(':
# it must be a function
used_funcs.append(path)
used_funcs.append(n)
else:
used_vars.append(path)
used_vars.append(n)
if string:
print 'str', string[-1]
if string and re.match(r'[\w\d]', string[-1]):
print 'yay'
string += ' '
#if token_type == tokenize.NAME \
# and self.last_token[0] == tokenize.NAME:
# print 'last_token', self.last_token, token_type
# string += ' ' + tok
string += ".".join(path)
print 'parse_stmt', tok, token.tok_name[token_type]
if tok == '\n' or tok == ';':
break
if ('=' in tok and not tok in ['>=', '<=', '==', '!=']):
#print 'parse_stmt', tok, token.tok_name[token_type]
continue
elif ('=' in tok and not tok in ['>=', '<=', '==', '!=']):
# there has been an assignement -> change vars
set_vars = used_vars
used_vars = []
@@ -483,37 +526,50 @@ class PyFuzzyParser(object):
string = set_string
else:
string += tok
token_type, tok, indent = self.next()
# caution: don't use indent anywhere,
# it's not working with the name parsing
token_type, tok, indent_dummy = self.next()
if not string:
return None, tok
print 'new_stat', string, set_vars, used_funcs, used_vars
#return Statement(), tok
#print 'new_stat', string, set_vars, used_funcs, used_vars
stmt = Statement(string, set_vars, used_funcs, used_vars,\
self.line_nr, indent)
return stmt, tok
def next(self):
type, tok, position, dummy, self.parserline = self.gen.next()
(self.line_nr, indent) = position
self.last_token = self.current
self.current = (type, tok, indent)
return self.current
def parse(self, text):
"""
The main part of the program. It analyzes the given code-text and
returns a tree-like scope. For a more detailed description, see the
class description.
"""
buf = cStringIO.StringIO(''.join(text) + '\n')
self.gen = tokenize.generate_tokens(buf.readline)
self.currentscope = self.scope
try:
extended_flow = ['else', 'except', 'finally']
statement_toks = ['{', '[', '(', '`']
freshscope = True
while True:
full_token = self.next()
tokentype, tok, indent = full_token
token_type, tok, indent = self.next()
dbg('main: tok=[%s] type=[%s] indent=[%s]'\
% (tok, tokentype, indent))
% (tok, token_type, indent))
if tokentype == tokenize.DEDENT:
if token_type == tokenize.DEDENT:
print 'dedent', self.scope.name
self.scope = self.scope.parent
elif tok == 'def':
func = self._parsefunction(indent)
if func is None:
print "function: syntax error..."
print "function: syntax error@%s" % self.line_nr
continue
dbg("new scope: function %s" % (func.name))
freshscope = True
@@ -521,7 +577,6 @@ class PyFuzzyParser(object):
elif tok == 'class':
cls = self._parseclass(indent)
if cls is None:
print "class: syntax error..."
continue
freshscope = True
dbg("new scope: class %s" % (cls.name))
@@ -533,10 +588,11 @@ class PyFuzzyParser(object):
self.scope.add_import(Import(self.line_nr, mod, alias))
freshscope = False
elif tok == 'from':
mod, tok = self._parsedotname()
mod, token_type, tok, start_indent = self._parsedotname()
if not mod or tok != "import":
print "from: syntax error..."
continue
mod = Name(mod, start_indent, self.line_nr)
names = self._parseimportlist()
for name, alias in names:
i = Import(self.line_nr, name, alias, mod)
@@ -544,33 +600,40 @@ class PyFuzzyParser(object):
freshscope = False
#loops
elif tok == 'for':
print tok, tokentype
value_list, tok = self._parse_value_list()
if tok == 'in':
statement, tok = self._parse_statement()
if tok == ':':
self.scope.append(statement)
f = Flow('for', statement, indent, self.line_nr, \
value_list)
dbg("new scope: flow %s" % (f.name))
self.scope = self.scope.add_statement(f)
elif tok in ['if', 'while', 'try', 'with'] + extended_flow:
# TODO with statement has local variables
command = tok
statement, tok = self._parse_statement()
if tok == ':':
f = Flow(command, statement, indent, self.line_nr)
dbg("new scope: flow %s" % (f.name))
if command in extended_flow:
# the last statement has to be another part of
# the flow statement
self.scope = self.scope.statements[-1].set_next(f)
else:
self.scope = self.scope.add_statement(f)
elif tok == 'while':
param_list = self._parse_while_loop()
elif tok == 'global':
self._parse_words(full_token)
elif tokentype == tokenize.STRING:
self._parse_statement(self.current)
pass
# TODO add suport for global
elif token_type == tokenize.STRING:
if freshscope:
self.scope.doc(tok)
elif tokentype == tokenize.NAME:
self._parse_words(full_token)
"""
name, tok = self._parsedotname(tok)
if tok == '=':
stmt = self._parseassignment()
dbg("parseassignment: %s = %s" % (name, stmt))
if stmt != None:
self.scope.add_local("%s = %s" % (name, stmt))
else:
#print "_not_implemented_", tok, self.parserline
pass
"""
elif token_type == tokenize.NAME or tok in statement_toks:
stmt, tok = self._parse_statement(self.current)
if stmt:
self.scope.add_statement(stmt)
freshscope = False
#else:
#print "_not_implemented_", tok, self.parserline

57
test.py
View File

@@ -34,41 +34,68 @@ class A():
a = A()
b = a.test()
c = a or b
class Empty():
pass
def blub():
#def blub():
cdef = 5
cdef cdef
def func():
def test:
return 2
#def test:
# return 2
cdef = A()
return test
#for i in range(3):
# asdf = aaa
# print 'blub'
for i in range(3):
asdf = aaa
print 'blub'
else:
a = 0
def ass_test(a):
"""docstring for assignment test"""
a -= 1
# (comment without indent)
b, c, d = (1,2,3)
del b
# test strange statements
[a,c] ; {1: a}; (1,); `a`
result = int((a+b)*2)
return result
matrix = [[1,2,3], [4,5,6], [7,8,9]]
def loop_test(a):
"""docstring for loop_test"""
def flow_test(a):
global matrix
for i in matrix:
print a
#while 1:
# del a
# print a
return Matrix[0,1]
else:
pass
while 1:
del a
print a
else:
pass
try:
if True or a:
m = 1
for i,j in enumerate(range(3)):
print i,j
for a in test(t):
p
else:
while 1:
m = 2
break
except IndexError, e:
raise e
yield e
except:
pass
finally:
pass
return Matrix[0,m]
if True or a:
print a