forked from VimPlug/jedi
601 lines
18 KiB
Python
601 lines
18 KiB
Python
""""
|
|
TODO This is a parser
|
|
|
|
scope
|
|
imports
|
|
subscopes
|
|
statements
|
|
|
|
Ignored simple statements:
|
|
- print (no use for it)
|
|
- assert
|
|
- break, continue (because we avoid loops)
|
|
- del (also no used, since this script avoids loops and files)
|
|
- exec (dangerous - not controllable)
|
|
|
|
global is a special case and will not be used here
|
|
"""
|
|
import sys
|
|
import tokenize
|
|
import cStringIO
|
|
import token
|
|
|
|
|
|
def indent_block(text, indention=" "):
|
|
""" This function indents a text block with a default of four spaces """
|
|
temp = ''
|
|
while text and text[-1] == '\n':
|
|
temp += text[-1]
|
|
text = text[:-1]
|
|
lines = text.split('\n')
|
|
return '\n'.join(map(lambda s: indention + s, lines)) + temp
|
|
|
|
|
|
class Scope(object):
|
|
def __init__(self, name, indent, line_nr, docstr=''):
|
|
self.subscopes = []
|
|
self.locals = []
|
|
self.imports = []
|
|
self.statements = []
|
|
self.docstr = docstr
|
|
self.parent = None
|
|
self.name = name
|
|
self.indent = indent
|
|
self.line_nr = line_nr
|
|
|
|
def add_scope(self, sub):
|
|
if sub == None:
|
|
print 'push scope: [%s@%s]' % (sub.name, sub.indent)
|
|
sub.parent = self
|
|
self.subscopes.append(sub)
|
|
return sub
|
|
|
|
def doc(self, str):
|
|
""" Clean up a docstring """
|
|
d = str.replace('\n', ' ')
|
|
d = d.replace('\t', ' ')
|
|
while d.find(' ') > -1:
|
|
d = d.replace(' ', ' ')
|
|
while d[0] in '"\'\t ':
|
|
d = d[1:]
|
|
while d[-1] in '"\'\t ':
|
|
d = d[:-1]
|
|
dbg("Scope(%s)::docstr = %s" % (self, d))
|
|
self.docstr = d
|
|
|
|
def add_local(self, loc):
|
|
self.locals.append(loc)
|
|
|
|
def add_import(self, imp):
|
|
self.imports.append(imp)
|
|
|
|
def _checkexisting(self, test):
|
|
"Convienance function... keep out duplicates"
|
|
if test.find('=') > -1:
|
|
var = test.split('=')[0].strip()
|
|
for l in self.locals:
|
|
if l.find('=') > -1 and var == l.split('=')[0].strip():
|
|
self.locals.remove(l)
|
|
|
|
def get_code(self, first_indent=False, indention=" "):
|
|
""" Returns the code of the current scope. """
|
|
string = ""
|
|
if len(self.docstr) > 0:
|
|
string += '"""' + self.docstr + '"""\n'
|
|
for i in self.imports:
|
|
string += i.get_code() + '\n'
|
|
for sub in self.subscopes:
|
|
string += str(sub.line_nr) + sub.get_code(first_indent=True, indention=indention)
|
|
for l in self.locals:
|
|
string += l + '\n'
|
|
|
|
if first_indent:
|
|
string = indent_block(string, indention=indention)
|
|
return string
|
|
|
|
def is_empty(self):
|
|
"""
|
|
this function returns true if there are no subscopes, imports, locals.
|
|
"""
|
|
return not (self.locals or self.imports or self.subscopes)
|
|
|
|
|
|
class Class(Scope):
|
|
def __init__(self, name, supers, indent, line_nr, docstr=''):
|
|
super(Class, self).__init__(name, indent, line_nr, docstr)
|
|
self.supers = supers
|
|
|
|
def get_code(self, first_indent=False, indention=" "):
|
|
str = 'class %s' % (self.name)
|
|
if len(self.supers) > 0:
|
|
str += '(%s)' % ','.join(self.supers)
|
|
str += ':\n'
|
|
str += super(Class, self).get_code(True, indention)
|
|
if self.is_empty():
|
|
str += "pass\n"
|
|
return str
|
|
|
|
class Flow(Scope):
|
|
"""
|
|
Used to describe programming structure - flow statements,
|
|
which indent code, but are not classes or functions:
|
|
- for
|
|
- while
|
|
- if
|
|
- try
|
|
- with
|
|
Therefore statements like else, except and finally are also here,
|
|
they are now saved in the root flow elements, but in the next variable.
|
|
"""
|
|
def __init__(self, code, functions, indent, line_nr):
|
|
super(Flow, self).__init__(name, indent, line_nr, None)
|
|
name = code
|
|
self.next = None
|
|
|
|
def get_code(self, first_indent=False, indention=" "):
|
|
str = 'class %s' % (self.name)
|
|
str += ':\n'
|
|
str += super(Class, self).get_code(True, indention)
|
|
print "get_code class %s %i" % (self.name, self.is_empty())
|
|
if self.is_empty():
|
|
str += "pass\n"
|
|
return str
|
|
|
|
class Function(Scope):
|
|
def __init__(self, name, params, indent, line_nr, docstr=''):
|
|
Scope.__init__(self, name, indent, line_nr, docstr)
|
|
self.params = params
|
|
|
|
def get_code(self, first_indent=False, indention=" "):
|
|
str = "def %s(%s):\n" % (self.name, ','.join(self.params))
|
|
#if len(self.docstr) > 0:
|
|
# str += self.childindent()+'"""'+self.docstr+'"""\n'
|
|
str += super(Function, self).get_code(True, indention)
|
|
if self.is_empty():
|
|
str += "pass\n"
|
|
#print "func", self.locals
|
|
return str
|
|
|
|
|
|
class Import(object):
|
|
"""
|
|
stores the imports of any scopes.
|
|
|
|
>>> 1+1
|
|
2
|
|
|
|
:param line_nr: Line number.
|
|
:type line_nr: int
|
|
:param namespace: the import, as an array list, e.g. ['datetime', 'time']
|
|
:type namespace: list
|
|
:param alias: the alias (valid in the current namespace).
|
|
:param from_ns: from declaration in an import.
|
|
:param star: if a star is used -> from time import *.
|
|
|
|
:returns: test
|
|
:raises:
|
|
|
|
TODO check star?
|
|
"""
|
|
def __init__(self, line_nr, namespace, alias='', from_ns='', star=False):
|
|
self.line_nr = line_nr
|
|
self.namespace = namespace
|
|
self.alias = alias
|
|
self.from_ns = from_ns
|
|
self.star = star
|
|
|
|
def get_code(self):
|
|
ns = ".".join(self.namespace)
|
|
if self.alias:
|
|
ns_str = "%s as %s" % (ns, self.alias)
|
|
else:
|
|
ns_str = ns
|
|
if self.from_ns:
|
|
if self.star:
|
|
ns_str = '*'
|
|
return "from %s import %s" % (self.from_ns, ns_str)
|
|
else:
|
|
return "import " + ns_str
|
|
|
|
|
|
class Statement(object):
|
|
"""
|
|
This is the class for Local and Functions
|
|
:param code:
|
|
:param locals:
|
|
"""
|
|
def __init__(self, code, locals, functions):
|
|
self.code = code
|
|
self.locals = locals
|
|
self.functions = functions
|
|
|
|
def get_code(self):
|
|
raise NotImplementedError()
|
|
|
|
|
|
class Local(object):
|
|
"""
|
|
stores locals variables of any scopes
|
|
"""
|
|
def __init__(self, line_nr, left, right=None, is_global=False):
|
|
"""
|
|
@param line_nr
|
|
@param left: the left part of the local assignment
|
|
@param right: the right part of the assignment, must not be set
|
|
(in case of global)
|
|
@param is_global: defines a global variable
|
|
"""
|
|
self.line_nr = line_nr
|
|
self.left = left
|
|
self.right = right
|
|
|
|
def get_code(self):
|
|
if self.alias:
|
|
ns_str = "%s as %s" % (self.namespace, self.alias)
|
|
else:
|
|
ns_str = self.namespace
|
|
if self.from_ns:
|
|
if self.star:
|
|
ns_str = '*'
|
|
return "test from %s import %s" % (self.from_ns, ns_str)
|
|
else:
|
|
return "test import " + ns_str
|
|
|
|
|
|
class Name(object):
|
|
"""
|
|
Used to define names in python.
|
|
Which means the whole namespace/class/function stuff.
|
|
So a name like "module.class.function"
|
|
would result in an array of [module, class, function]
|
|
"""
|
|
def __init__(self, names):
|
|
super(Name, self).__init__()
|
|
self.names = names
|
|
|
|
def get_code(self):
|
|
""" returns the name again in a full string format """
|
|
return ".".join(names)
|
|
|
|
|
|
class PyFuzzyParser(object):
|
|
"""
|
|
This class is used to parse a Python file, it then divides them into a
|
|
class structure of differnt scopes.
|
|
"""
|
|
def __init__(self):
|
|
self.top = Scope('global', 0, 0)
|
|
self.scope = self.top
|
|
|
|
def _parsedotname(self, pre_used_token=None):
|
|
""" @return (dottedname, nexttoken) """
|
|
names = []
|
|
if pre_used_token is None:
|
|
tokentype, tok, indent = self.next()
|
|
if tokentype != tokenize.NAME and tok != '*':
|
|
return ([], tok)
|
|
else:
|
|
tokentype, tok, indent = pre_used_token
|
|
names.append(tok)
|
|
while True:
|
|
tokentype, tok, indent = self.next()
|
|
if tok != '.':
|
|
break
|
|
tokentype, tok, indent = self.next()
|
|
if tokentype != tokenize.NAME:
|
|
break
|
|
names.append(tok)
|
|
return (names, tok)
|
|
|
|
|
|
def _parse_value_list(self, pre_used_token=None):
|
|
"""
|
|
A value list is a comma separated list. This is used for:
|
|
>>> for a,b,self.c in enumerate(test)
|
|
"""
|
|
value_list = []
|
|
if pre_used_token:
|
|
tokentype, tok, indent = pre_used_token
|
|
n = self._parsedotname(tok)
|
|
if n:
|
|
value_list.append(n)
|
|
|
|
tokentype, tok, indent = self.next()
|
|
while tok != 'in' and tokentype != tokenize.NEWLINE:
|
|
n = self._parsedotname(self.current)
|
|
if n:
|
|
value_list.append(n)
|
|
|
|
tokentype, tok, indent = self.next()
|
|
return (value_list, tok)
|
|
|
|
def _parseimportlist(self):
|
|
imports = []
|
|
while True:
|
|
name, tok = self._parsedotname()
|
|
if not name:
|
|
break
|
|
name2 = ''
|
|
if tok == 'as':
|
|
name2, tok = self._parsedotname()
|
|
imports.append((name, name2))
|
|
while tok != "," and "\n" not in tok:
|
|
tokentype, tok, indent = self.next()
|
|
if tok != ",":
|
|
break
|
|
return imports
|
|
|
|
def _parseparen(self):
|
|
name = ''
|
|
names = []
|
|
level = 1
|
|
while True:
|
|
tokentype, tok, indent = self.next()
|
|
if tok in (')', ',') and level == 1:
|
|
if '=' not in name:
|
|
name = name.replace(' ', '')
|
|
names.append(name.strip())
|
|
name = ''
|
|
if tok == '(':
|
|
level += 1
|
|
name += "("
|
|
elif tok == ')':
|
|
level -= 1
|
|
if level == 0:
|
|
break
|
|
else:
|
|
name += ")"
|
|
elif tok == ',' and level == 1:
|
|
pass
|
|
else:
|
|
name += "%s " % str(tok)
|
|
return names
|
|
|
|
|
|
def _parsefunction(self, indent):
|
|
tokentype, fname, ind = self.next()
|
|
if tokentype != tokenize.NAME:
|
|
return None
|
|
|
|
tokentype, open, ind = self.next()
|
|
if open != '(':
|
|
return None
|
|
params = self._parseparen()
|
|
|
|
tokentype, colon, ind = self.next()
|
|
if colon != ':':
|
|
return None
|
|
|
|
return Function(fname, params, indent, self.line_nr)
|
|
|
|
|
|
def _parseclass(self, indent):
|
|
tokentype, cname, ind = self.next()
|
|
if tokentype != tokenize.NAME:
|
|
return None
|
|
|
|
super = []
|
|
tokentype, next, ind = self.next()
|
|
if next == '(':
|
|
super = self._parseparen()
|
|
elif next != ':':
|
|
return None
|
|
|
|
return Class(cname, super, indent, self.line_nr)
|
|
|
|
|
|
def _parseassignment(self):
|
|
assign = ''
|
|
tokentype, tok, indent = self.next()
|
|
if tokentype == tokenize.STRING or tok == 'str':
|
|
return '""'
|
|
elif tok == '(' or tok == 'tuple':
|
|
return '()'
|
|
elif tok == '[' or tok == 'list':
|
|
return '[]'
|
|
elif tok == '{' or tok == 'dict':
|
|
return '{}'
|
|
elif tokentype == tokenize.NUMBER:
|
|
return '0'
|
|
elif tok == 'open' or tok == 'file':
|
|
return 'file'
|
|
elif tok == 'None':
|
|
return '_PyCmplNoType()'
|
|
elif tok == 'type':
|
|
return 'type(_PyCmplNoType)' # only for method resolution
|
|
else:
|
|
assign += tok
|
|
level = 0
|
|
while True:
|
|
tokentype, tok, indent = self.next()
|
|
if tok in ('(', '{', '['):
|
|
level += 1
|
|
elif tok in (']', '}', ')'):
|
|
level -= 1
|
|
if level == 0:
|
|
break
|
|
elif level == 0:
|
|
if tok in (';', '\n'):
|
|
break
|
|
assign += tok
|
|
return "%s" % assign
|
|
|
|
|
|
def _parse_words(self, pre_used_token):
|
|
"""
|
|
Used to parse a word, if the tokenizer returned a word at the start of
|
|
a new command.
|
|
|
|
:param pre_used_token: The pre parsed token.
|
|
:type pre_used_token: set
|
|
"""
|
|
return self._parse_statement(pre_used_token)
|
|
|
|
|
|
def _parse_statement(self, pre_used_token = None):
|
|
"""
|
|
Parses statements like:
|
|
|
|
>>> a = test(b)
|
|
>>> a += 3 - 2 or b
|
|
|
|
and so on. One row at a time.
|
|
|
|
:param pre_used_token: The pre parsed token.
|
|
:type pre_used_token: set
|
|
:return: Statement + last parsed token.
|
|
:rtype: (Statement, str)
|
|
"""
|
|
string = ''
|
|
set_vars = []
|
|
used_funcs = []
|
|
used_vars = []
|
|
|
|
token_type, tok, indent = pre_used_token
|
|
while tok != '\n' and tok != ';':
|
|
set_string = ''
|
|
print 'parse_stmt', tok, token.tok_name[token_type]
|
|
if token_type == tokenize.NAME:
|
|
if tok == 'pass':
|
|
set_string = ''
|
|
elif tok == 'return' or tok == 'del':
|
|
set_string = tok + ' '
|
|
elif tok == 'print':
|
|
set_string = ''
|
|
else:
|
|
path, tok = self._parsedotname(self.current)
|
|
if tok == '(':
|
|
# it must be a function
|
|
used_funcs.append(path)
|
|
else:
|
|
used_vars.append(path)
|
|
string += ".".join(path)
|
|
print 'parse_stmt', tok, token.tok_name[token_type]
|
|
if tok == '\n' or tok == ';':
|
|
break
|
|
|
|
if ('=' in tok and not tok in ['>=', '<=', '==', '!=']):
|
|
# there has been an assignement -> change vars
|
|
set_vars = used_vars
|
|
used_vars = []
|
|
|
|
if set_string:
|
|
string = set_string
|
|
else:
|
|
string += tok
|
|
token_type, tok, indent = self.next()
|
|
if not string:
|
|
return None, tok
|
|
print 'new_stat', string, set_vars, used_funcs, used_vars
|
|
#return Statement(), tok
|
|
|
|
def next(self):
|
|
type, tok, position, dummy, self.parserline = self.gen.next()
|
|
(self.line_nr, indent) = position
|
|
self.current = (type, tok, indent)
|
|
return self.current
|
|
|
|
def parse(self, text):
|
|
buf = cStringIO.StringIO(''.join(text) + '\n')
|
|
self.gen = tokenize.generate_tokens(buf.readline)
|
|
self.currentscope = self.scope
|
|
|
|
try:
|
|
freshscope = True
|
|
while True:
|
|
full_token = self.next()
|
|
tokentype, tok, indent = full_token
|
|
dbg('main: tok=[%s] type=[%s] indent=[%s]'\
|
|
% (tok, tokentype, indent))
|
|
|
|
if tokentype == tokenize.DEDENT:
|
|
self.scope = self.scope.parent
|
|
elif tok == 'def':
|
|
func = self._parsefunction(indent)
|
|
if func is None:
|
|
print "function: syntax error..."
|
|
continue
|
|
dbg("new scope: function %s" % (func.name))
|
|
freshscope = True
|
|
self.scope = self.scope.add_scope(func)
|
|
elif tok == 'class':
|
|
cls = self._parseclass(indent)
|
|
if cls is None:
|
|
print "class: syntax error..."
|
|
continue
|
|
freshscope = True
|
|
dbg("new scope: class %s" % (cls.name))
|
|
self.scope = self.scope.add_scope(cls)
|
|
# import stuff
|
|
elif tok == 'import':
|
|
imports = self._parseimportlist()
|
|
for mod, alias in imports:
|
|
self.scope.add_import(Import(self.line_nr, mod, alias))
|
|
freshscope = False
|
|
elif tok == 'from':
|
|
mod, tok = self._parsedotname()
|
|
if not mod or tok != "import":
|
|
print "from: syntax error..."
|
|
continue
|
|
names = self._parseimportlist()
|
|
for name, alias in names:
|
|
i = Import(self.line_nr, name, alias, mod)
|
|
self.scope.add_import(i)
|
|
freshscope = False
|
|
#loops
|
|
elif tok == 'for':
|
|
print tok, tokentype
|
|
value_list, tok = self._parse_value_list()
|
|
if tok == 'in':
|
|
statement, tok = self._parse_statement()
|
|
if tok == ':':
|
|
self.scope.append(statement)
|
|
|
|
elif tok == 'while':
|
|
param_list = self._parse_while_loop()
|
|
elif tok == 'global':
|
|
self._parse_words(full_token)
|
|
elif tokentype == tokenize.STRING:
|
|
if freshscope:
|
|
self.scope.doc(tok)
|
|
elif tokentype == tokenize.NAME:
|
|
self._parse_words(full_token)
|
|
"""
|
|
name, tok = self._parsedotname(tok)
|
|
if tok == '=':
|
|
stmt = self._parseassignment()
|
|
dbg("parseassignment: %s = %s" % (name, stmt))
|
|
if stmt != None:
|
|
self.scope.add_local("%s = %s" % (name, stmt))
|
|
else:
|
|
#print "_not_implemented_", tok, self.parserline
|
|
pass
|
|
"""
|
|
freshscope = False
|
|
#else:
|
|
#print "_not_implemented_", tok, self.parserline
|
|
except StopIteration: # thrown on EOF
|
|
pass
|
|
#except:
|
|
# dbg("parse error: %s, %s @ %s" %
|
|
# (sys.exc_info()[0], sys.exc_info()[1], self.parserline))
|
|
return self.top
|
|
|
|
|
|
def _sanitize(str):
|
|
val = ''
|
|
level = 0
|
|
for c in str:
|
|
if c in ('(', '{', '['):
|
|
level += 1
|
|
elif c in (']', '}', ')'):
|
|
level -= 1
|
|
elif level == 0:
|
|
val += c
|
|
return val
|
|
|
|
|
|
def dbg(*args):
|
|
#print args
|
|
pass
|