diff --git a/pyfuzzyparser.py b/pyfuzzyparser.py index 4a40e7bb..1310172b 100644 --- a/pyfuzzyparser.py +++ b/pyfuzzyparser.py @@ -1,16 +1,18 @@ """" -TODO This is a parser +TODO Description: This is a parser + +TODO be tolerant with indents +TODO dictionaries not working with statement parser +TODO except has local vars +TODO take special care for future imports scope imports subscopes statements -Ignored simple statements: +Ignored statements: - print (no use for it) - - assert - - break, continue (because we avoid loops) - - del (also no used, since this script avoids loops and files) - exec (dangerous - not controllable) global is a special case and will not be used here @@ -19,6 +21,7 @@ import sys import tokenize import cStringIO import token +import re def indent_block(text, indention=" "): @@ -44,12 +47,21 @@ class Scope(object): self.line_nr = line_nr def add_scope(self, sub): - if sub == None: - print 'push scope: [%s@%s]' % (sub.name, sub.indent) + # print 'push scope: [%s@%s]' % (sub.name, sub.indent) sub.parent = self self.subscopes.append(sub) return sub + def add_statement(self, stmt): + """ + Used to add a Statement or a Scope. + A statement would be a normal command (Statement) or a Scope (Flow). + """ + if isinstance(stmt, Scope): + stmt.parent = self + self.statements.append(stmt) + return stmt + def doc(self, str): """ Clean up a docstring """ d = str.replace('\n', ' ') @@ -83,11 +95,12 @@ class Scope(object): if len(self.docstr) > 0: string += '"""' + self.docstr + '"""\n' for i in self.imports: - string += i.get_code() + '\n' + string += i.get_code() for sub in self.subscopes: - string += str(sub.line_nr) + sub.get_code(first_indent=True, indention=indention) - for l in self.locals: - string += l + '\n' + #string += str(sub.line_nr) + string += sub.get_code(first_indent=True, indention=indention) + for stmt in self.statements: + string += stmt.get_code() if first_indent: string = indent_block(string, indention=indention) @@ -97,7 +110,8 @@ class Scope(object): """ this function returns true if there are no subscopes, imports, locals. """ - return not (self.locals or self.imports or self.subscopes) + return not (self.locals or self.imports or self.subscopes or \ + self.statements) class Class(Scope): @@ -115,32 +129,67 @@ class Class(Scope): str += "pass\n" return str + class Flow(Scope): """ Used to describe programming structure - flow statements, which indent code, but are not classes or functions: + - for - while - if - try - with + Therefore statements like else, except and finally are also here, they are now saved in the root flow elements, but in the next variable. + + :param command: The flow command, if, while, else, etc. + :type command: str + :param statement: The statement after the flow comand -> while 'statement'. + :type statement: Statement + :param indent: The indent level of the flow statement. + :type indent: int + :param line_nr: Line number of the flow statement. + :type line_nr: int + :param set_args: Local variables used in the for loop (only there). + :type set_args: list """ - def __init__(self, code, functions, indent, line_nr): - super(Flow, self).__init__(name, indent, line_nr, None) - name = code + def __init__(self, command, statement, indent, line_nr, set_args=None): + name = "%s@%s" % (command, line_nr) + super(Flow, self).__init__(name, indent, line_nr, '') + self.command = command + self.statement = statement + self.set_args = set_args self.next = None def get_code(self, first_indent=False, indention=" "): - str = 'class %s' % (self.name) - str += ':\n' - str += super(Class, self).get_code(True, indention) - print "get_code class %s %i" % (self.name, self.is_empty()) - if self.is_empty(): - str += "pass\n" + if self.set_args: + args = ",".join(map(lambda x: x.get_code(), self.set_args)) + args += ' in ' + else: + args = '' + + if self.statement: + stmt = self.statement.get_code(new_line=False) + else: + stmt = '' + str = "%s %s%s:\n" % (self.command, args, stmt) + str += super(Flow, self).get_code(True, indention) + if self.next: + str += self.next.get_code() return str + def set_next(self, next): + """ Set the next element in the flow, those are else, except, etc. """ + if self.next: + return self.next.set_next(next) + else: + self.next = next + next.parent = self.parent + return next + + class Function(Scope): def __init__(self, name, params, indent, line_nr, docstr=''): Scope.__init__(self, name, indent, line_nr, docstr) @@ -148,12 +197,9 @@ class Function(Scope): def get_code(self, first_indent=False, indention=" "): str = "def %s(%s):\n" % (self.name, ','.join(self.params)) - #if len(self.docstr) > 0: - # str += self.childindent()+'"""'+self.docstr+'"""\n' str += super(Function, self).get_code(True, indention) if self.is_empty(): str += "pass\n" - #print "func", self.locals return str @@ -166,7 +212,7 @@ class Import(object): :param line_nr: Line number. :type line_nr: int - :param namespace: the import, as an array list, e.g. ['datetime', 'time'] + :param namespace: the import, as an array list, e.g. ['datetime', 'time'] :type namespace: list :param alias: the alias (valid in the current namespace). :param from_ns: from declaration in an import. @@ -185,61 +231,38 @@ class Import(object): self.star = star def get_code(self): - ns = ".".join(self.namespace) if self.alias: - ns_str = "%s as %s" % (ns, self.alias) + ns_str = "%s as %s" % (self.namespace, self.alias) else: - ns_str = ns + ns_str = str(self.namespace) if self.from_ns: if self.star: ns_str = '*' - return "from %s import %s" % (self.from_ns, ns_str) + return "from %s import %s" % (self.from_ns, ns_str) + '\n' else: - return "import " + ns_str + return "import " + ns_str + '\n' class Statement(object): """ - This is the class for Local and Functions + This is the class for all different statements. :param code: - :param locals: + :param locals: """ - def __init__(self, code, locals, functions): + def __init__(self, code, set_vars, used_funcs, used_vars, indent, line_nr): self.code = code - self.locals = locals - self.functions = functions + self.set_vars = set_vars + self.used_funcs = used_funcs + self.used_vars = used_vars - def get_code(self): - raise NotImplementedError() - - -class Local(object): - """ - stores locals variables of any scopes - """ - def __init__(self, line_nr, left, right=None, is_global=False): - """ - @param line_nr - @param left: the left part of the local assignment - @param right: the right part of the assignment, must not be set - (in case of global) - @param is_global: defines a global variable - """ + self.indent = indent self.line_nr = line_nr - self.left = left - self.right = right - def get_code(self): - if self.alias: - ns_str = "%s as %s" % (self.namespace, self.alias) + def get_code(self, new_line=True): + if new_line: + return self.code + '\n' else: - ns_str = self.namespace - if self.from_ns: - if self.star: - ns_str = '*' - return "test from %s import %s" % (self.from_ns, ns_str) - else: - return "test import " + ns_str + return self.code class Name(object): @@ -249,13 +272,18 @@ class Name(object): So a name like "module.class.function" would result in an array of [module, class, function] """ - def __init__(self, names): + def __init__(self, names, indent, line_nr): super(Name, self).__init__() self.names = names + self.indent = indent + self.line_nr = line_nr def get_code(self): - """ returns the name again in a full string format """ - return ".".join(names) + """ Returns the names in a full string format """ + return ".".join(self.names) + + def __str__(self): + return self.get_code() class PyFuzzyParser(object): @@ -266,27 +294,33 @@ class PyFuzzyParser(object): def __init__(self): self.top = Scope('global', 0, 0) self.scope = self.top + self.current = (None, None, None) def _parsedotname(self, pre_used_token=None): - """ @return (dottedname, nexttoken) """ + """ + The dot name parser parses a name, variable or function and returns + their names. + :return: list of the names, token_type, nexttoken, start_indent. + :rtype: (Name, int, str, int) + """ names = [] if pre_used_token is None: - tokentype, tok, indent = self.next() - if tokentype != tokenize.NAME and tok != '*': + token_type, tok, indent = self.next() + if token_type != tokenize.NAME and tok != '*': return ([], tok) else: - tokentype, tok, indent = pre_used_token + token_type, tok, indent = pre_used_token names.append(tok) + start_indent = indent while True: - tokentype, tok, indent = self.next() + token_type, tok, indent = self.next() if tok != '.': break - tokentype, tok, indent = self.next() - if tokentype != tokenize.NAME: + token_type, tok, indent = self.next() + if token_type != tokenize.NAME: break names.append(tok) - return (names, tok) - + return (names, token_type, tok, start_indent) def _parse_value_list(self, pre_used_token=None): """ @@ -295,32 +329,36 @@ class PyFuzzyParser(object): """ value_list = [] if pre_used_token: - tokentype, tok, indent = pre_used_token - n = self._parsedotname(tok) + token_type, tok, indent = pre_used_token + n, token_type, tok, start_indent = self._parsedotname(tok) if n: - value_list.append(n) + value_list.append(Name(n, start_indent, self.line_nr)) - tokentype, tok, indent = self.next() - while tok != 'in' and tokentype != tokenize.NEWLINE: - n = self._parsedotname(self.current) + token_type, tok, indent = self.next() + while tok != 'in' and token_type != tokenize.NEWLINE: + n, token_type, tok, start_indent = self._parsedotname(self.current) if n: - value_list.append(n) + value_list.append(Name(n, start_indent, self.line_nr)) + if tok == 'in': + break - tokentype, tok, indent = self.next() + print 'for_tok', tok + token_type, tok, indent = self.next() return (value_list, tok) def _parseimportlist(self): imports = [] while True: - name, tok = self._parsedotname() + name, token_type, tok, start_indent = self._parsedotname() if not name: break - name2 = '' + name2 = None if tok == 'as': - name2, tok = self._parsedotname() - imports.append((name, name2)) + name2, token_type, tok, start_indent2 = self._parsedotname() + name2 = Name(name2, start_indent2, self.line_nr) + imports.append((Name(name, start_indent, self.line_nr), name2)) while tok != "," and "\n" not in tok: - tokentype, tok, indent = self.next() + token_type, tok, indent = self.next() if tok != ",": break return imports @@ -330,7 +368,7 @@ class PyFuzzyParser(object): names = [] level = 1 while True: - tokentype, tok, indent = self.next() + token_type, tok, indent = self.next() if tok in (')', ',') and level == 1: if '=' not in name: name = name.replace(' ', '') @@ -351,43 +389,43 @@ class PyFuzzyParser(object): name += "%s " % str(tok) return names - def _parsefunction(self, indent): - tokentype, fname, ind = self.next() - if tokentype != tokenize.NAME: + token_type, fname, ind = self.next() + if token_type != tokenize.NAME: return None - tokentype, open, ind = self.next() + token_type, open, ind = self.next() if open != '(': return None params = self._parseparen() - tokentype, colon, ind = self.next() + token_type, colon, ind = self.next() if colon != ':': return None return Function(fname, params, indent, self.line_nr) - def _parseclass(self, indent): - tokentype, cname, ind = self.next() - if tokentype != tokenize.NAME: + token_type, cname, ind = self.next() + if token_type != tokenize.NAME: + print "class: syntax error - token is not a name@%s (%s: %s)" \ + % (self.line_nr, token.tok_name[token_type], cname) return None super = [] - tokentype, next, ind = self.next() + token_type, next, ind = self.next() if next == '(': super = self._parseparen() elif next != ':': + print "class: syntax error - %s@%s" % (cname, self.line_nr) return None return Class(cname, super, indent, self.line_nr) - def _parseassignment(self): assign = '' - tokentype, tok, indent = self.next() - if tokentype == tokenize.STRING or tok == 'str': + token_type, tok, indent = self.next() + if token_type == tokenize.STRING or tok == 'str': return '""' elif tok == '(' or tok == 'tuple': return '()' @@ -395,7 +433,7 @@ class PyFuzzyParser(object): return '[]' elif tok == '{' or tok == 'dict': return '{}' - elif tokentype == tokenize.NUMBER: + elif token_type == tokenize.NUMBER: return '0' elif tok == 'open' or tok == 'file': return 'file' @@ -407,7 +445,7 @@ class PyFuzzyParser(object): assign += tok level = 0 while True: - tokentype, tok, indent = self.next() + token_type, tok, indent = self.next() if tok in ('(', '{', '['): level += 1 elif tok in (']', '}', ')'): @@ -420,19 +458,7 @@ class PyFuzzyParser(object): assign += tok return "%s" % assign - - def _parse_words(self, pre_used_token): - """ - Used to parse a word, if the tokenizer returned a word at the start of - a new command. - - :param pre_used_token: The pre parsed token. - :type pre_used_token: set - """ - return self._parse_statement(pre_used_token) - - - def _parse_statement(self, pre_used_token = None): + def _parse_statement(self, pre_used_token=None): """ Parses statements like: @@ -451,30 +477,47 @@ class PyFuzzyParser(object): used_funcs = [] used_vars = [] - token_type, tok, indent = pre_used_token - while tok != '\n' and tok != ';': + if pre_used_token: + token_type, tok, indent = pre_used_token + else: + token_type, tok, indent = self.next() + + is_break_token = lambda tok: tok in ['\n', ':', ';'] + + while not is_break_token(tok): set_string = '' - print 'parse_stmt', tok, token.tok_name[token_type] + #print 'parse_stmt', tok, token.tok_name[token_type] if token_type == tokenize.NAME: + print 'is_name', tok if tok == 'pass': set_string = '' - elif tok == 'return' or tok == 'del': + elif tok in ['return', 'yield', 'del', 'raise', 'assert']: set_string = tok + ' ' elif tok == 'print': - set_string = '' + set_string = tok + ' ' else: - path, tok = self._parsedotname(self.current) + path, token_type, tok, start_indent = \ + self._parsedotname(self.current) + print 'path', path + n = Name(path, start_indent, self.line_nr) if tok == '(': # it must be a function - used_funcs.append(path) + used_funcs.append(n) else: - used_vars.append(path) + used_vars.append(n) + if string: + print 'str', string[-1] + if string and re.match(r'[\w\d]', string[-1]): + print 'yay' + string += ' ' + #if token_type == tokenize.NAME \ + # and self.last_token[0] == tokenize.NAME: + # print 'last_token', self.last_token, token_type + # string += ' ' + tok string += ".".join(path) - print 'parse_stmt', tok, token.tok_name[token_type] - if tok == '\n' or tok == ';': - break - - if ('=' in tok and not tok in ['>=', '<=', '==', '!=']): + #print 'parse_stmt', tok, token.tok_name[token_type] + continue + elif ('=' in tok and not tok in ['>=', '<=', '==', '!=']): # there has been an assignement -> change vars set_vars = used_vars used_vars = [] @@ -483,37 +526,50 @@ class PyFuzzyParser(object): string = set_string else: string += tok - token_type, tok, indent = self.next() + # caution: don't use indent anywhere, + # it's not working with the name parsing + token_type, tok, indent_dummy = self.next() if not string: return None, tok - print 'new_stat', string, set_vars, used_funcs, used_vars - #return Statement(), tok + #print 'new_stat', string, set_vars, used_funcs, used_vars + stmt = Statement(string, set_vars, used_funcs, used_vars,\ + self.line_nr, indent) + return stmt, tok def next(self): type, tok, position, dummy, self.parserline = self.gen.next() (self.line_nr, indent) = position + self.last_token = self.current self.current = (type, tok, indent) return self.current def parse(self, text): + """ + The main part of the program. It analyzes the given code-text and + returns a tree-like scope. For a more detailed description, see the + class description. + """ buf = cStringIO.StringIO(''.join(text) + '\n') self.gen = tokenize.generate_tokens(buf.readline) self.currentscope = self.scope try: + extended_flow = ['else', 'except', 'finally'] + statement_toks = ['{', '[', '(', '`'] + freshscope = True while True: - full_token = self.next() - tokentype, tok, indent = full_token + token_type, tok, indent = self.next() dbg('main: tok=[%s] type=[%s] indent=[%s]'\ - % (tok, tokentype, indent)) + % (tok, token_type, indent)) - if tokentype == tokenize.DEDENT: + if token_type == tokenize.DEDENT: + print 'dedent', self.scope.name self.scope = self.scope.parent elif tok == 'def': func = self._parsefunction(indent) if func is None: - print "function: syntax error..." + print "function: syntax error@%s" % self.line_nr continue dbg("new scope: function %s" % (func.name)) freshscope = True @@ -521,7 +577,6 @@ class PyFuzzyParser(object): elif tok == 'class': cls = self._parseclass(indent) if cls is None: - print "class: syntax error..." continue freshscope = True dbg("new scope: class %s" % (cls.name)) @@ -533,10 +588,11 @@ class PyFuzzyParser(object): self.scope.add_import(Import(self.line_nr, mod, alias)) freshscope = False elif tok == 'from': - mod, tok = self._parsedotname() + mod, token_type, tok, start_indent = self._parsedotname() if not mod or tok != "import": print "from: syntax error..." continue + mod = Name(mod, start_indent, self.line_nr) names = self._parseimportlist() for name, alias in names: i = Import(self.line_nr, name, alias, mod) @@ -544,33 +600,40 @@ class PyFuzzyParser(object): freshscope = False #loops elif tok == 'for': - print tok, tokentype value_list, tok = self._parse_value_list() if tok == 'in': statement, tok = self._parse_statement() if tok == ':': - self.scope.append(statement) + f = Flow('for', statement, indent, self.line_nr, \ + value_list) + dbg("new scope: flow %s" % (f.name)) + self.scope = self.scope.add_statement(f) + + elif tok in ['if', 'while', 'try', 'with'] + extended_flow: + # TODO with statement has local variables + command = tok + statement, tok = self._parse_statement() + if tok == ':': + f = Flow(command, statement, indent, self.line_nr) + dbg("new scope: flow %s" % (f.name)) + if command in extended_flow: + # the last statement has to be another part of + # the flow statement + self.scope = self.scope.statements[-1].set_next(f) + else: + self.scope = self.scope.add_statement(f) - elif tok == 'while': - param_list = self._parse_while_loop() elif tok == 'global': - self._parse_words(full_token) - elif tokentype == tokenize.STRING: + self._parse_statement(self.current) + pass + # TODO add suport for global + elif token_type == tokenize.STRING: if freshscope: self.scope.doc(tok) - elif tokentype == tokenize.NAME: - self._parse_words(full_token) - """ - name, tok = self._parsedotname(tok) - if tok == '=': - stmt = self._parseassignment() - dbg("parseassignment: %s = %s" % (name, stmt)) - if stmt != None: - self.scope.add_local("%s = %s" % (name, stmt)) - else: - #print "_not_implemented_", tok, self.parserline - pass - """ + elif token_type == tokenize.NAME or tok in statement_toks: + stmt, tok = self._parse_statement(self.current) + if stmt: + self.scope.add_statement(stmt) freshscope = False #else: #print "_not_implemented_", tok, self.parserline diff --git a/test.py b/test.py index 9508a2b8..eed98ed4 100644 --- a/test.py +++ b/test.py @@ -34,41 +34,68 @@ class A(): a = A() b = a.test() +c = a or b class Empty(): pass -def blub(): +#def blub(): cdef = 5 +cdef cdef def func(): - def test: - return 2 + #def test: + # return 2 cdef = A() return test -#for i in range(3): -# asdf = aaa -# print 'blub' - +for i in range(3): + asdf = aaa + print 'blub' +else: + a = 0 def ass_test(a): """docstring for assignment test""" a -= 1 +# (comment without indent) b, c, d = (1,2,3) del b + # test strange statements + [a,c] ; {1: a}; (1,); `a` result = int((a+b)*2) return result - matrix = [[1,2,3], [4,5,6], [7,8,9]] -def loop_test(a): - """docstring for loop_test""" +def flow_test(a): global matrix for i in matrix: print a - #while 1: - # del a - # print a - - return Matrix[0,1] + else: + pass + while 1: + del a + print a + else: + pass + try: + if True or a: + m = 1 + for i,j in enumerate(range(3)): + print i,j + for a in test(t): + p + else: + while 1: + m = 2 + break + except IndexError, e: + raise e + yield e + except: + pass + finally: + pass + return Matrix[0,m] +if True or a: + print a