statement parser improved

This commit is contained in:
David Halter
2012-03-04 00:37:58 +01:00
parent 08acc2a2f0
commit 933e908fe2
3 changed files with 92 additions and 62 deletions

View File

@@ -5,7 +5,6 @@ import cStringIO
import sys import sys
import types import types
from pyfuzzyparser import PyFuzzyParser
import pyfuzzyparser import pyfuzzyparser
@@ -49,7 +48,7 @@ def _sanitize(str):
class Completer(object): class Completer(object):
def __init__(self): def __init__(self):
self.compldict = {} self.compldict = {}
self.parser = PyFuzzyParser() self.parser = pyfuzzyparser.PyFuzzyParser()
def evalsource(self, text, line=0): def evalsource(self, text, line=0):
sc = self.parser.parse(text) sc = self.parser.parse(text)
@@ -207,6 +206,7 @@ def showdbg():
print "DBG: %s " % d print "DBG: %s " % d
pyfuzzyparser.debug_function = pyfuzzyparser.dbg()
text = cStringIO.StringIO(open('test.py').read()) text = cStringIO.StringIO(open('test.py').read())
cmpl = Completer() cmpl = Completer()
cmpl.evalsource(text, 51) cmpl.evalsource(text, 51)
@@ -223,13 +223,32 @@ showdbg()
print cmpl.parser.top.get_code() print cmpl.parser.top.get_code()
#print cmpl.parser.top.subscopes[1].subscopes[0].get_code() #print cmpl.parser.top.subscopes[1].subscopes[0].get_code()
def handle_names(names):
#print names
for n in names:
try:
print n.names
except AttributeError:
print 'star!', n.from_ns
print 'global names:' print 'global names:'
names = cmpl.parser.top.get_names() names = cmpl.parser.top.get_names()
print [n.names for n in names] handle_names(names)
print
print 'func names:' print 'func names:'
names = cmpl.parser.top.subscopes[0].get_names() names = cmpl.parser.top.subscopes[0].get_names()
print [n.names for n in names] handle_names(names)
print
print 'class names:'
names = cmpl.parser.top.subscopes[2].get_names()
handle_names(names)
for s in cmpl.parser.top.subscopes[2].subscopes:
print 'method names:'
names = s.get_names()
handle_names(names)
p = cmpl.parser p = cmpl.parser
s = p.top s = p.top

View File

@@ -29,8 +29,8 @@ Ignored statements:
- exec (dangerous - not controllable) - exec (dangerous - not controllable)
TODO be tolerant with indents TODO be tolerant with indents
TODO dictionaries not working with statement parser
TODO take special care for future imports TODO take special care for future imports
TODO check meta classes
""" """
import sys import sys
@@ -65,7 +65,7 @@ class Scope(object):
:param docstr: The docstring for the current Scope. :param docstr: The docstring for the current Scope.
:type docstr: str :type docstr: str
""" """
def __init__(self, name, indent, line_nr, docstr=''): def __init__(self, indent, line_nr, docstr=''):
self.subscopes = [] self.subscopes = []
self.imports = [] self.imports = []
self.statements = [] self.statements = []
@@ -162,6 +162,10 @@ class Scope(object):
# function and class names # function and class names
n += [s.name for s in self.subscopes] n += [s.name for s in self.subscopes]
n += self.global_vars n += self.global_vars
for i in self.imports:
n += i.get_names()
return n return n
def is_empty(self): def is_empty(self):
@@ -192,10 +196,14 @@ class Class(Scope):
self.name = name self.name = name
self.supers = supers self.supers = supers
def __repr__(self):
return "<Class instance: %s@%s>" % (self.name, self.line_nr)
def get_code(self, first_indent=False, indention=" "): def get_code(self, first_indent=False, indention=" "):
str = 'class %s' % (self.name) str = 'class %s' % (self.name)
if len(self.supers) > 0: if len(self.supers) > 0:
str += '(%s)' % ','.join(self.supers) sup = ','.join([stmt.code for stmt in self.supers])
str += '(%s)' % sup
str += ':\n' str += ':\n'
str += super(Class, self).get_code(True, indention) str += super(Class, self).get_code(True, indention)
if self.is_empty(): if self.is_empty():
@@ -209,7 +217,7 @@ class Function(Scope):
:param name: The Function name. :param name: The Function name.
:type name: string :type name: string
:param params: The parameters (Name) of a Function. :param params: The parameters (Statement) of a Function.
:type name: list :type name: list
:param indent: The indent level of the flow statement. :param indent: The indent level of the flow statement.
:type indent: int :type indent: int
@@ -223,15 +231,22 @@ class Function(Scope):
self.name = name self.name = name
self.params = params self.params = params
def __repr__(self):
return "<Function instance: %s@%s>" % (self.name, self.line_nr)
def get_code(self, first_indent=False, indention=" "): def get_code(self, first_indent=False, indention=" "):
str = "def %s(%s):\n" % (self.name, ','.join(self.params)) params = ','.join([stmt.code for stmt in self.params])
str = "def %s(%s):\n" % (self.name, params)
str += super(Function, self).get_code(True, indention) str += super(Function, self).get_code(True, indention)
if self.is_empty(): if self.is_empty():
str += "pass\n" str += "pass\n"
return str return str
def get_names(self): def get_names(self):
n = self.params #n = self.params
n = []
for p in self.params:
n += p.set_vars or p.used_vars
n += super(Function, self).get_names() n += super(Function, self).get_names()
return n return n
@@ -258,32 +273,35 @@ class Flow(Scope):
:type indent: int :type indent: int
:param line_nr: Line number of the flow statement. :param line_nr: Line number of the flow statement.
:type line_nr: int :type line_nr: int
:param set_args: Local variables used in the for loop (only there). :param set_vars: Local variables used in the for loop (only there).
:type set_args: list :type set_vars: list
""" """
def __init__(self, command, statement, indent, line_nr, set_args=None): def __init__(self, command, statement, indent, line_nr, set_vars=None):
name = "%s@%s" % (command, line_nr) name = "%s@%s" % (command, line_nr)
super(Flow, self).__init__(indent, line_nr, '') super(Flow, self).__init__(indent, line_nr, '')
self.command = command self.command = command
self.statement = statement self.statement = statement
if set_args == None: if set_vars == None:
self.set_args = [] self.set_vars = []
else: else:
self.set_args = set_args self.set_vars = set_vars
self.next = None self.next = None
def __repr__(self):
return "<Flow instance: %s@%s>" % (self.command, self.line_nr)
def get_code(self, first_indent=False, indention=" "): def get_code(self, first_indent=False, indention=" "):
if self.set_args: if self.set_vars:
args = ",".join(map(lambda x: x.get_code(), self.set_args)) vars = ",".join(map(lambda x: x.get_code(), self.set_vars))
args += ' in ' vars += ' in '
else: else:
args = '' vars = ''
if self.statement: if self.statement:
stmt = self.statement.get_code(new_line=False) stmt = self.statement.get_code(new_line=False)
else: else:
stmt = '' stmt = ''
str = "%s %s%s:\n" % (self.command, args, stmt) str = "%s %s%s:\n" % (self.command, vars, stmt)
str += super(Flow, self).get_code(True, indention) str += super(Flow, self).get_code(True, indention)
if self.next: if self.next:
str += self.next.get_code() str += self.next.get_code()
@@ -294,7 +312,7 @@ class Flow(Scope):
Get the names for the flow. This includes also a call to the super Get the names for the flow. This includes also a call to the super
class. class.
""" """
n = self.set_args n = self.set_vars
if self.next: if self.next:
n += self.next.get_names() n += self.next.get_names()
n += super(Flow, self).get_names() n += super(Flow, self).get_names()
@@ -350,6 +368,11 @@ class Import(object):
else: else:
return "import " + ns_str + '\n' return "import " + ns_str + '\n'
def get_names(self):
if self.star:
return [self]
return [self.alias] if self.alias else [self.namespace]
class Statement(object): class Statement(object):
""" """
@@ -431,7 +454,8 @@ class PyFuzzyParser(object):
class structure of different scopes. class structure of different scopes.
""" """
def __init__(self): def __init__(self):
self.top = Scope('global', 0, 0) # initialize global Scope
self.top = Scope(0, 0)
self.scope = self.top self.scope = self.top
self.current = (None, None, None) self.current = (None, None, None)
@@ -521,43 +545,18 @@ class PyFuzzyParser(object):
def _parseparen(self): def _parseparen(self):
""" """
Functions and Classes have params (which means for classes Functions and Classes have params (which means for classes
super-classes). They are parsed here and returned as Names. super-classes). They are parsed here and returned as Statements.
TODO change behaviour, at the moment it's acting pretty weird and :return: List of Statements
doesn't return list(Name)
:return: List of Names
:rtype: list :rtype: list
""" """
name = ''
names = [] names = []
level = 1 tok = None
while tok not in [')', '\n', ':']:
stmt, tok = self._parse_statement(add_break=',')
if stmt:
names.append(stmt)
while True:
self._parse_statement()
while True:
break
token_type, tok, indent = self.next()
if tok in (')', ',') and level == 1:
if '=' in name:
pass
else:
name = name.replace(' ', '')
names.append(name.strip())
name = ''
if tok == '(':
level += 1
name += "("
elif tok == ')':
level -= 1
if level == 0:
break
else:
name += ")"
elif tok == ',' and level == 1:
pass
else:
name += "%s " % str(tok)
return names return names
def _parsefunction(self, indent): def _parsefunction(self, indent):
@@ -666,18 +665,20 @@ class PyFuzzyParser(object):
set_vars = [] set_vars = []
used_funcs = [] used_funcs = []
used_vars = [] used_vars = []
level = 0 # The level of parentheses
if pre_used_token: if pre_used_token:
token_type, tok, indent = pre_used_token token_type, tok, indent = pre_used_token
else: else:
token_type, tok, indent = self.next() token_type, tok, indent = self.next()
breaks = ['\n', ':', ';'] breaks = ['\n', ':', ';', ')']
if add_break: if add_break:
breaks += add_break breaks += add_break
is_break_token = lambda tok: tok in breaks is_break_token = lambda tok: tok in breaks
while not is_break_token(tok): while not (is_break_token(tok) and level <= 0):
set_string = '' set_string = ''
#print 'parse_stmt', tok, token.tok_name[token_type] #print 'parse_stmt', tok, token.tok_name[token_type]
if token_type == tokenize.NAME: if token_type == tokenize.NAME:
@@ -707,10 +708,14 @@ class PyFuzzyParser(object):
string += ".".join(path) string += ".".join(path)
#print 'parse_stmt', tok, token.tok_name[token_type] #print 'parse_stmt', tok, token.tok_name[token_type]
continue continue
elif ('=' in tok and not tok in ['>=', '<=', '==', '!=']): elif '=' in tok and not tok in ['>=', '<=', '==', '!=']:
# there has been an assignement -> change vars # there has been an assignement -> change vars
set_vars = used_vars set_vars = used_vars
used_vars = [] used_vars = []
elif tok in ['{', '(', '[']:
level += 1
elif tok in ['}', ')', ']']:
level -= 1
if set_string: if set_string:
string = set_string string = set_string
@@ -758,7 +763,7 @@ class PyFuzzyParser(object):
% (tok, token_type, indent)) % (tok, token_type, indent))
if token_type == tokenize.DEDENT: if token_type == tokenize.DEDENT:
print 'dedent' print 'dedent', self.scope
self.scope = self.scope.parent self.scope = self.scope.parent
elif tok == 'def': elif tok == 'def':
func = self._parsefunction(indent) func = self._parsefunction(indent)

12
test.py
View File

@@ -10,7 +10,7 @@ from token import OP as OP_TEST, INDENT as INDENT_TEST
aaa = 6; bbb = 13 aaa = 6; bbb = 13
ccc = bbb; d = open("test.py"); ccc = bbb; d = open("test.py");
def func(): def func(a,b,c=3):
#def test: #def test:
# return 2 # return 2
cdef = A() cdef = A()
@@ -27,10 +27,16 @@ class Supi(A, datetime.datetime):
static_var = 0 static_var = 0
def __init__(): def __init__():
pass self.b = A()
def test(self): def test(self):
import time import time
print 1 print 1
self.a = 1
self.b = (1,
2,[3,2
])
self.c = {1:3,
4:2,5:9}
return A() return A()
class A(): class A():
@@ -38,7 +44,7 @@ class A():
def test(self): def test(self):
return A() return A()
class init: class init:
def __init__(self, a, b=3, c = A()): def __init__(self, a, b=3, c = A):
self.a = a self.a = a
a = A() a = A()