statement parser - first version

This commit is contained in:
David Halter
2012-03-11 03:03:54 +01:00
parent 5332a87d39
commit b2a77acff6
4 changed files with 232 additions and 53 deletions

View File

@@ -1,9 +1,5 @@
import parsing import parsing
def Statement(object):
""" The statement object of functions, to """
pass
def get_names_for_scope(scope): def get_names_for_scope(scope):
""" Get all completions possible for the current scope. """ """ Get all completions possible for the current scope. """
comp = [] comp = []
@@ -38,6 +34,7 @@ def follow_path(scope, path):
scopes.append(c.parent) scopes.append(c.parent)
#elif p_class == parsing.Function: #elif p_class == parsing.Function:
elif p_class == parsing.Statement: elif p_class == parsing.Statement:
print 'state', c.parent.token_list, c.parent.get_assignment_calls()
pass pass
else: else:
print 'error follow_path:', p_class, repr(c.parent) print 'error follow_path:', p_class, repr(c.parent)
@@ -49,6 +46,42 @@ def follow_path(scope, path):
scopes = new_scopes scopes = new_scopes
return set(scopes) return set(scopes)
def _parseassignment(self):
""" TODO remove or replace, at the moment not used """
assign = ''
token_type, tok, indent = self.next()
if token_type == tokenize.STRING or tok == 'str':
return '""'
elif tok == '(' or tok == 'tuple':
return '()'
elif tok == '[' or tok == 'list':
return '[]'
elif tok == '{' or tok == 'dict':
return '{}'
elif token_type == tokenize.NUMBER:
return '0'
elif tok == 'open' or tok == 'file':
return 'file'
elif tok == 'None':
return '_PyCmplNoType()'
elif tok == 'type':
return 'type(_PyCmplNoType)' # only for method resolution
else:
assign += tok
level = 0
while True:
token_type, tok, indent = self.next()
if tok in ('(', '{', '['):
level += 1
elif tok in (']', '}', ')'):
level -= 1
if level == 0:
break
elif level == 0:
if tok in (';', '\n'):
break
assign += tok
return "%s" % assign
def dbg(*args): def dbg(*args):
if debug_function: if debug_function:

View File

@@ -222,11 +222,15 @@ def dbg(*args):
debug_function(*args) debug_function(*args)
def set_debug_function(func): def set_debug_function(func_cb):
"""
You can define a callback debug function to get all the debug messages.
:param func_cb: The callback function for debug messages, with n params.
"""
global debug_function global debug_function
debug_function = func debug_function = func_cb
parsing.debug_function = func parsing.debug_function = func_cb
evaluate.debug_function = func evaluate.debug_function = func_cb
debug_function = None debug_function = None

View File

@@ -39,6 +39,14 @@ import cStringIO
import re import re
class TokenNotFoundError(Exception):
pass
class ParserError(Exception):
pass
def indent_block(text, indention=" "): def indent_block(text, indention=" "):
""" This function indents a text block with a default of four spaces """ """ This function indents a text block with a default of four spaces """
temp = '' temp = ''
@@ -460,18 +468,21 @@ class Statement(Simple):
:param used_funcs: str :param used_funcs: str
:param used_vars: The variables which are used by the statement. :param used_vars: The variables which are used by the statement.
:param used_vars: str :param used_vars: str
:param token_list: Token list which is also peppered with Name.
:param token_list: list
:param indent: The indent level of the flow statement. :param indent: The indent level of the flow statement.
:type indent: int :type indent: int
:param line_nr: Line number of the flow statement. :param line_nr: Line number of the flow statement.
:type line_nr: int :type line_nr: int
""" """
def __init__(self, code, set_vars, used_funcs, used_vars, indent, line_nr, def __init__(self, code, set_vars, used_funcs, used_vars, token_list,
line_end): indent, line_nr, line_end):
super(Statement, self).__init__(indent, line_nr, line_end) super(Statement, self).__init__(indent, line_nr, line_end)
self.code = code self.code = code
self.set_vars = set_vars self.set_vars = set_vars
self.used_funcs = used_funcs self.used_funcs = used_funcs
self.used_vars = used_vars self.used_vars = used_vars
self.token_list = token_list
for s in set_vars + used_funcs + used_vars: for s in set_vars + used_funcs + used_vars:
s.parent = self s.parent = self
@@ -485,6 +496,170 @@ class Statement(Simple):
""" Get the names for the statement. """ """ Get the names for the statement. """
return self.set_vars return self.set_vars
def get_assignment_calls(self):
"""
This is not done in the main parser, because it might be slow and
most of the statements won't need this data anyway. This is something
'like' a lazy execution.
"""
result = None
has_assignment = False
level = 0
is_chain = False
for tok_temp in self.token_list:
print 'tok', tok_temp
try:
token_type, tok, indent = tok_temp
if '=' in tok and not tok in ['>=', '<=', '==', '!=']:
# This means, there is an assignment here.
# TODO there may be multiple assignments: a = b = 1
has_assignment = True
# initialize the first item
result = Array(Array.EMPTY)
continue
except TypeError:
# the token is a Name, which has already been parsed
tok = tok_temp
if has_assignment:
brackets = {'(': Array.EMPTY, '[': Array.LIST, '{': Array.SET}
is_call = isinstance(result, Call)
if isinstance(tok, Name):
call = Call(tok, result)
if is_chain:
result = result.set_next_chain_call(call)
is_chain = False
else:
result.add_to_current_field(call)
result = call
print 'asdf', result, result.parent
elif tok in brackets.keys():
level += 1
result = Array(brackets[tok], result)
if is_call:
result = result.parent.add_execution(result)
else:
result.parent.add_to_current_field(result)
elif tok == ':':
if is_call:
result = result.parent
result.add_dictionary_key()
elif tok == '.':
is_chain = True
elif tok == ',':
if is_call:
result = result.parent
result.add_field()
# important - it cannot be empty anymore
if result.arr_type == Array.EMPTY:
result.arr_type = Array.TUPLE
elif tok in [')', '}', ']']:
level -= 1
print 'asdf2', result, result.parent
result = result.parent
else:
# TODO catch numbers and strings -> token_type and make
# calls out of them
if is_call:
result = result.parent
result.add_to_current_field(tok)
if not has_assignment:
raise TokenNotFoundError("You are requesting the result of an "
"assignment, where the token cannot be found")
if level != 0:
raise ParserError("Brackets don't match: %s. This is not normal "
"behaviour. Please submit a bug" % level)
return result
class Array(object):
"""
Describes the different python types for an array, but also empty
statements. In the Python syntax definitions this type is named 'atom'.
http://docs.python.org/release/3.0.1/reference/grammar.html
Array saves sub-arrays as well as normal operators and calls to methods.
:param array_type: The type of an array, which can be one of the constants\
below.
:type array_type: int
"""
EMPTY = object()
TUPLE = object()
LIST = object()
DICT = object()
SET = object()
def __init__(self, arr_type, parent=None):
self.arr_type = arr_type
self.values = []
self.keys = []
self.parent = parent
def add_field(self):
"""
Just add a new field to the values.
"""
self.values.append([])
self.keys.append(None)
def add_to_current_field(self, tok):
""" Adds a token to the latest field (in content). """
if not self.values:
# add the first field, this is done here, because if nothing
# gets added, the list is empty, which is also needed sometimes.
self.values.append([])
self.values[-1].append(tok)
def add_dictionary_key(self):
"""
Only used for dictionaries, automatically adds the tokens added by now
from the values to keys.
"""
self.arr_type = Array.DICT
c = self._counter
self.keys[c] = self.values[c]
self.values[c] = []
def __len__(self):
return len(self.values)
def __getitem__(self, key):
return self.values[key]
def __iter__(self):
if self.arr_type == self.DICT:
return self.values.items()
else:
return self.values
class Call(object):
""" The statement object of functions, to """
def __init__(self, name, parent):
self.name = name
self.parent = parent
self.next = None
self.param_array = None
self.executions = []
def set_next_chain_call(self, call):
""" Adds another part of the statement"""
self.next = call
call.parent = self.parent
return call
def add_execution(self, call):
"""
An execution is nothing else than brackets, with params in them, which
shows access on the internals of this name.
"""
self.executions.append(call)
return call
class Name(Simple): class Name(Simple):
""" """
@@ -704,43 +879,6 @@ class PyFuzzyParser(object):
return Class(cname, super, indent, start_line) return Class(cname, super, indent, start_line)
def _parseassignment(self):
""" TODO remove or replace, at the moment not used """
assign = ''
token_type, tok, indent = self.next()
if token_type == tokenize.STRING or tok == 'str':
return '""'
elif tok == '(' or tok == 'tuple':
return '()'
elif tok == '[' or tok == 'list':
return '[]'
elif tok == '{' or tok == 'dict':
return '{}'
elif token_type == tokenize.NUMBER:
return '0'
elif tok == 'open' or tok == 'file':
return 'file'
elif tok == 'None':
return '_PyCmplNoType()'
elif tok == 'type':
return 'type(_PyCmplNoType)' # only for method resolution
else:
assign += tok
level = 0
while True:
token_type, tok, indent = self.next()
if tok in ('(', '{', '['):
level += 1
elif tok in (']', '}', ')'):
level -= 1
if level == 0:
break
elif level == 0:
if tok in (';', '\n'):
break
assign += tok
return "%s" % assign
def _parse_statement(self, pre_used_token=None, added_breaks=None): def _parse_statement(self, pre_used_token=None, added_breaks=None):
""" """
Parses statements like: Parses statements like:
@@ -778,9 +916,11 @@ class PyFuzzyParser(object):
if added_breaks: if added_breaks:
breaks += added_breaks breaks += added_breaks
tok_list = []
while not (tok in always_break or tok in breaks and level <= 0): while not (tok in always_break or tok in breaks and level <= 0):
set_string = None set_string = None
#print 'parse_stmt', tok, tokenize.tok_name[token_type] #print 'parse_stmt', tok, tokenize.tok_name[token_type]
tok_list.append(self.current)
if tok == 'as': if tok == 'as':
string += " %s " % tok string += " %s " % tok
token_type, tok, indent_dummy = self.next() token_type, tok, indent_dummy = self.next()
@@ -789,6 +929,7 @@ class PyFuzzyParser(object):
self._parsedotname(self.current) self._parsedotname(self.current)
n = Name(path, start_indent, start_line, self.line_nr) n = Name(path, start_indent, start_line, self.line_nr)
set_vars.append(n) set_vars.append(n)
tok_list.append(n)
string += ".".join(path) string += ".".join(path)
continue continue
elif token_type == tokenize.NAME: elif token_type == tokenize.NAME:
@@ -802,12 +943,13 @@ class PyFuzzyParser(object):
path, token_type, tok, start_indent, start_line = \ path, token_type, tok, start_indent, start_line = \
self._parsedotname(self.current) self._parsedotname(self.current)
n = Name(path, start_indent, start_line, self.line_nr) n = Name(path, start_indent, start_line, self.line_nr)
tok_list.pop() # remove last entry, because we add Name
tok_list.append(n)
if tok == '(': if tok == '(':
# it must be a function # it must be a function
used_funcs.append(n) used_funcs.append(n)
else: else:
if not n.names[0] in ['global']: used_vars.append(n)
used_vars.append(n)
if string and re.match(r'[\w\d\'"]', string[-1]): if string and re.match(r'[\w\d\'"]', string[-1]):
string += ' ' string += ' '
string += ".".join(path) string += ".".join(path)
@@ -833,7 +975,7 @@ class PyFuzzyParser(object):
return None, tok return None, tok
#print 'new_stat', string, set_vars, used_funcs, used_vars #print 'new_stat', string, set_vars, used_funcs, used_vars
stmt = Statement(string, set_vars, used_funcs, used_vars,\ stmt = Statement(string, set_vars, used_funcs, used_vars,\
indent, line_start, self.line_nr) tok_list, indent, line_start, self.line_nr)
return stmt, tok return stmt, tok
def next(self): def next(self):
@@ -887,7 +1029,7 @@ class PyFuzzyParser(object):
while indent <= self.scope.indent \ while indent <= self.scope.indent \
and token_type in [tokenize.NAME] \ and token_type in [tokenize.NAME] \
and self.scope != self.top: and self.scope != self.top:
dbg( 'syntax_err, dedent @%s - %s<=%s', \ dbg('syntax_err, dedent @%s - %s<=%s', \
(self.line_nr, indent, self.scope.indent)) (self.line_nr, indent, self.scope.indent))
self.scope.line_end = self.line_nr self.scope.line_end = self.line_nr
self.scope = self.scope.parent self.scope = self.scope.parent

View File

@@ -128,11 +128,11 @@ if True or a:
# completion # completion
import time import time
class c1(): class c1():
c2 = c1() c2,c5 = c1(), c1().c3()
def c3(self): def c3(self):
import time as c4 import time as c4
c5 = 3 c5 = 3
return 'asdf'