statement parser - first version

2026-05-13 10:08:44 +08:00 · 2012-03-11 03:03:54 +01:00
parent 5332a87d39
commit b2a77acff6
4 changed files with 232 additions and 53 deletions
@@ -1,9 +1,5 @@
 import parsing
 def Statement(object):
    """ The statement object of functions, to """
    pass
 def get_names_for_scope(scope):
    """ Get all completions possible for the current scope. """
    comp = []
@@ -38,6 +34,7 @@ def follow_path(scope, path):
            scopes.append(c.parent)
        #elif p_class == parsing.Function:
        elif p_class == parsing.Statement:
            print 'state', c.parent.token_list, c.parent.get_assignment_calls()
            pass
        else:
            print 'error follow_path:', p_class, repr(c.parent)
@@ -49,6 +46,42 @@ def follow_path(scope, path):
        scopes = new_scopes
    return set(scopes)
 def _parseassignment(self):
    """ TODO remove or replace, at the moment not used """
    assign = ''
    token_type, tok, indent = self.next()
    if token_type == tokenize.STRING or tok == 'str':
        return '""'
    elif tok == '(' or tok == 'tuple':
        return '()'
    elif tok == '[' or tok == 'list':
        return '[]'
    elif tok == '{' or tok == 'dict':
        return '{}'
    elif token_type == tokenize.NUMBER:
        return '0'
    elif tok == 'open' or tok == 'file':
        return 'file'
    elif tok == 'None':
        return '_PyCmplNoType()'
    elif tok == 'type':
        return 'type(_PyCmplNoType)'  # only for method resolution
    else:
        assign += tok
        level = 0
        while True:
            token_type, tok, indent = self.next()
            if tok in ('(', '{', '['):
                level += 1
            elif tok in (']', '}', ')'):
                level -= 1
                if level == 0:
                    break
            elif level == 0:
                if tok in (';', '\n'):
                    break
                assign += tok
    return "%s" % assign
 def dbg(*args):
    if debug_function:
@@ -222,11 +222,15 @@ def dbg(*args):
        debug_function(*args)
-def set_debug_function(func):
+def set_debug_function(func_cb):
    """
    You can define a callback debug function to get all the debug messages.
    :param func_cb: The callback function for debug messages, with n params.
    """
    global debug_function
-    debug_function = func
+    debug_function = func_cb
-    parsing.debug_function = func
+    parsing.debug_function = func_cb
-    evaluate.debug_function = func
+    evaluate.debug_function = func_cb
 debug_function = None
@@ -39,6 +39,14 @@ import cStringIO
 import re
 class TokenNotFoundError(Exception):
    pass
 class ParserError(Exception):
    pass
 def indent_block(text, indention="    "):
    """ This function indents a text block with a default of four spaces """
    temp = ''
@@ -460,18 +468,21 @@ class Statement(Simple):
    :param used_funcs: str
    :param used_vars: The variables which are used by the statement.
    :param used_vars: str
    :param token_list: Token list which is also peppered with Name.
    :param token_list: list
    :param indent: The indent level of the flow statement.
    :type indent: int
    :param line_nr: Line number of the flow statement.
    :type line_nr: int
    """
-    def __init__(self, code, set_vars, used_funcs, used_vars, indent, line_nr,
+    def __init__(self, code, set_vars, used_funcs, used_vars, token_list,
-            line_end):
+            indent, line_nr, line_end):
        super(Statement, self).__init__(indent, line_nr, line_end)
        self.code = code
        self.set_vars = set_vars
        self.used_funcs = used_funcs
        self.used_vars = used_vars
        self.token_list = token_list
        for s in set_vars + used_funcs + used_vars:
            s.parent = self
@@ -485,6 +496,170 @@ class Statement(Simple):
        """ Get the names for the statement. """
        return self.set_vars
    def get_assignment_calls(self):
        """
        This is not done in the main parser, because it might be slow and
        most of the statements won't need this data anyway. This is something
        'like' a lazy execution.
        """
        result = None
        has_assignment = False
        level = 0
        is_chain = False
        for tok_temp in self.token_list:
            print 'tok', tok_temp
            try:
                token_type, tok, indent = tok_temp
                if '=' in tok and not tok in ['>=', '<=', '==', '!=']:
                    # This means, there is an assignment here.
                    # TODO there may be multiple assignments: a = b = 1
                    has_assignment = True
                    # initialize the first item
                    result = Array(Array.EMPTY)
                    continue
            except TypeError:
                # the token is a Name, which has already been parsed
                tok = tok_temp
            if has_assignment:
                brackets = {'(': Array.EMPTY, '[': Array.LIST, '{': Array.SET}
                is_call = isinstance(result, Call)
                if isinstance(tok, Name):
                    call = Call(tok, result)
                    if is_chain:
                        result = result.set_next_chain_call(call)
                        is_chain = False
                    else:
                        result.add_to_current_field(call)
                        result = call
                        print 'asdf', result, result.parent
                elif tok in brackets.keys():
                    level += 1
                    result = Array(brackets[tok], result)
                    if is_call:
                        result = result.parent.add_execution(result)
                    else:
                        result.parent.add_to_current_field(result)
                elif tok == ':':
                    if is_call:
                        result = result.parent
                    result.add_dictionary_key()
                elif tok == '.':
                    is_chain = True
                elif tok == ',':
                    if is_call:
                        result = result.parent
                    result.add_field()
                    # important - it cannot be empty anymore
                    if result.arr_type == Array.EMPTY:
                        result.arr_type = Array.TUPLE
                elif tok in [')', '}', ']']:
                    level -= 1
                    print 'asdf2', result, result.parent
                    result = result.parent
                else:
                    # TODO catch numbers and strings -> token_type and make
                    # calls out of them
                    if is_call:
                        result = result.parent
                    result.add_to_current_field(tok)
        if not has_assignment:
            raise TokenNotFoundError("You are requesting the result of an "
                            "assignment, where the token cannot be found")
        if level != 0:
            raise ParserError("Brackets don't match: %s. This is not normal "
                                "behaviour. Please submit a bug" % level)
        return result
 class Array(object):
    """
    Describes the different python types for an array, but also empty
    statements. In the Python syntax definitions this type is named 'atom'.
    http://docs.python.org/release/3.0.1/reference/grammar.html
    Array saves sub-arrays as well as normal operators and calls to methods.
    :param array_type: The type of an array, which can be one of the constants\
    below.
    :type array_type: int
    """
    EMPTY = object()
    TUPLE = object()
    LIST = object()
    DICT = object()
    SET = object()
    def __init__(self, arr_type, parent=None):
        self.arr_type = arr_type
        self.values = []
        self.keys = []
        self.parent = parent
    def add_field(self):
        """
        Just add a new field to the values.
        """
        self.values.append([])
        self.keys.append(None)
    def add_to_current_field(self, tok):
        """ Adds a token to the latest field (in content). """
        if not self.values:
            # add the first field, this is done here, because if nothing
            # gets added, the list is empty, which is also needed sometimes.
            self.values.append([])
        self.values[-1].append(tok)
    def add_dictionary_key(self):
        """
        Only used for dictionaries, automatically adds the tokens added by now
        from the values to keys.
        """
        self.arr_type = Array.DICT
        c = self._counter
        self.keys[c] = self.values[c]
        self.values[c] = []
    def __len__(self):
        return len(self.values)
    def __getitem__(self, key):
        return self.values[key]
    def __iter__(self):
        if self.arr_type == self.DICT:
            return self.values.items()
        else:
            return self.values
 class Call(object):
    """ The statement object of functions, to  """
    def __init__(self, name, parent):
        self.name = name
        self.parent = parent
        self.next = None
        self.param_array = None
        self.executions = []
    def set_next_chain_call(self, call):
        """ Adds another part of the statement"""
        self.next = call
        call.parent = self.parent
        return call
    def add_execution(self, call):
        """
        An execution is nothing else than brackets, with params in them, which
        shows access on the internals of this name.
        """
        self.executions.append(call)
        return call
 class Name(Simple):
    """
@@ -704,43 +879,6 @@ class PyFuzzyParser(object):
        return Class(cname, super, indent, start_line)
    def _parseassignment(self):
        """ TODO remove or replace, at the moment not used """
        assign = ''
        token_type, tok, indent = self.next()
        if token_type == tokenize.STRING or tok == 'str':
            return '""'
        elif tok == '(' or tok == 'tuple':
            return '()'
        elif tok == '[' or tok == 'list':
            return '[]'
        elif tok == '{' or tok == 'dict':
            return '{}'
        elif token_type == tokenize.NUMBER:
            return '0'
        elif tok == 'open' or tok == 'file':
            return 'file'
        elif tok == 'None':
            return '_PyCmplNoType()'
        elif tok == 'type':
            return 'type(_PyCmplNoType)'  # only for method resolution
        else:
            assign += tok
            level = 0
            while True:
                token_type, tok, indent = self.next()
                if tok in ('(', '{', '['):
                    level += 1
                elif tok in (']', '}', ')'):
                    level -= 1
                    if level == 0:
                        break
                elif level == 0:
                    if tok in (';', '\n'):
                        break
                    assign += tok
        return "%s" % assign
    def _parse_statement(self, pre_used_token=None, added_breaks=None):
        """
        Parses statements like:
@@ -778,9 +916,11 @@ class PyFuzzyParser(object):
        if added_breaks:
            breaks += added_breaks
        tok_list = []
        while not (tok in always_break or tok in breaks and level <= 0):
            set_string = None
            #print 'parse_stmt', tok, tokenize.tok_name[token_type]
            tok_list.append(self.current)
            if tok == 'as':
                string += " %s " % tok
                token_type, tok, indent_dummy = self.next()
@@ -789,6 +929,7 @@ class PyFuzzyParser(object):
                            self._parsedotname(self.current)
                    n = Name(path, start_indent, start_line, self.line_nr)
                    set_vars.append(n)
                    tok_list.append(n)
                    string += ".".join(path)
                continue
            elif token_type == tokenize.NAME:
@@ -802,12 +943,13 @@ class PyFuzzyParser(object):
                    path, token_type, tok, start_indent, start_line = \
                            self._parsedotname(self.current)
                    n = Name(path, start_indent, start_line, self.line_nr)
                    tok_list.pop()  # remove last entry, because we add Name
                    tok_list.append(n)
                    if tok == '(':
                        # it must be a function
                        used_funcs.append(n)
                    else:
-                        if not n.names[0] in ['global']:
+                        used_vars.append(n)
                            used_vars.append(n)
                    if string and re.match(r'[\w\d\'"]', string[-1]):
                        string += ' '
                    string += ".".join(path)
@@ -833,7 +975,7 @@ class PyFuzzyParser(object):
            return None, tok
        #print 'new_stat', string, set_vars, used_funcs, used_vars
        stmt = Statement(string, set_vars, used_funcs, used_vars,\
-                            indent, line_start, self.line_nr)
+                            tok_list, indent, line_start, self.line_nr)
        return stmt, tok
    def next(self):
@@ -887,7 +1029,7 @@ class PyFuzzyParser(object):
                while indent <= self.scope.indent \
                        and token_type in [tokenize.NAME] \
                        and self.scope != self.top:
-                    dbg( 'syntax_err, dedent @%s - %s<=%s', \
+                    dbg('syntax_err, dedent @%s - %s<=%s', \
                            (self.line_nr, indent, self.scope.indent))
                    self.scope.line_end = self.line_nr
                    self.scope = self.scope.parent
@@ -128,11 +128,11 @@ if True or a:
 # completion
 import time
 class c1():
-    c2 = c1()
+    c2,c5 = c1(), c1().c3()
    def c3(self): 
        import time as c4
        c5 = 3
-
+        return 'asdf'