mirror of
https://github.com/davidhalter/jedi.git
synced 2025-12-07 14:34:31 +08:00
statement parser - first version
This commit is contained in:
41
evaluate.py
41
evaluate.py
@@ -1,9 +1,5 @@
|
|||||||
import parsing
|
import parsing
|
||||||
|
|
||||||
def Statement(object):
|
|
||||||
""" The statement object of functions, to """
|
|
||||||
pass
|
|
||||||
|
|
||||||
def get_names_for_scope(scope):
|
def get_names_for_scope(scope):
|
||||||
""" Get all completions possible for the current scope. """
|
""" Get all completions possible for the current scope. """
|
||||||
comp = []
|
comp = []
|
||||||
@@ -38,6 +34,7 @@ def follow_path(scope, path):
|
|||||||
scopes.append(c.parent)
|
scopes.append(c.parent)
|
||||||
#elif p_class == parsing.Function:
|
#elif p_class == parsing.Function:
|
||||||
elif p_class == parsing.Statement:
|
elif p_class == parsing.Statement:
|
||||||
|
print 'state', c.parent.token_list, c.parent.get_assignment_calls()
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
print 'error follow_path:', p_class, repr(c.parent)
|
print 'error follow_path:', p_class, repr(c.parent)
|
||||||
@@ -49,6 +46,42 @@ def follow_path(scope, path):
|
|||||||
scopes = new_scopes
|
scopes = new_scopes
|
||||||
return set(scopes)
|
return set(scopes)
|
||||||
|
|
||||||
|
def _parseassignment(self):
|
||||||
|
""" TODO remove or replace, at the moment not used """
|
||||||
|
assign = ''
|
||||||
|
token_type, tok, indent = self.next()
|
||||||
|
if token_type == tokenize.STRING or tok == 'str':
|
||||||
|
return '""'
|
||||||
|
elif tok == '(' or tok == 'tuple':
|
||||||
|
return '()'
|
||||||
|
elif tok == '[' or tok == 'list':
|
||||||
|
return '[]'
|
||||||
|
elif tok == '{' or tok == 'dict':
|
||||||
|
return '{}'
|
||||||
|
elif token_type == tokenize.NUMBER:
|
||||||
|
return '0'
|
||||||
|
elif tok == 'open' or tok == 'file':
|
||||||
|
return 'file'
|
||||||
|
elif tok == 'None':
|
||||||
|
return '_PyCmplNoType()'
|
||||||
|
elif tok == 'type':
|
||||||
|
return 'type(_PyCmplNoType)' # only for method resolution
|
||||||
|
else:
|
||||||
|
assign += tok
|
||||||
|
level = 0
|
||||||
|
while True:
|
||||||
|
token_type, tok, indent = self.next()
|
||||||
|
if tok in ('(', '{', '['):
|
||||||
|
level += 1
|
||||||
|
elif tok in (']', '}', ')'):
|
||||||
|
level -= 1
|
||||||
|
if level == 0:
|
||||||
|
break
|
||||||
|
elif level == 0:
|
||||||
|
if tok in (';', '\n'):
|
||||||
|
break
|
||||||
|
assign += tok
|
||||||
|
return "%s" % assign
|
||||||
|
|
||||||
def dbg(*args):
|
def dbg(*args):
|
||||||
if debug_function:
|
if debug_function:
|
||||||
|
|||||||
12
functions.py
12
functions.py
@@ -222,11 +222,15 @@ def dbg(*args):
|
|||||||
debug_function(*args)
|
debug_function(*args)
|
||||||
|
|
||||||
|
|
||||||
def set_debug_function(func):
|
def set_debug_function(func_cb):
|
||||||
|
"""
|
||||||
|
You can define a callback debug function to get all the debug messages.
|
||||||
|
:param func_cb: The callback function for debug messages, with n params.
|
||||||
|
"""
|
||||||
global debug_function
|
global debug_function
|
||||||
debug_function = func
|
debug_function = func_cb
|
||||||
parsing.debug_function = func
|
parsing.debug_function = func_cb
|
||||||
evaluate.debug_function = func
|
evaluate.debug_function = func_cb
|
||||||
|
|
||||||
|
|
||||||
debug_function = None
|
debug_function = None
|
||||||
|
|||||||
228
parsing.py
228
parsing.py
@@ -39,6 +39,14 @@ import cStringIO
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
class TokenNotFoundError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class ParserError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
def indent_block(text, indention=" "):
|
def indent_block(text, indention=" "):
|
||||||
""" This function indents a text block with a default of four spaces """
|
""" This function indents a text block with a default of four spaces """
|
||||||
temp = ''
|
temp = ''
|
||||||
@@ -460,18 +468,21 @@ class Statement(Simple):
|
|||||||
:param used_funcs: str
|
:param used_funcs: str
|
||||||
:param used_vars: The variables which are used by the statement.
|
:param used_vars: The variables which are used by the statement.
|
||||||
:param used_vars: str
|
:param used_vars: str
|
||||||
|
:param token_list: Token list which is also peppered with Name.
|
||||||
|
:param token_list: list
|
||||||
:param indent: The indent level of the flow statement.
|
:param indent: The indent level of the flow statement.
|
||||||
:type indent: int
|
:type indent: int
|
||||||
:param line_nr: Line number of the flow statement.
|
:param line_nr: Line number of the flow statement.
|
||||||
:type line_nr: int
|
:type line_nr: int
|
||||||
"""
|
"""
|
||||||
def __init__(self, code, set_vars, used_funcs, used_vars, indent, line_nr,
|
def __init__(self, code, set_vars, used_funcs, used_vars, token_list,
|
||||||
line_end):
|
indent, line_nr, line_end):
|
||||||
super(Statement, self).__init__(indent, line_nr, line_end)
|
super(Statement, self).__init__(indent, line_nr, line_end)
|
||||||
self.code = code
|
self.code = code
|
||||||
self.set_vars = set_vars
|
self.set_vars = set_vars
|
||||||
self.used_funcs = used_funcs
|
self.used_funcs = used_funcs
|
||||||
self.used_vars = used_vars
|
self.used_vars = used_vars
|
||||||
|
self.token_list = token_list
|
||||||
for s in set_vars + used_funcs + used_vars:
|
for s in set_vars + used_funcs + used_vars:
|
||||||
s.parent = self
|
s.parent = self
|
||||||
|
|
||||||
@@ -485,6 +496,170 @@ class Statement(Simple):
|
|||||||
""" Get the names for the statement. """
|
""" Get the names for the statement. """
|
||||||
return self.set_vars
|
return self.set_vars
|
||||||
|
|
||||||
|
def get_assignment_calls(self):
|
||||||
|
"""
|
||||||
|
This is not done in the main parser, because it might be slow and
|
||||||
|
most of the statements won't need this data anyway. This is something
|
||||||
|
'like' a lazy execution.
|
||||||
|
"""
|
||||||
|
result = None
|
||||||
|
has_assignment = False
|
||||||
|
level = 0
|
||||||
|
is_chain = False
|
||||||
|
|
||||||
|
for tok_temp in self.token_list:
|
||||||
|
print 'tok', tok_temp
|
||||||
|
try:
|
||||||
|
token_type, tok, indent = tok_temp
|
||||||
|
if '=' in tok and not tok in ['>=', '<=', '==', '!=']:
|
||||||
|
# This means, there is an assignment here.
|
||||||
|
# TODO there may be multiple assignments: a = b = 1
|
||||||
|
has_assignment = True
|
||||||
|
|
||||||
|
# initialize the first item
|
||||||
|
result = Array(Array.EMPTY)
|
||||||
|
continue
|
||||||
|
except TypeError:
|
||||||
|
# the token is a Name, which has already been parsed
|
||||||
|
tok = tok_temp
|
||||||
|
|
||||||
|
if has_assignment:
|
||||||
|
brackets = {'(': Array.EMPTY, '[': Array.LIST, '{': Array.SET}
|
||||||
|
is_call = isinstance(result, Call)
|
||||||
|
if isinstance(tok, Name):
|
||||||
|
call = Call(tok, result)
|
||||||
|
if is_chain:
|
||||||
|
result = result.set_next_chain_call(call)
|
||||||
|
is_chain = False
|
||||||
|
else:
|
||||||
|
result.add_to_current_field(call)
|
||||||
|
result = call
|
||||||
|
print 'asdf', result, result.parent
|
||||||
|
elif tok in brackets.keys():
|
||||||
|
level += 1
|
||||||
|
result = Array(brackets[tok], result)
|
||||||
|
if is_call:
|
||||||
|
result = result.parent.add_execution(result)
|
||||||
|
else:
|
||||||
|
result.parent.add_to_current_field(result)
|
||||||
|
elif tok == ':':
|
||||||
|
if is_call:
|
||||||
|
result = result.parent
|
||||||
|
result.add_dictionary_key()
|
||||||
|
elif tok == '.':
|
||||||
|
is_chain = True
|
||||||
|
elif tok == ',':
|
||||||
|
if is_call:
|
||||||
|
result = result.parent
|
||||||
|
result.add_field()
|
||||||
|
# important - it cannot be empty anymore
|
||||||
|
if result.arr_type == Array.EMPTY:
|
||||||
|
result.arr_type = Array.TUPLE
|
||||||
|
elif tok in [')', '}', ']']:
|
||||||
|
level -= 1
|
||||||
|
print 'asdf2', result, result.parent
|
||||||
|
result = result.parent
|
||||||
|
else:
|
||||||
|
# TODO catch numbers and strings -> token_type and make
|
||||||
|
# calls out of them
|
||||||
|
if is_call:
|
||||||
|
result = result.parent
|
||||||
|
result.add_to_current_field(tok)
|
||||||
|
|
||||||
|
if not has_assignment:
|
||||||
|
raise TokenNotFoundError("You are requesting the result of an "
|
||||||
|
"assignment, where the token cannot be found")
|
||||||
|
if level != 0:
|
||||||
|
raise ParserError("Brackets don't match: %s. This is not normal "
|
||||||
|
"behaviour. Please submit a bug" % level)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
class Array(object):
|
||||||
|
"""
|
||||||
|
Describes the different python types for an array, but also empty
|
||||||
|
statements. In the Python syntax definitions this type is named 'atom'.
|
||||||
|
http://docs.python.org/release/3.0.1/reference/grammar.html
|
||||||
|
Array saves sub-arrays as well as normal operators and calls to methods.
|
||||||
|
|
||||||
|
:param array_type: The type of an array, which can be one of the constants\
|
||||||
|
below.
|
||||||
|
:type array_type: int
|
||||||
|
"""
|
||||||
|
EMPTY = object()
|
||||||
|
TUPLE = object()
|
||||||
|
LIST = object()
|
||||||
|
DICT = object()
|
||||||
|
SET = object()
|
||||||
|
|
||||||
|
def __init__(self, arr_type, parent=None):
|
||||||
|
self.arr_type = arr_type
|
||||||
|
self.values = []
|
||||||
|
self.keys = []
|
||||||
|
self.parent = parent
|
||||||
|
|
||||||
|
def add_field(self):
|
||||||
|
"""
|
||||||
|
Just add a new field to the values.
|
||||||
|
"""
|
||||||
|
self.values.append([])
|
||||||
|
self.keys.append(None)
|
||||||
|
|
||||||
|
def add_to_current_field(self, tok):
|
||||||
|
""" Adds a token to the latest field (in content). """
|
||||||
|
if not self.values:
|
||||||
|
# add the first field, this is done here, because if nothing
|
||||||
|
# gets added, the list is empty, which is also needed sometimes.
|
||||||
|
self.values.append([])
|
||||||
|
self.values[-1].append(tok)
|
||||||
|
|
||||||
|
def add_dictionary_key(self):
|
||||||
|
"""
|
||||||
|
Only used for dictionaries, automatically adds the tokens added by now
|
||||||
|
from the values to keys.
|
||||||
|
"""
|
||||||
|
self.arr_type = Array.DICT
|
||||||
|
c = self._counter
|
||||||
|
self.keys[c] = self.values[c]
|
||||||
|
self.values[c] = []
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.values)
|
||||||
|
|
||||||
|
def __getitem__(self, key):
|
||||||
|
return self.values[key]
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
if self.arr_type == self.DICT:
|
||||||
|
return self.values.items()
|
||||||
|
else:
|
||||||
|
return self.values
|
||||||
|
|
||||||
|
|
||||||
|
class Call(object):
|
||||||
|
""" The statement object of functions, to """
|
||||||
|
def __init__(self, name, parent):
|
||||||
|
self.name = name
|
||||||
|
self.parent = parent
|
||||||
|
|
||||||
|
self.next = None
|
||||||
|
self.param_array = None
|
||||||
|
self.executions = []
|
||||||
|
|
||||||
|
def set_next_chain_call(self, call):
|
||||||
|
""" Adds another part of the statement"""
|
||||||
|
self.next = call
|
||||||
|
call.parent = self.parent
|
||||||
|
return call
|
||||||
|
|
||||||
|
def add_execution(self, call):
|
||||||
|
"""
|
||||||
|
An execution is nothing else than brackets, with params in them, which
|
||||||
|
shows access on the internals of this name.
|
||||||
|
"""
|
||||||
|
self.executions.append(call)
|
||||||
|
return call
|
||||||
|
|
||||||
|
|
||||||
class Name(Simple):
|
class Name(Simple):
|
||||||
"""
|
"""
|
||||||
@@ -704,43 +879,6 @@ class PyFuzzyParser(object):
|
|||||||
|
|
||||||
return Class(cname, super, indent, start_line)
|
return Class(cname, super, indent, start_line)
|
||||||
|
|
||||||
def _parseassignment(self):
|
|
||||||
""" TODO remove or replace, at the moment not used """
|
|
||||||
assign = ''
|
|
||||||
token_type, tok, indent = self.next()
|
|
||||||
if token_type == tokenize.STRING or tok == 'str':
|
|
||||||
return '""'
|
|
||||||
elif tok == '(' or tok == 'tuple':
|
|
||||||
return '()'
|
|
||||||
elif tok == '[' or tok == 'list':
|
|
||||||
return '[]'
|
|
||||||
elif tok == '{' or tok == 'dict':
|
|
||||||
return '{}'
|
|
||||||
elif token_type == tokenize.NUMBER:
|
|
||||||
return '0'
|
|
||||||
elif tok == 'open' or tok == 'file':
|
|
||||||
return 'file'
|
|
||||||
elif tok == 'None':
|
|
||||||
return '_PyCmplNoType()'
|
|
||||||
elif tok == 'type':
|
|
||||||
return 'type(_PyCmplNoType)' # only for method resolution
|
|
||||||
else:
|
|
||||||
assign += tok
|
|
||||||
level = 0
|
|
||||||
while True:
|
|
||||||
token_type, tok, indent = self.next()
|
|
||||||
if tok in ('(', '{', '['):
|
|
||||||
level += 1
|
|
||||||
elif tok in (']', '}', ')'):
|
|
||||||
level -= 1
|
|
||||||
if level == 0:
|
|
||||||
break
|
|
||||||
elif level == 0:
|
|
||||||
if tok in (';', '\n'):
|
|
||||||
break
|
|
||||||
assign += tok
|
|
||||||
return "%s" % assign
|
|
||||||
|
|
||||||
def _parse_statement(self, pre_used_token=None, added_breaks=None):
|
def _parse_statement(self, pre_used_token=None, added_breaks=None):
|
||||||
"""
|
"""
|
||||||
Parses statements like:
|
Parses statements like:
|
||||||
@@ -778,9 +916,11 @@ class PyFuzzyParser(object):
|
|||||||
if added_breaks:
|
if added_breaks:
|
||||||
breaks += added_breaks
|
breaks += added_breaks
|
||||||
|
|
||||||
|
tok_list = []
|
||||||
while not (tok in always_break or tok in breaks and level <= 0):
|
while not (tok in always_break or tok in breaks and level <= 0):
|
||||||
set_string = None
|
set_string = None
|
||||||
#print 'parse_stmt', tok, tokenize.tok_name[token_type]
|
#print 'parse_stmt', tok, tokenize.tok_name[token_type]
|
||||||
|
tok_list.append(self.current)
|
||||||
if tok == 'as':
|
if tok == 'as':
|
||||||
string += " %s " % tok
|
string += " %s " % tok
|
||||||
token_type, tok, indent_dummy = self.next()
|
token_type, tok, indent_dummy = self.next()
|
||||||
@@ -789,6 +929,7 @@ class PyFuzzyParser(object):
|
|||||||
self._parsedotname(self.current)
|
self._parsedotname(self.current)
|
||||||
n = Name(path, start_indent, start_line, self.line_nr)
|
n = Name(path, start_indent, start_line, self.line_nr)
|
||||||
set_vars.append(n)
|
set_vars.append(n)
|
||||||
|
tok_list.append(n)
|
||||||
string += ".".join(path)
|
string += ".".join(path)
|
||||||
continue
|
continue
|
||||||
elif token_type == tokenize.NAME:
|
elif token_type == tokenize.NAME:
|
||||||
@@ -802,12 +943,13 @@ class PyFuzzyParser(object):
|
|||||||
path, token_type, tok, start_indent, start_line = \
|
path, token_type, tok, start_indent, start_line = \
|
||||||
self._parsedotname(self.current)
|
self._parsedotname(self.current)
|
||||||
n = Name(path, start_indent, start_line, self.line_nr)
|
n = Name(path, start_indent, start_line, self.line_nr)
|
||||||
|
tok_list.pop() # remove last entry, because we add Name
|
||||||
|
tok_list.append(n)
|
||||||
if tok == '(':
|
if tok == '(':
|
||||||
# it must be a function
|
# it must be a function
|
||||||
used_funcs.append(n)
|
used_funcs.append(n)
|
||||||
else:
|
else:
|
||||||
if not n.names[0] in ['global']:
|
used_vars.append(n)
|
||||||
used_vars.append(n)
|
|
||||||
if string and re.match(r'[\w\d\'"]', string[-1]):
|
if string and re.match(r'[\w\d\'"]', string[-1]):
|
||||||
string += ' '
|
string += ' '
|
||||||
string += ".".join(path)
|
string += ".".join(path)
|
||||||
@@ -833,7 +975,7 @@ class PyFuzzyParser(object):
|
|||||||
return None, tok
|
return None, tok
|
||||||
#print 'new_stat', string, set_vars, used_funcs, used_vars
|
#print 'new_stat', string, set_vars, used_funcs, used_vars
|
||||||
stmt = Statement(string, set_vars, used_funcs, used_vars,\
|
stmt = Statement(string, set_vars, used_funcs, used_vars,\
|
||||||
indent, line_start, self.line_nr)
|
tok_list, indent, line_start, self.line_nr)
|
||||||
return stmt, tok
|
return stmt, tok
|
||||||
|
|
||||||
def next(self):
|
def next(self):
|
||||||
@@ -887,7 +1029,7 @@ class PyFuzzyParser(object):
|
|||||||
while indent <= self.scope.indent \
|
while indent <= self.scope.indent \
|
||||||
and token_type in [tokenize.NAME] \
|
and token_type in [tokenize.NAME] \
|
||||||
and self.scope != self.top:
|
and self.scope != self.top:
|
||||||
dbg( 'syntax_err, dedent @%s - %s<=%s', \
|
dbg('syntax_err, dedent @%s - %s<=%s', \
|
||||||
(self.line_nr, indent, self.scope.indent))
|
(self.line_nr, indent, self.scope.indent))
|
||||||
self.scope.line_end = self.line_nr
|
self.scope.line_end = self.line_nr
|
||||||
self.scope = self.scope.parent
|
self.scope = self.scope.parent
|
||||||
|
|||||||
Reference in New Issue
Block a user