1
0
forked from VimPlug/jedi

start using tokens all the way through in the parser

This commit is contained in:
Dave Halter
2014-02-24 20:35:36 +01:00
parent 6058e8b9c3
commit 50f8b8bf0c

View File

@@ -97,8 +97,7 @@ class Parser(object):
The dot name parser parses a name, variable or function and returns
their names.
:return: Tuple of Name, token_type, nexttoken.
:rtype: tuple(Name, int, str)
:return: tuple of Name, next_token
"""
def append(el):
names.append(el)
@@ -106,30 +105,30 @@ class Parser(object):
names = []
if pre_used_token is None:
token_type, tok = self.next()
if token_type != tokenize.NAME and tok != '*':
return [], token_type, tok
tok = self.next()
if tok.type != tokenize.NAME and tok.string != '*':
return [], tok # TODO the fuck, why []?
else:
token_type, tok = pre_used_token
tok = pre_used_token
if token_type != tokenize.NAME and tok != '*':
if tok.type != tokenize.NAME and tok.string != '*':
# token maybe a name or star
return None, token_type, tok
return None, tok
append((tok, self.start_pos))
append((tok.string, self.start_pos))
first_pos = self.start_pos
while True:
end_pos = self.end_pos
token_type, tok = self.next()
if tok != '.':
tok = self.next()
if tok.string != '.':
break
token_type, tok = self.next()
if token_type != tokenize.NAME:
tok = self.next()
if tok.type != tokenize.NAME:
break
append((tok, self.start_pos))
append((tok.string, self.start_pos))
n = pr.Name(self.module, names, first_pos, end_pos) if names else None
return n, token_type, tok
return n, tok
def _parse_import_list(self):
"""
@@ -153,22 +152,22 @@ class Parser(object):
+ list(set(keyword.kwlist) - set(['as']))
while True:
defunct = False
token_type, tok = self.next()
if tok == '(': # python allows only one `(` in the statement.
tok = self.next()
if tok.string == '(': # python allows only one `(` in the statement.
brackets = True
token_type, tok = self.next()
if brackets and tok == '\n':
token_type, tok = self.next()
i, token_type, tok = self._parse_dot_name(self._current)
tok = self.next()
if brackets and tok.string == '\n':
tok = self.next()
i, tok = self._parse_dot_name(tok)
if not i:
defunct = True
name2 = None
if tok == 'as':
name2, token_type, tok = self._parse_dot_name()
if tok.string == 'as':
name2, tok = self._parse_dot_name()
imports.append((i, name2, defunct))
while tok not in continue_kw:
token_type, tok = self.next()
if not (tok == "," or brackets and tok == '\n'):
while tok.string not in continue_kw:
tok = self.next()
if not (tok.string == "," or brackets and tok.string == '\n'):
break
return imports
@@ -184,10 +183,10 @@ class Parser(object):
tok = None
pos = 0
breaks = [',', ':']
while tok not in [')', ':']:
while tok is None or tok.string not in [')', ':']:
param, tok = self._parse_statement(added_breaks=breaks,
stmt_class=pr.Param)
if param and tok == ':':
if param and tok.string == ':':
# parse annotations
annotation, tok = self._parse_statement(added_breaks=breaks)
if annotation:
@@ -210,30 +209,30 @@ class Parser(object):
:rtype: Function
"""
first_pos = self.start_pos
token_type, fname = self.next()
if token_type != tokenize.NAME:
tok = self.next()
if tok.type != tokenize.NAME:
return None
fname = pr.Name(self.module, [(fname, self.start_pos)], self.start_pos,
fname = pr.Name(self.module, [(tok.string, self.start_pos)], self.start_pos,
self.end_pos)
token_type, open = self.next()
if open != '(':
tok = self.next()
if tok.string != '(':
return None
params = self._parse_parentheses()
token_type, colon = self.next()
colon = self.next()
annotation = None
if colon in ['-', '->']:
if colon.string in ['-', '->']:
# parse annotations
if colon == '-':
if colon.string == '-':
# The Python 2 tokenizer doesn't understand this
token_type, colon = self.next()
if colon != '>':
colon = self.next()
if colon.string != '>':
return None
annotation, colon = self._parse_statement(added_breaks=[':'])
if colon != ':':
if colon.string != ':':
return None
# because of 2 line func param definitions
@@ -248,22 +247,22 @@ class Parser(object):
:rtype: Class
"""
first_pos = self.start_pos
token_type, cname = self.next()
if token_type != tokenize.NAME:
cname = self.next()
if cname.type != tokenize.NAME:
debug.warning("class: syntax err, token is not a name@%s (%s: %s)",
self.start_pos[0], tokenize.tok_name[token_type], cname)
self.start_pos[0], tokenize.tok_name[cname.type], cname.string)
return None
cname = pr.Name(self.module, [(cname, self.start_pos)], self.start_pos,
self.end_pos)
cname = pr.Name(self.module, [(cname.string, self.start_pos)],
self.start_pos, self.end_pos)
super = []
token_type, _next = self.next()
if _next == '(':
_next = self.next()
if _next.string == '(':
super = self._parse_parentheses()
token_type, _next = self.next()
_next = self.next()
if _next != ':':
if _next.string != ':':
debug.warning("class syntax: %s@%s", cname, self.start_pos[0])
return None
@@ -288,14 +287,14 @@ class Parser(object):
level = 0 # The level of parentheses
if pre_used_token:
token_type, tok = pre_used_token
tok = pre_used_token
else:
token_type, tok = self.next()
tok = self.next()
while token_type == tokenize.COMMENT:
while tok.type == tokenize.COMMENT:
# remove newline and comment
self.next()
token_type, tok = self.next()
tok = self.next()
first_pos = self.start_pos
opening_brackets = ['{', '(', '[']
@@ -314,9 +313,9 @@ class Parser(object):
tok_list = []
as_names = []
while not (tok in always_break
or tok in not_first_break and not tok_list
or tok in breaks and level <= 0):
while not (tok.string in always_break
or tok.string in not_first_break and not tok_list
or tok.string in breaks and level <= 0):
try:
# print 'parse_stmt', tok, tokenize.tok_name[token_type]
tok_list.append(
@@ -324,10 +323,10 @@ class Parser(object):
self._current + (self.start_pos,)
)
)
if tok == 'as':
token_type, tok = self.next()
if token_type == tokenize.NAME:
n, token_type, tok = self._parse_dot_name(
if tok.string == 'as':
tok = self.next()
if tok.type == tokenize.NAME:
n, tok = self._parse_dot_name(
self._current
)
if n:
@@ -335,23 +334,23 @@ class Parser(object):
as_names.append(n)
tok_list.append(n)
continue
elif tok in ['lambda', 'for', 'in']:
elif tok.string in ['lambda', 'for', 'in']:
# don't parse these keywords, parse later in stmt.
if tok == 'lambda':
if tok.string == 'lambda':
breaks.discard(':')
elif token_type == tokenize.NAME:
n, token_type, tok = self._parse_dot_name(self._current)
elif tok.type == tokenize.NAME:
n, tok = self._parse_dot_name(self._current)
# removed last entry, because we add Name
tok_list.pop()
if n:
tok_list.append(n)
continue
elif tok in opening_brackets:
elif tok.string in opening_brackets:
level += 1
elif tok in closing_brackets:
elif tok.string in closing_brackets:
level -= 1
token_type, tok = self.next()
tok = self.next()
except (StopIteration, common.MultiLevelStopIteration):
# comes from tokenizer
break
@@ -387,7 +386,7 @@ class Parser(object):
stmt.parent = self._top_module
self._check_user_stmt(stmt)
if tok in always_break + not_first_break:
if tok.string in always_break + not_first_break:
self._gen.push_last_back()
return stmt, tok
@@ -399,11 +398,13 @@ class Parser(object):
def __next__(self):
""" Generate the next tokenize pattern. """
typ, tok, start_pos, end_pos = next(self._gen)
#typ, tok, start_pos, end_pos = next(self._gen)
self._current = next(self._gen)
# dedents shouldn't change positions
self.start_pos = start_pos
self.start_pos = self._current.start
self.end_pos = self._current.end
self._current = typ, tok
#self._current = typ, tok
return self._current
def _parse(self):
@@ -422,9 +423,11 @@ class Parser(object):
self._decorators = []
self.freshscope = True
self.iterator = iter(self)
# This iterator stuff is not intentional. It grew historically.
for token_type, tok in self.iterator:
self.iterator = iter(self)
for tok in self.iterator:
token_type = tok.type
tok_str = tok.string
self.module.temp_used_names = []
# debug.dbg('main: tok=[%s] type=[%s] indent=[%s]', \
# tok, tokenize.tok_name[token_type], start_position[0])
@@ -433,7 +436,7 @@ class Parser(object):
# errors. only check for names, because thats relevant here. If
# some docstrings are not indented, I don't care.
while self.start_pos[1] <= self._scope.start_pos[1] \
and (token_type == tokenize.NAME or tok in ['(', '['])\
and (token_type == tokenize.NAME or tok_str in ['(', '['])\
and self._scope != self.module:
self._scope.end_pos = self.start_pos
self._scope = self._scope.parent
@@ -446,7 +449,7 @@ class Parser(object):
else:
use_as_parent_scope = self._scope
first_pos = self.start_pos
if tok == 'def':
if tok_str == 'def':
func = self._parse_function()
if func is None:
debug.warning("function: syntax error@%s", self.start_pos[0])
@@ -454,7 +457,7 @@ class Parser(object):
self.freshscope = True
self._scope = self._scope.add_scope(func, self._decorators)
self._decorators = []
elif tok == 'class':
elif tok_str == 'class':
cls = self._parse_class()
if cls is None:
debug.warning("class: syntax error@%s" % self.start_pos[0])
@@ -463,7 +466,7 @@ class Parser(object):
self._scope = self._scope.add_scope(cls, self._decorators)
self._decorators = []
# import stuff
elif tok == 'import':
elif tok_str == 'import':
imports = self._parse_import_list()
for count, (m, alias, defunct) in enumerate(imports):
e = (alias or m or self).end_pos
@@ -477,25 +480,26 @@ class Parser(object):
defunct=True)
self._check_user_stmt(i)
self.freshscope = False
elif tok == 'from':
elif tok_str == 'from':
defunct = False
# take care for relative imports
relative_count = 0
while True:
token_type, tok = self.next()
if tok != '.':
tok = self.next()
if tok.string != '.':
break
relative_count += 1
# the from import
mod, token_type, tok = self._parse_dot_name(self._current)
mod, tok = self._parse_dot_name(self._current)
tok_str = tok.string
if str(mod) == 'import' and relative_count:
self._gen.push_last_back()
tok = 'import'
tok_str = 'import'
mod = None
if not mod and not relative_count or tok != "import":
if not mod and not relative_count or tok_str != "import":
debug.warning("from: syntax error@%s", self.start_pos[0])
defunct = True
if tok != 'import':
if tok_str != 'import':
self._gen.push_last_back()
names = self._parse_import_list()
for count, (name, alias, defunct2) in enumerate(names):
@@ -511,10 +515,10 @@ class Parser(object):
self._scope.add_import(i)
self.freshscope = False
# loops
elif tok == 'for':
elif tok_str == 'for':
set_stmt, tok = self._parse_statement(added_breaks=['in'],
names_are_set_vars=True)
if tok != 'in':
if tok.string != 'in':
debug.warning('syntax err, for flow incomplete @%s', self.start_pos[0])
try:
@@ -524,23 +528,23 @@ class Parser(object):
s = [] if statement is None else [statement]
f = pr.ForFlow(self.module, s, first_pos, set_stmt)
self._scope = self._scope.add_statement(f)
if tok != ':':
if tok is None or tok.string != ':':
debug.warning('syntax err, for flow started @%s', self.start_pos[0])
elif tok in ['if', 'while', 'try', 'with'] + extended_flow:
elif tok_str in ['if', 'while', 'try', 'with'] + extended_flow:
added_breaks = []
command = tok
command = tok_str
if command in ['except', 'with']:
added_breaks.append(',')
# multiple inputs because of with
inputs = []
first = True
while first or command == 'with' and tok not in [':', '\n']:
while first or command == 'with' and tok.string not in [':', '\n']:
statement, tok = \
self._parse_statement(added_breaks=added_breaks)
if command == 'except' and tok == ',':
if command == 'except' and tok.string == ',':
# the except statement defines a var
# this is only true for python 2
n, token_type, tok = self._parse_dot_name()
n, tok = self._parse_dot_name()
if n:
n.parent = statement
statement.as_names.append(n)
@@ -561,15 +565,15 @@ class Parser(object):
else:
s = self._scope.add_statement(f)
self._scope = s
if tok != ':':
if tok.string != ':':
debug.warning('syntax err, flow started @%s', self.start_pos[0])
# returns
elif tok in ['return', 'yield']:
elif tok_str in ['return', 'yield']:
s = self.start_pos
self.freshscope = False
# add returns to the scope
func = self._scope.get_parent_until(pr.Function)
if tok == 'yield':
if tok_str == 'yield':
func.is_generator = True
stmt, tok = self._parse_statement()
@@ -582,7 +586,7 @@ class Parser(object):
except AttributeError:
debug.warning('return in non-function')
# globals
elif tok == 'global':
elif tok_str == 'global':
stmt, tok = self._parse_statement(self._current)
if stmt:
self._scope.add_statement(stmt)
@@ -592,13 +596,13 @@ class Parser(object):
# important.
self.module.add_global(t)
# decorator
elif tok == '@':
elif tok_str == '@':
stmt, tok = self._parse_statement()
if stmt is not None:
self._decorators.append(stmt)
elif tok == 'pass':
elif tok_str == 'pass':
continue
elif tok == 'assert':
elif tok_str == 'assert':
stmt, tok = self._parse_statement()
if stmt is not None:
stmt.parent = use_as_parent_scope
@@ -606,7 +610,7 @@ class Parser(object):
# default
elif token_type in [tokenize.NAME, tokenize.STRING,
tokenize.NUMBER] \
or tok in statement_toks:
or tok_str in statement_toks:
# this is the main part - a name can be a function or a
# normal var, which can follow anything. but this is done
# by the statement parser.
@@ -616,7 +620,7 @@ class Parser(object):
self.freshscope = False
else:
if token_type not in [tokenize.COMMENT, tokenize.NEWLINE]:
debug.warning('Token not used: %s %s %s', tok,
debug.warning('Token not used: %s %s %s', tok_str,
tokenize.tok_name[token_type], self.start_pos)
continue
self.no_docstr = False