mirror of
https://github.com/davidhalter/jedi.git
synced 2026-02-23 22:28:27 +08:00
move all the parser stuff into a seperate package
This commit is contained in:
682
jedi/parser/__init__.py
Normal file
682
jedi/parser/__init__.py
Normal file
@@ -0,0 +1,682 @@
|
||||
"""
|
||||
The ``Parser`` tries to convert the available Python code in an easy to read
|
||||
format, something like an abstract syntax tree. The classes who represent this
|
||||
tree, are sitting in the :mod:`parsing_representation` module.
|
||||
|
||||
The Python module ``tokenize`` is a very important part in the ``Parser``,
|
||||
because it splits the code into different words (tokens). Sometimes it looks a
|
||||
bit messy. Sorry for that! You might ask now: "Why didn't you use the ``ast``
|
||||
module for this? Well, ``ast`` does a very good job understanding proper Python
|
||||
code, but fails to work as soon as there's a single line of broken code.
|
||||
|
||||
There's one important optimization that needs to be known: Statements are not
|
||||
being parsed completely. ``Statement`` is just a representation of the tokens
|
||||
within the statement. This lowers memory usage and cpu time and reduces the
|
||||
complexity of the ``Parser`` (there's another parser sitting inside
|
||||
``Statement``, which produces ``Array`` and ``Call``).
|
||||
"""
|
||||
from __future__ import with_statement
|
||||
|
||||
import tokenizer as tokenize
|
||||
import keyword
|
||||
|
||||
from jedi._compatibility import next, StringIO
|
||||
from jedi import debug
|
||||
from jedi import common
|
||||
from jedi.parser import representation as pr
|
||||
|
||||
|
||||
class Parser(object):
|
||||
"""
|
||||
This class is used to parse a Python file, it then divides them into a
|
||||
class structure of different scopes.
|
||||
|
||||
:param source: The codebase for the parser.
|
||||
:type source: str
|
||||
:param module_path: The path of the module in the file system, may be None.
|
||||
:type module_path: str
|
||||
:param user_position: The line/column, the user is currently on.
|
||||
:type user_position: tuple(int, int)
|
||||
:param no_docstr: If True, a string at the beginning is not a docstr.
|
||||
:param is_fast_parser: -> for fast_parser
|
||||
:param top_module: Use this module as a parent instead of `self.module`.
|
||||
"""
|
||||
def __init__(self, source, module_path=None, user_position=None,
|
||||
no_docstr=False, offset=(0, 0), is_fast_parser=None,
|
||||
top_module=None):
|
||||
self.user_position = user_position
|
||||
self.user_scope = None
|
||||
self.user_stmt = None
|
||||
self.no_docstr = no_docstr
|
||||
|
||||
self.start_pos = self.end_pos = 1 + offset[0], offset[1]
|
||||
# initialize global Scope
|
||||
self.module = pr.SubModule(module_path, self.start_pos, top_module)
|
||||
self._scope = self.module
|
||||
self._current = (None, None)
|
||||
|
||||
source = source + '\n' # end with \n, because the parser needs it
|
||||
buf = StringIO(source)
|
||||
self._gen = common.NoErrorTokenizer(buf.readline, offset,
|
||||
is_fast_parser)
|
||||
self.top_module = top_module or self.module
|
||||
try:
|
||||
self._parse()
|
||||
except (common.MultiLevelStopIteration, StopIteration):
|
||||
# StopIteration needs to be added as well, because python 2 has a
|
||||
# strange way of handling StopIterations.
|
||||
# sometimes StopIteration isn't catched. Just ignore it.
|
||||
pass
|
||||
|
||||
# clean up unused decorators
|
||||
for d in self._decorators:
|
||||
# set a parent for unused decorators, avoid NullPointerException
|
||||
# because of `self.module.used_names`.
|
||||
d.parent = self.module
|
||||
|
||||
if self._current[0] in (tokenize.NL, tokenize.NEWLINE):
|
||||
# we added a newline before, so we need to "remove" it again.
|
||||
self.end_pos = self._gen.previous[2]
|
||||
elif self._current[0] == tokenize.INDENT:
|
||||
self.end_pos = self._gen.last_previous[2]
|
||||
|
||||
self.start_pos = self.module.start_pos
|
||||
self.module.end_pos = self.end_pos
|
||||
del self._gen
|
||||
|
||||
def __repr__(self):
|
||||
return "<%s: %s>" % (type(self).__name__, self.module)
|
||||
|
||||
def _check_user_stmt(self, simple):
|
||||
# this is not user checking, just update the used_names
|
||||
for tok_name in self.module.temp_used_names:
|
||||
try:
|
||||
self.module.used_names[tok_name].add(simple)
|
||||
except KeyError:
|
||||
self.module.used_names[tok_name] = set([simple])
|
||||
self.module.temp_used_names = []
|
||||
|
||||
if not self.user_position:
|
||||
return
|
||||
# the position is right
|
||||
if simple.start_pos <= self.user_position <= simple.end_pos:
|
||||
if self.user_stmt is not None:
|
||||
# if there is already a user position (another import, because
|
||||
# imports are splitted) the names are checked.
|
||||
for n in simple.get_set_vars():
|
||||
if n.start_pos < self.user_position <= n.end_pos:
|
||||
self.user_stmt = simple
|
||||
else:
|
||||
self.user_stmt = simple
|
||||
|
||||
def _parse_dot_name(self, pre_used_token=None):
|
||||
"""
|
||||
The dot name parser parses a name, variable or function and returns
|
||||
their names.
|
||||
|
||||
:return: Tuple of Name, token_type, nexttoken.
|
||||
:rtype: tuple(Name, int, str)
|
||||
"""
|
||||
def append(el):
|
||||
names.append(el)
|
||||
self.module.temp_used_names.append(el[0])
|
||||
|
||||
names = []
|
||||
if pre_used_token is None:
|
||||
token_type, tok = self.next()
|
||||
if token_type != tokenize.NAME and tok != '*':
|
||||
return [], token_type, tok
|
||||
else:
|
||||
token_type, tok = pre_used_token
|
||||
|
||||
if token_type != tokenize.NAME and tok != '*':
|
||||
# token maybe a name or star
|
||||
return None, token_type, tok
|
||||
|
||||
append((tok, self.start_pos))
|
||||
first_pos = self.start_pos
|
||||
while True:
|
||||
end_pos = self.end_pos
|
||||
token_type, tok = self.next()
|
||||
if tok != '.':
|
||||
break
|
||||
token_type, tok = self.next()
|
||||
if token_type != tokenize.NAME:
|
||||
break
|
||||
append((tok, self.start_pos))
|
||||
|
||||
n = pr.Name(self.module, names, first_pos, end_pos) if names else None
|
||||
return n, token_type, tok
|
||||
|
||||
def _parse_import_list(self):
|
||||
"""
|
||||
The parser for the imports. Unlike the class and function parse
|
||||
function, this returns no Import class, but rather an import list,
|
||||
which is then added later on.
|
||||
The reason, why this is not done in the same class lies in the nature
|
||||
of imports. There are two ways to write them:
|
||||
|
||||
- from ... import ...
|
||||
- import ...
|
||||
|
||||
To distinguish, this has to be processed after the parser.
|
||||
|
||||
:return: List of imports.
|
||||
:rtype: list
|
||||
"""
|
||||
imports = []
|
||||
brackets = False
|
||||
continue_kw = [",", ";", "\n", ')'] \
|
||||
+ list(set(keyword.kwlist) - set(['as']))
|
||||
while True:
|
||||
defunct = False
|
||||
token_type, tok = self.next()
|
||||
if tok == '(': # python allows only one `(` in the statement.
|
||||
brackets = True
|
||||
token_type, tok = self.next()
|
||||
if brackets and tok == '\n':
|
||||
self.next()
|
||||
i, token_type, tok = self._parse_dot_name(self._current)
|
||||
if not i:
|
||||
defunct = True
|
||||
name2 = None
|
||||
if tok == 'as':
|
||||
name2, token_type, tok = self._parse_dot_name()
|
||||
imports.append((i, name2, defunct))
|
||||
while tok not in continue_kw:
|
||||
token_type, tok = self.next()
|
||||
if not (tok == "," or brackets and tok == '\n'):
|
||||
break
|
||||
return imports
|
||||
|
||||
def _parse_parentheses(self):
|
||||
"""
|
||||
Functions and Classes have params (which means for classes
|
||||
super-classes). They are parsed here and returned as Statements.
|
||||
|
||||
:return: List of Statements
|
||||
:rtype: list
|
||||
"""
|
||||
names = []
|
||||
tok = None
|
||||
pos = 0
|
||||
breaks = [',', ':']
|
||||
while tok not in [')', ':']:
|
||||
param, tok = self._parse_statement(added_breaks=breaks,
|
||||
stmt_class=pr.Param)
|
||||
if param and tok == ':':
|
||||
# parse annotations
|
||||
annotation, tok = self._parse_statement(added_breaks=breaks)
|
||||
if annotation:
|
||||
param.add_annotation(annotation)
|
||||
|
||||
# params without vars are usually syntax errors.
|
||||
if param and (param.get_set_vars()):
|
||||
param.position_nr = pos
|
||||
names.append(param)
|
||||
pos += 1
|
||||
|
||||
return names
|
||||
|
||||
def _parse_function(self):
|
||||
"""
|
||||
The parser for a text functions. Process the tokens, which follow a
|
||||
function definition.
|
||||
|
||||
:return: Return a Scope representation of the tokens.
|
||||
:rtype: Function
|
||||
"""
|
||||
first_pos = self.start_pos
|
||||
token_type, fname = self.next()
|
||||
if token_type != tokenize.NAME:
|
||||
return None
|
||||
|
||||
fname = pr.Name(self.module, [(fname, self.start_pos)], self.start_pos,
|
||||
self.end_pos)
|
||||
|
||||
token_type, open = self.next()
|
||||
if open != '(':
|
||||
return None
|
||||
params = self._parse_parentheses()
|
||||
|
||||
token_type, colon = self.next()
|
||||
annotation = None
|
||||
if colon in ['-', '->']:
|
||||
# parse annotations
|
||||
if colon == '-':
|
||||
# The Python 2 tokenizer doesn't understand this
|
||||
token_type, colon = self.next()
|
||||
if colon != '>':
|
||||
return None
|
||||
annotation, colon = self._parse_statement(added_breaks=[':'])
|
||||
|
||||
if colon != ':':
|
||||
return None
|
||||
|
||||
# because of 2 line func param definitions
|
||||
scope = pr.Function(self.module, fname, params, first_pos, annotation)
|
||||
if self.user_scope and scope != self.user_scope \
|
||||
and self.user_position > first_pos:
|
||||
self.user_scope = scope
|
||||
return scope
|
||||
|
||||
def _parse_class(self):
|
||||
"""
|
||||
The parser for a text class. Process the tokens, which follow a
|
||||
class definition.
|
||||
|
||||
:return: Return a Scope representation of the tokens.
|
||||
:rtype: Class
|
||||
"""
|
||||
first_pos = self.start_pos
|
||||
token_type, cname = self.next()
|
||||
if token_type != tokenize.NAME:
|
||||
debug.warning("class: syntax err, token is not a name@%s (%s: %s)"
|
||||
% (self.start_pos[0], tokenize.tok_name[token_type], cname))
|
||||
return None
|
||||
|
||||
cname = pr.Name(self.module, [(cname, self.start_pos)], self.start_pos,
|
||||
self.end_pos)
|
||||
|
||||
super = []
|
||||
token_type, _next = self.next()
|
||||
if _next == '(':
|
||||
super = self._parse_parentheses()
|
||||
token_type, _next = self.next()
|
||||
|
||||
if _next != ':':
|
||||
debug.warning("class syntax: %s@%s" % (cname, self.start_pos[0]))
|
||||
return None
|
||||
|
||||
# because of 2 line class initializations
|
||||
scope = pr.Class(self.module, cname, super, first_pos)
|
||||
if self.user_scope and scope != self.user_scope \
|
||||
and self.user_position > first_pos:
|
||||
self.user_scope = scope
|
||||
return scope
|
||||
|
||||
def _parse_statement(self, pre_used_token=None, added_breaks=None,
|
||||
stmt_class=pr.Statement, names_are_set_vars=False):
|
||||
"""
|
||||
Parses statements like::
|
||||
|
||||
a = test(b)
|
||||
a += 3 - 2 or b
|
||||
|
||||
and so on. One line at a time.
|
||||
|
||||
:param pre_used_token: The pre parsed token.
|
||||
:type pre_used_token: set
|
||||
:return: Statement + last parsed token.
|
||||
:rtype: (Statement, str)
|
||||
"""
|
||||
set_vars = []
|
||||
level = 0 # The level of parentheses
|
||||
|
||||
if pre_used_token:
|
||||
token_type, tok = pre_used_token
|
||||
else:
|
||||
token_type, tok = self.next()
|
||||
|
||||
while token_type == tokenize.COMMENT:
|
||||
# remove newline and comment
|
||||
self.next()
|
||||
token_type, tok = self.next()
|
||||
|
||||
first_pos = self.start_pos
|
||||
opening_brackets = ['{', '(', '[']
|
||||
closing_brackets = ['}', ')', ']']
|
||||
|
||||
# the difference between "break" and "always break" is that the latter
|
||||
# will even break in parentheses. This is true for typical flow
|
||||
# commands like def and class and the imports, which will never be used
|
||||
# in a statement.
|
||||
breaks = set(['\n', ':', ')'])
|
||||
always_break = [';', 'import', 'from', 'class', 'def', 'try', 'except',
|
||||
'finally', 'while', 'return', 'yield']
|
||||
not_first_break = ['del', 'raise']
|
||||
if added_breaks:
|
||||
breaks |= set(added_breaks)
|
||||
|
||||
tok_list = []
|
||||
as_names = []
|
||||
while not (tok in always_break
|
||||
or tok in not_first_break and not tok_list
|
||||
or tok in breaks and level <= 0):
|
||||
try:
|
||||
# print 'parse_stmt', tok, tokenize.tok_name[token_type]
|
||||
tok_list.append(self._current + (self.start_pos,))
|
||||
if tok == 'as':
|
||||
token_type, tok = self.next()
|
||||
if token_type == tokenize.NAME:
|
||||
n, token_type, tok = self._parse_dot_name(self._current)
|
||||
if n:
|
||||
set_vars.append(n)
|
||||
as_names.append(n)
|
||||
tok_list.append(n)
|
||||
continue
|
||||
elif tok in ['lambda', 'for', 'in']:
|
||||
# don't parse these keywords, parse later in stmt.
|
||||
if tok == 'lambda':
|
||||
breaks.discard(':')
|
||||
elif token_type == tokenize.NAME:
|
||||
n, token_type, tok = self._parse_dot_name(self._current)
|
||||
# removed last entry, because we add Name
|
||||
tok_list.pop()
|
||||
if n:
|
||||
tok_list.append(n)
|
||||
continue
|
||||
elif tok in opening_brackets:
|
||||
level += 1
|
||||
elif tok in closing_brackets:
|
||||
level -= 1
|
||||
|
||||
token_type, tok = self.next()
|
||||
except (StopIteration, common.MultiLevelStopIteration):
|
||||
# comes from tokenizer
|
||||
break
|
||||
|
||||
if not tok_list:
|
||||
return None, tok
|
||||
|
||||
first_tok = tok_list[0]
|
||||
# docstrings
|
||||
if len(tok_list) == 1 and not isinstance(first_tok, pr.Name) \
|
||||
and first_tok[0] == tokenize.STRING:
|
||||
# Normal docstring check
|
||||
if self.freshscope and not self.no_docstr:
|
||||
self._scope.add_docstr(first_tok[1])
|
||||
return None, tok
|
||||
|
||||
# Attribute docstring (PEP 224) support (sphinx uses it, e.g.)
|
||||
# If string literal is being parsed...
|
||||
elif first_tok[0] == tokenize.STRING:
|
||||
with common.ignored(IndexError, AttributeError):
|
||||
# ...then set it as a docstring
|
||||
self._scope.statements[-1].add_docstr(first_tok[1])
|
||||
return None, tok
|
||||
|
||||
|
||||
stmt = stmt_class(self.module, tok_list, first_pos, self.end_pos,
|
||||
as_names=as_names,
|
||||
names_are_set_vars=names_are_set_vars)
|
||||
|
||||
stmt.parent = self.top_module
|
||||
self._check_user_stmt(stmt)
|
||||
|
||||
if tok in always_break + not_first_break:
|
||||
self._gen.push_last_back()
|
||||
return stmt, tok
|
||||
|
||||
def next(self):
|
||||
return self.__next__()
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
""" Generate the next tokenize pattern. """
|
||||
try:
|
||||
typ, tok, start_pos, end_pos, self.parserline = next(self._gen)
|
||||
# dedents shouldn't change positions
|
||||
if typ != tokenize.DEDENT:
|
||||
self.start_pos = start_pos
|
||||
if typ not in (tokenize.INDENT, tokenize.NEWLINE, tokenize.NL):
|
||||
self.start_pos, self.end_pos = start_pos, end_pos
|
||||
except (StopIteration, common.MultiLevelStopIteration):
|
||||
# on finish, set end_pos correctly
|
||||
s = self._scope
|
||||
while s is not None:
|
||||
if isinstance(s, pr.Module) \
|
||||
and not isinstance(s, pr.SubModule):
|
||||
self.module.end_pos = self.end_pos
|
||||
break
|
||||
s.end_pos = self.end_pos
|
||||
s = s.parent
|
||||
raise
|
||||
|
||||
if self.user_position and (self.start_pos[0] == self.user_position[0]
|
||||
or self.user_scope is None
|
||||
and self.start_pos[0] >= self.user_position[0]):
|
||||
debug.dbg('user scope found [%s] = %s' %
|
||||
(self.parserline.replace('\n', ''), repr(self._scope)))
|
||||
self.user_scope = self._scope
|
||||
|
||||
self._current = typ, tok
|
||||
return self._current
|
||||
|
||||
def _parse(self):
|
||||
"""
|
||||
The main part of the program. It analyzes the given code-text and
|
||||
returns a tree-like scope. For a more detailed description, see the
|
||||
class description.
|
||||
|
||||
:param text: The code which should be parsed.
|
||||
:param type: str
|
||||
|
||||
:raises: IndentationError
|
||||
"""
|
||||
extended_flow = ['else', 'elif', 'except', 'finally']
|
||||
statement_toks = ['{', '[', '(', '`']
|
||||
|
||||
self._decorators = []
|
||||
self.freshscope = True
|
||||
self.iterator = iter(self)
|
||||
# This iterator stuff is not intentional. It grew historically.
|
||||
for token_type, tok in self.iterator:
|
||||
self.module.temp_used_names = []
|
||||
# debug.dbg('main: tok=[%s] type=[%s] indent=[%s]'\
|
||||
# % (tok, tokenize.tok_name[token_type], start_position[0]))
|
||||
|
||||
while token_type == tokenize.DEDENT and self._scope != self.module:
|
||||
token_type, tok = self.next()
|
||||
if self.start_pos[1] <= self._scope.start_pos[1]:
|
||||
self._scope.end_pos = self.start_pos
|
||||
self._scope = self._scope.parent
|
||||
if isinstance(self._scope, pr.Module) \
|
||||
and not isinstance(self._scope, pr.SubModule):
|
||||
self._scope = self.module
|
||||
|
||||
# check again for unindented stuff. this is true for syntax
|
||||
# errors. only check for names, because thats relevant here. If
|
||||
# some docstrings are not indented, I don't care.
|
||||
while self.start_pos[1] <= self._scope.start_pos[1] \
|
||||
and (token_type == tokenize.NAME or tok in ['(', '['])\
|
||||
and self._scope != self.module:
|
||||
self._scope.end_pos = self.start_pos
|
||||
self._scope = self._scope.parent
|
||||
if isinstance(self._scope, pr.Module) \
|
||||
and not isinstance(self._scope, pr.SubModule):
|
||||
self._scope = self.module
|
||||
|
||||
use_as_parent_scope = self.top_module if isinstance(self._scope,
|
||||
pr.SubModule) else self._scope
|
||||
first_pos = self.start_pos
|
||||
if tok == 'def':
|
||||
func = self._parse_function()
|
||||
if func is None:
|
||||
debug.warning("function: syntax error@%s" %
|
||||
self.start_pos[0])
|
||||
continue
|
||||
self.freshscope = True
|
||||
self._scope = self._scope.add_scope(func, self._decorators)
|
||||
self._decorators = []
|
||||
elif tok == 'class':
|
||||
cls = self._parse_class()
|
||||
if cls is None:
|
||||
debug.warning("class: syntax error@%s" % self.start_pos[0])
|
||||
continue
|
||||
self.freshscope = True
|
||||
self._scope = self._scope.add_scope(cls, self._decorators)
|
||||
self._decorators = []
|
||||
# import stuff
|
||||
elif tok == 'import':
|
||||
imports = self._parse_import_list()
|
||||
for count, (m, alias, defunct) in enumerate(imports):
|
||||
e = (alias or m or self).end_pos
|
||||
end_pos = self.end_pos if count + 1 == len(imports) else e
|
||||
i = pr.Import(self.module, first_pos, end_pos, m,
|
||||
alias, defunct=defunct)
|
||||
self._check_user_stmt(i)
|
||||
self._scope.add_import(i)
|
||||
if not imports:
|
||||
i = pr.Import(self.module, first_pos, self.end_pos, None,
|
||||
defunct=True)
|
||||
self._check_user_stmt(i)
|
||||
self.freshscope = False
|
||||
elif tok == 'from':
|
||||
defunct = False
|
||||
# take care for relative imports
|
||||
relative_count = 0
|
||||
while True:
|
||||
token_type, tok = self.next()
|
||||
if tok != '.':
|
||||
break
|
||||
relative_count += 1
|
||||
# the from import
|
||||
mod, token_type, tok = self._parse_dot_name(self._current)
|
||||
if str(mod) == 'import' and relative_count:
|
||||
self._gen.push_last_back()
|
||||
tok = 'import'
|
||||
mod = None
|
||||
if not mod and not relative_count or tok != "import":
|
||||
debug.warning("from: syntax error@%s" % self.start_pos[0])
|
||||
defunct = True
|
||||
if tok != 'import':
|
||||
self._gen.push_last_back()
|
||||
names = self._parse_import_list()
|
||||
for count, (name, alias, defunct2) in enumerate(names):
|
||||
star = name is not None and name.names[0] == '*'
|
||||
if star:
|
||||
name = None
|
||||
e = (alias or name or self).end_pos
|
||||
end_pos = self.end_pos if count + 1 == len(names) else e
|
||||
i = pr.Import(self.module, first_pos, end_pos, name,
|
||||
alias, mod, star, relative_count,
|
||||
defunct=defunct or defunct2)
|
||||
self._check_user_stmt(i)
|
||||
self._scope.add_import(i)
|
||||
self.freshscope = False
|
||||
# loops
|
||||
elif tok == 'for':
|
||||
set_stmt, tok = self._parse_statement(added_breaks=['in'],
|
||||
names_are_set_vars=True)
|
||||
if tok == 'in':
|
||||
statement, tok = self._parse_statement()
|
||||
if tok == ':':
|
||||
s = [] if statement is None else [statement]
|
||||
f = pr.ForFlow(self.module, s, first_pos, set_stmt)
|
||||
self._scope = self._scope.add_statement(f)
|
||||
else:
|
||||
debug.warning('syntax err, for flow started @%s',
|
||||
self.start_pos[0])
|
||||
if statement is not None:
|
||||
statement.parent = use_as_parent_scope
|
||||
if set_stmt is not None:
|
||||
set_stmt.parent = use_as_parent_scope
|
||||
else:
|
||||
debug.warning('syntax err, for flow incomplete @%s',
|
||||
self.start_pos[0])
|
||||
if set_stmt is not None:
|
||||
set_stmt.parent = use_as_parent_scope
|
||||
|
||||
elif tok in ['if', 'while', 'try', 'with'] + extended_flow:
|
||||
added_breaks = []
|
||||
command = tok
|
||||
if command in ['except', 'with']:
|
||||
added_breaks.append(',')
|
||||
# multiple inputs because of with
|
||||
inputs = []
|
||||
first = True
|
||||
while first or command == 'with' \
|
||||
and tok not in [':', '\n']:
|
||||
statement, tok = \
|
||||
self._parse_statement(added_breaks=added_breaks)
|
||||
if command == 'except' and tok == ',':
|
||||
# the except statement defines a var
|
||||
# this is only true for python 2
|
||||
n, token_type, tok = self._parse_dot_name()
|
||||
if n:
|
||||
n.parent = statement
|
||||
statement.as_names.append(n)
|
||||
if statement:
|
||||
inputs.append(statement)
|
||||
first = False
|
||||
|
||||
if tok == ':':
|
||||
f = pr.Flow(self.module, command, inputs, first_pos)
|
||||
if command in extended_flow:
|
||||
# the last statement has to be another part of
|
||||
# the flow statement, because a dedent releases the
|
||||
# main scope, so just take the last statement.
|
||||
try:
|
||||
s = self._scope.statements[-1].set_next(f)
|
||||
except (AttributeError, IndexError):
|
||||
# If set_next doesn't exist, just add it.
|
||||
s = self._scope.add_statement(f)
|
||||
else:
|
||||
s = self._scope.add_statement(f)
|
||||
self._scope = s
|
||||
else:
|
||||
for i in inputs:
|
||||
i.parent = use_as_parent_scope
|
||||
debug.warning('syntax err, flow started @%s',
|
||||
self.start_pos[0])
|
||||
# returns
|
||||
elif tok in ['return', 'yield']:
|
||||
s = self.start_pos
|
||||
self.freshscope = False
|
||||
# add returns to the scope
|
||||
func = self._scope.get_parent_until(pr.Function)
|
||||
if tok == 'yield':
|
||||
func.is_generator = True
|
||||
|
||||
stmt, tok = self._parse_statement()
|
||||
if stmt is not None:
|
||||
stmt.parent = use_as_parent_scope
|
||||
try:
|
||||
func.returns.append(stmt)
|
||||
# start_pos is the one of the return statement
|
||||
stmt.start_pos = s
|
||||
except AttributeError:
|
||||
debug.warning('return in non-function')
|
||||
# globals
|
||||
elif tok == 'global':
|
||||
stmt, tok = self._parse_statement(self._current)
|
||||
if stmt:
|
||||
self._scope.add_statement(stmt)
|
||||
for t in stmt.token_list:
|
||||
if isinstance(t, pr.Name):
|
||||
# add the global to the top, because there it is
|
||||
# important.
|
||||
self.module.add_global(t)
|
||||
# decorator
|
||||
elif tok == '@':
|
||||
stmt, tok = self._parse_statement()
|
||||
if stmt is not None:
|
||||
self._decorators.append(stmt)
|
||||
elif tok == 'pass':
|
||||
continue
|
||||
elif tok == 'assert':
|
||||
stmt, tok = self._parse_statement()
|
||||
if stmt is not None:
|
||||
stmt.parent = use_as_parent_scope
|
||||
self._scope.asserts.append(stmt)
|
||||
# default
|
||||
elif token_type in [tokenize.NAME, tokenize.STRING,
|
||||
tokenize.NUMBER] \
|
||||
or tok in statement_toks:
|
||||
# this is the main part - a name can be a function or a
|
||||
# normal var, which can follow anything. but this is done
|
||||
# by the statement parser.
|
||||
stmt, tok = self._parse_statement(self._current)
|
||||
if stmt:
|
||||
self._scope.add_statement(stmt)
|
||||
self.freshscope = False
|
||||
else:
|
||||
if token_type not in [tokenize.COMMENT, tokenize.INDENT,
|
||||
tokenize.NEWLINE, tokenize.NL]:
|
||||
debug.warning('token not classified', tok, token_type,
|
||||
self.start_pos[0])
|
||||
continue
|
||||
self.no_docstr = False
|
||||
440
jedi/parser/fast.py
Normal file
440
jedi/parser/fast.py
Normal file
@@ -0,0 +1,440 @@
|
||||
"""
|
||||
Basically a parser that is faster, because it tries to parse only parts and if
|
||||
anything changes, it only reparses the changed parts. But because it's not
|
||||
finished (and still not working as I want), I won't document it any further.
|
||||
"""
|
||||
import re
|
||||
|
||||
from jedi._compatibility import use_metaclass
|
||||
from jedi import settings
|
||||
from jedi.parser import Parser
|
||||
from jedi.parser import representation as pr
|
||||
from jedi import cache
|
||||
from jedi import common
|
||||
|
||||
|
||||
SCOPE_CONTENTS = ['asserts', 'subscopes', 'imports', 'statements', 'returns']
|
||||
|
||||
|
||||
class Module(pr.Simple, pr.Module):
|
||||
def __init__(self, parsers):
|
||||
super(Module, self).__init__(self, (1, 0))
|
||||
self.parsers = parsers
|
||||
self.reset_caches()
|
||||
|
||||
self.start_pos = 1, 0
|
||||
self.end_pos = None, None
|
||||
|
||||
def reset_caches(self):
|
||||
""" This module does a whole lot of caching, because it uses different
|
||||
parsers. """
|
||||
self._used_names = None
|
||||
for p in self.parsers:
|
||||
p.user_scope = None
|
||||
p.user_stmt = None
|
||||
|
||||
def __getattr__(self, name):
|
||||
if name.startswith('__'):
|
||||
raise AttributeError('Not available!')
|
||||
else:
|
||||
return getattr(self.parsers[0].module, name)
|
||||
|
||||
@property
|
||||
def used_names(self):
|
||||
if self._used_names is None:
|
||||
dct = {}
|
||||
for p in self.parsers:
|
||||
for k, statement_set in p.module.used_names.items():
|
||||
if k in dct:
|
||||
dct[k] |= statement_set
|
||||
else:
|
||||
dct[k] = set(statement_set)
|
||||
|
||||
self._used_names = dct
|
||||
return self._used_names
|
||||
|
||||
def __repr__(self):
|
||||
return "<%s: %s@%s-%s>" % (type(self).__name__, self.name,
|
||||
self.start_pos[0], self.end_pos[0])
|
||||
|
||||
|
||||
class CachedFastParser(type):
|
||||
""" This is a metaclass for caching `FastParser`. """
|
||||
def __call__(self, source, module_path=None, user_position=None):
|
||||
if not settings.fast_parser:
|
||||
return Parser(source, module_path, user_position)
|
||||
|
||||
pi = cache.parser_cache.get(module_path, None)
|
||||
if pi is None or isinstance(pi.parser, Parser):
|
||||
p = super(CachedFastParser, self).__call__(source, module_path,
|
||||
user_position)
|
||||
else:
|
||||
p = pi.parser # pi is a `cache.ParserCacheItem`
|
||||
p.update(source, user_position)
|
||||
return p
|
||||
|
||||
|
||||
class ParserNode(object):
|
||||
def __init__(self, parser, code, parent=None):
|
||||
self.parent = parent
|
||||
self.code = code
|
||||
self.hash = hash(code)
|
||||
|
||||
self.children = []
|
||||
# must be created before new things are added to it.
|
||||
self.save_contents(parser)
|
||||
|
||||
def save_contents(self, parser):
|
||||
self.parser = parser
|
||||
|
||||
try:
|
||||
# with fast_parser we have either 1 subscope or only statements.
|
||||
self.content_scope = parser.module.subscopes[0]
|
||||
except IndexError:
|
||||
self.content_scope = parser.module
|
||||
|
||||
scope = self.content_scope
|
||||
self._contents = {}
|
||||
for c in SCOPE_CONTENTS:
|
||||
self._contents[c] = list(getattr(scope, c))
|
||||
self._is_generator = scope.is_generator
|
||||
|
||||
self.old_children = self.children
|
||||
self.children = []
|
||||
|
||||
def reset_contents(self):
|
||||
scope = self.content_scope
|
||||
for key, c in self._contents.items():
|
||||
setattr(scope, key, list(c))
|
||||
scope.is_generator = self._is_generator
|
||||
self.parser.user_scope = self.parser.module
|
||||
|
||||
if self.parent is None:
|
||||
# Global vars of the first one can be deleted, in the global scope
|
||||
# they make no sense.
|
||||
self.parser.module.global_vars = []
|
||||
|
||||
for c in self.children:
|
||||
c.reset_contents()
|
||||
|
||||
def parent_until_indent(self, indent=None):
|
||||
if indent is None or self.indent >= indent and self.parent:
|
||||
self.old_children = []
|
||||
if self.parent is not None:
|
||||
return self.parent.parent_until_indent(indent)
|
||||
return self
|
||||
|
||||
@property
|
||||
def indent(self):
|
||||
if not self.parent:
|
||||
return 0
|
||||
module = self.parser.module
|
||||
try:
|
||||
el = module.subscopes[0]
|
||||
except IndexError:
|
||||
try:
|
||||
el = module.statements[0]
|
||||
except IndexError:
|
||||
try:
|
||||
el = module.imports[0]
|
||||
except IndexError:
|
||||
try:
|
||||
el = [r for r in module.returns if r is not None][0]
|
||||
except IndexError:
|
||||
return self.parent.indent + 1
|
||||
return el.start_pos[1]
|
||||
|
||||
def _set_items(self, parser, set_parent=False):
|
||||
# insert parser objects into current structure
|
||||
scope = self.content_scope
|
||||
for c in SCOPE_CONTENTS:
|
||||
content = getattr(scope, c)
|
||||
items = getattr(parser.module, c)
|
||||
if set_parent:
|
||||
for i in items:
|
||||
if i is None:
|
||||
continue # happens with empty returns
|
||||
i.parent = scope.use_as_parent
|
||||
if isinstance(i, (pr.Function, pr.Class)):
|
||||
for d in i.decorators:
|
||||
d.parent = scope.use_as_parent
|
||||
content += items
|
||||
|
||||
# global_vars
|
||||
cur = self
|
||||
while cur.parent is not None:
|
||||
cur = cur.parent
|
||||
cur.parser.module.global_vars += parser.module.global_vars
|
||||
|
||||
scope.is_generator |= parser.module.is_generator
|
||||
|
||||
def add_node(self, node, set_parent=False):
|
||||
"""Adding a node means adding a node that was already added earlier"""
|
||||
self.children.append(node)
|
||||
self._set_items(node.parser, set_parent=set_parent)
|
||||
node.old_children = node.children
|
||||
node.children = []
|
||||
return node
|
||||
|
||||
def add_parser(self, parser, code):
|
||||
return self.add_node(ParserNode(parser, code, self), True)
|
||||
|
||||
|
||||
class FastParser(use_metaclass(CachedFastParser)):
|
||||
def __init__(self, code, module_path=None, user_position=None):
|
||||
# set values like `pr.Module`.
|
||||
self.module_path = module_path
|
||||
self.user_position = user_position
|
||||
self._user_scope = None
|
||||
|
||||
self.current_node = None
|
||||
self.parsers = []
|
||||
self.module = Module(self.parsers)
|
||||
self.reset_caches()
|
||||
|
||||
try:
|
||||
self._parse(code)
|
||||
except:
|
||||
# FastParser is cached, be careful with exceptions
|
||||
self.parsers[:] = []
|
||||
raise
|
||||
|
||||
@property
|
||||
def user_scope(self):
|
||||
if self._user_scope is None:
|
||||
for p in self.parsers:
|
||||
if p.user_scope:
|
||||
if isinstance(p.user_scope, pr.SubModule):
|
||||
continue
|
||||
self._user_scope = p.user_scope
|
||||
|
||||
if isinstance(self._user_scope, pr.SubModule) \
|
||||
or self._user_scope is None:
|
||||
self._user_scope = self.module
|
||||
return self._user_scope
|
||||
|
||||
@property
|
||||
def user_stmt(self):
|
||||
if self._user_stmt is None:
|
||||
for p in self.parsers:
|
||||
if p.user_stmt:
|
||||
self._user_stmt = p.user_stmt
|
||||
break
|
||||
return self._user_stmt
|
||||
|
||||
def update(self, code, user_position=None):
|
||||
self.user_position = user_position
|
||||
self.reset_caches()
|
||||
|
||||
|
||||
try:
|
||||
self._parse(code)
|
||||
except:
|
||||
# FastParser is cached, be careful with exceptions
|
||||
self.parsers[:] = []
|
||||
raise
|
||||
|
||||
def _scan_user_scope(self, sub_module):
|
||||
""" Scan with self.user_position. """
|
||||
for scope in sub_module.statements + sub_module.subscopes:
|
||||
if isinstance(scope, pr.Scope):
|
||||
if scope.start_pos <= self.user_position <= scope.end_pos:
|
||||
return self._scan_user_scope(scope) or scope
|
||||
return None
|
||||
|
||||
def _split_parts(self, code):
|
||||
"""
|
||||
Split the code into different parts. This makes it possible to parse
|
||||
each part seperately and therefore cache parts of the file and not
|
||||
everything.
|
||||
"""
|
||||
def add_part():
|
||||
txt = '\n'.join(current_lines)
|
||||
if txt:
|
||||
if add_to_last and parts:
|
||||
parts[-1] += '\n' + txt
|
||||
else:
|
||||
parts.append(txt)
|
||||
current_lines[:] = []
|
||||
|
||||
r_keyword = '^[ \t]*(def|class|@|%s)' % '|'.join(common.FLOWS)
|
||||
|
||||
self._lines = code.splitlines()
|
||||
current_lines = []
|
||||
parts = []
|
||||
is_decorator = False
|
||||
current_indent = 0
|
||||
old_indent = 0
|
||||
new_indent = False
|
||||
in_flow = False
|
||||
add_to_last = False
|
||||
# All things within flows are simply being ignored.
|
||||
for i, l in enumerate(self._lines):
|
||||
# check for dedents
|
||||
m = re.match('^([\t ]*)(.?)', l)
|
||||
indent = len(m.group(1))
|
||||
if m.group(2) in ['', '#']:
|
||||
current_lines.append(l) # just ignore comments and blank lines
|
||||
continue
|
||||
|
||||
if indent < current_indent: # -> dedent
|
||||
current_indent = indent
|
||||
new_indent = False
|
||||
if not in_flow or indent < old_indent:
|
||||
add_part()
|
||||
add_to_last = False
|
||||
in_flow = False
|
||||
elif new_indent:
|
||||
current_indent = indent
|
||||
new_indent = False
|
||||
|
||||
# Check lines for functions/classes and split the code there.
|
||||
if not in_flow:
|
||||
m = re.match(r_keyword, l)
|
||||
if m:
|
||||
in_flow = m.group(1) in common.FLOWS
|
||||
if not is_decorator and not in_flow:
|
||||
add_part()
|
||||
add_to_last = False
|
||||
is_decorator = '@' == m.group(1)
|
||||
if not is_decorator:
|
||||
old_indent = current_indent
|
||||
current_indent += 1 # it must be higher
|
||||
new_indent = True
|
||||
elif is_decorator:
|
||||
is_decorator = False
|
||||
add_to_last = True
|
||||
|
||||
current_lines.append(l)
|
||||
add_part()
|
||||
|
||||
return parts
|
||||
|
||||
def _parse(self, code):
|
||||
""" :type code: str """
|
||||
def empty_parser():
|
||||
new, temp = self._get_parser('', '', 0, [], False)
|
||||
return new
|
||||
|
||||
parts = self._split_parts(code)
|
||||
self.parsers[:] = []
|
||||
|
||||
line_offset = 0
|
||||
start = 0
|
||||
p = None
|
||||
is_first = True
|
||||
|
||||
for code_part in parts:
|
||||
lines = code_part.count('\n') + 1
|
||||
if is_first or line_offset >= p.end_pos[0]:
|
||||
indent = len(re.match(r'[ \t]*', code_part).group(0))
|
||||
if is_first and self.current_node is not None:
|
||||
nodes = [self.current_node]
|
||||
else:
|
||||
nodes = []
|
||||
if self.current_node is not None:
|
||||
|
||||
self.current_node = \
|
||||
self.current_node.parent_until_indent(indent)
|
||||
nodes += self.current_node.old_children
|
||||
|
||||
# check if code_part has already been parsed
|
||||
# print '#'*45,line_offset, p and p.end_pos, '\n', code_part
|
||||
p, node = self._get_parser(code_part, code[start:],
|
||||
line_offset, nodes, not is_first)
|
||||
|
||||
# The actual used code_part is different from the given code
|
||||
# part, because of docstrings for example there's a chance that
|
||||
# splits are wrong.
|
||||
used_lines = self._lines[line_offset:p.end_pos[0]]
|
||||
code_part_actually_used = '\n'.join(used_lines)
|
||||
|
||||
if is_first and p.module.subscopes:
|
||||
# special case, we cannot use a function subscope as a
|
||||
# base scope, subscopes would save all the other contents
|
||||
new = empty_parser()
|
||||
if self.current_node is None:
|
||||
self.current_node = ParserNode(new, '')
|
||||
else:
|
||||
self.current_node.save_contents(new)
|
||||
self.parsers.append(new)
|
||||
is_first = False
|
||||
|
||||
if is_first:
|
||||
if self.current_node is None:
|
||||
self.current_node = ParserNode(p, code_part_actually_used)
|
||||
else:
|
||||
self.current_node.save_contents(p)
|
||||
else:
|
||||
if node is None:
|
||||
self.current_node = \
|
||||
self.current_node.add_parser(p, code_part_actually_used)
|
||||
else:
|
||||
self.current_node = self.current_node.add_node(node)
|
||||
|
||||
if self.current_node.parent and (isinstance(p.user_scope,
|
||||
pr.SubModule) or p.user_scope is None) \
|
||||
and self.user_position \
|
||||
and p.start_pos <= self.user_position < p.end_pos:
|
||||
p.user_scope = self.current_node.parent.content_scope
|
||||
|
||||
self.parsers.append(p)
|
||||
|
||||
is_first = False
|
||||
#else:
|
||||
#print '#'*45, line_offset, p.end_pos, 'theheck\n', repr(code_part)
|
||||
|
||||
line_offset += lines
|
||||
start += len(code_part) + 1 # +1 for newline
|
||||
|
||||
if self.parsers:
|
||||
self.current_node = self.current_node.parent_until_indent()
|
||||
else:
|
||||
self.parsers.append(empty_parser())
|
||||
|
||||
self.module.end_pos = self.parsers[-1].end_pos
|
||||
|
||||
# print(self.parsers[0].module.get_code())
|
||||
del code
|
||||
|
||||
def _get_parser(self, code, parser_code, line_offset, nodes, no_docstr):
|
||||
h = hash(code)
|
||||
hashes = [n.hash for n in nodes]
|
||||
node = None
|
||||
try:
|
||||
index = hashes.index(h)
|
||||
if nodes[index].code != code:
|
||||
raise ValueError()
|
||||
except ValueError:
|
||||
p = Parser(parser_code, self.module_path,
|
||||
self.user_position, offset=(line_offset, 0),
|
||||
is_fast_parser=True, top_module=self.module,
|
||||
no_docstr=no_docstr)
|
||||
p.module.parent = self.module
|
||||
else:
|
||||
if nodes[index] != self.current_node:
|
||||
offset = int(nodes[0] == self.current_node)
|
||||
self.current_node.old_children.pop(index - offset)
|
||||
node = nodes.pop(index)
|
||||
p = node.parser
|
||||
m = p.module
|
||||
m.line_offset += line_offset + 1 - m.start_pos[0]
|
||||
if self.user_position is not None and \
|
||||
m.start_pos[0] <= self.user_position[0] <= m.end_pos[0]:
|
||||
# It's important to take care of the whole user
|
||||
# positioning stuff, if no reparsing is being done.
|
||||
p.user_stmt = m.get_statement_for_position(
|
||||
self.user_position, include_imports=True)
|
||||
if p.user_stmt:
|
||||
p.user_scope = p.user_stmt.parent
|
||||
else:
|
||||
p.user_scope = self._scan_user_scope(m) or m
|
||||
|
||||
return p, node
|
||||
|
||||
def reset_caches(self):
|
||||
self._user_scope = None
|
||||
self._user_stmt = None
|
||||
self.module.reset_caches()
|
||||
if self.current_node is not None:
|
||||
self.current_node.reset_contents()
|
||||
1411
jedi/parser/representation.py
Normal file
1411
jedi/parser/representation.py
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user