move all the parser stuff into a seperate package

This commit is contained in:
David Halter
2013-09-06 00:58:40 +04:30
parent dd4d0bc619
commit 390442dc3b
3 changed files with 6 additions and 6 deletions

682
jedi/parser/__init__.py Normal file
View File

@@ -0,0 +1,682 @@
"""
The ``Parser`` tries to convert the available Python code in an easy to read
format, something like an abstract syntax tree. The classes who represent this
tree, are sitting in the :mod:`parsing_representation` module.
The Python module ``tokenize`` is a very important part in the ``Parser``,
because it splits the code into different words (tokens). Sometimes it looks a
bit messy. Sorry for that! You might ask now: "Why didn't you use the ``ast``
module for this? Well, ``ast`` does a very good job understanding proper Python
code, but fails to work as soon as there's a single line of broken code.
There's one important optimization that needs to be known: Statements are not
being parsed completely. ``Statement`` is just a representation of the tokens
within the statement. This lowers memory usage and cpu time and reduces the
complexity of the ``Parser`` (there's another parser sitting inside
``Statement``, which produces ``Array`` and ``Call``).
"""
from __future__ import with_statement
import tokenizer as tokenize
import keyword
from jedi._compatibility import next, StringIO
from jedi import debug
from jedi import common
from jedi.parser import representation as pr
class Parser(object):
"""
This class is used to parse a Python file, it then divides them into a
class structure of different scopes.
:param source: The codebase for the parser.
:type source: str
:param module_path: The path of the module in the file system, may be None.
:type module_path: str
:param user_position: The line/column, the user is currently on.
:type user_position: tuple(int, int)
:param no_docstr: If True, a string at the beginning is not a docstr.
:param is_fast_parser: -> for fast_parser
:param top_module: Use this module as a parent instead of `self.module`.
"""
def __init__(self, source, module_path=None, user_position=None,
no_docstr=False, offset=(0, 0), is_fast_parser=None,
top_module=None):
self.user_position = user_position
self.user_scope = None
self.user_stmt = None
self.no_docstr = no_docstr
self.start_pos = self.end_pos = 1 + offset[0], offset[1]
# initialize global Scope
self.module = pr.SubModule(module_path, self.start_pos, top_module)
self._scope = self.module
self._current = (None, None)
source = source + '\n' # end with \n, because the parser needs it
buf = StringIO(source)
self._gen = common.NoErrorTokenizer(buf.readline, offset,
is_fast_parser)
self.top_module = top_module or self.module
try:
self._parse()
except (common.MultiLevelStopIteration, StopIteration):
# StopIteration needs to be added as well, because python 2 has a
# strange way of handling StopIterations.
# sometimes StopIteration isn't catched. Just ignore it.
pass
# clean up unused decorators
for d in self._decorators:
# set a parent for unused decorators, avoid NullPointerException
# because of `self.module.used_names`.
d.parent = self.module
if self._current[0] in (tokenize.NL, tokenize.NEWLINE):
# we added a newline before, so we need to "remove" it again.
self.end_pos = self._gen.previous[2]
elif self._current[0] == tokenize.INDENT:
self.end_pos = self._gen.last_previous[2]
self.start_pos = self.module.start_pos
self.module.end_pos = self.end_pos
del self._gen
def __repr__(self):
return "<%s: %s>" % (type(self).__name__, self.module)
def _check_user_stmt(self, simple):
# this is not user checking, just update the used_names
for tok_name in self.module.temp_used_names:
try:
self.module.used_names[tok_name].add(simple)
except KeyError:
self.module.used_names[tok_name] = set([simple])
self.module.temp_used_names = []
if not self.user_position:
return
# the position is right
if simple.start_pos <= self.user_position <= simple.end_pos:
if self.user_stmt is not None:
# if there is already a user position (another import, because
# imports are splitted) the names are checked.
for n in simple.get_set_vars():
if n.start_pos < self.user_position <= n.end_pos:
self.user_stmt = simple
else:
self.user_stmt = simple
def _parse_dot_name(self, pre_used_token=None):
"""
The dot name parser parses a name, variable or function and returns
their names.
:return: Tuple of Name, token_type, nexttoken.
:rtype: tuple(Name, int, str)
"""
def append(el):
names.append(el)
self.module.temp_used_names.append(el[0])
names = []
if pre_used_token is None:
token_type, tok = self.next()
if token_type != tokenize.NAME and tok != '*':
return [], token_type, tok
else:
token_type, tok = pre_used_token
if token_type != tokenize.NAME and tok != '*':
# token maybe a name or star
return None, token_type, tok
append((tok, self.start_pos))
first_pos = self.start_pos
while True:
end_pos = self.end_pos
token_type, tok = self.next()
if tok != '.':
break
token_type, tok = self.next()
if token_type != tokenize.NAME:
break
append((tok, self.start_pos))
n = pr.Name(self.module, names, first_pos, end_pos) if names else None
return n, token_type, tok
def _parse_import_list(self):
"""
The parser for the imports. Unlike the class and function parse
function, this returns no Import class, but rather an import list,
which is then added later on.
The reason, why this is not done in the same class lies in the nature
of imports. There are two ways to write them:
- from ... import ...
- import ...
To distinguish, this has to be processed after the parser.
:return: List of imports.
:rtype: list
"""
imports = []
brackets = False
continue_kw = [",", ";", "\n", ')'] \
+ list(set(keyword.kwlist) - set(['as']))
while True:
defunct = False
token_type, tok = self.next()
if tok == '(': # python allows only one `(` in the statement.
brackets = True
token_type, tok = self.next()
if brackets and tok == '\n':
self.next()
i, token_type, tok = self._parse_dot_name(self._current)
if not i:
defunct = True
name2 = None
if tok == 'as':
name2, token_type, tok = self._parse_dot_name()
imports.append((i, name2, defunct))
while tok not in continue_kw:
token_type, tok = self.next()
if not (tok == "," or brackets and tok == '\n'):
break
return imports
def _parse_parentheses(self):
"""
Functions and Classes have params (which means for classes
super-classes). They are parsed here and returned as Statements.
:return: List of Statements
:rtype: list
"""
names = []
tok = None
pos = 0
breaks = [',', ':']
while tok not in [')', ':']:
param, tok = self._parse_statement(added_breaks=breaks,
stmt_class=pr.Param)
if param and tok == ':':
# parse annotations
annotation, tok = self._parse_statement(added_breaks=breaks)
if annotation:
param.add_annotation(annotation)
# params without vars are usually syntax errors.
if param and (param.get_set_vars()):
param.position_nr = pos
names.append(param)
pos += 1
return names
def _parse_function(self):
"""
The parser for a text functions. Process the tokens, which follow a
function definition.
:return: Return a Scope representation of the tokens.
:rtype: Function
"""
first_pos = self.start_pos
token_type, fname = self.next()
if token_type != tokenize.NAME:
return None
fname = pr.Name(self.module, [(fname, self.start_pos)], self.start_pos,
self.end_pos)
token_type, open = self.next()
if open != '(':
return None
params = self._parse_parentheses()
token_type, colon = self.next()
annotation = None
if colon in ['-', '->']:
# parse annotations
if colon == '-':
# The Python 2 tokenizer doesn't understand this
token_type, colon = self.next()
if colon != '>':
return None
annotation, colon = self._parse_statement(added_breaks=[':'])
if colon != ':':
return None
# because of 2 line func param definitions
scope = pr.Function(self.module, fname, params, first_pos, annotation)
if self.user_scope and scope != self.user_scope \
and self.user_position > first_pos:
self.user_scope = scope
return scope
def _parse_class(self):
"""
The parser for a text class. Process the tokens, which follow a
class definition.
:return: Return a Scope representation of the tokens.
:rtype: Class
"""
first_pos = self.start_pos
token_type, cname = self.next()
if token_type != tokenize.NAME:
debug.warning("class: syntax err, token is not a name@%s (%s: %s)"
% (self.start_pos[0], tokenize.tok_name[token_type], cname))
return None
cname = pr.Name(self.module, [(cname, self.start_pos)], self.start_pos,
self.end_pos)
super = []
token_type, _next = self.next()
if _next == '(':
super = self._parse_parentheses()
token_type, _next = self.next()
if _next != ':':
debug.warning("class syntax: %s@%s" % (cname, self.start_pos[0]))
return None
# because of 2 line class initializations
scope = pr.Class(self.module, cname, super, first_pos)
if self.user_scope and scope != self.user_scope \
and self.user_position > first_pos:
self.user_scope = scope
return scope
def _parse_statement(self, pre_used_token=None, added_breaks=None,
stmt_class=pr.Statement, names_are_set_vars=False):
"""
Parses statements like::
a = test(b)
a += 3 - 2 or b
and so on. One line at a time.
:param pre_used_token: The pre parsed token.
:type pre_used_token: set
:return: Statement + last parsed token.
:rtype: (Statement, str)
"""
set_vars = []
level = 0 # The level of parentheses
if pre_used_token:
token_type, tok = pre_used_token
else:
token_type, tok = self.next()
while token_type == tokenize.COMMENT:
# remove newline and comment
self.next()
token_type, tok = self.next()
first_pos = self.start_pos
opening_brackets = ['{', '(', '[']
closing_brackets = ['}', ')', ']']
# the difference between "break" and "always break" is that the latter
# will even break in parentheses. This is true for typical flow
# commands like def and class and the imports, which will never be used
# in a statement.
breaks = set(['\n', ':', ')'])
always_break = [';', 'import', 'from', 'class', 'def', 'try', 'except',
'finally', 'while', 'return', 'yield']
not_first_break = ['del', 'raise']
if added_breaks:
breaks |= set(added_breaks)
tok_list = []
as_names = []
while not (tok in always_break
or tok in not_first_break and not tok_list
or tok in breaks and level <= 0):
try:
# print 'parse_stmt', tok, tokenize.tok_name[token_type]
tok_list.append(self._current + (self.start_pos,))
if tok == 'as':
token_type, tok = self.next()
if token_type == tokenize.NAME:
n, token_type, tok = self._parse_dot_name(self._current)
if n:
set_vars.append(n)
as_names.append(n)
tok_list.append(n)
continue
elif tok in ['lambda', 'for', 'in']:
# don't parse these keywords, parse later in stmt.
if tok == 'lambda':
breaks.discard(':')
elif token_type == tokenize.NAME:
n, token_type, tok = self._parse_dot_name(self._current)
# removed last entry, because we add Name
tok_list.pop()
if n:
tok_list.append(n)
continue
elif tok in opening_brackets:
level += 1
elif tok in closing_brackets:
level -= 1
token_type, tok = self.next()
except (StopIteration, common.MultiLevelStopIteration):
# comes from tokenizer
break
if not tok_list:
return None, tok
first_tok = tok_list[0]
# docstrings
if len(tok_list) == 1 and not isinstance(first_tok, pr.Name) \
and first_tok[0] == tokenize.STRING:
# Normal docstring check
if self.freshscope and not self.no_docstr:
self._scope.add_docstr(first_tok[1])
return None, tok
# Attribute docstring (PEP 224) support (sphinx uses it, e.g.)
# If string literal is being parsed...
elif first_tok[0] == tokenize.STRING:
with common.ignored(IndexError, AttributeError):
# ...then set it as a docstring
self._scope.statements[-1].add_docstr(first_tok[1])
return None, tok
stmt = stmt_class(self.module, tok_list, first_pos, self.end_pos,
as_names=as_names,
names_are_set_vars=names_are_set_vars)
stmt.parent = self.top_module
self._check_user_stmt(stmt)
if tok in always_break + not_first_break:
self._gen.push_last_back()
return stmt, tok
def next(self):
return self.__next__()
def __iter__(self):
return self
def __next__(self):
""" Generate the next tokenize pattern. """
try:
typ, tok, start_pos, end_pos, self.parserline = next(self._gen)
# dedents shouldn't change positions
if typ != tokenize.DEDENT:
self.start_pos = start_pos
if typ not in (tokenize.INDENT, tokenize.NEWLINE, tokenize.NL):
self.start_pos, self.end_pos = start_pos, end_pos
except (StopIteration, common.MultiLevelStopIteration):
# on finish, set end_pos correctly
s = self._scope
while s is not None:
if isinstance(s, pr.Module) \
and not isinstance(s, pr.SubModule):
self.module.end_pos = self.end_pos
break
s.end_pos = self.end_pos
s = s.parent
raise
if self.user_position and (self.start_pos[0] == self.user_position[0]
or self.user_scope is None
and self.start_pos[0] >= self.user_position[0]):
debug.dbg('user scope found [%s] = %s' %
(self.parserline.replace('\n', ''), repr(self._scope)))
self.user_scope = self._scope
self._current = typ, tok
return self._current
def _parse(self):
"""
The main part of the program. It analyzes the given code-text and
returns a tree-like scope. For a more detailed description, see the
class description.
:param text: The code which should be parsed.
:param type: str
:raises: IndentationError
"""
extended_flow = ['else', 'elif', 'except', 'finally']
statement_toks = ['{', '[', '(', '`']
self._decorators = []
self.freshscope = True
self.iterator = iter(self)
# This iterator stuff is not intentional. It grew historically.
for token_type, tok in self.iterator:
self.module.temp_used_names = []
# debug.dbg('main: tok=[%s] type=[%s] indent=[%s]'\
# % (tok, tokenize.tok_name[token_type], start_position[0]))
while token_type == tokenize.DEDENT and self._scope != self.module:
token_type, tok = self.next()
if self.start_pos[1] <= self._scope.start_pos[1]:
self._scope.end_pos = self.start_pos
self._scope = self._scope.parent
if isinstance(self._scope, pr.Module) \
and not isinstance(self._scope, pr.SubModule):
self._scope = self.module
# check again for unindented stuff. this is true for syntax
# errors. only check for names, because thats relevant here. If
# some docstrings are not indented, I don't care.
while self.start_pos[1] <= self._scope.start_pos[1] \
and (token_type == tokenize.NAME or tok in ['(', '['])\
and self._scope != self.module:
self._scope.end_pos = self.start_pos
self._scope = self._scope.parent
if isinstance(self._scope, pr.Module) \
and not isinstance(self._scope, pr.SubModule):
self._scope = self.module
use_as_parent_scope = self.top_module if isinstance(self._scope,
pr.SubModule) else self._scope
first_pos = self.start_pos
if tok == 'def':
func = self._parse_function()
if func is None:
debug.warning("function: syntax error@%s" %
self.start_pos[0])
continue
self.freshscope = True
self._scope = self._scope.add_scope(func, self._decorators)
self._decorators = []
elif tok == 'class':
cls = self._parse_class()
if cls is None:
debug.warning("class: syntax error@%s" % self.start_pos[0])
continue
self.freshscope = True
self._scope = self._scope.add_scope(cls, self._decorators)
self._decorators = []
# import stuff
elif tok == 'import':
imports = self._parse_import_list()
for count, (m, alias, defunct) in enumerate(imports):
e = (alias or m or self).end_pos
end_pos = self.end_pos if count + 1 == len(imports) else e
i = pr.Import(self.module, first_pos, end_pos, m,
alias, defunct=defunct)
self._check_user_stmt(i)
self._scope.add_import(i)
if not imports:
i = pr.Import(self.module, first_pos, self.end_pos, None,
defunct=True)
self._check_user_stmt(i)
self.freshscope = False
elif tok == 'from':
defunct = False
# take care for relative imports
relative_count = 0
while True:
token_type, tok = self.next()
if tok != '.':
break
relative_count += 1
# the from import
mod, token_type, tok = self._parse_dot_name(self._current)
if str(mod) == 'import' and relative_count:
self._gen.push_last_back()
tok = 'import'
mod = None
if not mod and not relative_count or tok != "import":
debug.warning("from: syntax error@%s" % self.start_pos[0])
defunct = True
if tok != 'import':
self._gen.push_last_back()
names = self._parse_import_list()
for count, (name, alias, defunct2) in enumerate(names):
star = name is not None and name.names[0] == '*'
if star:
name = None
e = (alias or name or self).end_pos
end_pos = self.end_pos if count + 1 == len(names) else e
i = pr.Import(self.module, first_pos, end_pos, name,
alias, mod, star, relative_count,
defunct=defunct or defunct2)
self._check_user_stmt(i)
self._scope.add_import(i)
self.freshscope = False
# loops
elif tok == 'for':
set_stmt, tok = self._parse_statement(added_breaks=['in'],
names_are_set_vars=True)
if tok == 'in':
statement, tok = self._parse_statement()
if tok == ':':
s = [] if statement is None else [statement]
f = pr.ForFlow(self.module, s, first_pos, set_stmt)
self._scope = self._scope.add_statement(f)
else:
debug.warning('syntax err, for flow started @%s',
self.start_pos[0])
if statement is not None:
statement.parent = use_as_parent_scope
if set_stmt is not None:
set_stmt.parent = use_as_parent_scope
else:
debug.warning('syntax err, for flow incomplete @%s',
self.start_pos[0])
if set_stmt is not None:
set_stmt.parent = use_as_parent_scope
elif tok in ['if', 'while', 'try', 'with'] + extended_flow:
added_breaks = []
command = tok
if command in ['except', 'with']:
added_breaks.append(',')
# multiple inputs because of with
inputs = []
first = True
while first or command == 'with' \
and tok not in [':', '\n']:
statement, tok = \
self._parse_statement(added_breaks=added_breaks)
if command == 'except' and tok == ',':
# the except statement defines a var
# this is only true for python 2
n, token_type, tok = self._parse_dot_name()
if n:
n.parent = statement
statement.as_names.append(n)
if statement:
inputs.append(statement)
first = False
if tok == ':':
f = pr.Flow(self.module, command, inputs, first_pos)
if command in extended_flow:
# the last statement has to be another part of
# the flow statement, because a dedent releases the
# main scope, so just take the last statement.
try:
s = self._scope.statements[-1].set_next(f)
except (AttributeError, IndexError):
# If set_next doesn't exist, just add it.
s = self._scope.add_statement(f)
else:
s = self._scope.add_statement(f)
self._scope = s
else:
for i in inputs:
i.parent = use_as_parent_scope
debug.warning('syntax err, flow started @%s',
self.start_pos[0])
# returns
elif tok in ['return', 'yield']:
s = self.start_pos
self.freshscope = False
# add returns to the scope
func = self._scope.get_parent_until(pr.Function)
if tok == 'yield':
func.is_generator = True
stmt, tok = self._parse_statement()
if stmt is not None:
stmt.parent = use_as_parent_scope
try:
func.returns.append(stmt)
# start_pos is the one of the return statement
stmt.start_pos = s
except AttributeError:
debug.warning('return in non-function')
# globals
elif tok == 'global':
stmt, tok = self._parse_statement(self._current)
if stmt:
self._scope.add_statement(stmt)
for t in stmt.token_list:
if isinstance(t, pr.Name):
# add the global to the top, because there it is
# important.
self.module.add_global(t)
# decorator
elif tok == '@':
stmt, tok = self._parse_statement()
if stmt is not None:
self._decorators.append(stmt)
elif tok == 'pass':
continue
elif tok == 'assert':
stmt, tok = self._parse_statement()
if stmt is not None:
stmt.parent = use_as_parent_scope
self._scope.asserts.append(stmt)
# default
elif token_type in [tokenize.NAME, tokenize.STRING,
tokenize.NUMBER] \
or tok in statement_toks:
# this is the main part - a name can be a function or a
# normal var, which can follow anything. but this is done
# by the statement parser.
stmt, tok = self._parse_statement(self._current)
if stmt:
self._scope.add_statement(stmt)
self.freshscope = False
else:
if token_type not in [tokenize.COMMENT, tokenize.INDENT,
tokenize.NEWLINE, tokenize.NL]:
debug.warning('token not classified', tok, token_type,
self.start_pos[0])
continue
self.no_docstr = False

440
jedi/parser/fast.py Normal file
View File

@@ -0,0 +1,440 @@
"""
Basically a parser that is faster, because it tries to parse only parts and if
anything changes, it only reparses the changed parts. But because it's not
finished (and still not working as I want), I won't document it any further.
"""
import re
from jedi._compatibility import use_metaclass
from jedi import settings
from jedi.parser import Parser
from jedi.parser import representation as pr
from jedi import cache
from jedi import common
SCOPE_CONTENTS = ['asserts', 'subscopes', 'imports', 'statements', 'returns']
class Module(pr.Simple, pr.Module):
def __init__(self, parsers):
super(Module, self).__init__(self, (1, 0))
self.parsers = parsers
self.reset_caches()
self.start_pos = 1, 0
self.end_pos = None, None
def reset_caches(self):
""" This module does a whole lot of caching, because it uses different
parsers. """
self._used_names = None
for p in self.parsers:
p.user_scope = None
p.user_stmt = None
def __getattr__(self, name):
if name.startswith('__'):
raise AttributeError('Not available!')
else:
return getattr(self.parsers[0].module, name)
@property
def used_names(self):
if self._used_names is None:
dct = {}
for p in self.parsers:
for k, statement_set in p.module.used_names.items():
if k in dct:
dct[k] |= statement_set
else:
dct[k] = set(statement_set)
self._used_names = dct
return self._used_names
def __repr__(self):
return "<%s: %s@%s-%s>" % (type(self).__name__, self.name,
self.start_pos[0], self.end_pos[0])
class CachedFastParser(type):
""" This is a metaclass for caching `FastParser`. """
def __call__(self, source, module_path=None, user_position=None):
if not settings.fast_parser:
return Parser(source, module_path, user_position)
pi = cache.parser_cache.get(module_path, None)
if pi is None or isinstance(pi.parser, Parser):
p = super(CachedFastParser, self).__call__(source, module_path,
user_position)
else:
p = pi.parser # pi is a `cache.ParserCacheItem`
p.update(source, user_position)
return p
class ParserNode(object):
def __init__(self, parser, code, parent=None):
self.parent = parent
self.code = code
self.hash = hash(code)
self.children = []
# must be created before new things are added to it.
self.save_contents(parser)
def save_contents(self, parser):
self.parser = parser
try:
# with fast_parser we have either 1 subscope or only statements.
self.content_scope = parser.module.subscopes[0]
except IndexError:
self.content_scope = parser.module
scope = self.content_scope
self._contents = {}
for c in SCOPE_CONTENTS:
self._contents[c] = list(getattr(scope, c))
self._is_generator = scope.is_generator
self.old_children = self.children
self.children = []
def reset_contents(self):
scope = self.content_scope
for key, c in self._contents.items():
setattr(scope, key, list(c))
scope.is_generator = self._is_generator
self.parser.user_scope = self.parser.module
if self.parent is None:
# Global vars of the first one can be deleted, in the global scope
# they make no sense.
self.parser.module.global_vars = []
for c in self.children:
c.reset_contents()
def parent_until_indent(self, indent=None):
if indent is None or self.indent >= indent and self.parent:
self.old_children = []
if self.parent is not None:
return self.parent.parent_until_indent(indent)
return self
@property
def indent(self):
if not self.parent:
return 0
module = self.parser.module
try:
el = module.subscopes[0]
except IndexError:
try:
el = module.statements[0]
except IndexError:
try:
el = module.imports[0]
except IndexError:
try:
el = [r for r in module.returns if r is not None][0]
except IndexError:
return self.parent.indent + 1
return el.start_pos[1]
def _set_items(self, parser, set_parent=False):
# insert parser objects into current structure
scope = self.content_scope
for c in SCOPE_CONTENTS:
content = getattr(scope, c)
items = getattr(parser.module, c)
if set_parent:
for i in items:
if i is None:
continue # happens with empty returns
i.parent = scope.use_as_parent
if isinstance(i, (pr.Function, pr.Class)):
for d in i.decorators:
d.parent = scope.use_as_parent
content += items
# global_vars
cur = self
while cur.parent is not None:
cur = cur.parent
cur.parser.module.global_vars += parser.module.global_vars
scope.is_generator |= parser.module.is_generator
def add_node(self, node, set_parent=False):
"""Adding a node means adding a node that was already added earlier"""
self.children.append(node)
self._set_items(node.parser, set_parent=set_parent)
node.old_children = node.children
node.children = []
return node
def add_parser(self, parser, code):
return self.add_node(ParserNode(parser, code, self), True)
class FastParser(use_metaclass(CachedFastParser)):
def __init__(self, code, module_path=None, user_position=None):
# set values like `pr.Module`.
self.module_path = module_path
self.user_position = user_position
self._user_scope = None
self.current_node = None
self.parsers = []
self.module = Module(self.parsers)
self.reset_caches()
try:
self._parse(code)
except:
# FastParser is cached, be careful with exceptions
self.parsers[:] = []
raise
@property
def user_scope(self):
if self._user_scope is None:
for p in self.parsers:
if p.user_scope:
if isinstance(p.user_scope, pr.SubModule):
continue
self._user_scope = p.user_scope
if isinstance(self._user_scope, pr.SubModule) \
or self._user_scope is None:
self._user_scope = self.module
return self._user_scope
@property
def user_stmt(self):
if self._user_stmt is None:
for p in self.parsers:
if p.user_stmt:
self._user_stmt = p.user_stmt
break
return self._user_stmt
def update(self, code, user_position=None):
self.user_position = user_position
self.reset_caches()
try:
self._parse(code)
except:
# FastParser is cached, be careful with exceptions
self.parsers[:] = []
raise
def _scan_user_scope(self, sub_module):
""" Scan with self.user_position. """
for scope in sub_module.statements + sub_module.subscopes:
if isinstance(scope, pr.Scope):
if scope.start_pos <= self.user_position <= scope.end_pos:
return self._scan_user_scope(scope) or scope
return None
def _split_parts(self, code):
"""
Split the code into different parts. This makes it possible to parse
each part seperately and therefore cache parts of the file and not
everything.
"""
def add_part():
txt = '\n'.join(current_lines)
if txt:
if add_to_last and parts:
parts[-1] += '\n' + txt
else:
parts.append(txt)
current_lines[:] = []
r_keyword = '^[ \t]*(def|class|@|%s)' % '|'.join(common.FLOWS)
self._lines = code.splitlines()
current_lines = []
parts = []
is_decorator = False
current_indent = 0
old_indent = 0
new_indent = False
in_flow = False
add_to_last = False
# All things within flows are simply being ignored.
for i, l in enumerate(self._lines):
# check for dedents
m = re.match('^([\t ]*)(.?)', l)
indent = len(m.group(1))
if m.group(2) in ['', '#']:
current_lines.append(l) # just ignore comments and blank lines
continue
if indent < current_indent: # -> dedent
current_indent = indent
new_indent = False
if not in_flow or indent < old_indent:
add_part()
add_to_last = False
in_flow = False
elif new_indent:
current_indent = indent
new_indent = False
# Check lines for functions/classes and split the code there.
if not in_flow:
m = re.match(r_keyword, l)
if m:
in_flow = m.group(1) in common.FLOWS
if not is_decorator and not in_flow:
add_part()
add_to_last = False
is_decorator = '@' == m.group(1)
if not is_decorator:
old_indent = current_indent
current_indent += 1 # it must be higher
new_indent = True
elif is_decorator:
is_decorator = False
add_to_last = True
current_lines.append(l)
add_part()
return parts
def _parse(self, code):
""" :type code: str """
def empty_parser():
new, temp = self._get_parser('', '', 0, [], False)
return new
parts = self._split_parts(code)
self.parsers[:] = []
line_offset = 0
start = 0
p = None
is_first = True
for code_part in parts:
lines = code_part.count('\n') + 1
if is_first or line_offset >= p.end_pos[0]:
indent = len(re.match(r'[ \t]*', code_part).group(0))
if is_first and self.current_node is not None:
nodes = [self.current_node]
else:
nodes = []
if self.current_node is not None:
self.current_node = \
self.current_node.parent_until_indent(indent)
nodes += self.current_node.old_children
# check if code_part has already been parsed
# print '#'*45,line_offset, p and p.end_pos, '\n', code_part
p, node = self._get_parser(code_part, code[start:],
line_offset, nodes, not is_first)
# The actual used code_part is different from the given code
# part, because of docstrings for example there's a chance that
# splits are wrong.
used_lines = self._lines[line_offset:p.end_pos[0]]
code_part_actually_used = '\n'.join(used_lines)
if is_first and p.module.subscopes:
# special case, we cannot use a function subscope as a
# base scope, subscopes would save all the other contents
new = empty_parser()
if self.current_node is None:
self.current_node = ParserNode(new, '')
else:
self.current_node.save_contents(new)
self.parsers.append(new)
is_first = False
if is_first:
if self.current_node is None:
self.current_node = ParserNode(p, code_part_actually_used)
else:
self.current_node.save_contents(p)
else:
if node is None:
self.current_node = \
self.current_node.add_parser(p, code_part_actually_used)
else:
self.current_node = self.current_node.add_node(node)
if self.current_node.parent and (isinstance(p.user_scope,
pr.SubModule) or p.user_scope is None) \
and self.user_position \
and p.start_pos <= self.user_position < p.end_pos:
p.user_scope = self.current_node.parent.content_scope
self.parsers.append(p)
is_first = False
#else:
#print '#'*45, line_offset, p.end_pos, 'theheck\n', repr(code_part)
line_offset += lines
start += len(code_part) + 1 # +1 for newline
if self.parsers:
self.current_node = self.current_node.parent_until_indent()
else:
self.parsers.append(empty_parser())
self.module.end_pos = self.parsers[-1].end_pos
# print(self.parsers[0].module.get_code())
del code
def _get_parser(self, code, parser_code, line_offset, nodes, no_docstr):
h = hash(code)
hashes = [n.hash for n in nodes]
node = None
try:
index = hashes.index(h)
if nodes[index].code != code:
raise ValueError()
except ValueError:
p = Parser(parser_code, self.module_path,
self.user_position, offset=(line_offset, 0),
is_fast_parser=True, top_module=self.module,
no_docstr=no_docstr)
p.module.parent = self.module
else:
if nodes[index] != self.current_node:
offset = int(nodes[0] == self.current_node)
self.current_node.old_children.pop(index - offset)
node = nodes.pop(index)
p = node.parser
m = p.module
m.line_offset += line_offset + 1 - m.start_pos[0]
if self.user_position is not None and \
m.start_pos[0] <= self.user_position[0] <= m.end_pos[0]:
# It's important to take care of the whole user
# positioning stuff, if no reparsing is being done.
p.user_stmt = m.get_statement_for_position(
self.user_position, include_imports=True)
if p.user_stmt:
p.user_scope = p.user_stmt.parent
else:
p.user_scope = self._scan_user_scope(m) or m
return p, node
def reset_caches(self):
self._user_scope = None
self._user_stmt = None
self.module.reset_caches()
if self.current_node is not None:
self.current_node.reset_contents()

File diff suppressed because it is too large Load Diff