1
0
forked from VimPlug/jedi
Files
jedi-fork/jedi/parser/__init__.py
2014-11-23 19:17:50 +01:00

289 lines
12 KiB
Python

"""
The ``Parser`` tries to convert the available Python code in an easy to read
format, something like an abstract syntax tree. The classes who represent this
tree, are sitting in the :mod:`jedi.parser.representation` module.
The Python module ``tokenize`` is a very important part in the ``Parser``,
because it splits the code into different words (tokens). Sometimes it looks a
bit messy. Sorry for that! You might ask now: "Why didn't you use the ``ast``
module for this? Well, ``ast`` does a very good job understanding proper Python
code, but fails to work as soon as there's a single line of broken code.
There's one important optimization that needs to be known: Statements are not
being parsed completely. ``Statement`` is just a representation of the tokens
within the statement. This lowers memory usage and cpu time and reduces the
complexity of the ``Parser`` (there's another parser sitting inside
``Statement``, which produces ``Array`` and ``Call``).
"""
import keyword
import logging
from jedi._compatibility import next, unicode
from jedi import debug
from jedi import common
from jedi.parser import representation as pr
from jedi.parser import tokenize
from jedi.parser import pytree
from jedi.parser.pgen2 import Driver
from jedi.parser import pgen2
OPERATOR_KEYWORDS = 'and', 'for', 'if', 'else', 'in', 'is', 'lambda', 'not', 'or'
# Not used yet. In the future I intend to add something like KeywordStatement
STATEMENT_KEYWORDS = 'assert', 'del', 'global', 'nonlocal', 'raise', \
'return', 'yield', 'pass', 'continue', 'break'
class Parser(object):
"""
This class is used to parse a Python file, it then divides them into a
class structure of different scopes.
:param source: The codebase for the parser.
:type source: str
:param module_path: The path of the module in the file system, may be None.
:type module_path: str
:param no_docstr: If True, a string at the beginning is not a docstr.
:param top_module: Use this module as a parent instead of `self.module`.
"""
def __init__(self, source, module_path=None, no_docstr=False,
tokenizer=None, top_module=None):
if not source.endswith('\n'):
source += '\n'
_ast_mapping = {
'expr_stmt': pr.ExprStmt,
'classdef': pr.Class,
'funcdef': pr.Function,
'file_input': pr.SubModule,
'import_name': pr.ImportName,
'import_from': pr.ImportFrom,
'break_stmt': pr.KeywordStatement,
'continue_stmt': pr.KeywordStatement,
'return_stmt': pr.ReturnStmt,
'raise_stmt': pr.KeywordStatement,
'yield_expr': pr.YieldExpr,
'del_stmt': pr.KeywordStatement,
'pass_stmt': pr.KeywordStatement,
'global_stmt': pr.GlobalStmt,
'nonlocal_stmt': pr.KeywordStatement,
'assert_stmt': pr.KeywordStatement,
'if_stmt': pr.IfStmt,
'with_stmt': pr.WithStmt,
'for_stmt': pr.ForStmt,
'while_stmt': pr.WhileStmt,
'try_stmt': pr.TryStmt,
'comp_for': pr.CompFor,
'decorator': pr.Decorator,
}
self._ast_mapping = dict((getattr(pytree.python_symbols, k), v)
for k, v in _ast_mapping.items())
self.global_names = []
#if self.options["print_function"]:
# python_grammar = pygram.python_grammar_no_print_statement
#else:
# When this is True, the refactor*() methods will call write_file() for
# files processed even if they were not changed during refactoring. If
# and only if the refactor method's write parameter was True.
self.used_names = {}
self.scope_names_stack = [{}]
logger = logging.getLogger("Jedi-Parser")
d = Driver(pytree.python_grammar, self.convert_node, self.convert_leaf,
self.error_recovery, logger=logger)
self.module = d.parse_string(source).get_parent_until()
self.module.used_names = self.used_names
self.module.path = module_path
self.module.set_global_names(self.global_names)
def convert_node(self, grammar, type, children):
"""
Convert raw node information to a Node instance.
This is passed to the parser driver which calls it whenever a reduction of a
grammar rule produces a new complete node, so that the tree is build
strictly bottom-up.
"""
#print(type, children, pytree.type_repr(type))
try:
new_node = self._ast_mapping[type](children)
except KeyError:
new_node = pr.Node(type, children)
# We need to check raw_node always, because the same node can be
# returned by convert multiple times.
if type == pytree.python_symbols.global_stmt:
self.global_names += new_node.get_defined_names()
elif isinstance(new_node, (pr.ClassOrFunc, pr.Module)) \
and type in (pytree.python_symbols.funcdef,
pytree.python_symbols.classdef,
pytree.python_symbols.file_input):
# scope_name_stack handling
scope_names = self.scope_names_stack.pop()
if isinstance(new_node, pr.ClassOrFunc):
n = new_node.name
scope_names[n.value].remove(n)
# Set the func name of the current node
arr = self.scope_names_stack[-1].setdefault(n.value, [])
arr.append(n)
new_node.names_dict = scope_names
elif isinstance(new_node, pr.CompFor):
# The name definitions of comprehenions shouldn't be part of the
# current scope. They are part of the comprehension scope.
for n in new_node.get_defined_names():
self.scope_names_stack[-1][n.value].remove(n)
return new_node
def convert_leaf(self, grammar, type, value, prefix, start_pos):
#print('leaf', value, pytree.type_repr(type))
if type == tokenize.NAME:
if value in grammar.keywords:
if value in ('def', 'class'):
self.scope_names_stack.append({})
return pr.Keyword(value, start_pos, prefix)
else:
name = pr.Name(value, start_pos, prefix)
# Keep a listing of all used names
arr = self.used_names.setdefault(name.value, [])
arr.append(name)
arr = self.scope_names_stack[-1].setdefault(name.value, [])
arr.append(name)
return name
elif type == tokenize.STRING:
return pr.String(value, start_pos, prefix)
elif type == tokenize.NUMBER:
return pr.Number(value, start_pos, prefix)
elif type in (tokenize.NEWLINE, tokenize.ENDMARKER):
return pr.Whitespace(value, start_pos, prefix)
else:
return pr.Operator(value, start_pos, prefix)
def error_recovery(self, grammar, stack, type, value):
"""
This parser is written in a dynamic way, meaning that this parser
allows using different grammars (even non-Python). However, error
recovery is purely written for Python.
"""
# For now just discard everything that is not a suite or
# file_input, if we detect an error.
for i, (dfa, state, (type, _)) in reversed(list(enumerate(stack))):
# `suite` can sometimes be only simple_stmt, not stmt.
if type in (grammar.symbol2number['file_input'],
grammar.symbol2number['suite']):
index = i
break
self._stack_removal(stack, index + 1)
# No success finding a transition
#raise ParseError("bad input", type, value, context)
def _stack_removal(self, stack, start_index):
def clear_names(children):
for c in children:
try:
clear_names(c.children)
except AttributeError:
if isinstance(c, pr.Name):
try:
self.scope_names_stack[-1][c.value].remove(c)
self.used_names[c.value].remove(c)
except ValueError:
pass # This may happen with CompFor.
for dfa, state, node in stack[start_index:]:
clear_names(children=node[1])
stack[start_index:] = []
def __init__old__(self, source, module_path=None, no_docstr=False,
tokenizer=None, top_module=None):
self.no_docstr = no_docstr
tokenizer = tokenizer or tokenize.source_tokens(source)
self._gen = PushBackTokenizer(tokenizer)
# initialize global Scope
start_pos = next(self._gen).start_pos
self._gen.push_last_back()
self.module = pr.SubModule(module_path, start_pos, top_module)
self._scope = self.module
self._top_module = top_module or self.module
try:
self._parse()
except (common.MultiLevelStopIteration, StopIteration):
# StopIteration needs to be added as well, because python 2 has a
# strange way of handling StopIterations.
# sometimes StopIteration isn't catched. Just ignore it.
# on finish, set end_pos correctly
pass
s = self._scope
while s is not None:
s.end_pos = self._gen.current.end_pos
s = s.parent
# clean up unused decorators
for d in self._decorators:
# set a parent for unused decorators, avoid NullPointerException
# because of `self.module.used_names`.
d.parent = self.module
self.module.end_pos = self._gen.current.end_pos
if self._gen.current.type == tokenize.NEWLINE:
# This case is only relevant with the FastTokenizer, because
# otherwise there's always an ENDMARKER.
# we added a newline before, so we need to "remove" it again.
#
# NOTE: It should be keep end_pos as-is if the last token of
# a source is a NEWLINE, otherwise the newline at the end of
# a source is not included in a ParserNode.code.
if self._gen.previous.type != tokenize.NEWLINE:
self.module.end_pos = self._gen.previous.end_pos
del self._gen
def __repr__(self):
return "<%s: %s>" % (type(self).__name__, self.module)
def _check_user_stmt(self, simple):
# this is not user checking, just update the used_names
for tok_name in self.module.temp_used_names:
try:
self.module.used_names[tok_name].add(simple)
except KeyError:
self.module.used_names[tok_name] = set([simple])
self.module.temp_used_names = []
if isinstance(simple, pr.Statement):
for name, calls in simple.get_names_dict().items():
self._scope.add_name_calls(name, calls)
class PushBackTokenizer(object):
def __init__(self, tokenizer):
self._tokenizer = tokenizer
self._push_backs = []
self.current = self.previous = tokenize.Token(None, '', (0, 0))
def push_last_back(self):
self._push_backs.append(self.current)
def next(self):
""" Python 2 Compatibility """
return self.__next__()
def __next__(self):
if self._push_backs:
return self._push_backs.pop(0)
previous = self.current
self.current = next(self._tokenizer)
self.previous = previous
return self.current
def __iter__(self):
return self