1
0
forked from VimPlug/jedi

Starting to create a way of how context sensitive completions can be made.

This involves playing heavily with the parser pgen2. We use its stack to check for all possible tokens/keywords.
This commit is contained in:
Dave Halter
2016-05-23 18:11:44 +02:00
parent 36a135c347
commit d4a10929e2
8 changed files with 264 additions and 59 deletions

View File

@@ -186,7 +186,7 @@ class Script(object):
return new_defs
goto_path = self._user_context.get_path_under_cursor()
context = self._user_context.get_context()
context = self._user_context.get_reverse_context()
definitions = []
if next(context) in ('class', 'def'):
definitions = [self._evaluator.wrap(self._parser.user_scope())]
@@ -253,7 +253,7 @@ class Script(object):
return definitions
goto_path = self._user_context.get_path_under_cursor()
context = self._user_context.get_context()
context = self._user_context.get_reverse_context()
user_stmt = self._parser.user_stmt()
user_scope = self._parser.user_scope()

View File

@@ -1,6 +1,7 @@
from itertools import chain
import re
from jedi.parser import token
from jedi.parser import tree
from jedi import debug
from jedi import settings
@@ -70,7 +71,7 @@ class Completion:
user_stmt = self._parser.user_stmt_with_whitespace()
completion_names = self.get_completions(user_stmt, completion_parts)
completion_names = self._get_context_completions(user_stmt, completion_parts)
if not completion_parts.has_dot:
call_signatures = self._call_signatures_method()
@@ -85,30 +86,73 @@ class Completion:
x.name.startswith('_'),
x.name.lower()))
def get_completions(self, user_stmt, completion_parts):
# TODO this closure is ugly. it also doesn't work with
# simple_complete (used for Interpreter), somehow redo.
def _get_context_completions(self, user_stmt, completion_parts):
"""
Analyzes the context that a completion is made in and decides what to
return.
Could provide context for:
- from/import completions
- as nothing
- statements that start always on new line
'import', 'class', 'def', 'try', 'except',
'finally', 'while', with
- statements that start always on new line or after ; or after :
return raise continue break del pass global nonlocal assert
- def/class nothing
- async for/def/with
- \n@/del/return/raise no keyword (after keyword no keyword)?
- after keyword
- continue/break/pass nothing
- global/nonlocal search global
- after operator no keyword: return
- yield like return + after ( and =
- almost always ok
'and', 'for', 'if', 'else', 'in', 'is', 'lambda', 'not', 'or'
- after operations no keyword:
+ = * ** - etc Maybe work with the parser state?
# hard:
- await
- yield from / raise from / from import difference
- In args: */**: no completion
- In params (also lambda): no completion before =
"""
module = self._evaluator.wrap(self._parser.module())
names, level, only_modules, unfinished_dotted = \
helpers.check_error_statements(module, self._pos)
grammar = self._evaluator.grammar
stack = helpers.get_stack_at_position(grammar, module, self._pos)
allowed_keywords, allowed_tokens = \
helpers.get_possible_completion_types(grammar, stack)
completion_names = list(self._get_keyword_completion_names(allowed_keywords))
if token.NAME in allowed_tokens:
# Differentiate between import names and other names.
completion_names += self._simple_complete(completion_parts)
completion_names = []
if names is not None:
imp_names = tuple(str(n) for n in names if n.end_pos < self._pos)
i = imports.Importer(self._evaluator, imp_names, module, level)
completion_names = i.completion_names(self._evaluator, only_modules)
return completion_names
# TODO this paragraph is necessary, but not sure it works.
context = self._user_context.get_context()
if not next(context).startswith('.'): # skip the path
if next(context) == 'from':
# completion is just "import" if before stands from ..
if unfinished_dotted:
return completion_names
else:
return [keywords.keyword(self._evaluator, 'import').name]
context = self._user_context.get_backwards_context_tokens()
x = next(context, None)
#print(x)
#if not x.string.startswith('.'): # skip the path
if next(context, None).string == 'from':
# completion is just "import" if before stands from ..
if unfinished_dotted:
return completion_names
else:
return [keywords.keyword(self._evaluator, 'import').name]
if isinstance(user_stmt, tree.Import):
module = self._parser.module()
completion_names += imports.completion_names(self._evaluator,
user_stmt, self._pos)
return completion_names
@@ -126,6 +170,10 @@ class Completion:
completion_names += self._simple_complete(completion_parts)
return completion_names
def _get_keyword_completion_names(self, keywords):
for keyword in keywords:
yield keywords.keyword(self._evaluator, keyword).name
def _simple_complete(self, completion_parts):
if not completion_parts.path and not completion_parts.has_dot:
scope = self._parser.user_scope()

View File

@@ -4,8 +4,11 @@ Helpers for the API
import re
from collections import namedtuple
from jedi import common
from jedi.parser import tree as pt
from jedi.evaluate import imports
from jedi import parser
from jedi.parser import tokenize, token
CompletionParts = namedtuple('CompletionParts', ['path', 'has_dot', 'name'])
@@ -46,6 +49,83 @@ def check_error_statements(module, pos):
return None, 0, False, False
def get_code_until(code, start_pos, end_pos):
lines = common.splitlines(code)
line_difference = end_pos[0] - start_pos[0]
if line_difference == 0:
end_line_length = end_pos[1] - start_pos[1]
else:
end_line_length = end_pos[1]
if line_difference > len(lines) or end_line_length > len(lines[-1]):
raise ValueError("The end_pos seems to be after the code part.")
new_lines = lines[:line_difference] + [lines[-1][:end_line_length]]
return '\n'.join(new_lines)
def get_stack_at_position(grammar, module, pos):
"""
Returns the possible node names (e.g. import_from, xor_test or yield_stmt).
"""
for error_statement in module.error_statement_stacks:
if error_statement.first_pos < pos <= error_statement.next_start_pos:
code = error_statement.get_code()
code = get_code_until(code, error_statement.first_pos, pos)
break
else:
raise NotImplementedError
class EndMarkerReached(Exception):
pass
def tokenize_without_endmarker(code):
for token_ in tokenize.source_tokens(code):
if token_[0] == token.ENDMARKER:
raise EndMarkerReached()
else:
yield token_
p = parser.Parser(grammar, code, tokenizer=tokenize_without_endmarker(code),
start_parsing=False)
try:
p.parse()
except EndMarkerReached:
return p.pgen_parser.stack
def get_possible_completion_types(grammar, stack):
def add_results(label_index):
try:
grammar_labels.append(inversed_tokens[label_index])
except KeyError:
try:
keywords.append(inversed_keywords[label_index])
except KeyError:
t, v = grammar.labels[label_index]
assert t >= 256
# See if it's a symbol and if we're in its first set
inversed_keywords
itsdfa = grammar.dfas[t]
itsstates, itsfirst = itsdfa
for first_label_index in itsfirst.keys():
add_results(first_label_index)
dfa, state, node = stack[-1]
states, first = dfa
arcs = states[state]
inversed_keywords = dict((v, k) for k, v in grammar.keywords.items())
inversed_tokens = dict((v, k) for k, v in grammar.tokens.items())
keywords = []
grammar_labels = []
for label_index, new_state in arcs:
add_results(label_index)
return keywords, grammar_labels
def importer_from_error_statement(error_statement, pos):
def check_dotted(children):
for name in children[::2]:

View File

@@ -24,7 +24,7 @@ from jedi.parser import token
from jedi.parser.token import (DEDENT, INDENT, ENDMARKER, NEWLINE, NUMBER,
STRING, OP, ERRORTOKEN)
from jedi.parser.pgen2.pgen import generate_grammar
from jedi.parser.pgen2.parse import PgenParser
from jedi.parser.pgen2.parse import PgenParser, token_to_ilabel
OPERATOR_KEYWORDS = 'and', 'for', 'if', 'else', 'in', 'is', 'lambda', 'not', 'or'
# Not used yet. In the future I intend to add something like KeywordStatement
@@ -60,12 +60,20 @@ def load_grammar(version='3.4'):
class ErrorStatement(object):
def __init__(self, stack, next_token, position_modifier, next_start_pos):
def __init__(self, stack, arcs, next_token, position_modifier, next_start_pos):
self.stack = stack
self.arcs = arcs
self._position_modifier = position_modifier
self.next_token = next_token
self._next_start_pos = next_start_pos
def __repr__(self):
return '<%s next: %s@%s>' % (
type(self).__name__,
repr(self.next_token),
self.next_start_pos
)
@property
def next_start_pos(self):
s = self._next_start_pos
@@ -81,6 +89,16 @@ class ErrorStatement(object):
first_type, nodes = self.stack[0]
return first_type
def is_a_valid_token(self, type_, value):
ilabel = token_to_ilabel(type_, value)
for i, newstate in self.arcs:
if ilabel == i:
return True
return False
def get_code(self):
return ''.join(node.get_code() for _, nodes in self.stack for node in nodes)
class ParserSyntaxError(object):
def __init__(self, message, position):
@@ -119,8 +137,10 @@ class Parser(object):
'lambdef_nocond': pt.Lambda,
}
def __init__(self, grammar, source, start, tokenizer=None):
start_number = grammar.symbol2number[start]
def __init__(self, grammar, source, start_symbol='file_input',
tokenizer=None, start_parsing=True):
# Todo Remove start_parsing (with False)
start_number = grammar.symbol2number[start_symbol]
self._used_names = {}
self._scope_names_stack = [{}]
@@ -131,27 +151,42 @@ class Parser(object):
# For the fast parser.
self.position_modifier = pt.PositionModifier()
added_newline = False
self._added_newline = False
# The Python grammar needs a newline at the end of each statement.
if not source.endswith('\n') and start == 'file_input':
if not source.endswith('\n') and start_symbol == 'file_input':
source += '\n'
added_newline = True
self._added_newline = True
p = PgenParser(grammar, self.convert_node, self.convert_leaf,
self.error_recovery, start_number)
self.pgen_parser = PgenParser(
grammar, self.convert_node, self.convert_leaf,
self.error_recovery, start_number
)
self._start_symbol = start_symbol
self._grammar = grammar
self._tokenizer = tokenizer
if tokenizer is None:
tokenizer = tokenize.source_tokens(source)
self._tokenizer = tokenize.source_tokens(source, use_exact_op_types=True)
self._parsed = p.parse(self._tokenize(tokenizer))
self._parsed = None
if start == 'file_input' != self._parsed.type:
if start_parsing:
self.parse()
def parse(self):
if self._parsed is not None:
return self._parsed
self._parsed = self.pgen_parser.parse(self._tokenize(self._tokenizer))
if self._start_symbol == 'file_input' != self._parsed.type:
# If there's only one statement, we get back a non-module. That's
# not what we want, we want a module, so we add it here:
self._parsed = self.convert_node(grammar,
grammar.symbol2number['file_input'],
self._parsed = self.convert_node(self._grammar,
self._grammar.symbol2number['file_input'],
[self._parsed])
if added_newline:
if self._added_newline:
self.remove_last_newline()
def get_parsed_node(self):
@@ -161,8 +196,6 @@ class Parser(object):
for typ, value, start_pos, prefix in tokenizer:
if typ == ERRORTOKEN:
raise ParseError
elif typ == OP:
typ = token.opmap[value]
yield typ, value, prefix, start_pos
def error_recovery(self, grammar, stack, typ, value, start_pos, prefix,
@@ -301,7 +334,7 @@ class ParserWithRecovery(Parser):
#if self.options["print_function"]:
# python_grammar = pygram.python_grammar_no_print_statement
#else:
super(ParserWithRecovery, self).__init__(grammar, source, 'file_input', tokenizer)
super(ParserWithRecovery, self).__init__(grammar, source, tokenizer=tokenizer)
self.module = self._parsed
self.module.used_names = self._used_names
@@ -309,7 +342,7 @@ class ParserWithRecovery(Parser):
self.module.global_names = self._global_names
self.module.error_statement_stacks = self._error_statement_stacks
def error_recovery(self, grammar, stack, typ, value, start_pos, prefix,
def error_recovery(self, grammar, stack, arcs, typ, value, start_pos, prefix,
add_token_callback):
"""
This parser is written in a dynamic way, meaning that this parser
@@ -345,7 +378,7 @@ class ParserWithRecovery(Parser):
stack[index]
#print('err', token.tok_name[typ], repr(value), start_pos, len(stack), index)
self._stack_removal(grammar, stack, index + 1, value, start_pos)
self._stack_removal(grammar, stack, arcs, index + 1, value, start_pos)
if typ == INDENT:
# For every deleted INDENT we have to delete a DEDENT as well.
# Otherwise the parser will get into trouble and DEDENT too early.
@@ -366,7 +399,7 @@ class ParserWithRecovery(Parser):
# doesn't stop you from defining `continue` in a module, etc.
add_token_callback(typ, value, prefix, start_pos)
def _stack_removal(self, grammar, stack, start_index, value, start_pos):
def _stack_removal(self, grammar, stack, arcs, start_index, value, start_pos):
def clear_names(children):
for c in children:
try:
@@ -393,7 +426,7 @@ class ParserWithRecovery(Parser):
if nodes and nodes[0] in ('def', 'class', 'lambda'):
self._scope_names_stack.pop()
if failed_stack:
err = ErrorStatement(failed_stack, value, self.position_modifier, start_pos)
err = ErrorStatement(failed_stack, arcs, value, self.position_modifier, start_pos)
self._error_statement_stacks.append(err)
self._last_failed_start_pos = start_pos
@@ -418,8 +451,6 @@ class ParserWithRecovery(Parser):
self._add_syntax_error('Strange token', start_pos)
continue
if typ == OP:
typ = token.opmap[value]
yield typ, value, prefix, start_pos
def _add_syntax_error(self, message, position):

View File

@@ -451,7 +451,7 @@ class FastTokenizer(object):
"""
def __init__(self, source):
self.source = source
self._gen = source_tokens(source)
self._gen = source_tokens(source, use_exact_op_types=True)
self._closed = False
# fast parser options

View File

@@ -34,6 +34,18 @@ class InternalParseError(Exception):
self.start_pos = start_pos
def token_to_ilabel(grammar, type_, value):
# Map from token to label
if type_ == tokenize.NAME:
# Check for reserved words (keywords)
try:
return grammar.keywords[value]
except KeyError:
pass
return grammar.tokens[type_]
class PgenParser(object):
"""Parser engine.
@@ -118,15 +130,7 @@ class PgenParser(object):
def addtoken(self, type_, value, prefix, start_pos):
"""Add a token; return True if this is the end of the program."""
# Map from token to label
if type_ == tokenize.NAME:
# Check for reserved words (keywords)
try:
ilabel = self.grammar.keywords[value]
except KeyError:
ilabel = self.grammar.tokens[type_]
else:
ilabel = self.grammar.tokens[type_]
ilabel = token_to_ilabel(self.grammar, type_, value)
# Loop until the token is shifted; may raise exceptions
while True:
@@ -168,7 +172,7 @@ class PgenParser(object):
# Done parsing, but another token is input
raise InternalParseError("too much input", type_, value, start_pos)
else:
self.error_recovery(self.grammar, self.stack, type_,
self.error_recovery(self.grammar, self.stack, arcs, type_,
value, start_pos, prefix, self.addtoken)
break

View File

@@ -14,7 +14,7 @@ from __future__ import absolute_import
import string
import re
from io import StringIO
from jedi.parser.token import (tok_name, N_TOKENS, ENDMARKER, STRING, NUMBER,
from jedi.parser.token import (tok_name, N_TOKENS, ENDMARKER, STRING, NUMBER, opmap,
NAME, OP, ERRORTOKEN, NEWLINE, INDENT, DEDENT)
from jedi._compatibility import is_py3
@@ -143,18 +143,19 @@ del _compile
tabsize = 8
# TODO add with?
ALWAYS_BREAK_TOKENS = (';', 'import', 'class', 'def', 'try', 'except',
'finally', 'while', 'return')
def source_tokens(source):
def source_tokens(source, use_exact_op_types=False):
"""Generate tokens from a the source code (string)."""
source = source
readline = StringIO(source).readline
return generate_tokens(readline)
return generate_tokens(readline, use_exact_op_types)
def generate_tokens(readline):
def generate_tokens(readline, use_exact_op_types=False):
"""
A heavily modified Python standard library tokenizer.
@@ -285,7 +286,12 @@ def generate_tokens(readline):
paren_level += 1
elif token in ')]}':
paren_level -= 1
yield OP, token, spos, prefix
if use_exact_op_types:
typ = opmap[token]
else:
typ = OP
yield typ, token, spos, prefix
if new_line:
end_pos = lnum + 1, 0

View File

@@ -1,16 +1,20 @@
import re
import os
import keyword
from collections import namedtuple
from jedi import cache
from jedi import common
from jedi.parser import tokenize, ParserWithRecovery
from jedi._compatibility import u
from jedi.parser import token
from jedi.parser.fast import FastParser
from jedi.parser import tree
from jedi import debug
from jedi.common import PushBackIterator
# TODO this should be part of the tokenizer not just of this user_context.
Token = namedtuple('Token', ['type', 'string', 'start_pos', 'prefix'])
REPLACE_STR = r"[bBuU]?[rR]?" + (r"(?:(')[^\n'\\]*(?:\\.[^\n'\\]*)*(?:'|$)" +
'|' +
@@ -66,7 +70,7 @@ class UserContext(object):
first_line = common.splitlines(tok_str)[0]
column -= len(first_line)
# Reverse the token again, so that it is in normal order again.
yield typ, tok_str[::-1], (self._line_temp, column), prefix[::-1]
yield Token(typ, tok_str[::-1], (self._line_temp, column), prefix[::-1])
def _calc_path_until_cursor(self, start_pos):
"""
@@ -214,11 +218,14 @@ class UserContext(object):
next_is_key = True
return None, 0, None, (0, 0)
def get_context(self, yield_positions=False):
def get_reverse_context(self, yield_positions=False):
"""
Returns the token strings in reverse order from the start position.
"""
self.get_path_until_cursor() # In case _start_cursor_pos is undefined.
pos = self._start_cursor_pos
while True:
# remove non important white space
# Remove non important white space.
line = self.get_line(pos[0])
while True:
if pos[1] == 0:
@@ -246,6 +253,35 @@ class UserContext(object):
else:
yield ''
def get_backwards_context_tokens(self):
self.get_path_until_cursor() # In case _start_cursor_pos is undefined.
pos = self._start_cursor_pos
while True:
# Remove non important white space.
line = self.get_line(pos[0])
while True:
if pos[1] == 0:
line = self.get_line(pos[0] - 1)
if line and line[-1] == '\\':
pos = pos[0] - 1, len(line) - 1
continue
else:
break
if line[pos[1] - 1].isspace():
pos = pos[0], pos[1] - 1
else:
break
try:
token_ = next(self._get_backwards_tokenizer(pos))
pos = token_.start_pos
yield token_
except StopIteration:
# Make it clear that there's nothing coming anymore.
#yield Token('', token.ENDMARKER, (1, 0), '')
break
def get_line(self, line_nr):
if not self._line_cache:
self._line_cache = common.splitlines(self.source)
@@ -310,7 +346,7 @@ class UserContextParser(object):
# process it - probably a Syntax Error (or in a comment).
debug.warning('No statement under the cursor.')
return
pos = next(self._user_context.get_context(yield_positions=True))
pos = next(self._user_context.get_reverse_context(yield_positions=True))
user_stmt = self.module().get_statement_for_position(pos)
return user_stmt