From f3e4bf9ed1b097b66889a94ce844b9c1699adab0 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Tue, 4 Nov 2014 18:54:25 +0100 Subject: [PATCH] Splitting up the convert function into leaves and nodes. --- jedi/parser/__init__.py | 38 +++++++++++++++++++++++++---------- jedi/parser/pgen2/__init__.py | 7 ++++--- jedi/parser/pgen2/parse.py | 23 ++++++++++++--------- jedi/parser/pytree.py | 14 +------------ 4 files changed, 45 insertions(+), 37 deletions(-) diff --git a/jedi/parser/__init__.py b/jedi/parser/__init__.py index 283af772..6d8dcfe7 100644 --- a/jedi/parser/__init__.py +++ b/jedi/parser/__init__.py @@ -60,28 +60,19 @@ class Parser(object): self.used_names = {} self.scope_names_stack = [{}] logger = logging.getLogger("Jedi-Parser") - d = Driver(pytree.python_grammar, self.convert, + d = Driver(pytree.python_grammar, self.convert_node, self.convert_leaf, self.error_recovery, logger=logger) self.module = d.parse_string(source).get_parent_until() self.module.used_names = self.used_names self.module.set_global_names(self.global_names) - def convert(self, grammar, raw_node): - if raw_node[1] in ('def', 'class') and raw_node[0] == pgen2.tokenize.NAME: - self.scope_names_stack.append({}) - + def convert_node(self, grammar, raw_node): new_node = pytree.convert(grammar, raw_node) # We need to check raw_node always, because the same node can be # returned by convert multiple times. if raw_node[0] == pytree.python_symbols.global_stmt: self.global_names += new_node.names() - elif isinstance(new_node, pr.Name) and raw_node[0] == pgen2.tokenize.NAME: - # Keep a listing of all used names - arr = self.used_names.setdefault(new_node.value, []) - arr.append(new_node) - arr = self.scope_names_stack[-1].setdefault(new_node.value, []) - arr.append(new_node) elif isinstance(new_node, (pr.ClassOrFunc, pr.Module)) \ and raw_node[0] in (pytree.python_symbols.funcdef, pytree.python_symbols.classdef, @@ -97,6 +88,31 @@ class Parser(object): new_node.names_dict = scope_names return new_node + def convert_leaf(self, grammar, raw_node): + type, value, context, children = raw_node + #print('leaf', raw_node, type_repr(type)) + prefix, start_pos = context + if type == tokenize.NAME: + if value in grammar.keywords: + if value in ('def', 'class'): + self.scope_names_stack.append({}) + + return pr.Keyword(value, start_pos, prefix) + else: + name = pr.Name(value, start_pos, prefix) + # Keep a listing of all used names + arr = self.used_names.setdefault(name.value, []) + arr.append(name) + arr = self.scope_names_stack[-1].setdefault(name.value, []) + arr.append(name) + return name + elif type in (tokenize.STRING, tokenize.NUMBER): + return pr.Literal(value, start_pos, prefix) + elif type in (tokenize.NEWLINE, tokenize.ENDMARKER): + return pr.Whitespace(value, start_pos, prefix) + else: + return pr.Operator(value, start_pos, prefix) + def error_recovery(self, grammar, stack, type, value): """ This parser is written in a dynamic way, meaning that this parser diff --git a/jedi/parser/pgen2/__init__.py b/jedi/parser/pgen2/__init__.py index 6e5458f8..ec2b6740 100644 --- a/jedi/parser/pgen2/__init__.py +++ b/jedi/parser/pgen2/__init__.py @@ -21,18 +21,19 @@ from . import tokenize class Driver(object): - def __init__(self, grammar, convert, error_recovery, logger=None): + def __init__(self, grammar, convert_node, convert_leaf, error_recovery, logger=None): self.grammar = grammar if logger is None: logger = logging.getLogger() self.logger = logger - self.convert = convert + self.convert_node = convert_node + self.convert_leaf = convert_leaf self.error_recovery = error_recovery def parse_tokens(self, tokens): """Parse a series of tokens and return the syntax tree.""" # XXX Move the prefix computation into a wrapper around tokenize. - p = parse.Parser(self.grammar, self.convert, self.error_recovery) + p = parse.Parser(self.grammar, self.convert_node, self.convert_leaf, self.error_recovery) lineno = 1 column = 0 type = value = start = end = line_text = None diff --git a/jedi/parser/pgen2/parse.py b/jedi/parser/pgen2/parse.py index 5614081a..30395ab2 100644 --- a/jedi/parser/pgen2/parse.py +++ b/jedi/parser/pgen2/parse.py @@ -56,7 +56,7 @@ class Parser(object): """ - def __init__(self, grammar, convert, error_recovery): + def __init__(self, grammar, convert_node, convert_leaf, error_recovery): """Constructor. The grammar argument is a grammar.Grammar instance; see the @@ -86,7 +86,8 @@ class Parser(object): """ self.grammar = grammar - self.convert = convert or (lambda grammar, node: node) + self.convert_node = convert_node + self.convert_leaf = convert_leaf # Prepare for parsing. start = self.grammar.start @@ -163,9 +164,8 @@ class Parser(object): """Shift a token. (Internal)""" dfa, state, node = self.stack[-1] newnode = (type, value, context, None) - newnode = self.convert(self.grammar, newnode) - if newnode is not None: - node[-1].append(newnode) + newnode = self.convert_leaf(self.grammar, newnode) + node[-1].append(newnode) self.stack[-1] = (dfa, newstate, node) def push(self, type, newdfa, newstate, context): @@ -181,12 +181,15 @@ class Parser(object): children = popnode[3] if len(children) != 1 or popnode[0] in (self.grammar.symbol2number['expr_stmt'], self.grammar.symbol2number['file_input']): - newnode = self.convert(self.grammar, popnode) + newnode = self.convert_node(self.grammar, popnode) else: newnode = children[0] - if self.stack: - dfa, state, node = self.stack[-1] - node[-1].append(newnode) - else: + try: + # Equal to: + # dfa, state, node = self.stack[-1] + # symbol, value, context, children = node + self.stack[-1][2][3].append(newnode) + except IndexError: + # stack is empty, set the rootnode. self.rootnode = newnode diff --git a/jedi/parser/pytree.py b/jedi/parser/pytree.py index 16fb6690..e7657457 100644 --- a/jedi/parser/pytree.py +++ b/jedi/parser/pytree.py @@ -117,16 +117,4 @@ def convert(grammar, raw_node): except KeyError: return pr.Node(type, children) else: - #print('leaf', raw_node, type_repr(type)) - prefix, start_pos = context - if type == tokenize.NAME: - if value in grammar.keywords: - return pr.Keyword(value, start_pos, prefix) - else: - return pr.Name(value, start_pos, prefix) - elif type in (tokenize.STRING, tokenize.NUMBER): - return pr.Literal(value, start_pos, prefix) - elif type in (tokenize.NEWLINE, tokenize.ENDMARKER): - return pr.Whitespace(value, start_pos, prefix) - else: - return pr.Operator(value, start_pos, prefix) + raise NotImplementedError