From f92e675400c8f5589cb9b84a3f037532a606ddbd Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Fri, 19 May 2017 10:26:24 -0400 Subject: [PATCH] Remove the whole parser. --- jedi/parser/__init__.py | 8 - jedi/parser/cache.py | 147 ---- jedi/parser/parser.py | 77 --- jedi/parser/pgen2/__init__.py | 8 - jedi/parser/pgen2/grammar.py | 127 ---- jedi/parser/pgen2/parse.py | 218 ------ jedi/parser/pgen2/pgen.py | 394 ----------- jedi/parser/python/__init__.py | 124 ---- jedi/parser/python/diff.py | 603 ----------------- jedi/parser/python/grammar2.7.txt | 152 ----- jedi/parser/python/grammar3.4.txt | 135 ---- jedi/parser/python/grammar3.5.txt | 154 ----- jedi/parser/python/grammar3.6.txt | 161 ----- jedi/parser/python/parser.py | 232 ------- jedi/parser/python/tree.py | 1045 ----------------------------- jedi/parser/token.py | 90 --- jedi/parser/tokenize.py | 369 ---------- jedi/parser/tree.py | 328 --------- 18 files changed, 4372 deletions(-) delete mode 100644 jedi/parser/__init__.py delete mode 100644 jedi/parser/cache.py delete mode 100644 jedi/parser/parser.py delete mode 100644 jedi/parser/pgen2/__init__.py delete mode 100644 jedi/parser/pgen2/grammar.py delete mode 100644 jedi/parser/pgen2/parse.py delete mode 100644 jedi/parser/pgen2/pgen.py delete mode 100644 jedi/parser/python/__init__.py delete mode 100644 jedi/parser/python/diff.py delete mode 100644 jedi/parser/python/grammar2.7.txt delete mode 100644 jedi/parser/python/grammar3.4.txt delete mode 100644 jedi/parser/python/grammar3.5.txt delete mode 100644 jedi/parser/python/grammar3.6.txt delete mode 100644 jedi/parser/python/parser.py delete mode 100644 jedi/parser/python/tree.py delete mode 100644 jedi/parser/token.py delete mode 100644 jedi/parser/tokenize.py delete mode 100644 jedi/parser/tree.py diff --git a/jedi/parser/__init__.py b/jedi/parser/__init__.py deleted file mode 100644 index d9e7c8db..00000000 --- a/jedi/parser/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -from jedi.parser.parser import ParserSyntaxError -from jedi.parser.pgen2.pgen import generate_grammar -from jedi.parser import python - - -def parse(grammar, code): - raise NotImplementedError - Parser(grammar, code) diff --git a/jedi/parser/cache.py b/jedi/parser/cache.py deleted file mode 100644 index 182a8a47..00000000 --- a/jedi/parser/cache.py +++ /dev/null @@ -1,147 +0,0 @@ -import time -import os -import sys -import hashlib -import gc -import shutil -import pickle -import platform -import errno - -from jedi import settings -from jedi import debug -from jedi._compatibility import FileNotFoundError - - -_PICKLE_VERSION = 30 -""" -Version number (integer) for file system cache. - -Increment this number when there are any incompatible changes in -the parser tree classes. For example, the following changes -are regarded as incompatible. - -- A class name is changed. -- A class is moved to another module. -- A __slot__ of a class is changed. -""" - -_VERSION_TAG = '%s-%s%s-%s' % ( - platform.python_implementation(), - sys.version_info[0], - sys.version_info[1], - _PICKLE_VERSION -) -""" -Short name for distinguish Python implementations and versions. - -It's like `sys.implementation.cache_tag` but for Python < 3.3 -we generate something similar. See: -http://docs.python.org/3/library/sys.html#sys.implementation -""" - -# for fast_parser, should not be deleted -parser_cache = {} - - - -class _NodeCacheItem(object): - def __init__(self, node, lines, change_time=None): - self.node = node - self.lines = lines - if change_time is None: - change_time = time.time() - self.change_time = change_time - - -def load_module(grammar, path): - """ - Returns a module or None, if it fails. - """ - try: - p_time = os.path.getmtime(path) - except FileNotFoundError: - return None - - try: - # TODO Add grammar sha256 - module_cache_item = parser_cache[path] - if p_time <= module_cache_item.change_time: - return module_cache_item.node - except KeyError: - if not settings.use_filesystem_cache: - return None - - return _load_from_file_system(grammar, path, p_time) - - -def _load_from_file_system(grammar, path, p_time): - cache_path = _get_hashed_path(grammar, path) - try: - try: - if p_time > os.path.getmtime(cache_path): - # Cache is outdated - return None - except OSError as e: - if e.errno == errno.ENOENT: - # In Python 2 instead of an IOError here we get an OSError. - raise FileNotFoundError - else: - raise - - with open(cache_path, 'rb') as f: - gc.disable() - try: - module_cache_item = pickle.load(f) - finally: - gc.enable() - except FileNotFoundError: - return None - else: - parser_cache[path] = module_cache_item - debug.dbg('pickle loaded: %s', path) - return module_cache_item.node - - -def save_module(grammar, path, module, lines, pickling=True): - try: - p_time = None if path is None else os.path.getmtime(path) - except OSError: - p_time = None - pickling = False - - item = _NodeCacheItem(module, lines, p_time) - parser_cache[path] = item - if settings.use_filesystem_cache and pickling and path is not None: - _save_to_file_system(grammar, path, item) - - -def _save_to_file_system(grammar, path, item): - with open(_get_hashed_path(grammar, path), 'wb') as f: - pickle.dump(item, f, pickle.HIGHEST_PROTOCOL) - - -def remove_old_modules(self): - """ - # TODO Might want to use such a function to clean up the cache (if it's - # too old). We could potentially also scan for old files in the - # directory and delete those. - """ - - -def clear_cache(self): - shutil.rmtree(settings.cache_directory) - parser_cache.clear() - - -def _get_hashed_path(grammar, path): - file_hash = hashlib.sha256(path.encode("utf-8")).hexdigest() - directory = _get_cache_directory_path() - return os.path.join(directory, '%s-%s.pkl' % (grammar.sha256, file_hash)) - - -def _get_cache_directory_path(): - directory = os.path.join(settings.cache_directory, _VERSION_TAG) - if not os.path.exists(directory): - os.makedirs(directory) - return directory diff --git a/jedi/parser/parser.py b/jedi/parser/parser.py deleted file mode 100644 index 05217d89..00000000 --- a/jedi/parser/parser.py +++ /dev/null @@ -1,77 +0,0 @@ -""" -The ``Parser`` tries to convert the available Python code in an easy to read -format, something like an abstract syntax tree. The classes who represent this -tree, are sitting in the :mod:`jedi.parser.tree` module. - -The Python module ``tokenize`` is a very important part in the ``Parser``, -because it splits the code into different words (tokens). Sometimes it looks a -bit messy. Sorry for that! You might ask now: "Why didn't you use the ``ast`` -module for this? Well, ``ast`` does a very good job understanding proper Python -code, but fails to work as soon as there's a single line of broken code. - -There's one important optimization that needs to be known: Statements are not -being parsed completely. ``Statement`` is just a representation of the tokens -within the statement. This lowers memory usage and cpu time and reduces the -complexity of the ``Parser`` (there's another parser sitting inside -``Statement``, which produces ``Array`` and ``Call``). -""" -from jedi.parser import tree -from jedi.parser.pgen2.parse import PgenParser - - -class ParserSyntaxError(Exception): - """ - Contains error information about the parser tree. - - May be raised as an exception. - """ - def __init__(self, message, position): - self.message = message - self.position = position - - -class BaseParser(object): - node_map = {} - default_node = tree.Node - - leaf_map = { - } - default_leaf = tree.Leaf - - def __init__(self, grammar, start_symbol='file_input', error_recovery=False): - self._grammar = grammar - self._start_symbol = start_symbol - self._error_recovery = error_recovery - - def parse(self, tokens): - start_number = self._grammar.symbol2number[self._start_symbol] - self.pgen_parser = PgenParser( - self._grammar, self.convert_node, self.convert_leaf, - self.error_recovery, start_number - ) - - node = self.pgen_parser.parse(tokens) - # The stack is empty now, we don't need it anymore. - del self.pgen_parser - return node - - def error_recovery(self, grammar, stack, arcs, typ, value, start_pos, prefix, - add_token_callback): - if self._error_recovery: - raise NotImplementedError("Error Recovery is not implemented") - else: - raise ParserSyntaxError('SyntaxError: invalid syntax', start_pos) - - def convert_node(self, grammar, type_, children): - # TODO REMOVE symbol, we don't want type here. - symbol = grammar.number2symbol[type_] - try: - return self.node_map[symbol](children) - except KeyError: - return self.default_node(symbol, children) - - def convert_leaf(self, grammar, type_, value, prefix, start_pos): - try: - return self.leaf_map[type_](value, start_pos, prefix) - except KeyError: - return self.default_leaf(value, start_pos, prefix) diff --git a/jedi/parser/pgen2/__init__.py b/jedi/parser/pgen2/__init__.py deleted file mode 100644 index 1ddae5fe..00000000 --- a/jedi/parser/pgen2/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. -# Licensed to PSF under a Contributor Agreement. - -# Modifications: -# Copyright 2006 Google, Inc. All Rights Reserved. -# Licensed to PSF under a Contributor Agreement. -# Copyright 2014 David Halter. Integration into Jedi. -# Modifications are dual-licensed: MIT and PSF. diff --git a/jedi/parser/pgen2/grammar.py b/jedi/parser/pgen2/grammar.py deleted file mode 100644 index 44214f93..00000000 --- a/jedi/parser/pgen2/grammar.py +++ /dev/null @@ -1,127 +0,0 @@ -# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. -# Licensed to PSF under a Contributor Agreement. - -# Modifications: -# Copyright 2014 David Halter. Integration into Jedi. -# Modifications are dual-licensed: MIT and PSF. - -"""This module defines the data structures used to represent a grammar. - -These are a bit arcane because they are derived from the data -structures used by Python's 'pgen' parser generator. - -There's also a table here mapping operators to their names in the -token module; the Python tokenize module reports all operators as the -fallback token code OP, but the parser needs the actual token code. - -""" - -import pickle -import hashlib - - - -class Grammar(object): - """Pgen parsing tables conversion class. - - Once initialized, this class supplies the grammar tables for the - parsing engine implemented by parse.py. The parsing engine - accesses the instance variables directly. The class here does not - provide initialization of the tables; several subclasses exist to - do this (see the conv and pgen modules). - - The load() method reads the tables from a pickle file, which is - much faster than the other ways offered by subclasses. The pickle - file is written by calling dump() (after loading the grammar - tables using a subclass). The report() method prints a readable - representation of the tables to stdout, for debugging. - - The instance variables are as follows: - - symbol2number -- a dict mapping symbol names to numbers. Symbol - numbers are always 256 or higher, to distinguish - them from token numbers, which are between 0 and - 255 (inclusive). - - number2symbol -- a dict mapping numbers to symbol names; - these two are each other's inverse. - - states -- a list of DFAs, where each DFA is a list of - states, each state is a list of arcs, and each - arc is a (i, j) pair where i is a label and j is - a state number. The DFA number is the index into - this list. (This name is slightly confusing.) - Final states are represented by a special arc of - the form (0, j) where j is its own state number. - - dfas -- a dict mapping symbol numbers to (DFA, first) - pairs, where DFA is an item from the states list - above, and first is a set of tokens that can - begin this grammar rule (represented by a dict - whose values are always 1). - - labels -- a list of (x, y) pairs where x is either a token - number or a symbol number, and y is either None - or a string; the strings are keywords. The label - number is the index in this list; label numbers - are used to mark state transitions (arcs) in the - DFAs. - - start -- the number of the grammar's start symbol. - - keywords -- a dict mapping keyword strings to arc labels. - - tokens -- a dict mapping token numbers to arc labels. - - """ - - def __init__(self, bnf_text): - self.symbol2number = {} - self.number2symbol = {} - self.states = [] - self.dfas = {} - self.labels = [(0, "EMPTY")] - self.keywords = {} - self.tokens = {} - self.symbol2label = {} - self.start = 256 - self.sha256 = hashlib.sha256(bnf_text.encode("utf-8")).hexdigest() - - def dump(self, filename): - """Dump the grammar tables to a pickle file.""" - with open(filename, "wb") as f: - pickle.dump(self.__dict__, f, 2) - - def load(self, filename): - """Load the grammar tables from a pickle file.""" - with open(filename, "rb") as f: - d = pickle.load(f) - self.__dict__.update(d) - - def copy(self): - """ - Copy the grammar. - """ - new = self.__class__() - for dict_attr in ("symbol2number", "number2symbol", "dfas", "keywords", - "tokens", "symbol2label"): - setattr(new, dict_attr, getattr(self, dict_attr).copy()) - new.labels = self.labels[:] - new.states = self.states[:] - new.start = self.start - return new - - def report(self): - """Dump the grammar tables to standard output, for debugging.""" - from pprint import pprint - print("s2n") - pprint(self.symbol2number) - print("n2s") - pprint(self.number2symbol) - print("states") - pprint(self.states) - print("dfas") - pprint(self.dfas) - print("labels") - pprint(self.labels) - print("start", self.start) diff --git a/jedi/parser/pgen2/parse.py b/jedi/parser/pgen2/parse.py deleted file mode 100644 index 8fa15f1d..00000000 --- a/jedi/parser/pgen2/parse.py +++ /dev/null @@ -1,218 +0,0 @@ -# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. -# Licensed to PSF under a Contributor Agreement. - -# Modifications: -# Copyright 2014 David Halter. Integration into Jedi. -# Modifications are dual-licensed: MIT and PSF. - -""" -Parser engine for the grammar tables generated by pgen. - -The grammar table must be loaded first. - -See Parser/parser.c in the Python distribution for additional info on -how this parsing engine works. -""" - -# Local imports -from jedi.parser import tokenize - - -class InternalParseError(Exception): - """ - Exception to signal the parser is stuck and error recovery didn't help. - Basically this shouldn't happen. It's a sign that something is really - wrong. - """ - - def __init__(self, msg, type, value, start_pos): - Exception.__init__(self, "%s: type=%r, value=%r, start_pos=%r" % - (msg, tokenize.tok_name[type], value, start_pos)) - self.msg = msg - self.type = type - self.value = value - self.start_pos = start_pos - - -def token_to_ilabel(grammar, type_, value): - # Map from token to label - if type_ == tokenize.NAME: - # Check for reserved words (keywords) - try: - return grammar.keywords[value] - except KeyError: - pass - - try: - return grammar.tokens[type_] - except KeyError: - return None - - -class PgenParser(object): - """Parser engine. - - The proper usage sequence is: - - p = Parser(grammar, [converter]) # create instance - p.setup([start]) # prepare for parsing - : - if p.addtoken(...): # parse a token - break - root = p.rootnode # root of abstract syntax tree - - A Parser instance may be reused by calling setup() repeatedly. - - A Parser instance contains state pertaining to the current token - sequence, and should not be used concurrently by different threads - to parse separate token sequences. - - See driver.py for how to get input tokens by tokenizing a file or - string. - - Parsing is complete when addtoken() returns True; the root of the - abstract syntax tree can then be retrieved from the rootnode - instance variable. When a syntax error occurs, error_recovery() - is called. There is no error recovery; the parser cannot be used - after a syntax error was reported (but it can be reinitialized by - calling setup()). - - """ - - def __init__(self, grammar, convert_node, convert_leaf, error_recovery, start): - """Constructor. - - The grammar argument is a grammar.Grammar instance; see the - grammar module for more information. - - The parser is not ready yet for parsing; you must call the - setup() method to get it started. - - The optional convert argument is a function mapping concrete - syntax tree nodes to abstract syntax tree nodes. If not - given, no conversion is done and the syntax tree produced is - the concrete syntax tree. If given, it must be a function of - two arguments, the first being the grammar (a grammar.Grammar - instance), and the second being the concrete syntax tree node - to be converted. The syntax tree is converted from the bottom - up. - - A concrete syntax tree node is a (type, nodes) tuple, where - type is the node type (a token or symbol number) and nodes - is a list of children for symbols, and None for tokens. - - An abstract syntax tree node may be anything; this is entirely - up to the converter function. - - """ - self.grammar = grammar - self.convert_node = convert_node - self.convert_leaf = convert_leaf - - # Each stack entry is a tuple: (dfa, state, node). - # A node is a tuple: (type, children), - # where children is a list of nodes or None - newnode = (start, []) - stackentry = (self.grammar.dfas[start], 0, newnode) - self.stack = [stackentry] - self.rootnode = None - self.error_recovery = error_recovery - - def parse(self, tokens): - for type_, value, start_pos, prefix in tokens: - if self.addtoken(type_, value, start_pos, prefix): - break - else: - # We never broke out -- EOF is too soon -- Unfinished statement. - # However, the error recovery might have added the token again, if - # the stack is empty, we're fine. - if self.stack: - raise InternalParseError("incomplete input", type_, value, start_pos) - return self.rootnode - - def addtoken(self, type_, value, start_pos, prefix): - """Add a token; return True if this is the end of the program.""" - ilabel = token_to_ilabel(self.grammar, type_, value) - - # Loop until the token is shifted; may raise exceptions - _gram = self.grammar - _labels = _gram.labels - _push = self._push - _pop = self._pop - _shift = self._shift - while True: - dfa, state, node = self.stack[-1] - states, first = dfa - arcs = states[state] - # Look for a state with this label - for i, newstate in arcs: - t, v = _labels[i] - if ilabel == i: - # Look it up in the list of labels - assert t < 256 - # Shift a token; we're done with it - _shift(type_, value, newstate, prefix, start_pos) - # Pop while we are in an accept-only state - state = newstate - while states[state] == [(0, state)]: - _pop() - if not self.stack: - # Done parsing! - return True - dfa, state, node = self.stack[-1] - states, first = dfa - # Done with this token - return False - elif t >= 256: - # See if it's a symbol and if we're in its first set - itsdfa = _gram.dfas[t] - itsstates, itsfirst = itsdfa - if ilabel in itsfirst: - # Push a symbol - _push(t, itsdfa, newstate) - break # To continue the outer while loop - else: - if (0, state) in arcs: - # An accepting state, pop it and try something else - _pop() - if not self.stack: - # Done parsing, but another token is input - raise InternalParseError("too much input", type_, value, start_pos) - else: - self.error_recovery(self.grammar, self.stack, arcs, type_, - value, start_pos, prefix, self.addtoken) - break - - def _shift(self, type_, value, newstate, prefix, start_pos): - """Shift a token. (Internal)""" - dfa, state, node = self.stack[-1] - newnode = self.convert_leaf(self.grammar, type_, value, prefix, start_pos) - node[-1].append(newnode) - self.stack[-1] = (dfa, newstate, node) - - def _push(self, type_, newdfa, newstate): - """Push a nonterminal. (Internal)""" - dfa, state, node = self.stack[-1] - newnode = (type_, []) - self.stack[-1] = (dfa, newstate, node) - self.stack.append((newdfa, 0, newnode)) - - def _pop(self): - """Pop a nonterminal. (Internal)""" - popdfa, popstate, (type_, children) = self.stack.pop() - # If there's exactly one child, return that child instead of creating a - # new node. We still create expr_stmt and file_input though, because a - # lot of Jedi depends on its logic. - if len(children) == 1: - newnode = children[0] - else: - newnode = self.convert_node(self.grammar, type_, children) - - try: - # Equal to: - # dfa, state, node = self.stack[-1] - # symbol, children = node - self.stack[-1][2][1].append(newnode) - except IndexError: - # Stack is empty, set the rootnode. - self.rootnode = newnode diff --git a/jedi/parser/pgen2/pgen.py b/jedi/parser/pgen2/pgen.py deleted file mode 100644 index 3317a7cd..00000000 --- a/jedi/parser/pgen2/pgen.py +++ /dev/null @@ -1,394 +0,0 @@ -# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. -# Licensed to PSF under a Contributor Agreement. - -# Modifications: -# Copyright 2014 David Halter. Integration into Jedi. -# Modifications are dual-licensed: MIT and PSF. - -from . import grammar -from jedi.parser import token -from jedi.parser import tokenize - - -class ParserGenerator(object): - def __init__(self, bnf_text): - self._bnf_text = bnf_text - self.generator = tokenize.source_tokens(bnf_text) - self.gettoken() # Initialize lookahead - self.dfas, self.startsymbol = self.parse() - self.first = {} # map from symbol name to set of tokens - self.addfirstsets() - - def make_grammar(self): - c = grammar.Grammar(self._bnf_text) - names = list(self.dfas.keys()) - names.sort() - names.remove(self.startsymbol) - names.insert(0, self.startsymbol) - for name in names: - i = 256 + len(c.symbol2number) - c.symbol2number[name] = i - c.number2symbol[i] = name - for name in names: - dfa = self.dfas[name] - states = [] - for state in dfa: - arcs = [] - for label, next in state.arcs.items(): - arcs.append((self.make_label(c, label), dfa.index(next))) - if state.isfinal: - arcs.append((0, dfa.index(state))) - states.append(arcs) - c.states.append(states) - c.dfas[c.symbol2number[name]] = (states, self.make_first(c, name)) - c.start = c.symbol2number[self.startsymbol] - return c - - def make_first(self, c, name): - rawfirst = self.first[name] - first = {} - for label in rawfirst: - ilabel = self.make_label(c, label) - ##assert ilabel not in first # XXX failed on <> ... != - first[ilabel] = 1 - return first - - def make_label(self, c, label): - # XXX Maybe this should be a method on a subclass of converter? - ilabel = len(c.labels) - if label[0].isalpha(): - # Either a symbol name or a named token - if label in c.symbol2number: - # A symbol name (a non-terminal) - if label in c.symbol2label: - return c.symbol2label[label] - else: - c.labels.append((c.symbol2number[label], None)) - c.symbol2label[label] = ilabel - return ilabel - else: - # A named token (NAME, NUMBER, STRING) - itoken = getattr(token, label, None) - assert isinstance(itoken, int), label - assert itoken in token.tok_name, label - if itoken in c.tokens: - return c.tokens[itoken] - else: - c.labels.append((itoken, None)) - c.tokens[itoken] = ilabel - return ilabel - else: - # Either a keyword or an operator - assert label[0] in ('"', "'"), label - value = eval(label) - if value[0].isalpha(): - # A keyword - if value in c.keywords: - return c.keywords[value] - else: - c.labels.append((token.NAME, value)) - c.keywords[value] = ilabel - return ilabel - else: - # An operator (any non-numeric token) - itoken = token.opmap[value] # Fails if unknown token - if itoken in c.tokens: - return c.tokens[itoken] - else: - c.labels.append((itoken, None)) - c.tokens[itoken] = ilabel - return ilabel - - def addfirstsets(self): - names = list(self.dfas.keys()) - names.sort() - for name in names: - if name not in self.first: - self.calcfirst(name) - #print name, self.first[name].keys() - - def calcfirst(self, name): - dfa = self.dfas[name] - self.first[name] = None # dummy to detect left recursion - state = dfa[0] - totalset = {} - overlapcheck = {} - for label, next in state.arcs.items(): - if label in self.dfas: - if label in self.first: - fset = self.first[label] - if fset is None: - raise ValueError("recursion for rule %r" % name) - else: - self.calcfirst(label) - fset = self.first[label] - totalset.update(fset) - overlapcheck[label] = fset - else: - totalset[label] = 1 - overlapcheck[label] = {label: 1} - inverse = {} - for label, itsfirst in overlapcheck.items(): - for symbol in itsfirst: - if symbol in inverse: - raise ValueError("rule %s is ambiguous; %s is in the" - " first sets of %s as well as %s" % - (name, symbol, label, inverse[symbol])) - inverse[symbol] = label - self.first[name] = totalset - - def parse(self): - dfas = {} - startsymbol = None - # MSTART: (NEWLINE | RULE)* ENDMARKER - while self.type != token.ENDMARKER: - while self.type == token.NEWLINE: - self.gettoken() - # RULE: NAME ':' RHS NEWLINE - name = self.expect(token.NAME) - self.expect(token.OP, ":") - a, z = self.parse_rhs() - self.expect(token.NEWLINE) - #self.dump_nfa(name, a, z) - dfa = self.make_dfa(a, z) - #self.dump_dfa(name, dfa) - # oldlen = len(dfa) - self.simplify_dfa(dfa) - # newlen = len(dfa) - dfas[name] = dfa - #print name, oldlen, newlen - if startsymbol is None: - startsymbol = name - return dfas, startsymbol - - def make_dfa(self, start, finish): - # To turn an NFA into a DFA, we define the states of the DFA - # to correspond to *sets* of states of the NFA. Then do some - # state reduction. Let's represent sets as dicts with 1 for - # values. - assert isinstance(start, NFAState) - assert isinstance(finish, NFAState) - - def closure(state): - base = {} - addclosure(state, base) - return base - - def addclosure(state, base): - assert isinstance(state, NFAState) - if state in base: - return - base[state] = 1 - for label, next in state.arcs: - if label is None: - addclosure(next, base) - - states = [DFAState(closure(start), finish)] - for state in states: # NB states grows while we're iterating - arcs = {} - for nfastate in state.nfaset: - for label, next in nfastate.arcs: - if label is not None: - addclosure(next, arcs.setdefault(label, {})) - for label, nfaset in arcs.items(): - for st in states: - if st.nfaset == nfaset: - break - else: - st = DFAState(nfaset, finish) - states.append(st) - state.addarc(st, label) - return states # List of DFAState instances; first one is start - - def dump_nfa(self, name, start, finish): - print("Dump of NFA for", name) - todo = [start] - for i, state in enumerate(todo): - print(" State", i, state is finish and "(final)" or "") - for label, next in state.arcs: - if next in todo: - j = todo.index(next) - else: - j = len(todo) - todo.append(next) - if label is None: - print(" -> %d" % j) - else: - print(" %s -> %d" % (label, j)) - - def dump_dfa(self, name, dfa): - print("Dump of DFA for", name) - for i, state in enumerate(dfa): - print(" State", i, state.isfinal and "(final)" or "") - for label, next in state.arcs.items(): - print(" %s -> %d" % (label, dfa.index(next))) - - def simplify_dfa(self, dfa): - # This is not theoretically optimal, but works well enough. - # Algorithm: repeatedly look for two states that have the same - # set of arcs (same labels pointing to the same nodes) and - # unify them, until things stop changing. - - # dfa is a list of DFAState instances - changes = True - while changes: - changes = False - for i, state_i in enumerate(dfa): - for j in range(i + 1, len(dfa)): - state_j = dfa[j] - if state_i == state_j: - #print " unify", i, j - del dfa[j] - for state in dfa: - state.unifystate(state_j, state_i) - changes = True - break - - def parse_rhs(self): - # RHS: ALT ('|' ALT)* - a, z = self.parse_alt() - if self.value != "|": - return a, z - else: - aa = NFAState() - zz = NFAState() - aa.addarc(a) - z.addarc(zz) - while self.value == "|": - self.gettoken() - a, z = self.parse_alt() - aa.addarc(a) - z.addarc(zz) - return aa, zz - - def parse_alt(self): - # ALT: ITEM+ - a, b = self.parse_item() - while (self.value in ("(", "[") or - self.type in (token.NAME, token.STRING)): - c, d = self.parse_item() - b.addarc(c) - b = d - return a, b - - def parse_item(self): - # ITEM: '[' RHS ']' | ATOM ['+' | '*'] - if self.value == "[": - self.gettoken() - a, z = self.parse_rhs() - self.expect(token.OP, "]") - a.addarc(z) - return a, z - else: - a, z = self.parse_atom() - value = self.value - if value not in ("+", "*"): - return a, z - self.gettoken() - z.addarc(a) - if value == "+": - return a, z - else: - return a, a - - def parse_atom(self): - # ATOM: '(' RHS ')' | NAME | STRING - if self.value == "(": - self.gettoken() - a, z = self.parse_rhs() - self.expect(token.OP, ")") - return a, z - elif self.type in (token.NAME, token.STRING): - a = NFAState() - z = NFAState() - a.addarc(z, self.value) - self.gettoken() - return a, z - else: - self.raise_error("expected (...) or NAME or STRING, got %s/%s", - self.type, self.value) - - def expect(self, type, value=None): - if self.type != type or (value is not None and self.value != value): - self.raise_error("expected %s/%s, got %s/%s", - type, value, self.type, self.value) - value = self.value - self.gettoken() - return value - - def gettoken(self): - tup = next(self.generator) - while tup[0] in (token.COMMENT, token.NL): - tup = next(self.generator) - self.type, self.value, self.begin, prefix = tup - #print tokenize.tok_name[self.type], repr(self.value) - - def raise_error(self, msg, *args): - if args: - try: - msg = msg % args - except: - msg = " ".join([msg] + list(map(str, args))) - line = open(self.filename).readlines()[self.begin[0]] - raise SyntaxError(msg, (self.filename, self.begin[0], - self.begin[1], line)) - - -class NFAState(object): - def __init__(self): - self.arcs = [] # list of (label, NFAState) pairs - - def addarc(self, next, label=None): - assert label is None or isinstance(label, str) - assert isinstance(next, NFAState) - self.arcs.append((label, next)) - - -class DFAState(object): - def __init__(self, nfaset, final): - assert isinstance(nfaset, dict) - assert isinstance(next(iter(nfaset)), NFAState) - assert isinstance(final, NFAState) - self.nfaset = nfaset - self.isfinal = final in nfaset - self.arcs = {} # map from label to DFAState - - def addarc(self, next, label): - assert isinstance(label, str) - assert label not in self.arcs - assert isinstance(next, DFAState) - self.arcs[label] = next - - def unifystate(self, old, new): - for label, next in self.arcs.items(): - if next is old: - self.arcs[label] = new - - def __eq__(self, other): - # Equality test -- ignore the nfaset instance variable - assert isinstance(other, DFAState) - if self.isfinal != other.isfinal: - return False - # Can't just return self.arcs == other.arcs, because that - # would invoke this method recursively, with cycles... - if len(self.arcs) != len(other.arcs): - return False - for label, next in self.arcs.items(): - if next is not other.arcs.get(label): - return False - return True - - __hash__ = None # For Py3 compatibility. - - -def generate_grammar(bnf_text): - """ - ``bnf_text`` is a grammar in extended BNF (using * for repetition, + for - at-least-once repetition, [] for optional parts, | for alternatives and () - for grouping). - - It's not EBNF according to ISO/IEC 14977. It's a dialect Python uses in its - own parser. - """ - p = ParserGenerator(bnf_text) - return p.make_grammar() diff --git a/jedi/parser/python/__init__.py b/jedi/parser/python/__init__.py deleted file mode 100644 index d60531fa..00000000 --- a/jedi/parser/python/__init__.py +++ /dev/null @@ -1,124 +0,0 @@ -""" -Parsers for Python -""" -import os - -from jedi import settings -from jedi._compatibility import FileNotFoundError -from jedi.parser.pgen2.pgen import generate_grammar -from jedi.parser.python.parser import Parser, _remove_last_newline -from jedi.parser.python.diff import DiffParser -from jedi.parser.tokenize import generate_tokens -from jedi.parser.cache import parser_cache, load_module, save_module -from jedi.common import splitlines, source_to_unicode - - -_loaded_grammars = {} - - -def load_grammar(version=None): - """ - Loads a Python grammar. The default version is always the latest. - - If you need support for a specific version, please use e.g. - `version='3.3'`. - """ - if version is None: - version = '3.6' - - if version in ('3.2', '3.3'): - version = '3.4' - elif version == '2.6': - version = '2.7' - - file = 'grammar' + version + '.txt' - - global _loaded_grammars - path = os.path.join(os.path.dirname(__file__), file) - try: - return _loaded_grammars[path] - except KeyError: - try: - with open(path) as f: - bnf_text = f.read() - grammar = generate_grammar(bnf_text) - return _loaded_grammars.setdefault(path, grammar) - except FileNotFoundError: - # Just load the default if the file does not exist. - return load_grammar() - - -def parse(code=None, path=None, grammar=None, error_recovery=True, - start_symbol='file_input', cache=False, diff_cache=False): - """ - If you want to parse a Python file you want to start here, most likely. - - If you need finer grained control over the parsed instance, there will be - other ways to access it. - - :param code: A unicode string that contains Python code. - :param path: The path to the file you want to open. Only needed for caching. - :param grammar: A Python grammar file, created with load_grammar. You may - not specify it. In that case it's the current Python version. - :param error_recovery: If enabled, any code will be returned. If it is - invalid, it will be returned as an error node. If disabled, you will - get a ParseError when encountering syntax errors in your code. - :param start_symbol: The grammar symbol that you want to parse. Only - allowed to be used when error_recovery is disabled. - - :return: A syntax tree node. Typically the module. - """ - if code is None and path is None: - raise TypeError("Please provide either code or a path.") - - if grammar is None: - grammar = load_grammar() - - if cache and not code and path is not None: - # In this case we do actual caching. We just try to load it. - module_node = load_module(grammar, path) - if module_node is not None: - return module_node - - if code is None: - with open(path, 'rb') as f: - code = source_to_unicode(f.read()) - - if diff_cache and settings.fast_parser: - try: - module_cache_item = parser_cache[path] - except KeyError: - pass - else: - lines = splitlines(code, keepends=True) - module_node = module_cache_item.node - old_lines = module_cache_item.lines - if old_lines == lines: - save_module(grammar, path, module_node, lines, pickling=False) - return module_node - - new_node = DiffParser(grammar, module_node).update( - old_lines=old_lines, - new_lines=lines - ) - save_module(grammar, path, new_node, lines, pickling=cache) - return new_node - - added_newline = not code.endswith('\n') - lines = tokenize_lines = splitlines(code, keepends=True) - if added_newline: - code += '\n' - tokenize_lines = list(tokenize_lines) - tokenize_lines[-1] += '\n' - tokenize_lines.append('') - - tokens = generate_tokens(tokenize_lines, use_exact_op_types=True) - - p = Parser(grammar, error_recovery=error_recovery, start_symbol=start_symbol) - root_node = p.parse(tokens=tokens) - if added_newline: - _remove_last_newline(root_node) - - if cache or diff_cache: - save_module(grammar, path, root_node, lines, pickling=cache) - return root_node diff --git a/jedi/parser/python/diff.py b/jedi/parser/python/diff.py deleted file mode 100644 index e9fa4b0c..00000000 --- a/jedi/parser/python/diff.py +++ /dev/null @@ -1,603 +0,0 @@ -""" -Basically a contains parser that is faster, because it tries to parse only -parts and if anything changes, it only reparses the changed parts. - -It works with a simple diff in the beginning and will try to reuse old parser -fragments. -""" -import re -import difflib -from collections import namedtuple - -from jedi.common import splitlines -from jedi.parser.python.parser import Parser, _remove_last_newline -from jedi.parser.python.tree import EndMarker -from jedi import debug -from jedi.parser.tokenize import (generate_tokens, NEWLINE, TokenInfo, - ENDMARKER, INDENT, DEDENT) - - -def _get_last_line(node_or_leaf): - last_leaf = node_or_leaf.get_last_leaf() - if _ends_with_newline(last_leaf): - return last_leaf.start_pos[0] - else: - return last_leaf.end_pos[0] - - -def _ends_with_newline(leaf, suffix=''): - if leaf.type == 'error_leaf': - typ = leaf.original_type - else: - typ = leaf.type - - return typ == 'newline' or suffix.endswith('\n') - - -def _flows_finished(grammar, stack): - """ - if, while, for and try might not be finished, because another part might - still be parsed. - """ - for dfa, newstate, (symbol_number, nodes) in stack: - if grammar.number2symbol[symbol_number] in ('if_stmt', 'while_stmt', - 'for_stmt', 'try_stmt'): - return False - return True - - -def suite_or_file_input_is_valid(grammar, stack): - if not _flows_finished(grammar, stack): - return False - - for dfa, newstate, (symbol_number, nodes) in reversed(stack): - if grammar.number2symbol[symbol_number] == 'suite': - # If only newline is in the suite, the suite is not valid, yet. - return len(nodes) > 1 - # Not reaching a suite means that we're dealing with file_input levels - # where there's no need for a valid statement in it. It can also be empty. - return True - - -def _is_flow_node(node): - try: - value = node.children[0].value - except AttributeError: - return False - return value in ('if', 'for', 'while', 'try') - - -class _PositionUpdatingFinished(Exception): - pass - - -def _update_positions(nodes, line_offset, last_leaf): - for node in nodes: - try: - children = node.children - except AttributeError: - # Is a leaf - node.line += line_offset - if node is last_leaf: - raise _PositionUpdatingFinished - else: - _update_positions(children, line_offset, last_leaf) - - -class DiffParser(object): - """ - An advanced form of parsing a file faster. Unfortunately comes with huge - side effects. It changes the given module. - """ - def __init__(self, grammar, module): - self._grammar = grammar - self._module = module - - def _reset(self): - self._copy_count = 0 - self._parser_count = 0 - - self._nodes_stack = _NodesStack(self._module) - - def update(self, old_lines, new_lines): - ''' - The algorithm works as follows: - - Equal: - - Assure that the start is a newline, otherwise parse until we get - one. - - Copy from parsed_until_line + 1 to max(i2 + 1) - - Make sure that the indentation is correct (e.g. add DEDENT) - - Add old and change positions - Insert: - - Parse from parsed_until_line + 1 to min(j2 + 1), hopefully not - much more. - - Returns the new module node. - ''' - debug.speed('diff parser start') - # Reset the used names cache so they get regenerated. - self._module._used_names = None - - self._parser_lines_new = new_lines - self._added_newline = False - if new_lines[-1] != '': - # The Python grammar needs a newline at the end of a file, but for - # everything else we keep working with new_lines here. - self._parser_lines_new = list(new_lines) - self._parser_lines_new[-1] += '\n' - self._parser_lines_new.append('') - self._added_newline = True - - self._reset() - - line_length = len(new_lines) - sm = difflib.SequenceMatcher(None, old_lines, self._parser_lines_new) - opcodes = sm.get_opcodes() - debug.speed('diff parser calculated') - debug.dbg('diff: line_lengths old: %s, new: %s' % (len(old_lines), line_length)) - - for operation, i1, i2, j1, j2 in opcodes: - debug.dbg('diff %s old[%s:%s] new[%s:%s]', - operation, i1 + 1, i2, j1 + 1, j2) - - if j2 == line_length + int(self._added_newline): - # The empty part after the last newline is not relevant. - j2 -= 1 - - if operation == 'equal': - line_offset = j1 - i1 - self._copy_from_old_parser(line_offset, i2, j2) - elif operation == 'replace': - self._parse(until_line=j2) - elif operation == 'insert': - self._parse(until_line=j2) - else: - assert operation == 'delete' - - # With this action all change will finally be applied and we have a - # changed module. - self._nodes_stack.close() - - if self._added_newline: - _remove_last_newline(self._module) - - # Good for debugging. - if debug.debug_function: - self._enabled_debugging(old_lines, new_lines) - last_pos = self._module.end_pos[0] - if last_pos != line_length: - current_lines = splitlines(self._module.get_code(), keepends=True) - diff = difflib.unified_diff(current_lines, new_lines) - raise Exception( - "There's an issue (%s != %s) with the diff parser. Please report:\n%s" - % (last_pos, line_length, ''.join(diff)) - ) - - debug.speed('diff parser end') - return self._module - - def _enabled_debugging(self, old_lines, lines_new): - if self._module.get_code() != ''.join(lines_new): - debug.warning('parser issue:\n%s\n%s', ''.join(old_lines), - ''.join(lines_new)) - - def _copy_from_old_parser(self, line_offset, until_line_old, until_line_new): - copied_nodes = [None] - - last_until_line = -1 - while until_line_new > self._nodes_stack.parsed_until_line: - parsed_until_line_old = self._nodes_stack.parsed_until_line - line_offset - line_stmt = self._get_old_line_stmt(parsed_until_line_old + 1) - if line_stmt is None: - # Parse 1 line at least. We don't need more, because we just - # want to get into a state where the old parser has statements - # again that can be copied (e.g. not lines within parentheses). - self._parse(self._nodes_stack.parsed_until_line + 1) - elif not copied_nodes: - # We have copied as much as possible (but definitely not too - # much). Therefore we just parse the rest. - # We might not reach the end, because there's a statement - # that is not finished. - self._parse(until_line_new) - else: - p_children = line_stmt.parent.children - index = p_children.index(line_stmt) - - copied_nodes = self._nodes_stack.copy_nodes( - p_children[index:], - until_line_old, - line_offset - ) - # Match all the nodes that are in the wanted range. - if copied_nodes: - self._copy_count += 1 - - from_ = copied_nodes[0].get_start_pos_of_prefix()[0] + line_offset - to = self._nodes_stack.parsed_until_line - - debug.dbg('diff actually copy %s to %s', from_, to) - # Since there are potential bugs that might loop here endlessly, we - # just stop here. - assert last_until_line != self._nodes_stack.parsed_until_line \ - or not copied_nodes, last_until_line - last_until_line = self._nodes_stack.parsed_until_line - - def _get_old_line_stmt(self, old_line): - leaf = self._module.get_leaf_for_position((old_line, 0), include_prefixes=True) - - if _ends_with_newline(leaf): - leaf = leaf.get_next_leaf() - if leaf.get_start_pos_of_prefix()[0] == old_line: - node = leaf - while node.parent.type not in ('file_input', 'suite'): - node = node.parent - return node - # Must be on the same line. Otherwise we need to parse that bit. - return None - - def _get_before_insertion_node(self): - if self._nodes_stack.is_empty(): - return None - - line = self._nodes_stack.parsed_until_line + 1 - node = self._new_module.get_last_leaf() - while True: - parent = node.parent - if parent.type in ('suite', 'file_input'): - assert node.end_pos[0] <= line - assert node.end_pos[1] == 0 or '\n' in self._prefix - return node - node = parent - - def _parse(self, until_line): - """ - Parses at least until the given line, but might just parse more until a - valid state is reached. - """ - last_until_line = 0 - while until_line > self._nodes_stack.parsed_until_line: - node = self._try_parse_part(until_line) - nodes = self._get_children_nodes(node) - #self._insert_nodes(nodes) - - self._nodes_stack.add_parsed_nodes(nodes) - debug.dbg( - 'parse part %s to %s (to %s in parser)', - nodes[0].get_start_pos_of_prefix()[0], - self._nodes_stack.parsed_until_line, - node.end_pos[0] - 1 - ) - # Since the tokenizer sometimes has bugs, we cannot be sure that - # this loop terminates. Therefore assert that there's always a - # change. - assert last_until_line != self._nodes_stack.parsed_until_line, last_until_line - last_until_line = self._nodes_stack.parsed_until_line - - def _get_children_nodes(self, node): - nodes = node.children - first_element = nodes[0] - # TODO this looks very strange... - if first_element.type == 'error_leaf' and \ - first_element.original_type == 'indent': - assert False, str(nodes) - - return nodes - - def _try_parse_part(self, until_line): - """ - Sets up a normal parser that uses a spezialized tokenizer to only parse - until a certain position (or a bit longer if the statement hasn't - ended. - """ - self._parser_count += 1 - # TODO speed up, shouldn't copy the whole list all the time. - # memoryview? - parsed_until_line = self._nodes_stack.parsed_until_line - lines_after = self._parser_lines_new[parsed_until_line:] - #print('parse_content', parsed_until_line, lines_after, until_line) - tokens = self._diff_tokenize( - lines_after, - until_line, - line_offset=parsed_until_line - ) - self._active_parser = Parser( - self._grammar, - error_recovery=True - ) - return self._active_parser.parse(tokens=tokens) - - def _diff_tokenize(self, lines, until_line, line_offset=0): - is_first_token = True - omitted_first_indent = False - indents = [] - tokens = generate_tokens(lines, use_exact_op_types=True) - stack = self._active_parser.pgen_parser.stack - for typ, string, start_pos, prefix in tokens: - start_pos = start_pos[0] + line_offset, start_pos[1] - if typ == INDENT: - indents.append(start_pos[1]) - if is_first_token: - omitted_first_indent = True - # We want to get rid of indents that are only here because - # we only parse part of the file. These indents would only - # get parsed as error leafs, which doesn't make any sense. - is_first_token = False - continue - is_first_token = False - - if typ == DEDENT: - indents.pop() - if omitted_first_indent and not indents: - # We are done here, only thing that can come now is an - # endmarker or another dedented code block. - typ, string, start_pos, prefix = next(tokens) - if '\n' in prefix: - prefix = re.sub(r'(<=\n)[^\n]+$', '', prefix) - else: - prefix = '' - yield TokenInfo(ENDMARKER, '', (start_pos[0] + line_offset, 0), prefix) - break - elif typ == NEWLINE and start_pos[0] >= until_line: - yield TokenInfo(typ, string, start_pos, prefix) - # Check if the parser is actually in a valid suite state. - if suite_or_file_input_is_valid(self._grammar, stack): - start_pos = start_pos[0] + 1, 0 - while len(indents) > int(omitted_first_indent): - indents.pop() - yield TokenInfo(DEDENT, '', start_pos, '') - - yield TokenInfo(ENDMARKER, '', start_pos, '') - break - else: - continue - - yield TokenInfo(typ, string, start_pos, prefix) - - -class _NodesStackNode(object): - ChildrenGroup = namedtuple('ChildrenGroup', 'children line_offset last_line_offset_leaf') - - def __init__(self, tree_node, parent=None): - self.tree_node = tree_node - self.children_groups = [] - self.parent = parent - - def close(self): - children = [] - for children_part, line_offset, last_line_offset_leaf in self.children_groups: - if line_offset != 0: - try: - _update_positions( - children_part, line_offset, last_line_offset_leaf) - except _PositionUpdatingFinished: - pass - children += children_part - self.tree_node.children = children - # Reset the parents - for node in children: - node.parent = self.tree_node - - def add(self, children, line_offset=0, last_line_offset_leaf=None): - group = self.ChildrenGroup(children, line_offset, last_line_offset_leaf) - self.children_groups.append(group) - - def get_last_line(self, suffix): - line = 0 - if self.children_groups: - children_group = self.children_groups[-1] - last_leaf = children_group.children[-1].get_last_leaf() - line = last_leaf.end_pos[0] - - # Calculate the line offsets - offset = children_group.line_offset - if offset: - # In case the line_offset is not applied to this specific leaf, - # just ignore it. - if last_leaf.line <= children_group.last_line_offset_leaf.line: - line += children_group.line_offset - - # Newlines end on the next line, which means that they would cover - # the next line. That line is not fully parsed at this point. - if _ends_with_newline(last_leaf, suffix): - line -= 1 - line += suffix.count('\n') - return line - - -class _NodesStack(object): - endmarker_type = 'endmarker' - - def __init__(self, module): - # Top of stack - self._tos = self._base_node = _NodesStackNode(module) - self._module = module - self._last_prefix = '' - self.prefix = '' - - def is_empty(self): - return not self._base_node.children - - @property - def parsed_until_line(self): - return self._tos.get_last_line(self.prefix) - - def _get_insertion_node(self, indentation_node): - indentation = indentation_node.start_pos[1] - - # find insertion node - node = self._tos - while True: - tree_node = node.tree_node - if tree_node.type == 'suite': - # A suite starts with NEWLINE, ... - node_indentation = tree_node.children[1].start_pos[1] - - if indentation >= node_indentation: # Not a Dedent - # We might be at the most outer layer: modules. We - # don't want to depend on the first statement - # having the right indentation. - return node - - elif tree_node.type == 'file_input': - return node - - node = self._close_tos() - - def _close_tos(self): - self._tos.close() - self._tos = self._tos.parent - return self._tos - - def add_parsed_nodes(self, tree_nodes): - tree_nodes = self._remove_endmarker(tree_nodes) - if not tree_nodes: - return - - assert tree_nodes[0].type != 'newline' - - node = self._get_insertion_node(tree_nodes[0]) - assert node.tree_node.type in ('suite', 'file_input') - node.add(tree_nodes) - self._update_tos(tree_nodes[-1]) - - def _remove_endmarker(self, tree_nodes): - """ - Helps cleaning up the tree nodes that get inserted. - """ - last_leaf = tree_nodes[-1].get_last_leaf() - is_endmarker = last_leaf.type == self.endmarker_type - self._last_prefix = '' - if is_endmarker: - try: - separation = last_leaf.prefix.rindex('\n') - except ValueError: - pass - else: - # Remove the whitespace part of the prefix after a newline. - # That is not relevant if parentheses were opened. Always parse - # until the end of a line. - last_leaf.prefix, self._last_prefix = \ - last_leaf.prefix[:separation + 1], last_leaf.prefix[separation + 1:] - - first_leaf = tree_nodes[0].get_first_leaf() - first_leaf.prefix = self.prefix + first_leaf.prefix - self.prefix = '' - - if is_endmarker: - self.prefix = last_leaf.prefix - - tree_nodes = tree_nodes[:-1] - - return tree_nodes - - def copy_nodes(self, tree_nodes, until_line, line_offset): - """ - Copies tree nodes from the old parser tree. - - Returns the number of tree nodes that were copied. - """ - tos = self._get_insertion_node(tree_nodes[0]) - - new_nodes, self._tos = self._copy_nodes(tos, tree_nodes, until_line, line_offset) - return new_nodes - - def _copy_nodes(self, tos, nodes, until_line, line_offset): - new_nodes = [] - - new_tos = tos - for node in nodes: - if node.type == 'endmarker': - # Endmarkers just distort all the checks below. Remove them. - break - - if node.start_pos[0] > until_line: - break - # TODO this check might take a bit of time for large files. We - # might want to change this to do more intelligent guessing or - # binary search. - if _get_last_line(node) > until_line: - # We can split up functions and classes later. - if node.type in ('classdef', 'funcdef') and node.children[-1].type == 'suite': - new_nodes.append(node) - break - - new_nodes.append(node) - - if not new_nodes: - return [], tos - - last_node = new_nodes[-1] - line_offset_index = -1 - if last_node.type in ('classdef', 'funcdef'): - suite = last_node.children[-1] - if suite.type == 'suite': - suite_tos = _NodesStackNode(suite) - # Don't need to pass line_offset here, it's already done by the - # parent. - suite_nodes, recursive_tos = self._copy_nodes( - suite_tos, suite.children, until_line, line_offset) - if len(suite_nodes) < 2: - # A suite only with newline is not valid. - new_nodes.pop() - else: - suite_tos.parent = tos - new_tos = recursive_tos - line_offset_index = -2 - - elif (new_nodes[-1].type in ('error_leaf', 'error_node') or - _is_flow_node(new_nodes[-1])): - # Error leafs/nodes don't have a defined start/end. Error - # nodes might not end with a newline (e.g. if there's an - # open `(`). Therefore ignore all of them unless they are - # succeeded with valid parser state. - # If we copy flows at the end, they might be continued - # after the copy limit (in the new parser). - # In this while loop we try to remove until we find a newline. - new_nodes.pop() - while new_nodes: - last_node = new_nodes[-1] - if last_node.get_last_leaf().type == 'newline': - break - new_nodes.pop() - - if new_nodes: - try: - last_line_offset_leaf = new_nodes[line_offset_index].get_last_leaf() - except IndexError: - line_offset = 0 - # In this case we don't have to calculate an offset, because - # there's no children to be managed. - last_line_offset_leaf = None - tos.add(new_nodes, line_offset, last_line_offset_leaf) - return new_nodes, new_tos - - def _update_tos(self, tree_node): - if tree_node.type in ('suite', 'file_input'): - self._tos = _NodesStackNode(tree_node, self._tos) - self._tos.add(list(tree_node.children)) - self._update_tos(tree_node.children[-1]) - elif tree_node.type in ('classdef', 'funcdef'): - self._update_tos(tree_node.children[-1]) - - def close(self): - while self._tos is not None: - self._close_tos() - - # Add an endmarker. - try: - last_leaf = self._module.get_last_leaf() - end_pos = list(last_leaf.end_pos) - except IndexError: - end_pos = [1, 0] - lines = splitlines(self.prefix) - assert len(lines) > 0 - if len(lines) == 1: - end_pos[1] += len(lines[0]) - else: - end_pos[0] += len(lines) - 1 - end_pos[1] = len(lines[-1]) - - endmarker = EndMarker('', tuple(end_pos), self.prefix + self._last_prefix) - endmarker.parent = self._module - self._module.children.append(endmarker) diff --git a/jedi/parser/python/grammar2.7.txt b/jedi/parser/python/grammar2.7.txt deleted file mode 100644 index 515dea64..00000000 --- a/jedi/parser/python/grammar2.7.txt +++ /dev/null @@ -1,152 +0,0 @@ -# Grammar for 2to3. This grammar supports Python 2.x and 3.x. - -# Note: Changing the grammar specified in this file will most likely -# require corresponding changes in the parser module -# (../Modules/parsermodule.c). If you can't make the changes to -# that module yourself, please co-ordinate the required changes -# with someone who can; ask around on python-dev for help. Fred -# Drake will probably be listening there. - -# NOTE WELL: You should also follow all the steps listed in PEP 306, -# "How to Change Python's Grammar" - - -# Start symbols for the grammar: -# file_input is a module or sequence of commands read from an input file; -# single_input is a single interactive statement; -# eval_input is the input for the eval() and input() functions. -# NB: compound_stmt in single_input is followed by extra NEWLINE! -file_input: (NEWLINE | stmt)* ENDMARKER -single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE -eval_input: testlist NEWLINE* ENDMARKER - -decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE -decorators: decorator+ -decorated: decorators (classdef | funcdef) -funcdef: 'def' NAME parameters ['->' test] ':' suite -parameters: '(' [typedargslist] ')' -typedargslist: ((tfpdef ['=' test] ',')* - ('*' [tname] (',' tname ['=' test])* [',' '**' tname] | '**' tname) - | tfpdef ['=' test] (',' tfpdef ['=' test])* [',']) -tname: NAME [':' test] -tfpdef: tname | '(' tfplist ')' -tfplist: tfpdef (',' tfpdef)* [','] -varargslist: ((vfpdef ['=' test] ',')* - ('*' [vname] (',' vname ['=' test])* [',' '**' vname] | '**' vname) - | vfpdef ['=' test] (',' vfpdef ['=' test])* [',']) -vname: NAME -vfpdef: vname | '(' vfplist ')' -vfplist: vfpdef (',' vfpdef)* [','] - -stmt: simple_stmt | compound_stmt -simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE -small_stmt: (expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt | - import_stmt | global_stmt | exec_stmt | assert_stmt) -expr_stmt: testlist_star_expr (augassign (yield_expr|testlist) | - ('=' (yield_expr|testlist_star_expr))*) -testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] -augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | - '<<=' | '>>=' | '**=' | '//=') -# For normal assignments, additional restrictions enforced by the interpreter -print_stmt: 'print' ( [ test (',' test)* [','] ] | - '>>' test [ (',' test)+ [','] ] ) -del_stmt: 'del' exprlist -pass_stmt: 'pass' -flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt -break_stmt: 'break' -continue_stmt: 'continue' -return_stmt: 'return' [testlist] -yield_stmt: yield_expr -raise_stmt: 'raise' [test [',' test [',' test]]] -import_stmt: import_name | import_from -import_name: 'import' dotted_as_names -# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS -import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+) - 'import' ('*' | '(' import_as_names ')' | import_as_names)) -import_as_name: NAME ['as' NAME] -dotted_as_name: dotted_name ['as' NAME] -import_as_names: import_as_name (',' import_as_name)* [','] -dotted_as_names: dotted_as_name (',' dotted_as_name)* -dotted_name: NAME ('.' NAME)* -global_stmt: 'global' NAME (',' NAME)* -exec_stmt: 'exec' expr ['in' test [',' test]] -assert_stmt: 'assert' test [',' test] - -compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated -if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] -while_stmt: 'while' test ':' suite ['else' ':' suite] -for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] -try_stmt: ('try' ':' suite - ((except_clause ':' suite)+ - ['else' ':' suite] - ['finally' ':' suite] | - 'finally' ':' suite)) -with_stmt: 'with' with_item (',' with_item)* ':' suite -with_item: test ['as' expr] -with_var: 'as' expr -# NB compile.c makes sure that the default except clause is last -except_clause: 'except' [test [(',' | 'as') test]] -# Edit by David Halter: The stmt is now optional. This reflects how Jedi allows -# classes and functions to be empty, which is beneficial for autocompletion. -suite: simple_stmt | NEWLINE INDENT stmt* DEDENT - -# Backward compatibility cruft to support: -# [ x for x in lambda: True, lambda: False if x() ] -# even while also allowing: -# lambda x: 5 if x else 2 -# (But not a mix of the two) -testlist_safe: old_test [(',' old_test)+ [',']] -old_test: or_test | old_lambdef -old_lambdef: 'lambda' [varargslist] ':' old_test - -test: or_test ['if' or_test 'else' test] | lambdef -or_test: and_test ('or' and_test)* -and_test: not_test ('and' not_test)* -not_test: 'not' not_test | comparison -comparison: expr (comp_op expr)* -comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' -star_expr: '*' expr -expr: xor_expr ('|' xor_expr)* -xor_expr: and_expr ('^' and_expr)* -and_expr: shift_expr ('&' shift_expr)* -shift_expr: arith_expr (('<<'|'>>') arith_expr)* -arith_expr: term (('+'|'-') term)* -term: factor (('*'|'/'|'%'|'//') factor)* -factor: ('+'|'-'|'~') factor | power -power: atom trailer* ['**' factor] -atom: ('(' [yield_expr|testlist_comp] ')' | - '[' [testlist_comp] ']' | - '{' [dictorsetmaker] '}' | - '`' testlist1 '`' | - NAME | NUMBER | STRING+ | '.' '.' '.') -# Modification by David Halter, remove `testlist_gexp` and `listmaker` -testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] ) -lambdef: 'lambda' [varargslist] ':' test -trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME -subscriptlist: subscript (',' subscript)* [','] -subscript: test | [test] ':' [test] [sliceop] -sliceop: ':' [test] -exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] -testlist: test (',' test)* [','] -# Modification by David Halter, dictsetmaker -> dictorsetmaker (so that it's -# the same as in the 3.4 grammar). -dictorsetmaker: ( (test ':' test (comp_for | (',' test ':' test)* [','])) | - (test (comp_for | (',' test)* [','])) ) - -classdef: 'class' NAME ['(' [arglist] ')'] ':' suite - -arglist: (argument ',')* (argument [','] - |'*' test (',' argument)* [',' '**' test] - |'**' test) -argument: test [comp_for] | test '=' test # Really [keyword '='] test - -comp_iter: comp_for | comp_if -comp_for: 'for' exprlist 'in' testlist_safe [comp_iter] -comp_if: 'if' old_test [comp_iter] - -testlist1: test (',' test)* - -# not used in grammar, but may appear in "node" passed from Parser to Compiler -encoding_decl: NAME - -yield_expr: 'yield' [testlist] diff --git a/jedi/parser/python/grammar3.4.txt b/jedi/parser/python/grammar3.4.txt deleted file mode 100644 index d4a32b8e..00000000 --- a/jedi/parser/python/grammar3.4.txt +++ /dev/null @@ -1,135 +0,0 @@ -# Grammar for Python - -# Note: Changing the grammar specified in this file will most likely -# require corresponding changes in the parser module -# (../Modules/parsermodule.c). If you can't make the changes to -# that module yourself, please co-ordinate the required changes -# with someone who can; ask around on python-dev for help. Fred -# Drake will probably be listening there. - -# NOTE WELL: You should also follow all the steps listed in PEP 306, -# "How to Change Python's Grammar" - -# Start symbols for the grammar: -# single_input is a single interactive statement; -# file_input is a module or sequence of commands read from an input file; -# eval_input is the input for the eval() functions. -# NB: compound_stmt in single_input is followed by extra NEWLINE! -file_input: (NEWLINE | stmt)* ENDMARKER -single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE -eval_input: testlist NEWLINE* ENDMARKER - -decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE -decorators: decorator+ -decorated: decorators (classdef | funcdef) -funcdef: 'def' NAME parameters ['->' test] ':' suite -parameters: '(' [typedargslist] ')' -typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' - ['*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef]] - | '*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef) -tfpdef: NAME [':' test] -varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' - ['*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef]] - | '*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef) -vfpdef: NAME - -stmt: simple_stmt | compound_stmt -simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE -small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt | - import_stmt | global_stmt | nonlocal_stmt | assert_stmt) -expr_stmt: testlist_star_expr (augassign (yield_expr|testlist) | - ('=' (yield_expr|testlist_star_expr))*) -testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] -augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | - '<<=' | '>>=' | '**=' | '//=') -# For normal assignments, additional restrictions enforced by the interpreter -del_stmt: 'del' exprlist -pass_stmt: 'pass' -flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt -break_stmt: 'break' -continue_stmt: 'continue' -return_stmt: 'return' [testlist] -yield_stmt: yield_expr -raise_stmt: 'raise' [test ['from' test]] -import_stmt: import_name | import_from -import_name: 'import' dotted_as_names -# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS -import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+) - 'import' ('*' | '(' import_as_names ')' | import_as_names)) -import_as_name: NAME ['as' NAME] -dotted_as_name: dotted_name ['as' NAME] -import_as_names: import_as_name (',' import_as_name)* [','] -dotted_as_names: dotted_as_name (',' dotted_as_name)* -dotted_name: NAME ('.' NAME)* -global_stmt: 'global' NAME (',' NAME)* -nonlocal_stmt: 'nonlocal' NAME (',' NAME)* -assert_stmt: 'assert' test [',' test] - -compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated -if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] -while_stmt: 'while' test ':' suite ['else' ':' suite] -for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] -try_stmt: ('try' ':' suite - ((except_clause ':' suite)+ - ['else' ':' suite] - ['finally' ':' suite] | - 'finally' ':' suite)) -with_stmt: 'with' with_item (',' with_item)* ':' suite -with_item: test ['as' expr] -# NB compile.c makes sure that the default except clause is last -except_clause: 'except' [test ['as' NAME]] -# Edit by David Halter: The stmt is now optional. This reflects how Jedi allows -# classes and functions to be empty, which is beneficial for autocompletion. -suite: simple_stmt | NEWLINE INDENT stmt* DEDENT - -test: or_test ['if' or_test 'else' test] | lambdef -test_nocond: or_test | lambdef_nocond -lambdef: 'lambda' [varargslist] ':' test -lambdef_nocond: 'lambda' [varargslist] ':' test_nocond -or_test: and_test ('or' and_test)* -and_test: not_test ('and' not_test)* -not_test: 'not' not_test | comparison -comparison: expr (comp_op expr)* -# <> isn't actually a valid comparison operator in Python. It's here for the -# sake of a __future__ import described in PEP 401 -comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' -star_expr: '*' expr -expr: xor_expr ('|' xor_expr)* -xor_expr: and_expr ('^' and_expr)* -and_expr: shift_expr ('&' shift_expr)* -shift_expr: arith_expr (('<<'|'>>') arith_expr)* -arith_expr: term (('+'|'-') term)* -term: factor (('*'|'/'|'%'|'//') factor)* -factor: ('+'|'-'|'~') factor | power -power: atom trailer* ['**' factor] -atom: ('(' [yield_expr|testlist_comp] ')' | - '[' [testlist_comp] ']' | - '{' [dictorsetmaker] '}' | - NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False') -testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] ) -trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME -subscriptlist: subscript (',' subscript)* [','] -subscript: test | [test] ':' [test] [sliceop] -sliceop: ':' [test] -exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] -testlist: test (',' test)* [','] -dictorsetmaker: ( (test ':' test (comp_for | (',' test ':' test)* [','])) | - (test (comp_for | (',' test)* [','])) ) - -classdef: 'class' NAME ['(' [arglist] ')'] ':' suite - -arglist: (argument ',')* (argument [','] - |'*' test (',' argument)* [',' '**' test] - |'**' test) -# The reason that keywords are test nodes instead of NAME is that using NAME -# results in an ambiguity. ast.c makes sure it's a NAME. -argument: test [comp_for] | test '=' test # Really [keyword '='] test -comp_iter: comp_for | comp_if -comp_for: 'for' exprlist 'in' or_test [comp_iter] -comp_if: 'if' test_nocond [comp_iter] - -# not used in grammar, but may appear in "node" passed from Parser to Compiler -encoding_decl: NAME - -yield_expr: 'yield' [yield_arg] -yield_arg: 'from' test | testlist diff --git a/jedi/parser/python/grammar3.5.txt b/jedi/parser/python/grammar3.5.txt deleted file mode 100644 index 96a72718..00000000 --- a/jedi/parser/python/grammar3.5.txt +++ /dev/null @@ -1,154 +0,0 @@ -# Grammar for Python - -# Note: Changing the grammar specified in this file will most likely -# require corresponding changes in the parser module -# (../Modules/parsermodule.c). If you can't make the changes to -# that module yourself, please co-ordinate the required changes -# with someone who can; ask around on python-dev for help. Fred -# Drake will probably be listening there. - -# NOTE WELL: You should also follow all the steps listed at -# https://docs.python.org/devguide/grammar.html - -# Start symbols for the grammar: -# single_input is a single interactive statement; -# file_input is a module or sequence of commands read from an input file; -# eval_input is the input for the eval() functions. -# NB: compound_stmt in single_input is followed by extra NEWLINE! -file_input: (NEWLINE | stmt)* ENDMARKER -single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE -eval_input: testlist NEWLINE* ENDMARKER - -decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE -decorators: decorator+ -decorated: decorators (classdef | funcdef | async_funcdef) - -# NOTE: Reinoud Elhorst, using ASYNC/AWAIT keywords instead of tokens -# skipping python3.5 compatibility, in favour of 3.7 solution -async_funcdef: 'async' funcdef -funcdef: 'def' NAME parameters ['->' test] ':' suite - -parameters: '(' [typedargslist] ')' -typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' - ['*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef]] - | '*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef) -tfpdef: NAME [':' test] -varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' - ['*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef]] - | '*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef) -vfpdef: NAME - -stmt: simple_stmt | compound_stmt -simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE -small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt | - import_stmt | global_stmt | nonlocal_stmt | assert_stmt) -expr_stmt: testlist_star_expr (augassign (yield_expr|testlist) | - ('=' (yield_expr|testlist_star_expr))*) -testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] -augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' | - '<<=' | '>>=' | '**=' | '//=') -# For normal assignments, additional restrictions enforced by the interpreter -del_stmt: 'del' exprlist -pass_stmt: 'pass' -flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt -break_stmt: 'break' -continue_stmt: 'continue' -return_stmt: 'return' [testlist] -yield_stmt: yield_expr -raise_stmt: 'raise' [test ['from' test]] -import_stmt: import_name | import_from -import_name: 'import' dotted_as_names -# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS -import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+) - 'import' ('*' | '(' import_as_names ')' | import_as_names)) -import_as_name: NAME ['as' NAME] -dotted_as_name: dotted_name ['as' NAME] -import_as_names: import_as_name (',' import_as_name)* [','] -dotted_as_names: dotted_as_name (',' dotted_as_name)* -dotted_name: NAME ('.' NAME)* -global_stmt: 'global' NAME (',' NAME)* -nonlocal_stmt: 'nonlocal' NAME (',' NAME)* -assert_stmt: 'assert' test [',' test] - -compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt -async_stmt: 'async' (funcdef | with_stmt | for_stmt) -if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] -while_stmt: 'while' test ':' suite ['else' ':' suite] -for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] -try_stmt: ('try' ':' suite - ((except_clause ':' suite)+ - ['else' ':' suite] - ['finally' ':' suite] | - 'finally' ':' suite)) -with_stmt: 'with' with_item (',' with_item)* ':' suite -with_item: test ['as' expr] -# NB compile.c makes sure that the default except clause is last -except_clause: 'except' [test ['as' NAME]] -# Edit by David Halter: The stmt is now optional. This reflects how Jedi allows -# classes and functions to be empty, which is beneficial for autocompletion. -suite: simple_stmt | NEWLINE INDENT stmt* DEDENT - -test: or_test ['if' or_test 'else' test] | lambdef -test_nocond: or_test | lambdef_nocond -lambdef: 'lambda' [varargslist] ':' test -lambdef_nocond: 'lambda' [varargslist] ':' test_nocond -or_test: and_test ('or' and_test)* -and_test: not_test ('and' not_test)* -not_test: 'not' not_test | comparison -comparison: expr (comp_op expr)* -# <> isn't actually a valid comparison operator in Python. It's here for the -# sake of a __future__ import described in PEP 401 (which really works :-) -comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' -star_expr: '*' expr -expr: xor_expr ('|' xor_expr)* -xor_expr: and_expr ('^' and_expr)* -and_expr: shift_expr ('&' shift_expr)* -shift_expr: arith_expr (('<<'|'>>') arith_expr)* -arith_expr: term (('+'|'-') term)* -term: factor (('*'|'@'|'/'|'%'|'//') factor)* -factor: ('+'|'-'|'~') factor | power -power: atom_expr ['**' factor] -atom_expr: ['await'] atom trailer* -atom: ('(' [yield_expr|testlist_comp] ')' | - '[' [testlist_comp] ']' | - '{' [dictorsetmaker] '}' | - NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False') -testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] ) -trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME -subscriptlist: subscript (',' subscript)* [','] -subscript: test | [test] ':' [test] [sliceop] -sliceop: ':' [test] -exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] -testlist: test (',' test)* [','] -dictorsetmaker: ( ((test ':' test | '**' expr) - (comp_for | (',' (test ':' test | '**' expr))* [','])) | - ((test | star_expr) - (comp_for | (',' (test | star_expr))* [','])) ) - -classdef: 'class' NAME ['(' [arglist] ')'] ':' suite - -arglist: argument (',' argument)* [','] - -# The reason that keywords are test nodes instead of NAME is that using NAME -# results in an ambiguity. ast.c makes sure it's a NAME. -# "test '=' test" is really "keyword '=' test", but we have no such token. -# These need to be in a single rule to avoid grammar that is ambiguous -# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr, -# we explicitly match '*' here, too, to give it proper precedence. -# Illegal combinations and orderings are blocked in ast.c: -# multiple (test comp_for) arguements are blocked; keyword unpackings -# that precede iterable unpackings are blocked; etc. -argument: ( test [comp_for] | - test '=' test | - '**' test | - '*' test ) - -comp_iter: comp_for | comp_if -comp_for: 'for' exprlist 'in' or_test [comp_iter] -comp_if: 'if' test_nocond [comp_iter] - -# not used in grammar, but may appear in "node" passed from Parser to Compiler -encoding_decl: NAME - -yield_expr: 'yield' [yield_arg] -yield_arg: 'from' test | testlist diff --git a/jedi/parser/python/grammar3.6.txt b/jedi/parser/python/grammar3.6.txt deleted file mode 100644 index b44a5698..00000000 --- a/jedi/parser/python/grammar3.6.txt +++ /dev/null @@ -1,161 +0,0 @@ -# Grammar for Python - -# Note: Changing the grammar specified in this file will most likely -# require corresponding changes in the parser module -# (../Modules/parsermodule.c). If you can't make the changes to -# that module yourself, please co-ordinate the required changes -# with someone who can; ask around on python-dev for help. Fred -# Drake will probably be listening there. - -# NOTE WELL: You should also follow all the steps listed at -# https://docs.python.org/devguide/grammar.html - -# Start symbols for the grammar: -# file_input is a module or sequence of commands read from an input file; -# single_input is a single interactive statement; -# eval_input is the input for the eval() functions. -# NB: compound_stmt in single_input is followed by extra NEWLINE! -file_input: (NEWLINE | stmt)* ENDMARKER -single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE -eval_input: testlist NEWLINE* ENDMARKER - -decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE -decorators: decorator+ -decorated: decorators (classdef | funcdef | async_funcdef) - -# NOTE: Francisco Souza/Reinoud Elhorst, using ASYNC/'await' keywords instead of -# skipping python3.5+ compatibility, in favour of 3.7 solution -async_funcdef: 'async' funcdef -funcdef: 'def' NAME parameters ['->' test] ':' suite - -parameters: '(' [typedargslist] ')' -typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' [ - '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] - | '**' tfpdef [',']]] - | '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] - | '**' tfpdef [',']) -tfpdef: NAME [':' test] -varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [ - '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] - | '**' vfpdef [',']]] - | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] - | '**' vfpdef [','] -) -vfpdef: NAME - -stmt: simple_stmt | compound_stmt -simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE -small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt | - import_stmt | global_stmt | nonlocal_stmt | assert_stmt) -expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) | - ('=' (yield_expr|testlist_star_expr))*) -annassign: ':' test ['=' test] -testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] -augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' | - '<<=' | '>>=' | '**=' | '//=') -# For normal and annotated assignments, additional restrictions enforced by the interpreter -del_stmt: 'del' exprlist -pass_stmt: 'pass' -flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt -break_stmt: 'break' -continue_stmt: 'continue' -return_stmt: 'return' [testlist] -yield_stmt: yield_expr -raise_stmt: 'raise' [test ['from' test]] -import_stmt: import_name | import_from -import_name: 'import' dotted_as_names -# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS -import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+) - 'import' ('*' | '(' import_as_names ')' | import_as_names)) -import_as_name: NAME ['as' NAME] -dotted_as_name: dotted_name ['as' NAME] -import_as_names: import_as_name (',' import_as_name)* [','] -dotted_as_names: dotted_as_name (',' dotted_as_name)* -dotted_name: NAME ('.' NAME)* -global_stmt: 'global' NAME (',' NAME)* -nonlocal_stmt: 'nonlocal' NAME (',' NAME)* -assert_stmt: 'assert' test [',' test] - -compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt -async_stmt: 'async' (funcdef | with_stmt | for_stmt) -if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] -while_stmt: 'while' test ':' suite ['else' ':' suite] -for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] -try_stmt: ('try' ':' suite - ((except_clause ':' suite)+ - ['else' ':' suite] - ['finally' ':' suite] | - 'finally' ':' suite)) -with_stmt: 'with' with_item (',' with_item)* ':' suite -with_item: test ['as' expr] -# NB compile.c makes sure that the default except clause is last -except_clause: 'except' [test ['as' NAME]] -# Edit by Francisco Souza/David Halter: The stmt is now optional. This reflects -# how Jedi allows classes and functions to be empty, which is beneficial for -# autocompletion. -suite: simple_stmt | NEWLINE INDENT stmt* DEDENT - -test: or_test ['if' or_test 'else' test] | lambdef -test_nocond: or_test | lambdef_nocond -lambdef: 'lambda' [varargslist] ':' test -lambdef_nocond: 'lambda' [varargslist] ':' test_nocond -or_test: and_test ('or' and_test)* -and_test: not_test ('and' not_test)* -not_test: 'not' not_test | comparison -comparison: expr (comp_op expr)* -# <> isn't actually a valid comparison operator in Python. It's here for the -# sake of a __future__ import described in PEP 401 (which really works :-) -comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' -star_expr: '*' expr -expr: xor_expr ('|' xor_expr)* -xor_expr: and_expr ('^' and_expr)* -and_expr: shift_expr ('&' shift_expr)* -shift_expr: arith_expr (('<<'|'>>') arith_expr)* -arith_expr: term (('+'|'-') term)* -term: factor (('*'|'@'|'/'|'%'|'//') factor)* -factor: ('+'|'-'|'~') factor | power -power: atom_expr ['**' factor] -atom_expr: ['await'] atom trailer* -atom: ('(' [yield_expr|testlist_comp] ')' | - '[' [testlist_comp] ']' | - '{' [dictorsetmaker] '}' | - NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False') -testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] ) -trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME -subscriptlist: subscript (',' subscript)* [','] -subscript: test | [test] ':' [test] [sliceop] -sliceop: ':' [test] -exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] -testlist: test (',' test)* [','] -dictorsetmaker: ( ((test ':' test | '**' expr) - (comp_for | (',' (test ':' test | '**' expr))* [','])) | - ((test | star_expr) - (comp_for | (',' (test | star_expr))* [','])) ) - -classdef: 'class' NAME ['(' [arglist] ')'] ':' suite - -arglist: argument (',' argument)* [','] - -# The reason that keywords are test nodes instead of NAME is that using NAME -# results in an ambiguity. ast.c makes sure it's a NAME. -# "test '=' test" is really "keyword '=' test", but we have no such token. -# These need to be in a single rule to avoid grammar that is ambiguous -# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr, -# we explicitly match '*' here, too, to give it proper precedence. -# Illegal combinations and orderings are blocked in ast.c: -# multiple (test comp_for) arguments are blocked; keyword unpackings -# that precede iterable unpackings are blocked; etc. -argument: ( test [comp_for] | - test '=' test | - '**' test | - '*' test ) - -comp_iter: comp_for | comp_if -comp_for: ['async'] 'for' exprlist 'in' or_test [comp_iter] -comp_if: 'if' test_nocond [comp_iter] - -# not used in grammar, but may appear in "node" passed from Parser to Compiler -encoding_decl: NAME - -yield_expr: 'yield' [yield_arg] -yield_arg: 'from' test | testlist diff --git a/jedi/parser/python/parser.py b/jedi/parser/python/parser.py deleted file mode 100644 index 7b91e268..00000000 --- a/jedi/parser/python/parser.py +++ /dev/null @@ -1,232 +0,0 @@ -from jedi.parser.python import tree -from jedi.parser import tokenize -from jedi.parser.token import (DEDENT, INDENT, ENDMARKER, NEWLINE, NUMBER, - STRING, tok_name) -from jedi.parser.parser import BaseParser -from jedi.common import splitlines - - -class Parser(BaseParser): - """ - This class is used to parse a Python file, it then divides them into a - class structure of different scopes. - - :param grammar: The grammar object of pgen2. Loaded by load_grammar. - """ - - node_map = { - 'expr_stmt': tree.ExprStmt, - 'classdef': tree.Class, - 'funcdef': tree.Function, - 'file_input': tree.Module, - 'import_name': tree.ImportName, - 'import_from': tree.ImportFrom, - 'break_stmt': tree.KeywordStatement, - 'continue_stmt': tree.KeywordStatement, - 'return_stmt': tree.ReturnStmt, - 'raise_stmt': tree.KeywordStatement, - 'yield_expr': tree.YieldExpr, - 'del_stmt': tree.KeywordStatement, - 'pass_stmt': tree.KeywordStatement, - 'global_stmt': tree.GlobalStmt, - 'nonlocal_stmt': tree.KeywordStatement, - 'print_stmt': tree.KeywordStatement, - 'assert_stmt': tree.AssertStmt, - 'if_stmt': tree.IfStmt, - 'with_stmt': tree.WithStmt, - 'for_stmt': tree.ForStmt, - 'while_stmt': tree.WhileStmt, - 'try_stmt': tree.TryStmt, - 'comp_for': tree.CompFor, - 'decorator': tree.Decorator, - 'lambdef': tree.Lambda, - 'old_lambdef': tree.Lambda, - 'lambdef_nocond': tree.Lambda, - } - default_node = tree.PythonNode - - def __init__(self, grammar, error_recovery=True, start_symbol='file_input'): - super(Parser, self).__init__(grammar, start_symbol, error_recovery=error_recovery) - - self.syntax_errors = [] - self._omit_dedent_list = [] - self._indent_counter = 0 - - # TODO do print absolute import detection here. - # try: - # del python_grammar_no_print_statement.keywords["print"] - # except KeyError: - # pass # Doesn't exist in the Python 3 grammar. - - # if self.options["print_function"]: - # python_grammar = pygram.python_grammar_no_print_statement - # else: - - def parse(self, tokens): - if self._error_recovery: - if self._start_symbol != 'file_input': - raise NotImplementedError - - tokens = self._recovery_tokenize(tokens) - - node = super(Parser, self).parse(tokens) - - if self._start_symbol == 'file_input' != node.type: - # If there's only one statement, we get back a non-module. That's - # not what we want, we want a module, so we add it here: - node = self.convert_node( - self._grammar, - self._grammar.symbol2number['file_input'], - [node] - ) - - return node - - def convert_node(self, grammar, type, children): - """ - Convert raw node information to a PythonBaseNode instance. - - This is passed to the parser driver which calls it whenever a reduction of a - grammar rule produces a new complete node, so that the tree is build - strictly bottom-up. - """ - # TODO REMOVE symbol, we don't want type here. - symbol = grammar.number2symbol[type] - try: - return self.node_map[symbol](children) - except KeyError: - if symbol == 'suite': - # We don't want the INDENT/DEDENT in our parser tree. Those - # leaves are just cancer. They are virtual leaves and not real - # ones and therefore have pseudo start/end positions and no - # prefixes. Just ignore them. - children = [children[0]] + children[2:-1] - return self.default_node(symbol, children) - - def convert_leaf(self, grammar, type, value, prefix, start_pos): - # print('leaf', repr(value), token.tok_name[type]) - if type == tokenize.NAME: - if value in grammar.keywords: - return tree.Keyword(value, start_pos, prefix) - else: - return tree.Name(value, start_pos, prefix) - elif type == STRING: - return tree.String(value, start_pos, prefix) - elif type == NUMBER: - return tree.Number(value, start_pos, prefix) - elif type == NEWLINE: - return tree.Newline(value, start_pos, prefix) - elif type == ENDMARKER: - return tree.EndMarker(value, start_pos, prefix) - else: - return tree.Operator(value, start_pos, prefix) - - def error_recovery(self, grammar, stack, arcs, typ, value, start_pos, prefix, - add_token_callback): - """ - This parser is written in a dynamic way, meaning that this parser - allows using different grammars (even non-Python). However, error - recovery is purely written for Python. - """ - if not self._error_recovery: - return super(Parser, self).error_recovery( - grammar, stack, arcs, typ, value, start_pos, prefix, - add_token_callback) - - def current_suite(stack): - # For now just discard everything that is not a suite or - # file_input, if we detect an error. - for index, (dfa, state, (type_, nodes)) in reversed(list(enumerate(stack))): - # `suite` can sometimes be only simple_stmt, not stmt. - symbol = grammar.number2symbol[type_] - if symbol == 'file_input': - break - elif symbol == 'suite' and len(nodes) > 1: - # suites without an indent in them get discarded. - break - return index, symbol, nodes - - index, symbol, nodes = current_suite(stack) - - # print('err', token.tok_name[typ], repr(value), start_pos, len(stack), index) - if self._stack_removal(grammar, stack, arcs, index + 1, value, start_pos): - add_token_callback(typ, value, start_pos, prefix) - else: - if typ == INDENT: - # For every deleted INDENT we have to delete a DEDENT as well. - # Otherwise the parser will get into trouble and DEDENT too early. - self._omit_dedent_list.append(self._indent_counter) - else: - error_leaf = tree.PythonErrorLeaf(tok_name[typ].lower(), value, start_pos, prefix) - stack[-1][2][1].append(error_leaf) - - def _stack_removal(self, grammar, stack, arcs, start_index, value, start_pos): - failed_stack = [] - found = False - all_nodes = [] - for dfa, state, (typ, nodes) in stack[start_index:]: - if nodes: - found = True - if found: - symbol = grammar.number2symbol[typ] - failed_stack.append((symbol, nodes)) - all_nodes += nodes - if failed_stack: - stack[start_index - 1][2][1].append(tree.PythonErrorNode(all_nodes)) - - stack[start_index:] = [] - return failed_stack - - def _recovery_tokenize(self, tokens): - for typ, value, start_pos, prefix in tokens: - # print(tokenize.tok_name[typ], repr(value), start_pos, repr(prefix)) - if typ == DEDENT: - # We need to count indents, because if we just omit any DEDENT, - # we might omit them in the wrong place. - o = self._omit_dedent_list - if o and o[-1] == self._indent_counter: - o.pop() - continue - - self._indent_counter -= 1 - elif typ == INDENT: - self._indent_counter += 1 - - yield typ, value, start_pos, prefix - - -def _remove_last_newline(node): - endmarker = node.children[-1] - # The newline is either in the endmarker as a prefix or the previous - # leaf as a newline token. - prefix = endmarker.prefix - leaf = endmarker.get_previous_leaf() - if prefix: - text = prefix - else: - if leaf is None: - raise ValueError("You're trying to remove a newline from an empty module.") - - text = leaf.value - - if not text.endswith('\n'): - raise ValueError("There's no newline at the end, cannot remove it.") - - text = text[:-1] - if prefix: - endmarker.prefix = text - - if leaf is None: - end_pos = (1, 0) - else: - end_pos = leaf.end_pos - - lines = splitlines(text, keepends=True) - if len(lines) == 1: - end_pos = end_pos[0], end_pos[1] + len(lines[0]) - else: - end_pos = end_pos[0] + len(lines) - 1, len(lines[-1]) - endmarker.start_pos = end_pos - else: - leaf.value = text - endmarker.start_pos = leaf.end_pos diff --git a/jedi/parser/python/tree.py b/jedi/parser/python/tree.py deleted file mode 100644 index 8257f0a5..00000000 --- a/jedi/parser/python/tree.py +++ /dev/null @@ -1,1045 +0,0 @@ -""" -If you know what an syntax tree is, you'll see that this module is pretty much -that. The classes represent syntax elements like functions and imports. - -This is the "business logic" part of the parser. There's a lot of logic here -that makes it easier for Jedi (and other libraries) to deal with a Python syntax -tree. - -By using `get_code` on a module, you can get back the 1-to-1 representation of -the input given to the parser. This is important if you are using refactoring. - -The easiest way to play with this module is to use :class:`parsing.Parser`. -:attr:`parsing.Parser.module` holds an instance of :class:`Module`: - ->>> from jedi.parser.python import parse ->>> parser = parse('import os') ->>> module = parser.get_root_node() ->>> module - - -Any subclasses of :class:`Scope`, including :class:`Module` has an attribute -:attr:`iter_imports `: - ->>> list(module.iter_imports()) -[] -""" - -from jedi._compatibility import utf8_repr, unicode -from jedi.parser.tree import Node, BaseNode, Leaf, ErrorNode, ErrorLeaf, \ - search_ancestor - - -class DocstringMixin(object): - __slots__ = () - - def get_doc_node(self): - """ - Returns the string leaf of a docstring. e.g. ``r'''foo'''``. - """ - if self.type == 'file_input': - node = self.children[0] - elif isinstance(self, ClassOrFunc): - node = self.children[self.children.index(':') + 1] - if node.type == 'suite': # Normally a suite - node = node.children[1] # -> NEWLINE stmt - else: # ExprStmt - simple_stmt = self.parent - c = simple_stmt.parent.children - index = c.index(simple_stmt) - if not index: - return None - node = c[index - 1] - - if node.type == 'simple_stmt': - node = node.children[0] - if node.type == 'string': - return node - return None - - -class PythonMixin(object): - """ - Some Python specific utitilies. - """ - __slots__ = () - - def get_definition(self): - if self.type in ('newline', 'endmarker'): - raise ValueError('Cannot get the indentation of whitespace or indentation.') - scope = self - while scope.parent is not None: - parent = scope.parent - if isinstance(scope, (PythonNode, PythonLeaf)) and parent.type != 'simple_stmt': - if scope.type == 'testlist_comp': - try: - if scope.children[1].type == 'comp_for': - return scope.children[1] - except IndexError: - pass - scope = parent - else: - break - return scope - - def get_name_of_position(self, position): - for c in self.children: - if isinstance(c, Leaf): - if c.type == 'name' and c.start_pos <= position <= c.end_pos: - return c - else: - result = c.get_name_of_position(position) - if result is not None: - return result - return None - - -class PythonLeaf(Leaf, PythonMixin): - __slots__ = () - - -class _LeafWithoutNewlines(PythonLeaf): - """ - Simply here to optimize performance. - """ - __slots__ = () - - @property - def end_pos(self): - return self.line, self.indent + len(self.value) - - -# Python base classes -class PythonBaseNode(BaseNode, PythonMixin): - __slots__ = () - - -class PythonNode(Node, PythonMixin): - __slots__ = () - - -class PythonErrorNode(ErrorNode, PythonMixin): - __slots__ = () - - -class PythonErrorLeaf(ErrorLeaf, PythonMixin): - __slots__ = () - - -class EndMarker(_LeafWithoutNewlines): - __slots__ = () - type = 'endmarker' - - -class Newline(PythonLeaf): - """Contains NEWLINE and ENDMARKER tokens.""" - __slots__ = () - type = 'newline' - - @utf8_repr - def __repr__(self): - return "<%s: %s>" % (type(self).__name__, repr(self.value)) - - -class Name(_LeafWithoutNewlines): - """ - A string. Sometimes it is important to know if the string belongs to a name - or not. - """ - type = 'name' - __slots__ = () - - def __repr__(self): - return "<%s: %s@%s,%s>" % (type(self).__name__, self.value, - self.line, self.indent) - - def is_definition(self): - if self.parent.type in ('power', 'atom_expr'): - # In `self.x = 3` self is not a definition, but x is. - return False - - stmt = self.get_definition() - if stmt.type in ('funcdef', 'classdef', 'param'): - return self == stmt.name - elif stmt.type == 'for_stmt': - return self.start_pos < stmt.children[2].start_pos - elif stmt.type == 'try_stmt': - return self.get_previous_sibling() == 'as' - else: - return stmt.type in ('expr_stmt', 'import_name', 'import_from', - 'comp_for', 'with_stmt') \ - and self in stmt.get_defined_names() - - -class Literal(PythonLeaf): - __slots__ = () - - -class Number(Literal): - type = 'number' - __slots__ = () - - -class String(Literal): - type = 'string' - __slots__ = () - - -class _StringComparisonMixin(object): - def __eq__(self, other): - """ - Make comparisons with strings easy. - Improves the readability of the parser. - """ - if isinstance(other, (str, unicode)): - return self.value == other - - return self is other - - def __ne__(self, other): - """Python 2 compatibility.""" - return not self.__eq__(other) - - def __hash__(self): - return hash(self.value) - - -class Operator(_LeafWithoutNewlines, _StringComparisonMixin): - type = 'operator' - __slots__ = () - - -class Keyword(_LeafWithoutNewlines, _StringComparisonMixin): - type = 'keyword' - __slots__ = () - - -class Scope(PythonBaseNode, DocstringMixin): - """ - Super class for the parser tree, which represents the state of a python - text file. - A Scope is either a function, class or lambda. - """ - __slots__ = () - - def __init__(self, children): - super(Scope, self).__init__(children) - - def iter_funcdefs(self): - """ - Returns a generator of `funcdef` nodes. - """ - return self._search_in_scope('funcdef') - - def iter_classdefs(self): - """ - Returns a generator of `classdef` nodes. - """ - return self._search_in_scope('classdef') - - def iter_imports(self): - """ - Returns a generator of `import_name` and `import_from` nodes. - """ - return self._search_in_scope('import_name', 'import_from') - - def _search_in_scope(self, *names): - def scan(children): - for element in children: - if element.type in names: - yield element - if element.type in ('suite', 'simple_stmt', 'decorated') \ - or isinstance(element, Flow): - for e in scan(element.children): - yield e - - return scan(self.children) - - def get_suite(self): - """ - Returns the part that is executed by the function. - """ - return self.children[-1] - - def __repr__(self): - try: - name = self.name.value - except AttributeError: - name = '' - - return "<%s: %s@%s-%s>" % (type(self).__name__, name, - self.start_pos[0], self.end_pos[0]) - - -class Module(Scope): - """ - The top scope, which is always a module. - Depending on the underlying parser this may be a full module or just a part - of a module. - """ - __slots__ = ('_used_names',) - type = 'file_input' - - def __init__(self, children): - super(Module, self).__init__(children) - self._used_names = None - - def iter_future_import_names(self): - """ - :return list of str: A list of future import names. - """ - # TODO this is a strange scan and not fully correct. I think Python's - # parser does it in a different way and scans for the first - # statement/import with a tokenizer (to check for syntax changes like - # the future print statement). - for imp in self.iter_imports(): - if imp.type == 'import_from' and imp.level == 0: - for path in imp.get_paths(): - names = [name.value for name in path] - if len(names) == 2 and names[0] == '__future__': - yield names[1] - - def has_explicit_absolute_import(self): - """ - Checks if imports in this module are explicitly absolute, i.e. there - is a ``__future__`` import. - :return bool: - """ - for name in self.iter_future_import_names(): - if name == 'absolute_import': - return True - return False - - def get_used_names(self): - """ - Returns all the `Name` leafs that exist in this module. Tihs includes - both definitions and references of names. - """ - if self._used_names is None: - # Don't directly use self._used_names to eliminate a lookup. - dct = {} - - def recurse(node): - try: - children = node.children - except AttributeError: - if node.type == 'name': - arr = dct.setdefault(node.value, []) - arr.append(node) - else: - for child in children: - recurse(child) - - recurse(self) - self._used_names = dct - return self._used_names - - -class Decorator(PythonBaseNode): - type = 'decorator' - __slots__ = () - - -class ClassOrFunc(Scope): - __slots__ = () - - @property - def name(self): - """ - Returns the `Name` leaf that defines the function or class name. - """ - return self.children[1] - - def get_decorators(self): - """ - :return list of Decorator: - """ - decorated = self.parent - if decorated.type == 'decorated': - if decorated.children[0].type == 'decorators': - return decorated.children[0].children - else: - return decorated.children[:1] - else: - return [] - - -class Class(ClassOrFunc): - """ - Used to store the parsed contents of a python class. - - :param name: The Class name. - :type name: str - :param supers: The super classes of a Class. - :type supers: list - :param start_pos: The start position (line, column) of the class. - :type start_pos: tuple(int, int) - """ - type = 'classdef' - __slots__ = () - - def __init__(self, children): - super(Class, self).__init__(children) - - def get_super_arglist(self): - """ - Returns the `arglist` node that defines the super classes. It returns - None if there are no arguments. - """ - if self.children[2] != '(': # Has no parentheses - return None - else: - if self.children[3] == ')': # Empty parentheses - return None - else: - return self.children[3] - - -def _create_params(parent, argslist_list): - """ - `argslist_list` is a list that can contain an argslist as a first item, but - most not. It's basically the items between the parameter brackets (which is - at most one item). - This function modifies the parser structure. It generates `Param` objects - from the normal ast. Those param objects do not exist in a normal ast, but - make the evaluation of the ast tree so much easier. - You could also say that this function replaces the argslist node with a - list of Param objects. - """ - def check_python2_nested_param(node): - """ - Python 2 allows params to look like ``def x(a, (b, c))``, which is - basically a way of unpacking tuples in params. Python 3 has ditched - this behavior. Jedi currently just ignores those constructs. - """ - return node.type == 'tfpdef' and node.children[0] == '(' - - try: - first = argslist_list[0] - except IndexError: - return [] - - if first.type in ('name', 'tfpdef'): - if check_python2_nested_param(first): - return [first] - else: - return [Param([first], parent)] - elif first == '*': - return [first] - else: # argslist is a `typedargslist` or a `varargslist`. - children = first.children - new_children = [] - start = 0 - # Start with offset 1, because the end is higher. - for end, child in enumerate(children + [None], 1): - if child is None or child == ',': - param_children = children[start:end] - if param_children: # Could as well be comma and then end. - if check_python2_nested_param(param_children[0]): - new_children += param_children - elif param_children[0] == '*' and param_children[1] == ',': - new_children += param_children - else: - new_children.append(Param(param_children, parent)) - start = end - return new_children - - -class Function(ClassOrFunc): - """ - Used to store the parsed contents of a python function. - - Children:: - - 0. - 1. - 2. parameter list (including open-paren and close-paren s) - 3. or 5. - 4. or 6. Node() representing function body - 3. -> (if annotation is also present) - 4. annotation (if present) - """ - type = 'funcdef' - - def __init__(self, children): - super(Function, self).__init__(children) - parameters = self.children[2] # After `def foo` - parameters.children[1:-1] = _create_params(parameters, parameters.children[1:-1]) - - def _get_param_nodes(self): - return self.children[2].children - - @property - def params(self): - """ - Returns a list of `Param()`. - """ - return [p for p in self._get_param_nodes() if p.type == 'param'] - - @property - def name(self): - return self.children[1] # First token after `def` - - def iter_yield_exprs(self): - """ - Returns a generator of `yield_expr`. - """ - # TODO This is incorrect, yields are also possible in a statement. - return self._search_in_scope('yield_expr') - - def iter_return_stmts(self): - """ - Returns a generator of `return_stmt`. - """ - return self._search_in_scope('return_stmt') - - def is_generator(self): - """ - :return bool: Checks if a function is a generator or not. - """ - return next(self.iter_yield_exprs(), None) is not None - - @property - def annotation(self): - """ - Returns the test node after `->` or `None` if there is no annotation. - """ - try: - if self.children[3] == "->": - return self.children[4] - assert self.children[3] == ":" - return None - except IndexError: - return None - -class Lambda(Function): - """ - Lambdas are basically trimmed functions, so give it the same interface. - - Children:: - - 0. - *. for each argument x - -2. - -1. Node() representing body - """ - type = 'lambdef' - __slots__ = () - - def __init__(self, children): - # We don't want to call the Function constructor, call its parent. - super(Function, self).__init__(children) - # Everything between `lambda` and the `:` operator is a parameter. - self.children[1:-2] = _create_params(self, self.children[1:-2]) - - @property - def name(self): - """ - Raises an AttributeError. Lambdas don't have a defined name. - """ - raise AttributeError("lambda is not named.") - - def _get_param_nodes(self): - return self.children[1:-2] - - @property - def annotation(self): - """ - Returns `None`, lambdas don't have annotations. - """ - return None - - def __repr__(self): - return "<%s@%s>" % (self.__class__.__name__, self.start_pos) - - -class Flow(PythonBaseNode): - __slots__ = () - - -class IfStmt(Flow): - type = 'if_stmt' - __slots__ = () - - def get_test_nodes(self): - """ - E.g. returns all the `test` nodes that are named as x, below: - - if x: - pass - elif x: - pass - """ - for i, c in enumerate(self.children): - if c in ('elif', 'if'): - yield self.children[i + 1] - - def get_corresponding_test_node(self, node): - """ - Searches for the branch in which the node is and returns the - corresponding test node (see function above). However if the node is in - the test node itself and not in the suite return None. - """ - start_pos = node.start_pos - for check_node in reversed(list(self.get_test_nodes())): - if check_node.start_pos < start_pos: - if start_pos < check_node.end_pos: - return None - # In this case the node is within the check_node itself, - # not in the suite - else: - return check_node - - def is_node_after_else(self, node): - """ - Checks if a node is defined after `else`. - """ - for c in self.children: - if c == 'else': - if node.start_pos > c.start_pos: - return True - else: - return False - - -class WhileStmt(Flow): - type = 'while_stmt' - __slots__ = () - - -class ForStmt(Flow): - type = 'for_stmt' - __slots__ = () - - def get_testlist(self): - """ - Returns the input node ``y`` from: ``for x in y:``. - """ - return self.children[3] - - -class TryStmt(Flow): - type = 'try_stmt' - __slots__ = () - - def get_except_clause_tests(self): - """ - Returns the ``test`` nodes found in ``except_clause`` nodes. - Returns ``[None]`` for except clauses without an exception given. - """ - for node in self.children: - # TODO this is not correct. We're not returning an except clause. - if node.type == 'except_clause': - yield node.children[1] - elif node == 'except': - yield None - - -class WithStmt(Flow): - type = 'with_stmt' - __slots__ = () - - def get_defined_names(self): - """ - Returns the a list of `Name` that the with statement defines. The - defined names are set after `as`. - """ - names = [] - for with_item in self.children[1:-2:2]: - # Check with items for 'as' names. - if with_item.type == 'with_item': - names += _defined_names(with_item.children[2]) - return names - - def get_context_manager_from_name(self, name): - # TODO Replace context_manager with test? - node = name.parent - if node.type != 'with_item': - raise ValueError('The name is not actually part of a with statement.') - return node.children[0] - - -class Import(PythonBaseNode): - __slots__ = () - - def get_path_for_name(self, name): - """ - The path is the list of names that leads to the searched name. - - :return list of Name: - """ - try: - # The name may be an alias. If it is, just map it back to the name. - name = self._aliases()[name] - except KeyError: - pass - - for path in self.get_paths(): - if name in path: - return path[:path.index(name) + 1] - raise ValueError('Name should be defined in the import itself') - - def is_nested(self): - return False # By default, sub classes may overwrite this behavior - - def is_star_import(self): - return self.children[-1] == '*' - - -class ImportFrom(Import): - type = 'import_from' - __slots__ = () - - def get_defined_names(self): - """ - Returns the a list of `Name` that the import defines. The - defined names are set after `import` or in case an alias - `as` - is - present that name is returned. - """ - return [alias or name for name, alias in self._as_name_tuples()] - - def _aliases(self): - """Mapping from alias to its corresponding name.""" - return dict((alias, name) for name, alias in self._as_name_tuples() - if alias is not None) - - def get_from_names(self): - for n in self.children[1:]: - if n not in ('.', '...'): - break - if n.type == 'dotted_name': # from x.y import - return n.children[::2] - elif n == 'import': # from . import - return [] - else: # from x import - return [n] - - @property - def level(self): - """The level parameter of ``__import__``.""" - level = 0 - for n in self.children[1:]: - if n in ('.', '...'): - level += len(n.value) - else: - break - return level - - def _as_name_tuples(self): - last = self.children[-1] - if last == ')': - last = self.children[-2] - elif last == '*': - return # No names defined directly. - - if last.type == 'import_as_names': - as_names = last.children[::2] - else: - as_names = [last] - for as_name in as_names: - if as_name.type == 'name': - yield as_name, None - else: - yield as_name.children[::2] # yields x, y -> ``x as y`` - - def get_paths(self): - """ - The import paths defined in an import statement. Typically an array - like this: ``[, ]``. - - :return list of list of Name: - """ - dotted = self.get_from_names() - - if self.children[-1] == '*': - return [dotted] - return [dotted + [name] for name, alias in self._as_name_tuples()] - - -class ImportName(Import): - """For ``import_name`` nodes. Covers normal imports without ``from``.""" - type = 'import_name' - __slots__ = () - - def get_defined_names(self): - """ - Returns the a list of `Name` that the import defines. The defined names - is always the first name after `import` or in case an alias - `as` - is - present that name is returned. - """ - return [alias or path[0] for path, alias in self._dotted_as_names()] - - @property - def level(self): - """The level parameter of ``__import__``.""" - return 0 # Obviously 0 for imports without from. - - def get_paths(self): - return [path for path, alias in self._dotted_as_names()] - - def _dotted_as_names(self): - """Generator of (list(path), alias) where alias may be None.""" - dotted_as_names = self.children[1] - if dotted_as_names.type == 'dotted_as_names': - as_names = dotted_as_names.children[::2] - else: - as_names = [dotted_as_names] - - for as_name in as_names: - if as_name.type == 'dotted_as_name': - alias = as_name.children[2] - as_name = as_name.children[0] - else: - alias = None - if as_name.type == 'name': - yield [as_name], alias - else: - # dotted_names - yield as_name.children[::2], alias - - def is_nested(self): - """ - This checks for the special case of nested imports, without aliases and - from statement:: - - import foo.bar - """ - return bool([1 for path, alias in self._dotted_as_names() - if alias is None and len(path) > 1]) - - def _aliases(self): - """ - :return list of Name: Returns all the alias - """ - return dict((alias, path[-1]) for path, alias in self._dotted_as_names() - if alias is not None) - - -class KeywordStatement(PythonBaseNode): - """ - For the following statements: `assert`, `del`, `global`, `nonlocal`, - `raise`, `return`, `yield`, `return`, `yield`. - - `pass`, `continue` and `break` are not in there, because they are just - simple keywords and the parser reduces it to a keyword. - """ - __slots__ = () - - @property - def type(self): - """ - Keyword statements start with the keyword and end with `_stmt`. You can - crosscheck this with the Python grammar. - """ - return '%s_stmt' % self.keyword - - @property - def keyword(self): - return self.children[0].value - - -class AssertStmt(KeywordStatement): - __slots__ = () - - @property - def assertion(self): - return self.children[1] - - -class GlobalStmt(KeywordStatement): - __slots__ = () - - def get_global_names(self): - return self.children[1::2] - - -class ReturnStmt(KeywordStatement): - __slots__ = () - - -class YieldExpr(PythonBaseNode): - type = 'yield_expr' - __slots__ = () - - -def _defined_names(current): - """ - A helper function to find the defined names in statements, for loops and - list comprehensions. - """ - names = [] - if current.type in ('testlist_star_expr', 'testlist_comp', 'exprlist'): - for child in current.children[::2]: - names += _defined_names(child) - elif current.type in ('atom', 'star_expr'): - names += _defined_names(current.children[1]) - elif current.type in ('power', 'atom_expr'): - if current.children[-2] != '**': # Just if there's no operation - trailer = current.children[-1] - if trailer.children[0] == '.': - names.append(trailer.children[1]) - else: - names.append(current) - return names - - -class ExprStmt(PythonBaseNode, DocstringMixin): - type = 'expr_stmt' - __slots__ = () - - def get_defined_names(self): - """ - Returns a list of `Name` defined before the `=` sign. - """ - names = [] - if self.children[1].type == 'annassign': - names = _defined_names(self.children[0]) - return [ - name - for i in range(0, len(self.children) - 2, 2) - if '=' in self.children[i + 1].value - for name in _defined_names(self.children[i]) - ] + names - - def get_rhs(self): - """Returns the right-hand-side of the equals.""" - return self.children[-1] - - def yield_operators(self): - """ - Returns a generator of `+=`, `=`, etc. or None if there is no operation. - """ - first = self.children[1] - if first.type == 'annassign': - if len(first.children) <= 2: - return # No operator is available, it's just PEP 484. - - first = first.children[2] - yield first - - for operator in self.children[3::2]: - yield operator - - -class Param(PythonBaseNode): - """ - It's a helper class that makes business logic with params much easier. The - Python grammar defines no ``param`` node. It defines it in a different way - that is not really suited to working with parameters. - """ - type = 'param' - - def __init__(self, children, parent): - super(Param, self).__init__(children) - self.parent = parent - for child in children: - child.parent = self - - @property - def star_count(self): - """ - Is `0` in case of `foo`, `1` in case of `*foo` or `2` in case of - `**foo`. - """ - first = self.children[0] - if first in ('*', '**'): - return len(first.value) - return 0 - - @property - def default(self): - """ - The default is the test node that appears after the `=`. Is `None` in - case no default is present. - """ - try: - return self.children[int(self.children[0] in ('*', '**')) + 2] - except IndexError: - return None - - @property - def annotation(self): - """ - The default is the test node that appears after `->`. Is `None` in case - no annotation is present. - """ - tfpdef = self._tfpdef() - if tfpdef.type == 'tfpdef': - assert tfpdef.children[1] == ":" - assert len(tfpdef.children) == 3 - annotation = tfpdef.children[2] - return annotation - else: - return None - - def _tfpdef(self): - """ - tfpdef: see grammar.txt. - """ - offset = int(self.children[0] in ('*', '**')) - return self.children[offset] - - @property - def name(self): - """ - The `Name` leaf of the param. - """ - if self._tfpdef().type == 'tfpdef': - return self._tfpdef().children[0] - else: - return self._tfpdef() - - @property - def position_index(self): - """ - Property for the positional index of a paramter. - """ - index = self.parent.children.index(self) - try: - keyword_only_index = self.parent.children.index('*') - if index > keyword_only_index: - # Skip the ` *, ` - index -= 2 - except ValueError: - pass - return index - 1 - - def get_parent_function(self): - """ - Returns the function/lambda of a parameter. - """ - return search_ancestor(self, 'funcdef', 'lambdef') - - def get_code(self, normalized=False, include_prefix=True, include_comma=True): - """ - Like all the other get_code functions, but includes the param - `include_comma`. - - :param include_comma bool: If enabled includes the comma in the string output. - """ - if include_comma: - return super(Param, self).get_code(normalized, include_prefix) - - children = self.children - if children[-1] == ',': - children = children[:-1] - return self._get_code_for_children( - children, - normalized=False, - include_prefix=include_prefix - ) - - def __repr__(self): - default = '' if self.default is None else '=%s' % self.default.get_code() - return '<%s: %s>' % (type(self).__name__, str(self._tfpdef()) + default) - - -class CompFor(PythonBaseNode): - type = 'comp_for' - __slots__ = () - - def get_defined_names(self): - """ - Returns the a list of `Name` that the comprehension defines. - """ - return _defined_names(self.children[1]) diff --git a/jedi/parser/token.py b/jedi/parser/token.py deleted file mode 100644 index 0cb846da..00000000 --- a/jedi/parser/token.py +++ /dev/null @@ -1,90 +0,0 @@ -from __future__ import absolute_import - -from jedi._compatibility import is_py3, is_py35 -from token import * - - -COMMENT = N_TOKENS -tok_name[COMMENT] = 'COMMENT' -N_TOKENS += 1 - -NL = N_TOKENS -tok_name[NL] = 'NL' -N_TOKENS += 1 - -if is_py3: - BACKQUOTE = N_TOKENS - tok_name[BACKQUOTE] = 'BACKQUOTE' - N_TOKENS += 1 -else: - RARROW = N_TOKENS - tok_name[RARROW] = 'RARROW' - N_TOKENS += 1 - ELLIPSIS = N_TOKENS - tok_name[ELLIPSIS] = 'ELLIPSIS' - N_TOKENS += 1 - -if not is_py35: - ATEQUAL = N_TOKENS - tok_name[ATEQUAL] = 'ATEQUAL' - N_TOKENS += 1 - - - -# Map from operator to number (since tokenize doesn't do this) - -opmap_raw = """\ -( LPAR -) RPAR -[ LSQB -] RSQB -: COLON -, COMMA -; SEMI -+ PLUS -- MINUS -* STAR -/ SLASH -| VBAR -& AMPER -< LESS -> GREATER -= EQUAL -. DOT -% PERCENT -` BACKQUOTE -{ LBRACE -} RBRACE -@ AT -== EQEQUAL -!= NOTEQUAL -<> NOTEQUAL -<= LESSEQUAL ->= GREATEREQUAL -~ TILDE -^ CIRCUMFLEX -<< LEFTSHIFT ->> RIGHTSHIFT -** DOUBLESTAR -+= PLUSEQUAL --= MINEQUAL -*= STAREQUAL -/= SLASHEQUAL -%= PERCENTEQUAL -&= AMPEREQUAL -|= VBAREQUAL -@= ATEQUAL -^= CIRCUMFLEXEQUAL -<<= LEFTSHIFTEQUAL ->>= RIGHTSHIFTEQUAL -**= DOUBLESTAREQUAL -// DOUBLESLASH -//= DOUBLESLASHEQUAL --> RARROW -... ELLIPSIS -""" - -opmap = {} -for line in opmap_raw.splitlines(): - op, name = line.split() - opmap[op] = globals()[name] diff --git a/jedi/parser/tokenize.py b/jedi/parser/tokenize.py deleted file mode 100644 index ddbb15fc..00000000 --- a/jedi/parser/tokenize.py +++ /dev/null @@ -1,369 +0,0 @@ -# -*- coding: utf-8 -*- -""" -This tokenizer has been copied from the ``tokenize.py`` standard library -tokenizer. The reason was simple: The standard library tokenizer fails -if the indentation is not right. The fast parser of jedi however requires -"wrong" indentation. - -Basically this is a stripped down version of the standard library module, so -you can read the documentation there. Additionally we included some speed and -memory optimizations here. -""" -from __future__ import absolute_import - -import string -import re -from collections import namedtuple -import itertools as _itertools - -from jedi.parser.token import (tok_name, N_TOKENS, ENDMARKER, STRING, NUMBER, opmap, - NAME, OP, ERRORTOKEN, NEWLINE, INDENT, DEDENT) -from jedi._compatibility import is_py3, py_version, u -from jedi.common import splitlines - - -cookie_re = re.compile("coding[:=]\s*([-\w.]+)") - - -if is_py3: - # Python 3 has str.isidentifier() to check if a char is a valid identifier - is_identifier = str.isidentifier -else: - namechars = string.ascii_letters + '_' - is_identifier = lambda s: s in namechars - - -COMMENT = N_TOKENS -tok_name[COMMENT] = 'COMMENT' - - -def group(*choices, **kwargs): - capture = kwargs.pop('capture', False) # Python 2, arrghhhhh :( - assert not kwargs - - start = '(' - if not capture: - start += '?:' - return start + '|'.join(choices) + ')' - -def any(*choices): - return group(*choices) + '*' - -def maybe(*choices): - return group(*choices) + '?' - -# Note: we use unicode matching for names ("\w") but ascii matching for -# number literals. -Whitespace = r'[ \f\t]*' -Comment = r'#[^\r\n]*' -Name = r'\w+' - -if py_version >= 36: - Hexnumber = r'0[xX](?:_?[0-9a-fA-F])+' - Binnumber = r'0[bB](?:_?[01])+' - Octnumber = r'0[oO](?:_?[0-7])+' - Decnumber = r'(?:0(?:_?0)*|[1-9](?:_?[0-9])*)' - Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber) - Exponent = r'[eE][-+]?[0-9](?:_?[0-9])*' - Pointfloat = group(r'[0-9](?:_?[0-9])*\.(?:[0-9](?:_?[0-9])*)?', - r'\.[0-9](?:_?[0-9])*') + maybe(Exponent) - Expfloat = r'[0-9](?:_?[0-9])*' + Exponent - Floatnumber = group(Pointfloat, Expfloat) - Imagnumber = group(r'[0-9](?:_?[0-9])*[jJ]', Floatnumber + r'[jJ]') -else: - Hexnumber = r'0[xX][0-9a-fA-F]+' - Binnumber = r'0[bB][01]+' - if is_py3: - Octnumber = r'0[oO][0-7]+' - else: - Octnumber = '0[0-7]+' - Decnumber = r'(?:0+|[1-9][0-9]*)' - Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber) - Exponent = r'[eE][-+]?[0-9]+' - Pointfloat = group(r'[0-9]+\.[0-9]*', r'\.[0-9]+') + maybe(Exponent) - Expfloat = r'[0-9]+' + Exponent - Floatnumber = group(Pointfloat, Expfloat) - Imagnumber = group(r'[0-9]+[jJ]', Floatnumber + r'[jJ]') -Number = group(Imagnumber, Floatnumber, Intnumber) - -# Return the empty string, plus all of the valid string prefixes. -def _all_string_prefixes(): - # The valid string prefixes. Only contain the lower case versions, - # and don't contain any permuations (include 'fr', but not - # 'rf'). The various permutations will be generated. - _valid_string_prefixes = ['b', 'r', 'u', 'br'] - if py_version >= 36: - _valid_string_prefixes += ['f', 'fr'] - if py_version <= 27: - # TODO this is actually not 100% valid. ur is valid in Python 2.7, - # while ru is not. - _valid_string_prefixes.append('ur') - - # if we add binary f-strings, add: ['fb', 'fbr'] - result = set(['']) - for prefix in _valid_string_prefixes: - for t in _itertools.permutations(prefix): - # create a list with upper and lower versions of each - # character - for u in _itertools.product(*[(c, c.upper()) for c in t]): - result.add(''.join(u)) - return result - -def _compile(expr): - return re.compile(expr, re.UNICODE) - -# Note that since _all_string_prefixes includes the empty string, -# StringPrefix can be the empty string (making it optional). -StringPrefix = group(*_all_string_prefixes()) - -# Tail end of ' string. -Single = r"[^'\\]*(?:\\.[^'\\]*)*'" -# Tail end of " string. -Double = r'[^"\\]*(?:\\.[^"\\]*)*"' -# Tail end of ''' string. -Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''" -# Tail end of """ string. -Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""' -Triple = group(StringPrefix + "'''", StringPrefix + '"""') - -# Because of leftmost-then-longest match semantics, be sure to put the -# longest operators first (e.g., if = came before ==, == would get -# recognized as two instances of =). -Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"!=", - r"//=?", r"->", - r"[+\-*/%&@|^=<>]=?", - r"~") - -Bracket = '[][(){}]' -Special = group(r'\r?\n', r'\.\.\.', r'[:;.,@]') -Funny = group(Operator, Bracket, Special) - -PlainToken = group(Number, Funny, Name, capture=True) - -# First (or only) line of ' or " string. -ContStr = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" + - group("'", r'\\\r?\n'), - StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' + - group('"', r'\\\r?\n')) -PseudoExtras = group(r'\\\r?\n|\Z', Comment, Triple) -PseudoToken = group(Whitespace, capture=True) + \ - group(PseudoExtras, Number, Funny, ContStr, Name, capture=True) - -# For a given string prefix plus quotes, endpats maps it to a regex -# to match the remainder of that string. _prefix can be empty, for -# a normal single or triple quoted string (with no prefix). -endpats = {} -for _prefix in _all_string_prefixes(): - endpats[_prefix + "'"] = _compile(Single) - endpats[_prefix + '"'] = _compile(Double) - endpats[_prefix + "'''"] = _compile(Single3) - endpats[_prefix + '"""'] = _compile(Double3) - -# A set of all of the single and triple quoted string prefixes, -# including the opening quotes. -single_quoted = set() -triple_quoted = set() -for t in _all_string_prefixes(): - for p in (t + '"', t + "'"): - single_quoted.add(p) - for p in (t + '"""', t + "'''"): - triple_quoted.add(p) - - -# TODO add with? -ALWAYS_BREAK_TOKENS = (';', 'import', 'class', 'def', 'try', 'except', - 'finally', 'while', 'return') -pseudo_token_compiled = _compile(PseudoToken) - - -class TokenInfo(namedtuple('Token', ['type', 'string', 'start_pos', 'prefix'])): - def __repr__(self): - return ('TokenInfo(type=%s, string=%r, start=%r, prefix=%r)' % - self._replace(type=self.get_type_name())) - - def get_type_name(self, exact=True): - if exact: - typ = self.exact_type - else: - typ = self.type - return tok_name[typ] - - @property - def exact_type(self): - if self.type == OP and self.string in opmap: - return opmap[self.string] - else: - return self.type - - @property - def end_pos(self): - lines = splitlines(self.string) - if len(lines) > 1: - return self.start_pos[0] + len(lines) - 1, 0 - else: - return self.start_pos[0], self.start_pos[1] + len(self.string) - - -def source_tokens(source, use_exact_op_types=False): - """Generate tokens from a the source code (string).""" - lines = splitlines(source, keepends=True) - return generate_tokens(lines, use_exact_op_types) - - -def generate_tokens(lines, use_exact_op_types=False): - """ - A heavily modified Python standard library tokenizer. - - Additionally to the default information, yields also the prefix of each - token. This idea comes from lib2to3. The prefix contains all information - that is irrelevant for the parser like newlines in parentheses or comments. - """ - paren_level = 0 # count parentheses - indents = [0] - max = 0 - numchars = '0123456789' - contstr = '' - contline = None - # We start with a newline. This makes indent at the first position - # possible. It's not valid Python, but still better than an INDENT in the - # second line (and not in the first). This makes quite a few things in - # Jedi's fast parser possible. - new_line = True - prefix = '' # Should never be required, but here for safety - additional_prefix = '' - for lnum, line in enumerate(lines, 1): # loop over lines in stream - pos, max = 0, len(line) - - if contstr: # continued string - endmatch = endprog.match(line) - if endmatch: - pos = endmatch.end(0) - yield TokenInfo(STRING, contstr + line[:pos], contstr_start, prefix) - contstr = '' - contline = None - else: - contstr = contstr + line - contline = contline + line - continue - - while pos < max: - pseudomatch = pseudo_token_compiled.match(line, pos) - if not pseudomatch: # scan for tokens - txt = line[pos:] - if txt.endswith('\n'): - new_line = True - yield TokenInfo(ERRORTOKEN, txt, (lnum, pos), prefix) - break - - prefix = additional_prefix + pseudomatch.group(1) - additional_prefix = '' - start, pos = pseudomatch.span(2) - spos = (lnum, start) - token = pseudomatch.group(2) - initial = token[0] - - if new_line and initial not in '\r\n#': - new_line = False - if paren_level == 0: - i = 0 - while line[i] == '\f': - i += 1 - start -= 1 - if start > indents[-1]: - yield TokenInfo(INDENT, '', spos, '') - indents.append(start) - while start < indents[-1]: - yield TokenInfo(DEDENT, '', spos, '') - indents.pop() - - if (initial in numchars or # ordinary number - (initial == '.' and token != '.' and token != '...')): - yield TokenInfo(NUMBER, token, spos, prefix) - elif initial in '\r\n': - if not new_line and paren_level == 0: - yield TokenInfo(NEWLINE, token, spos, prefix) - else: - additional_prefix = prefix + token - new_line = True - elif initial == '#': # Comments - assert not token.endswith("\n") - additional_prefix = prefix + token - elif token in triple_quoted: - endprog = endpats[token] - endmatch = endprog.match(line, pos) - if endmatch: # all on one line - pos = endmatch.end(0) - token = line[start:pos] - yield TokenInfo(STRING, token, spos, prefix) - else: - contstr_start = (lnum, start) # multiple lines - contstr = line[start:] - contline = line - break - elif initial in single_quoted or \ - token[:2] in single_quoted or \ - token[:3] in single_quoted: - if token[-1] == '\n': # continued string - contstr_start = lnum, start - endprog = (endpats.get(initial) or endpats.get(token[1]) - or endpats.get(token[2])) - contstr = line[start:] - contline = line - break - else: # ordinary string - yield TokenInfo(STRING, token, spos, prefix) - elif is_identifier(initial): # ordinary name - if token in ALWAYS_BREAK_TOKENS: - paren_level = 0 - while True: - indent = indents.pop() - if indent > start: - yield TokenInfo(DEDENT, '', spos, '') - else: - indents.append(indent) - break - yield TokenInfo(NAME, token, spos, prefix) - elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n'): # continued stmt - additional_prefix += prefix + line[start:] - break - else: - if token in '([{': - paren_level += 1 - elif token in ')]}': - paren_level -= 1 - - try: - # This check is needed in any case to check if it's a valid - # operator or just some random unicode character. - exact_type = opmap[token] - except KeyError: - exact_type = typ = ERRORTOKEN - if use_exact_op_types: - typ = exact_type - else: - typ = OP - yield TokenInfo(typ, token, spos, prefix) - - if contstr: - yield TokenInfo(ERRORTOKEN, contstr, contstr_start, prefix) - if contstr.endswith('\n'): - new_line = True - - end_pos = lnum, max - # As the last position we just take the maximally possible position. We - # remove -1 for the last new line. - for indent in indents[1:]: - yield TokenInfo(DEDENT, '', end_pos, '') - yield TokenInfo(ENDMARKER, '', end_pos, additional_prefix) - - -if __name__ == "__main__": - import sys - if len(sys.argv) >= 2: - path = sys.argv[1] - with open(path) as f: - code = u(f.read()) - else: - code = u(sys.stdin.read()) - for token in source_tokens(code, use_exact_op_types=True): - print(token) diff --git a/jedi/parser/tree.py b/jedi/parser/tree.py deleted file mode 100644 index 690698aa..00000000 --- a/jedi/parser/tree.py +++ /dev/null @@ -1,328 +0,0 @@ -from abc import abstractmethod, abstractproperty -from jedi._compatibility import utf8_repr, encoding, is_py3 - - -def search_ancestor(node, *node_types): - """ - Recursively looks at the parents of a node and checks if the type names - match. - - :param node: The node that is looked at. - :param node_types: A tuple or a string of type names that are - searched for. - """ - while True: - node = node.parent - if node is None or node.type in node_types: - return node - - -class NodeOrLeaf(object): - """ - The base class for nodes and leaves. - """ - __slots__ = () - - def get_root_node(self): - """ - Returns the root node of a parser tree. The returned node doesn't have - a parent node like all the other nodes/leaves. - """ - scope = self - while scope.parent is not None: - scope = scope.parent - return scope - - def get_next_sibling(self): - """ - The node immediately following the invocant in their parent's children - list. If the invocant does not have a next sibling, it is None - """ - # Can't use index(); we need to test by identity - for i, child in enumerate(self.parent.children): - if child is self: - try: - return self.parent.children[i + 1] - except IndexError: - return None - - def get_previous_sibling(self): - """ - The node/leaf immediately preceding the invocant in their parent's - children list. If the invocant does not have a previous sibling, it is - None. - """ - # Can't use index(); we need to test by identity - for i, child in enumerate(self.parent.children): - if child is self: - if i == 0: - return None - return self.parent.children[i - 1] - - def get_previous_leaf(self): - """ - Returns the previous leaf in the parser tree. - Raises an IndexError if it's the first element in the parser tree. - """ - node = self - while True: - c = node.parent.children - i = c.index(node) - if i == 0: - node = node.parent - if node.parent is None: - return None - else: - node = c[i - 1] - break - - while True: - try: - node = node.children[-1] - except AttributeError: # A Leaf doesn't have children. - return node - - def get_next_leaf(self): - """ - Returns the next leaf in the parser tree. - Returns `None` if it's the last element in the parser tree. - """ - node = self - while True: - c = node.parent.children - i = c.index(node) - if i == len(c) - 1: - node = node.parent - if node.parent is None: - return None - else: - node = c[i + 1] - break - - while True: - try: - node = node.children[0] - except AttributeError: # A Leaf doesn't have children. - return node - - @abstractproperty - def start_pos(self): - """ - Returns the starting position of the prefix as a tuple, e.g. `(3, 4)`. - - :return tuple of int: (line, column) - """ - - @abstractproperty - def end_pos(self): - """ - Returns the end position of the prefix as a tuple, e.g. `(3, 4)`. - - :return tuple of int: (line, column) - """ - - @abstractmethod - def get_start_pos_of_prefix(self): - """ - Returns the start_pos of the prefix. This means basically it returns - the end_pos of the last prefix. The `get_start_pos_of_prefix()` of the - prefix `+` in `2 + 1` would be `(1, 1)`, while the start_pos is - `(1, 2)`. - - :return tuple of int: (line, column) - """ - - @abstractmethod - def get_first_leaf(self): - """ - Returns the first leaf of a node or itself it's a leaf. - """ - - @abstractmethod - def get_last_leaf(self): - """ - Returns the last leaf of a node or itself it's a leaf. - """ - - @abstractmethod - def get_code(self, normalized=False, include_prefix=True): - """ - Returns the code that was the input of the parser. - - If a normalizer is given, the returned code will be normalized and will - not be equal to the input. - - :param include_prefix: Removes the prefix (whitespace and comments) of e.g. a statement. - :param normalized: Deprecated. Please don't use. Will be replaced with something more powerful. - """ - - -class Leaf(NodeOrLeaf): - __slots__ = ('value', 'parent', 'line', 'indent', 'prefix') - - def __init__(self, value, start_pos, prefix=''): - self.value = value - self.start_pos = start_pos - self.prefix = prefix - self.parent = None - - @property - def start_pos(self): - return self.line, self.indent - - @start_pos.setter - def start_pos(self, value): - self.line = value[0] - self.indent = value[1] - - def get_start_pos_of_prefix(self): - previous_leaf = self.get_previous_leaf() - if previous_leaf is None: - return self.line - self.prefix.count('\n'), 0 # It's the first leaf. - return previous_leaf.end_pos - - def get_first_leaf(self): - return self - - def get_last_leaf(self): - return self - - def get_code(self, normalized=False, include_prefix=True): - if normalized: - return self.value - if include_prefix: - return self.prefix + self.value - else: - return self.value - - @property - def end_pos(self): - lines = self.value.split('\n') - end_pos_line = self.line + len(lines) - 1 - # Check for multiline token - if self.line == end_pos_line: - end_pos_indent = self.indent + len(lines[-1]) - else: - end_pos_indent = len(lines[-1]) - return end_pos_line, end_pos_indent - - @utf8_repr - def __repr__(self): - return "<%s: %s start=%s>" % (type(self).__name__, self.value, self.start_pos) - - -class BaseNode(NodeOrLeaf): - """ - The super class for all nodes. - - If you create custom nodes, you will probably want to inherit from this - ``BaseNode``. - """ - __slots__ = ('children', 'parent') - type = None - - def __init__(self, children): - for c in children: - c.parent = self - self.children = children - self.parent = None - - @property - def start_pos(self): - return self.children[0].start_pos - - def get_start_pos_of_prefix(self): - return self.children[0].get_start_pos_of_prefix() - - @property - def end_pos(self): - return self.children[-1].end_pos - - def _get_code_for_children(self, children, normalized, include_prefix): - # TODO implement normalized (depending on context). - if include_prefix: - return "".join(c.get_code(normalized) for c in children) - else: - first = children[0].get_code(include_prefix=False) - return first + "".join(c.get_code(normalized) for c in children[1:]) - - def get_code(self, normalized=False, include_prefix=True): - return self._get_code_for_children(self.children, normalized, include_prefix) - - def get_leaf_for_position(self, position, include_prefixes=False): - def binary_search(lower, upper): - if lower == upper: - element = self.children[lower] - if not include_prefixes and position < element.start_pos: - # We're on a prefix. - return None - # In case we have prefixes, a leaf always matches - try: - return element.get_leaf_for_position(position, include_prefixes) - except AttributeError: - return element - - - index = int((lower + upper) / 2) - element = self.children[index] - if position <= element.end_pos: - return binary_search(lower, index) - else: - return binary_search(index + 1, upper) - - if not ((1, 0) <= position <= self.children[-1].end_pos): - raise ValueError('Please provide a position that exists within this node.') - return binary_search(0, len(self.children) - 1) - - def get_first_leaf(self): - return self.children[0].get_first_leaf() - - def get_last_leaf(self): - return self.children[-1].get_last_leaf() - - @utf8_repr - def __repr__(self): - code = self.get_code().replace('\n', ' ').strip() - if not is_py3: - code = code.encode(encoding, 'replace') - return "<%s: %s@%s,%s>" % \ - (type(self).__name__, code, self.start_pos[0], self.start_pos[1]) - - -class Node(BaseNode): - """Concrete implementation for interior nodes.""" - __slots__ = ('type',) - - def __init__(self, type, children): - super(Node, self).__init__(children) - self.type = type - - def __repr__(self): - return "%s(%s, %r)" % (self.__class__.__name__, self.type, self.children) - - -class ErrorNode(BaseNode): - """ - A node that containes valid nodes/leaves that we're follow by a token that - was invalid. This basically means that the leaf after this node is where - Python would mark a syntax error. - """ - __slots__ = () - type = 'error_node' - - -class ErrorLeaf(Leaf): - """ - A leaf that is either completely invalid in a language (like `$` in Python) - or is invalid at that position. Like the star in `1 +* 1`. - """ - __slots__ = ('original_type') - type = 'error_leaf' - - def __init__(self, original_type, value, start_pos, prefix=''): - super(ErrorLeaf, self).__init__(value, start_pos, prefix) - self.original_type = original_type - - def __repr__(self): - return "<%s: %s:%s, %s)>" % \ - (type(self).__name__, self.original_type, repr(self.value), self.start_pos)