mirror of
https://github.com/davidhalter/parso.git
synced 2025-12-07 21:34:32 +08:00
Add an initial version of the code from Jedi.
Still includes imports that have to be removed.
This commit is contained in:
8
parso/__init__.py
Normal file
8
parso/__init__.py
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
from parso.parser import ParserSyntaxError
|
||||||
|
from parso.pgen2.pgen import generate_grammar
|
||||||
|
from parso import python
|
||||||
|
|
||||||
|
|
||||||
|
def parse(grammar, code):
|
||||||
|
raise NotImplementedError
|
||||||
|
Parser(grammar, code)
|
||||||
147
parso/cache.py
Normal file
147
parso/cache.py
Normal file
@@ -0,0 +1,147 @@
|
|||||||
|
import time
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import hashlib
|
||||||
|
import gc
|
||||||
|
import shutil
|
||||||
|
import pickle
|
||||||
|
import platform
|
||||||
|
import errno
|
||||||
|
|
||||||
|
from jedi import settings
|
||||||
|
from jedi import debug
|
||||||
|
from jedi._compatibility import FileNotFoundError
|
||||||
|
|
||||||
|
|
||||||
|
_PICKLE_VERSION = 30
|
||||||
|
"""
|
||||||
|
Version number (integer) for file system cache.
|
||||||
|
|
||||||
|
Increment this number when there are any incompatible changes in
|
||||||
|
the parser tree classes. For example, the following changes
|
||||||
|
are regarded as incompatible.
|
||||||
|
|
||||||
|
- A class name is changed.
|
||||||
|
- A class is moved to another module.
|
||||||
|
- A __slot__ of a class is changed.
|
||||||
|
"""
|
||||||
|
|
||||||
|
_VERSION_TAG = '%s-%s%s-%s' % (
|
||||||
|
platform.python_implementation(),
|
||||||
|
sys.version_info[0],
|
||||||
|
sys.version_info[1],
|
||||||
|
_PICKLE_VERSION
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
Short name for distinguish Python implementations and versions.
|
||||||
|
|
||||||
|
It's like `sys.implementation.cache_tag` but for Python < 3.3
|
||||||
|
we generate something similar. See:
|
||||||
|
http://docs.python.org/3/library/sys.html#sys.implementation
|
||||||
|
"""
|
||||||
|
|
||||||
|
# for fast_parser, should not be deleted
|
||||||
|
parser_cache = {}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class _NodeCacheItem(object):
|
||||||
|
def __init__(self, node, lines, change_time=None):
|
||||||
|
self.node = node
|
||||||
|
self.lines = lines
|
||||||
|
if change_time is None:
|
||||||
|
change_time = time.time()
|
||||||
|
self.change_time = change_time
|
||||||
|
|
||||||
|
|
||||||
|
def load_module(grammar, path):
|
||||||
|
"""
|
||||||
|
Returns a module or None, if it fails.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
p_time = os.path.getmtime(path)
|
||||||
|
except FileNotFoundError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
# TODO Add grammar sha256
|
||||||
|
module_cache_item = parser_cache[path]
|
||||||
|
if p_time <= module_cache_item.change_time:
|
||||||
|
return module_cache_item.node
|
||||||
|
except KeyError:
|
||||||
|
if not settings.use_filesystem_cache:
|
||||||
|
return None
|
||||||
|
|
||||||
|
return _load_from_file_system(grammar, path, p_time)
|
||||||
|
|
||||||
|
|
||||||
|
def _load_from_file_system(grammar, path, p_time):
|
||||||
|
cache_path = _get_hashed_path(grammar, path)
|
||||||
|
try:
|
||||||
|
try:
|
||||||
|
if p_time > os.path.getmtime(cache_path):
|
||||||
|
# Cache is outdated
|
||||||
|
return None
|
||||||
|
except OSError as e:
|
||||||
|
if e.errno == errno.ENOENT:
|
||||||
|
# In Python 2 instead of an IOError here we get an OSError.
|
||||||
|
raise FileNotFoundError
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
|
||||||
|
with open(cache_path, 'rb') as f:
|
||||||
|
gc.disable()
|
||||||
|
try:
|
||||||
|
module_cache_item = pickle.load(f)
|
||||||
|
finally:
|
||||||
|
gc.enable()
|
||||||
|
except FileNotFoundError:
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
parser_cache[path] = module_cache_item
|
||||||
|
debug.dbg('pickle loaded: %s', path)
|
||||||
|
return module_cache_item.node
|
||||||
|
|
||||||
|
|
||||||
|
def save_module(grammar, path, module, lines, pickling=True):
|
||||||
|
try:
|
||||||
|
p_time = None if path is None else os.path.getmtime(path)
|
||||||
|
except OSError:
|
||||||
|
p_time = None
|
||||||
|
pickling = False
|
||||||
|
|
||||||
|
item = _NodeCacheItem(module, lines, p_time)
|
||||||
|
parser_cache[path] = item
|
||||||
|
if settings.use_filesystem_cache and pickling and path is not None:
|
||||||
|
_save_to_file_system(grammar, path, item)
|
||||||
|
|
||||||
|
|
||||||
|
def _save_to_file_system(grammar, path, item):
|
||||||
|
with open(_get_hashed_path(grammar, path), 'wb') as f:
|
||||||
|
pickle.dump(item, f, pickle.HIGHEST_PROTOCOL)
|
||||||
|
|
||||||
|
|
||||||
|
def remove_old_modules(self):
|
||||||
|
"""
|
||||||
|
# TODO Might want to use such a function to clean up the cache (if it's
|
||||||
|
# too old). We could potentially also scan for old files in the
|
||||||
|
# directory and delete those.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def clear_cache(self):
|
||||||
|
shutil.rmtree(settings.cache_directory)
|
||||||
|
parser_cache.clear()
|
||||||
|
|
||||||
|
|
||||||
|
def _get_hashed_path(grammar, path):
|
||||||
|
file_hash = hashlib.sha256(path.encode("utf-8")).hexdigest()
|
||||||
|
directory = _get_cache_directory_path()
|
||||||
|
return os.path.join(directory, '%s-%s.pkl' % (grammar.sha256, file_hash))
|
||||||
|
|
||||||
|
|
||||||
|
def _get_cache_directory_path():
|
||||||
|
directory = os.path.join(settings.cache_directory, _VERSION_TAG)
|
||||||
|
if not os.path.exists(directory):
|
||||||
|
os.makedirs(directory)
|
||||||
|
return directory
|
||||||
77
parso/parser.py
Normal file
77
parso/parser.py
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
"""
|
||||||
|
The ``Parser`` tries to convert the available Python code in an easy to read
|
||||||
|
format, something like an abstract syntax tree. The classes who represent this
|
||||||
|
tree, are sitting in the :mod:`jedi.parser.tree` module.
|
||||||
|
|
||||||
|
The Python module ``tokenize`` is a very important part in the ``Parser``,
|
||||||
|
because it splits the code into different words (tokens). Sometimes it looks a
|
||||||
|
bit messy. Sorry for that! You might ask now: "Why didn't you use the ``ast``
|
||||||
|
module for this? Well, ``ast`` does a very good job understanding proper Python
|
||||||
|
code, but fails to work as soon as there's a single line of broken code.
|
||||||
|
|
||||||
|
There's one important optimization that needs to be known: Statements are not
|
||||||
|
being parsed completely. ``Statement`` is just a representation of the tokens
|
||||||
|
within the statement. This lowers memory usage and cpu time and reduces the
|
||||||
|
complexity of the ``Parser`` (there's another parser sitting inside
|
||||||
|
``Statement``, which produces ``Array`` and ``Call``).
|
||||||
|
"""
|
||||||
|
from parso import tree
|
||||||
|
from parso.pgen2.parse import PgenParser
|
||||||
|
|
||||||
|
|
||||||
|
class ParserSyntaxError(Exception):
|
||||||
|
"""
|
||||||
|
Contains error information about the parser tree.
|
||||||
|
|
||||||
|
May be raised as an exception.
|
||||||
|
"""
|
||||||
|
def __init__(self, message, position):
|
||||||
|
self.message = message
|
||||||
|
self.position = position
|
||||||
|
|
||||||
|
|
||||||
|
class BaseParser(object):
|
||||||
|
node_map = {}
|
||||||
|
default_node = tree.Node
|
||||||
|
|
||||||
|
leaf_map = {
|
||||||
|
}
|
||||||
|
default_leaf = tree.Leaf
|
||||||
|
|
||||||
|
def __init__(self, grammar, start_symbol='file_input', error_recovery=False):
|
||||||
|
self._grammar = grammar
|
||||||
|
self._start_symbol = start_symbol
|
||||||
|
self._error_recovery = error_recovery
|
||||||
|
|
||||||
|
def parse(self, tokens):
|
||||||
|
start_number = self._grammar.symbol2number[self._start_symbol]
|
||||||
|
self.pgen_parser = PgenParser(
|
||||||
|
self._grammar, self.convert_node, self.convert_leaf,
|
||||||
|
self.error_recovery, start_number
|
||||||
|
)
|
||||||
|
|
||||||
|
node = self.pgen_parser.parse(tokens)
|
||||||
|
# The stack is empty now, we don't need it anymore.
|
||||||
|
del self.pgen_parser
|
||||||
|
return node
|
||||||
|
|
||||||
|
def error_recovery(self, grammar, stack, arcs, typ, value, start_pos, prefix,
|
||||||
|
add_token_callback):
|
||||||
|
if self._error_recovery:
|
||||||
|
raise NotImplementedError("Error Recovery is not implemented")
|
||||||
|
else:
|
||||||
|
raise ParserSyntaxError('SyntaxError: invalid syntax', start_pos)
|
||||||
|
|
||||||
|
def convert_node(self, grammar, type_, children):
|
||||||
|
# TODO REMOVE symbol, we don't want type here.
|
||||||
|
symbol = grammar.number2symbol[type_]
|
||||||
|
try:
|
||||||
|
return self.node_map[symbol](children)
|
||||||
|
except KeyError:
|
||||||
|
return self.default_node(symbol, children)
|
||||||
|
|
||||||
|
def convert_leaf(self, grammar, type_, value, prefix, start_pos):
|
||||||
|
try:
|
||||||
|
return self.leaf_map[type_](value, start_pos, prefix)
|
||||||
|
except KeyError:
|
||||||
|
return self.default_leaf(value, start_pos, prefix)
|
||||||
8
parso/pgen2/__init__.py
Normal file
8
parso/pgen2/__init__.py
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
|
||||||
|
# Licensed to PSF under a Contributor Agreement.
|
||||||
|
|
||||||
|
# Modifications:
|
||||||
|
# Copyright 2006 Google, Inc. All Rights Reserved.
|
||||||
|
# Licensed to PSF under a Contributor Agreement.
|
||||||
|
# Copyright 2014 David Halter. Integration into Jedi.
|
||||||
|
# Modifications are dual-licensed: MIT and PSF.
|
||||||
127
parso/pgen2/grammar.py
Normal file
127
parso/pgen2/grammar.py
Normal file
@@ -0,0 +1,127 @@
|
|||||||
|
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
|
||||||
|
# Licensed to PSF under a Contributor Agreement.
|
||||||
|
|
||||||
|
# Modifications:
|
||||||
|
# Copyright 2014 David Halter. Integration into Jedi.
|
||||||
|
# Modifications are dual-licensed: MIT and PSF.
|
||||||
|
|
||||||
|
"""This module defines the data structures used to represent a grammar.
|
||||||
|
|
||||||
|
These are a bit arcane because they are derived from the data
|
||||||
|
structures used by Python's 'pgen' parser generator.
|
||||||
|
|
||||||
|
There's also a table here mapping operators to their names in the
|
||||||
|
token module; the Python tokenize module reports all operators as the
|
||||||
|
fallback token code OP, but the parser needs the actual token code.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
import pickle
|
||||||
|
import hashlib
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Grammar(object):
|
||||||
|
"""Pgen parsing tables conversion class.
|
||||||
|
|
||||||
|
Once initialized, this class supplies the grammar tables for the
|
||||||
|
parsing engine implemented by parse.py. The parsing engine
|
||||||
|
accesses the instance variables directly. The class here does not
|
||||||
|
provide initialization of the tables; several subclasses exist to
|
||||||
|
do this (see the conv and pgen modules).
|
||||||
|
|
||||||
|
The load() method reads the tables from a pickle file, which is
|
||||||
|
much faster than the other ways offered by subclasses. The pickle
|
||||||
|
file is written by calling dump() (after loading the grammar
|
||||||
|
tables using a subclass). The report() method prints a readable
|
||||||
|
representation of the tables to stdout, for debugging.
|
||||||
|
|
||||||
|
The instance variables are as follows:
|
||||||
|
|
||||||
|
symbol2number -- a dict mapping symbol names to numbers. Symbol
|
||||||
|
numbers are always 256 or higher, to distinguish
|
||||||
|
them from token numbers, which are between 0 and
|
||||||
|
255 (inclusive).
|
||||||
|
|
||||||
|
number2symbol -- a dict mapping numbers to symbol names;
|
||||||
|
these two are each other's inverse.
|
||||||
|
|
||||||
|
states -- a list of DFAs, where each DFA is a list of
|
||||||
|
states, each state is a list of arcs, and each
|
||||||
|
arc is a (i, j) pair where i is a label and j is
|
||||||
|
a state number. The DFA number is the index into
|
||||||
|
this list. (This name is slightly confusing.)
|
||||||
|
Final states are represented by a special arc of
|
||||||
|
the form (0, j) where j is its own state number.
|
||||||
|
|
||||||
|
dfas -- a dict mapping symbol numbers to (DFA, first)
|
||||||
|
pairs, where DFA is an item from the states list
|
||||||
|
above, and first is a set of tokens that can
|
||||||
|
begin this grammar rule (represented by a dict
|
||||||
|
whose values are always 1).
|
||||||
|
|
||||||
|
labels -- a list of (x, y) pairs where x is either a token
|
||||||
|
number or a symbol number, and y is either None
|
||||||
|
or a string; the strings are keywords. The label
|
||||||
|
number is the index in this list; label numbers
|
||||||
|
are used to mark state transitions (arcs) in the
|
||||||
|
DFAs.
|
||||||
|
|
||||||
|
start -- the number of the grammar's start symbol.
|
||||||
|
|
||||||
|
keywords -- a dict mapping keyword strings to arc labels.
|
||||||
|
|
||||||
|
tokens -- a dict mapping token numbers to arc labels.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, bnf_text):
|
||||||
|
self.symbol2number = {}
|
||||||
|
self.number2symbol = {}
|
||||||
|
self.states = []
|
||||||
|
self.dfas = {}
|
||||||
|
self.labels = [(0, "EMPTY")]
|
||||||
|
self.keywords = {}
|
||||||
|
self.tokens = {}
|
||||||
|
self.symbol2label = {}
|
||||||
|
self.start = 256
|
||||||
|
self.sha256 = hashlib.sha256(bnf_text.encode("utf-8")).hexdigest()
|
||||||
|
|
||||||
|
def dump(self, filename):
|
||||||
|
"""Dump the grammar tables to a pickle file."""
|
||||||
|
with open(filename, "wb") as f:
|
||||||
|
pickle.dump(self.__dict__, f, 2)
|
||||||
|
|
||||||
|
def load(self, filename):
|
||||||
|
"""Load the grammar tables from a pickle file."""
|
||||||
|
with open(filename, "rb") as f:
|
||||||
|
d = pickle.load(f)
|
||||||
|
self.__dict__.update(d)
|
||||||
|
|
||||||
|
def copy(self):
|
||||||
|
"""
|
||||||
|
Copy the grammar.
|
||||||
|
"""
|
||||||
|
new = self.__class__()
|
||||||
|
for dict_attr in ("symbol2number", "number2symbol", "dfas", "keywords",
|
||||||
|
"tokens", "symbol2label"):
|
||||||
|
setattr(new, dict_attr, getattr(self, dict_attr).copy())
|
||||||
|
new.labels = self.labels[:]
|
||||||
|
new.states = self.states[:]
|
||||||
|
new.start = self.start
|
||||||
|
return new
|
||||||
|
|
||||||
|
def report(self):
|
||||||
|
"""Dump the grammar tables to standard output, for debugging."""
|
||||||
|
from pprint import pprint
|
||||||
|
print("s2n")
|
||||||
|
pprint(self.symbol2number)
|
||||||
|
print("n2s")
|
||||||
|
pprint(self.number2symbol)
|
||||||
|
print("states")
|
||||||
|
pprint(self.states)
|
||||||
|
print("dfas")
|
||||||
|
pprint(self.dfas)
|
||||||
|
print("labels")
|
||||||
|
pprint(self.labels)
|
||||||
|
print("start", self.start)
|
||||||
217
parso/pgen2/parse.py
Normal file
217
parso/pgen2/parse.py
Normal file
@@ -0,0 +1,217 @@
|
|||||||
|
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
|
||||||
|
# Licensed to PSF under a Contributor Agreement.
|
||||||
|
|
||||||
|
# Modifications:
|
||||||
|
# Copyright 2014 David Halter. Integration into Jedi.
|
||||||
|
# Modifications are dual-licensed: MIT and PSF.
|
||||||
|
|
||||||
|
"""
|
||||||
|
Parser engine for the grammar tables generated by pgen.
|
||||||
|
|
||||||
|
The grammar table must be loaded first.
|
||||||
|
|
||||||
|
See Parser/parser.c in the Python distribution for additional info on
|
||||||
|
how this parsing engine works.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from parso import tokenize
|
||||||
|
|
||||||
|
|
||||||
|
class InternalParseError(Exception):
|
||||||
|
"""
|
||||||
|
Exception to signal the parser is stuck and error recovery didn't help.
|
||||||
|
Basically this shouldn't happen. It's a sign that something is really
|
||||||
|
wrong.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, msg, type, value, start_pos):
|
||||||
|
Exception.__init__(self, "%s: type=%r, value=%r, start_pos=%r" %
|
||||||
|
(msg, tokenize.tok_name[type], value, start_pos))
|
||||||
|
self.msg = msg
|
||||||
|
self.type = type
|
||||||
|
self.value = value
|
||||||
|
self.start_pos = start_pos
|
||||||
|
|
||||||
|
|
||||||
|
def token_to_ilabel(grammar, type_, value):
|
||||||
|
# Map from token to label
|
||||||
|
if type_ == tokenize.NAME:
|
||||||
|
# Check for reserved words (keywords)
|
||||||
|
try:
|
||||||
|
return grammar.keywords[value]
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
return grammar.tokens[type_]
|
||||||
|
except KeyError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
class PgenParser(object):
|
||||||
|
"""Parser engine.
|
||||||
|
|
||||||
|
The proper usage sequence is:
|
||||||
|
|
||||||
|
p = Parser(grammar, [converter]) # create instance
|
||||||
|
p.setup([start]) # prepare for parsing
|
||||||
|
<for each input token>:
|
||||||
|
if p.addtoken(...): # parse a token
|
||||||
|
break
|
||||||
|
root = p.rootnode # root of abstract syntax tree
|
||||||
|
|
||||||
|
A Parser instance may be reused by calling setup() repeatedly.
|
||||||
|
|
||||||
|
A Parser instance contains state pertaining to the current token
|
||||||
|
sequence, and should not be used concurrently by different threads
|
||||||
|
to parse separate token sequences.
|
||||||
|
|
||||||
|
See driver.py for how to get input tokens by tokenizing a file or
|
||||||
|
string.
|
||||||
|
|
||||||
|
Parsing is complete when addtoken() returns True; the root of the
|
||||||
|
abstract syntax tree can then be retrieved from the rootnode
|
||||||
|
instance variable. When a syntax error occurs, error_recovery()
|
||||||
|
is called. There is no error recovery; the parser cannot be used
|
||||||
|
after a syntax error was reported (but it can be reinitialized by
|
||||||
|
calling setup()).
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, grammar, convert_node, convert_leaf, error_recovery, start):
|
||||||
|
"""Constructor.
|
||||||
|
|
||||||
|
The grammar argument is a grammar.Grammar instance; see the
|
||||||
|
grammar module for more information.
|
||||||
|
|
||||||
|
The parser is not ready yet for parsing; you must call the
|
||||||
|
setup() method to get it started.
|
||||||
|
|
||||||
|
The optional convert argument is a function mapping concrete
|
||||||
|
syntax tree nodes to abstract syntax tree nodes. If not
|
||||||
|
given, no conversion is done and the syntax tree produced is
|
||||||
|
the concrete syntax tree. If given, it must be a function of
|
||||||
|
two arguments, the first being the grammar (a grammar.Grammar
|
||||||
|
instance), and the second being the concrete syntax tree node
|
||||||
|
to be converted. The syntax tree is converted from the bottom
|
||||||
|
up.
|
||||||
|
|
||||||
|
A concrete syntax tree node is a (type, nodes) tuple, where
|
||||||
|
type is the node type (a token or symbol number) and nodes
|
||||||
|
is a list of children for symbols, and None for tokens.
|
||||||
|
|
||||||
|
An abstract syntax tree node may be anything; this is entirely
|
||||||
|
up to the converter function.
|
||||||
|
|
||||||
|
"""
|
||||||
|
self.grammar = grammar
|
||||||
|
self.convert_node = convert_node
|
||||||
|
self.convert_leaf = convert_leaf
|
||||||
|
|
||||||
|
# Each stack entry is a tuple: (dfa, state, node).
|
||||||
|
# A node is a tuple: (type, children),
|
||||||
|
# where children is a list of nodes or None
|
||||||
|
newnode = (start, [])
|
||||||
|
stackentry = (self.grammar.dfas[start], 0, newnode)
|
||||||
|
self.stack = [stackentry]
|
||||||
|
self.rootnode = None
|
||||||
|
self.error_recovery = error_recovery
|
||||||
|
|
||||||
|
def parse(self, tokens):
|
||||||
|
for type_, value, start_pos, prefix in tokens:
|
||||||
|
if self.addtoken(type_, value, start_pos, prefix):
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
# We never broke out -- EOF is too soon -- Unfinished statement.
|
||||||
|
# However, the error recovery might have added the token again, if
|
||||||
|
# the stack is empty, we're fine.
|
||||||
|
if self.stack:
|
||||||
|
raise InternalParseError("incomplete input", type_, value, start_pos)
|
||||||
|
return self.rootnode
|
||||||
|
|
||||||
|
def addtoken(self, type_, value, start_pos, prefix):
|
||||||
|
"""Add a token; return True if this is the end of the program."""
|
||||||
|
ilabel = token_to_ilabel(self.grammar, type_, value)
|
||||||
|
|
||||||
|
# Loop until the token is shifted; may raise exceptions
|
||||||
|
_gram = self.grammar
|
||||||
|
_labels = _gram.labels
|
||||||
|
_push = self._push
|
||||||
|
_pop = self._pop
|
||||||
|
_shift = self._shift
|
||||||
|
while True:
|
||||||
|
dfa, state, node = self.stack[-1]
|
||||||
|
states, first = dfa
|
||||||
|
arcs = states[state]
|
||||||
|
# Look for a state with this label
|
||||||
|
for i, newstate in arcs:
|
||||||
|
t, v = _labels[i]
|
||||||
|
if ilabel == i:
|
||||||
|
# Look it up in the list of labels
|
||||||
|
assert t < 256
|
||||||
|
# Shift a token; we're done with it
|
||||||
|
_shift(type_, value, newstate, prefix, start_pos)
|
||||||
|
# Pop while we are in an accept-only state
|
||||||
|
state = newstate
|
||||||
|
while states[state] == [(0, state)]:
|
||||||
|
_pop()
|
||||||
|
if not self.stack:
|
||||||
|
# Done parsing!
|
||||||
|
return True
|
||||||
|
dfa, state, node = self.stack[-1]
|
||||||
|
states, first = dfa
|
||||||
|
# Done with this token
|
||||||
|
return False
|
||||||
|
elif t >= 256:
|
||||||
|
# See if it's a symbol and if we're in its first set
|
||||||
|
itsdfa = _gram.dfas[t]
|
||||||
|
itsstates, itsfirst = itsdfa
|
||||||
|
if ilabel in itsfirst:
|
||||||
|
# Push a symbol
|
||||||
|
_push(t, itsdfa, newstate)
|
||||||
|
break # To continue the outer while loop
|
||||||
|
else:
|
||||||
|
if (0, state) in arcs:
|
||||||
|
# An accepting state, pop it and try something else
|
||||||
|
_pop()
|
||||||
|
if not self.stack:
|
||||||
|
# Done parsing, but another token is input
|
||||||
|
raise InternalParseError("too much input", type_, value, start_pos)
|
||||||
|
else:
|
||||||
|
self.error_recovery(self.grammar, self.stack, arcs, type_,
|
||||||
|
value, start_pos, prefix, self.addtoken)
|
||||||
|
break
|
||||||
|
|
||||||
|
def _shift(self, type_, value, newstate, prefix, start_pos):
|
||||||
|
"""Shift a token. (Internal)"""
|
||||||
|
dfa, state, node = self.stack[-1]
|
||||||
|
newnode = self.convert_leaf(self.grammar, type_, value, prefix, start_pos)
|
||||||
|
node[-1].append(newnode)
|
||||||
|
self.stack[-1] = (dfa, newstate, node)
|
||||||
|
|
||||||
|
def _push(self, type_, newdfa, newstate):
|
||||||
|
"""Push a nonterminal. (Internal)"""
|
||||||
|
dfa, state, node = self.stack[-1]
|
||||||
|
newnode = (type_, [])
|
||||||
|
self.stack[-1] = (dfa, newstate, node)
|
||||||
|
self.stack.append((newdfa, 0, newnode))
|
||||||
|
|
||||||
|
def _pop(self):
|
||||||
|
"""Pop a nonterminal. (Internal)"""
|
||||||
|
popdfa, popstate, (type_, children) = self.stack.pop()
|
||||||
|
# If there's exactly one child, return that child instead of creating a
|
||||||
|
# new node. We still create expr_stmt and file_input though, because a
|
||||||
|
# lot of Jedi depends on its logic.
|
||||||
|
if len(children) == 1:
|
||||||
|
newnode = children[0]
|
||||||
|
else:
|
||||||
|
newnode = self.convert_node(self.grammar, type_, children)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Equal to:
|
||||||
|
# dfa, state, node = self.stack[-1]
|
||||||
|
# symbol, children = node
|
||||||
|
self.stack[-1][2][1].append(newnode)
|
||||||
|
except IndexError:
|
||||||
|
# Stack is empty, set the rootnode.
|
||||||
|
self.rootnode = newnode
|
||||||
394
parso/pgen2/pgen.py
Normal file
394
parso/pgen2/pgen.py
Normal file
@@ -0,0 +1,394 @@
|
|||||||
|
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
|
||||||
|
# Licensed to PSF under a Contributor Agreement.
|
||||||
|
|
||||||
|
# Modifications:
|
||||||
|
# Copyright 2014 David Halter. Integration into Jedi.
|
||||||
|
# Modifications are dual-licensed: MIT and PSF.
|
||||||
|
|
||||||
|
from parso.pgen2 import grammar
|
||||||
|
from parso import token
|
||||||
|
from parso import tokenize
|
||||||
|
|
||||||
|
|
||||||
|
class ParserGenerator(object):
|
||||||
|
def __init__(self, bnf_text):
|
||||||
|
self._bnf_text = bnf_text
|
||||||
|
self.generator = tokenize.source_tokens(bnf_text)
|
||||||
|
self.gettoken() # Initialize lookahead
|
||||||
|
self.dfas, self.startsymbol = self.parse()
|
||||||
|
self.first = {} # map from symbol name to set of tokens
|
||||||
|
self.addfirstsets()
|
||||||
|
|
||||||
|
def make_grammar(self):
|
||||||
|
c = grammar.Grammar(self._bnf_text)
|
||||||
|
names = list(self.dfas.keys())
|
||||||
|
names.sort()
|
||||||
|
names.remove(self.startsymbol)
|
||||||
|
names.insert(0, self.startsymbol)
|
||||||
|
for name in names:
|
||||||
|
i = 256 + len(c.symbol2number)
|
||||||
|
c.symbol2number[name] = i
|
||||||
|
c.number2symbol[i] = name
|
||||||
|
for name in names:
|
||||||
|
dfa = self.dfas[name]
|
||||||
|
states = []
|
||||||
|
for state in dfa:
|
||||||
|
arcs = []
|
||||||
|
for label, next in state.arcs.items():
|
||||||
|
arcs.append((self.make_label(c, label), dfa.index(next)))
|
||||||
|
if state.isfinal:
|
||||||
|
arcs.append((0, dfa.index(state)))
|
||||||
|
states.append(arcs)
|
||||||
|
c.states.append(states)
|
||||||
|
c.dfas[c.symbol2number[name]] = (states, self.make_first(c, name))
|
||||||
|
c.start = c.symbol2number[self.startsymbol]
|
||||||
|
return c
|
||||||
|
|
||||||
|
def make_first(self, c, name):
|
||||||
|
rawfirst = self.first[name]
|
||||||
|
first = {}
|
||||||
|
for label in rawfirst:
|
||||||
|
ilabel = self.make_label(c, label)
|
||||||
|
##assert ilabel not in first # XXX failed on <> ... !=
|
||||||
|
first[ilabel] = 1
|
||||||
|
return first
|
||||||
|
|
||||||
|
def make_label(self, c, label):
|
||||||
|
# XXX Maybe this should be a method on a subclass of converter?
|
||||||
|
ilabel = len(c.labels)
|
||||||
|
if label[0].isalpha():
|
||||||
|
# Either a symbol name or a named token
|
||||||
|
if label in c.symbol2number:
|
||||||
|
# A symbol name (a non-terminal)
|
||||||
|
if label in c.symbol2label:
|
||||||
|
return c.symbol2label[label]
|
||||||
|
else:
|
||||||
|
c.labels.append((c.symbol2number[label], None))
|
||||||
|
c.symbol2label[label] = ilabel
|
||||||
|
return ilabel
|
||||||
|
else:
|
||||||
|
# A named token (NAME, NUMBER, STRING)
|
||||||
|
itoken = getattr(token, label, None)
|
||||||
|
assert isinstance(itoken, int), label
|
||||||
|
assert itoken in token.tok_name, label
|
||||||
|
if itoken in c.tokens:
|
||||||
|
return c.tokens[itoken]
|
||||||
|
else:
|
||||||
|
c.labels.append((itoken, None))
|
||||||
|
c.tokens[itoken] = ilabel
|
||||||
|
return ilabel
|
||||||
|
else:
|
||||||
|
# Either a keyword or an operator
|
||||||
|
assert label[0] in ('"', "'"), label
|
||||||
|
value = eval(label)
|
||||||
|
if value[0].isalpha():
|
||||||
|
# A keyword
|
||||||
|
if value in c.keywords:
|
||||||
|
return c.keywords[value]
|
||||||
|
else:
|
||||||
|
c.labels.append((token.NAME, value))
|
||||||
|
c.keywords[value] = ilabel
|
||||||
|
return ilabel
|
||||||
|
else:
|
||||||
|
# An operator (any non-numeric token)
|
||||||
|
itoken = token.opmap[value] # Fails if unknown token
|
||||||
|
if itoken in c.tokens:
|
||||||
|
return c.tokens[itoken]
|
||||||
|
else:
|
||||||
|
c.labels.append((itoken, None))
|
||||||
|
c.tokens[itoken] = ilabel
|
||||||
|
return ilabel
|
||||||
|
|
||||||
|
def addfirstsets(self):
|
||||||
|
names = list(self.dfas.keys())
|
||||||
|
names.sort()
|
||||||
|
for name in names:
|
||||||
|
if name not in self.first:
|
||||||
|
self.calcfirst(name)
|
||||||
|
#print name, self.first[name].keys()
|
||||||
|
|
||||||
|
def calcfirst(self, name):
|
||||||
|
dfa = self.dfas[name]
|
||||||
|
self.first[name] = None # dummy to detect left recursion
|
||||||
|
state = dfa[0]
|
||||||
|
totalset = {}
|
||||||
|
overlapcheck = {}
|
||||||
|
for label, next in state.arcs.items():
|
||||||
|
if label in self.dfas:
|
||||||
|
if label in self.first:
|
||||||
|
fset = self.first[label]
|
||||||
|
if fset is None:
|
||||||
|
raise ValueError("recursion for rule %r" % name)
|
||||||
|
else:
|
||||||
|
self.calcfirst(label)
|
||||||
|
fset = self.first[label]
|
||||||
|
totalset.update(fset)
|
||||||
|
overlapcheck[label] = fset
|
||||||
|
else:
|
||||||
|
totalset[label] = 1
|
||||||
|
overlapcheck[label] = {label: 1}
|
||||||
|
inverse = {}
|
||||||
|
for label, itsfirst in overlapcheck.items():
|
||||||
|
for symbol in itsfirst:
|
||||||
|
if symbol in inverse:
|
||||||
|
raise ValueError("rule %s is ambiguous; %s is in the"
|
||||||
|
" first sets of %s as well as %s" %
|
||||||
|
(name, symbol, label, inverse[symbol]))
|
||||||
|
inverse[symbol] = label
|
||||||
|
self.first[name] = totalset
|
||||||
|
|
||||||
|
def parse(self):
|
||||||
|
dfas = {}
|
||||||
|
startsymbol = None
|
||||||
|
# MSTART: (NEWLINE | RULE)* ENDMARKER
|
||||||
|
while self.type != token.ENDMARKER:
|
||||||
|
while self.type == token.NEWLINE:
|
||||||
|
self.gettoken()
|
||||||
|
# RULE: NAME ':' RHS NEWLINE
|
||||||
|
name = self.expect(token.NAME)
|
||||||
|
self.expect(token.OP, ":")
|
||||||
|
a, z = self.parse_rhs()
|
||||||
|
self.expect(token.NEWLINE)
|
||||||
|
#self.dump_nfa(name, a, z)
|
||||||
|
dfa = self.make_dfa(a, z)
|
||||||
|
#self.dump_dfa(name, dfa)
|
||||||
|
# oldlen = len(dfa)
|
||||||
|
self.simplify_dfa(dfa)
|
||||||
|
# newlen = len(dfa)
|
||||||
|
dfas[name] = dfa
|
||||||
|
#print name, oldlen, newlen
|
||||||
|
if startsymbol is None:
|
||||||
|
startsymbol = name
|
||||||
|
return dfas, startsymbol
|
||||||
|
|
||||||
|
def make_dfa(self, start, finish):
|
||||||
|
# To turn an NFA into a DFA, we define the states of the DFA
|
||||||
|
# to correspond to *sets* of states of the NFA. Then do some
|
||||||
|
# state reduction. Let's represent sets as dicts with 1 for
|
||||||
|
# values.
|
||||||
|
assert isinstance(start, NFAState)
|
||||||
|
assert isinstance(finish, NFAState)
|
||||||
|
|
||||||
|
def closure(state):
|
||||||
|
base = {}
|
||||||
|
addclosure(state, base)
|
||||||
|
return base
|
||||||
|
|
||||||
|
def addclosure(state, base):
|
||||||
|
assert isinstance(state, NFAState)
|
||||||
|
if state in base:
|
||||||
|
return
|
||||||
|
base[state] = 1
|
||||||
|
for label, next in state.arcs:
|
||||||
|
if label is None:
|
||||||
|
addclosure(next, base)
|
||||||
|
|
||||||
|
states = [DFAState(closure(start), finish)]
|
||||||
|
for state in states: # NB states grows while we're iterating
|
||||||
|
arcs = {}
|
||||||
|
for nfastate in state.nfaset:
|
||||||
|
for label, next in nfastate.arcs:
|
||||||
|
if label is not None:
|
||||||
|
addclosure(next, arcs.setdefault(label, {}))
|
||||||
|
for label, nfaset in arcs.items():
|
||||||
|
for st in states:
|
||||||
|
if st.nfaset == nfaset:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
st = DFAState(nfaset, finish)
|
||||||
|
states.append(st)
|
||||||
|
state.addarc(st, label)
|
||||||
|
return states # List of DFAState instances; first one is start
|
||||||
|
|
||||||
|
def dump_nfa(self, name, start, finish):
|
||||||
|
print("Dump of NFA for", name)
|
||||||
|
todo = [start]
|
||||||
|
for i, state in enumerate(todo):
|
||||||
|
print(" State", i, state is finish and "(final)" or "")
|
||||||
|
for label, next in state.arcs:
|
||||||
|
if next in todo:
|
||||||
|
j = todo.index(next)
|
||||||
|
else:
|
||||||
|
j = len(todo)
|
||||||
|
todo.append(next)
|
||||||
|
if label is None:
|
||||||
|
print(" -> %d" % j)
|
||||||
|
else:
|
||||||
|
print(" %s -> %d" % (label, j))
|
||||||
|
|
||||||
|
def dump_dfa(self, name, dfa):
|
||||||
|
print("Dump of DFA for", name)
|
||||||
|
for i, state in enumerate(dfa):
|
||||||
|
print(" State", i, state.isfinal and "(final)" or "")
|
||||||
|
for label, next in state.arcs.items():
|
||||||
|
print(" %s -> %d" % (label, dfa.index(next)))
|
||||||
|
|
||||||
|
def simplify_dfa(self, dfa):
|
||||||
|
# This is not theoretically optimal, but works well enough.
|
||||||
|
# Algorithm: repeatedly look for two states that have the same
|
||||||
|
# set of arcs (same labels pointing to the same nodes) and
|
||||||
|
# unify them, until things stop changing.
|
||||||
|
|
||||||
|
# dfa is a list of DFAState instances
|
||||||
|
changes = True
|
||||||
|
while changes:
|
||||||
|
changes = False
|
||||||
|
for i, state_i in enumerate(dfa):
|
||||||
|
for j in range(i + 1, len(dfa)):
|
||||||
|
state_j = dfa[j]
|
||||||
|
if state_i == state_j:
|
||||||
|
#print " unify", i, j
|
||||||
|
del dfa[j]
|
||||||
|
for state in dfa:
|
||||||
|
state.unifystate(state_j, state_i)
|
||||||
|
changes = True
|
||||||
|
break
|
||||||
|
|
||||||
|
def parse_rhs(self):
|
||||||
|
# RHS: ALT ('|' ALT)*
|
||||||
|
a, z = self.parse_alt()
|
||||||
|
if self.value != "|":
|
||||||
|
return a, z
|
||||||
|
else:
|
||||||
|
aa = NFAState()
|
||||||
|
zz = NFAState()
|
||||||
|
aa.addarc(a)
|
||||||
|
z.addarc(zz)
|
||||||
|
while self.value == "|":
|
||||||
|
self.gettoken()
|
||||||
|
a, z = self.parse_alt()
|
||||||
|
aa.addarc(a)
|
||||||
|
z.addarc(zz)
|
||||||
|
return aa, zz
|
||||||
|
|
||||||
|
def parse_alt(self):
|
||||||
|
# ALT: ITEM+
|
||||||
|
a, b = self.parse_item()
|
||||||
|
while (self.value in ("(", "[") or
|
||||||
|
self.type in (token.NAME, token.STRING)):
|
||||||
|
c, d = self.parse_item()
|
||||||
|
b.addarc(c)
|
||||||
|
b = d
|
||||||
|
return a, b
|
||||||
|
|
||||||
|
def parse_item(self):
|
||||||
|
# ITEM: '[' RHS ']' | ATOM ['+' | '*']
|
||||||
|
if self.value == "[":
|
||||||
|
self.gettoken()
|
||||||
|
a, z = self.parse_rhs()
|
||||||
|
self.expect(token.OP, "]")
|
||||||
|
a.addarc(z)
|
||||||
|
return a, z
|
||||||
|
else:
|
||||||
|
a, z = self.parse_atom()
|
||||||
|
value = self.value
|
||||||
|
if value not in ("+", "*"):
|
||||||
|
return a, z
|
||||||
|
self.gettoken()
|
||||||
|
z.addarc(a)
|
||||||
|
if value == "+":
|
||||||
|
return a, z
|
||||||
|
else:
|
||||||
|
return a, a
|
||||||
|
|
||||||
|
def parse_atom(self):
|
||||||
|
# ATOM: '(' RHS ')' | NAME | STRING
|
||||||
|
if self.value == "(":
|
||||||
|
self.gettoken()
|
||||||
|
a, z = self.parse_rhs()
|
||||||
|
self.expect(token.OP, ")")
|
||||||
|
return a, z
|
||||||
|
elif self.type in (token.NAME, token.STRING):
|
||||||
|
a = NFAState()
|
||||||
|
z = NFAState()
|
||||||
|
a.addarc(z, self.value)
|
||||||
|
self.gettoken()
|
||||||
|
return a, z
|
||||||
|
else:
|
||||||
|
self.raise_error("expected (...) or NAME or STRING, got %s/%s",
|
||||||
|
self.type, self.value)
|
||||||
|
|
||||||
|
def expect(self, type, value=None):
|
||||||
|
if self.type != type or (value is not None and self.value != value):
|
||||||
|
self.raise_error("expected %s/%s, got %s/%s",
|
||||||
|
type, value, self.type, self.value)
|
||||||
|
value = self.value
|
||||||
|
self.gettoken()
|
||||||
|
return value
|
||||||
|
|
||||||
|
def gettoken(self):
|
||||||
|
tup = next(self.generator)
|
||||||
|
while tup[0] in (token.COMMENT, token.NL):
|
||||||
|
tup = next(self.generator)
|
||||||
|
self.type, self.value, self.begin, prefix = tup
|
||||||
|
#print tokenize.tok_name[self.type], repr(self.value)
|
||||||
|
|
||||||
|
def raise_error(self, msg, *args):
|
||||||
|
if args:
|
||||||
|
try:
|
||||||
|
msg = msg % args
|
||||||
|
except:
|
||||||
|
msg = " ".join([msg] + list(map(str, args)))
|
||||||
|
line = open(self.filename).readlines()[self.begin[0]]
|
||||||
|
raise SyntaxError(msg, (self.filename, self.begin[0],
|
||||||
|
self.begin[1], line))
|
||||||
|
|
||||||
|
|
||||||
|
class NFAState(object):
|
||||||
|
def __init__(self):
|
||||||
|
self.arcs = [] # list of (label, NFAState) pairs
|
||||||
|
|
||||||
|
def addarc(self, next, label=None):
|
||||||
|
assert label is None or isinstance(label, str)
|
||||||
|
assert isinstance(next, NFAState)
|
||||||
|
self.arcs.append((label, next))
|
||||||
|
|
||||||
|
|
||||||
|
class DFAState(object):
|
||||||
|
def __init__(self, nfaset, final):
|
||||||
|
assert isinstance(nfaset, dict)
|
||||||
|
assert isinstance(next(iter(nfaset)), NFAState)
|
||||||
|
assert isinstance(final, NFAState)
|
||||||
|
self.nfaset = nfaset
|
||||||
|
self.isfinal = final in nfaset
|
||||||
|
self.arcs = {} # map from label to DFAState
|
||||||
|
|
||||||
|
def addarc(self, next, label):
|
||||||
|
assert isinstance(label, str)
|
||||||
|
assert label not in self.arcs
|
||||||
|
assert isinstance(next, DFAState)
|
||||||
|
self.arcs[label] = next
|
||||||
|
|
||||||
|
def unifystate(self, old, new):
|
||||||
|
for label, next in self.arcs.items():
|
||||||
|
if next is old:
|
||||||
|
self.arcs[label] = new
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
# Equality test -- ignore the nfaset instance variable
|
||||||
|
assert isinstance(other, DFAState)
|
||||||
|
if self.isfinal != other.isfinal:
|
||||||
|
return False
|
||||||
|
# Can't just return self.arcs == other.arcs, because that
|
||||||
|
# would invoke this method recursively, with cycles...
|
||||||
|
if len(self.arcs) != len(other.arcs):
|
||||||
|
return False
|
||||||
|
for label, next in self.arcs.items():
|
||||||
|
if next is not other.arcs.get(label):
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
__hash__ = None # For Py3 compatibility.
|
||||||
|
|
||||||
|
|
||||||
|
def generate_grammar(bnf_text):
|
||||||
|
"""
|
||||||
|
``bnf_text`` is a grammar in extended BNF (using * for repetition, + for
|
||||||
|
at-least-once repetition, [] for optional parts, | for alternatives and ()
|
||||||
|
for grouping).
|
||||||
|
|
||||||
|
It's not EBNF according to ISO/IEC 14977. It's a dialect Python uses in its
|
||||||
|
own parser.
|
||||||
|
"""
|
||||||
|
p = ParserGenerator(bnf_text)
|
||||||
|
return p.make_grammar()
|
||||||
124
parso/python/__init__.py
Normal file
124
parso/python/__init__.py
Normal file
@@ -0,0 +1,124 @@
|
|||||||
|
"""
|
||||||
|
Parsers for Python
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
|
||||||
|
from jedi import settings
|
||||||
|
from jedi.common import splitlines, source_to_unicode
|
||||||
|
from jedi._compatibility import FileNotFoundError
|
||||||
|
from parso.pgen2.pgen import generate_grammar
|
||||||
|
from parso.python.parser import Parser, _remove_last_newline
|
||||||
|
from parso.python.diff import DiffParser
|
||||||
|
from parso.tokenize import generate_tokens
|
||||||
|
from parso.cache import parser_cache, load_module, save_module
|
||||||
|
|
||||||
|
|
||||||
|
_loaded_grammars = {}
|
||||||
|
|
||||||
|
|
||||||
|
def load_grammar(version=None):
|
||||||
|
"""
|
||||||
|
Loads a Python grammar. The default version is always the latest.
|
||||||
|
|
||||||
|
If you need support for a specific version, please use e.g.
|
||||||
|
`version='3.3'`.
|
||||||
|
"""
|
||||||
|
if version is None:
|
||||||
|
version = '3.6'
|
||||||
|
|
||||||
|
if version in ('3.2', '3.3'):
|
||||||
|
version = '3.4'
|
||||||
|
elif version == '2.6':
|
||||||
|
version = '2.7'
|
||||||
|
|
||||||
|
file = 'grammar' + version + '.txt'
|
||||||
|
|
||||||
|
global _loaded_grammars
|
||||||
|
path = os.path.join(os.path.dirname(__file__), file)
|
||||||
|
try:
|
||||||
|
return _loaded_grammars[path]
|
||||||
|
except KeyError:
|
||||||
|
try:
|
||||||
|
with open(path) as f:
|
||||||
|
bnf_text = f.read()
|
||||||
|
grammar = generate_grammar(bnf_text)
|
||||||
|
return _loaded_grammars.setdefault(path, grammar)
|
||||||
|
except FileNotFoundError:
|
||||||
|
# Just load the default if the file does not exist.
|
||||||
|
return load_grammar()
|
||||||
|
|
||||||
|
|
||||||
|
def parse(code=None, path=None, grammar=None, error_recovery=True,
|
||||||
|
start_symbol='file_input', cache=False, diff_cache=False):
|
||||||
|
"""
|
||||||
|
If you want to parse a Python file you want to start here, most likely.
|
||||||
|
|
||||||
|
If you need finer grained control over the parsed instance, there will be
|
||||||
|
other ways to access it.
|
||||||
|
|
||||||
|
:param code: A unicode string that contains Python code.
|
||||||
|
:param path: The path to the file you want to open. Only needed for caching.
|
||||||
|
:param grammar: A Python grammar file, created with load_grammar. You may
|
||||||
|
not specify it. In that case it's the current Python version.
|
||||||
|
:param error_recovery: If enabled, any code will be returned. If it is
|
||||||
|
invalid, it will be returned as an error node. If disabled, you will
|
||||||
|
get a ParseError when encountering syntax errors in your code.
|
||||||
|
:param start_symbol: The grammar symbol that you want to parse. Only
|
||||||
|
allowed to be used when error_recovery is disabled.
|
||||||
|
|
||||||
|
:return: A syntax tree node. Typically the module.
|
||||||
|
"""
|
||||||
|
if code is None and path is None:
|
||||||
|
raise TypeError("Please provide either code or a path.")
|
||||||
|
|
||||||
|
if grammar is None:
|
||||||
|
grammar = load_grammar()
|
||||||
|
|
||||||
|
if cache and not code and path is not None:
|
||||||
|
# In this case we do actual caching. We just try to load it.
|
||||||
|
module_node = load_module(grammar, path)
|
||||||
|
if module_node is not None:
|
||||||
|
return module_node
|
||||||
|
|
||||||
|
if code is None:
|
||||||
|
with open(path, 'rb') as f:
|
||||||
|
code = source_to_unicode(f.read())
|
||||||
|
|
||||||
|
if diff_cache and settings.fast_parser:
|
||||||
|
try:
|
||||||
|
module_cache_item = parser_cache[path]
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
lines = splitlines(code, keepends=True)
|
||||||
|
module_node = module_cache_item.node
|
||||||
|
old_lines = module_cache_item.lines
|
||||||
|
if old_lines == lines:
|
||||||
|
save_module(grammar, path, module_node, lines, pickling=False)
|
||||||
|
return module_node
|
||||||
|
|
||||||
|
new_node = DiffParser(grammar, module_node).update(
|
||||||
|
old_lines=old_lines,
|
||||||
|
new_lines=lines
|
||||||
|
)
|
||||||
|
save_module(grammar, path, new_node, lines, pickling=cache)
|
||||||
|
return new_node
|
||||||
|
|
||||||
|
added_newline = not code.endswith('\n')
|
||||||
|
lines = tokenize_lines = splitlines(code, keepends=True)
|
||||||
|
if added_newline:
|
||||||
|
code += '\n'
|
||||||
|
tokenize_lines = list(tokenize_lines)
|
||||||
|
tokenize_lines[-1] += '\n'
|
||||||
|
tokenize_lines.append('')
|
||||||
|
|
||||||
|
tokens = generate_tokens(tokenize_lines, use_exact_op_types=True)
|
||||||
|
|
||||||
|
p = Parser(grammar, error_recovery=error_recovery, start_symbol=start_symbol)
|
||||||
|
root_node = p.parse(tokens=tokens)
|
||||||
|
if added_newline:
|
||||||
|
_remove_last_newline(root_node)
|
||||||
|
|
||||||
|
if cache or diff_cache:
|
||||||
|
save_module(grammar, path, root_node, lines, pickling=cache)
|
||||||
|
return root_node
|
||||||
603
parso/python/diff.py
Normal file
603
parso/python/diff.py
Normal file
@@ -0,0 +1,603 @@
|
|||||||
|
"""
|
||||||
|
Basically a contains parser that is faster, because it tries to parse only
|
||||||
|
parts and if anything changes, it only reparses the changed parts.
|
||||||
|
|
||||||
|
It works with a simple diff in the beginning and will try to reuse old parser
|
||||||
|
fragments.
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
import difflib
|
||||||
|
from collections import namedtuple
|
||||||
|
|
||||||
|
from jedi.common import splitlines
|
||||||
|
from jedi import debug
|
||||||
|
from parso.python.parser import Parser, _remove_last_newline
|
||||||
|
from parso.python.tree import EndMarker
|
||||||
|
from parso.tokenize import (generate_tokens, NEWLINE, TokenInfo,
|
||||||
|
ENDMARKER, INDENT, DEDENT)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_last_line(node_or_leaf):
|
||||||
|
last_leaf = node_or_leaf.get_last_leaf()
|
||||||
|
if _ends_with_newline(last_leaf):
|
||||||
|
return last_leaf.start_pos[0]
|
||||||
|
else:
|
||||||
|
return last_leaf.end_pos[0]
|
||||||
|
|
||||||
|
|
||||||
|
def _ends_with_newline(leaf, suffix=''):
|
||||||
|
if leaf.type == 'error_leaf':
|
||||||
|
typ = leaf.original_type
|
||||||
|
else:
|
||||||
|
typ = leaf.type
|
||||||
|
|
||||||
|
return typ == 'newline' or suffix.endswith('\n')
|
||||||
|
|
||||||
|
|
||||||
|
def _flows_finished(grammar, stack):
|
||||||
|
"""
|
||||||
|
if, while, for and try might not be finished, because another part might
|
||||||
|
still be parsed.
|
||||||
|
"""
|
||||||
|
for dfa, newstate, (symbol_number, nodes) in stack:
|
||||||
|
if grammar.number2symbol[symbol_number] in ('if_stmt', 'while_stmt',
|
||||||
|
'for_stmt', 'try_stmt'):
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def suite_or_file_input_is_valid(grammar, stack):
|
||||||
|
if not _flows_finished(grammar, stack):
|
||||||
|
return False
|
||||||
|
|
||||||
|
for dfa, newstate, (symbol_number, nodes) in reversed(stack):
|
||||||
|
if grammar.number2symbol[symbol_number] == 'suite':
|
||||||
|
# If only newline is in the suite, the suite is not valid, yet.
|
||||||
|
return len(nodes) > 1
|
||||||
|
# Not reaching a suite means that we're dealing with file_input levels
|
||||||
|
# where there's no need for a valid statement in it. It can also be empty.
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def _is_flow_node(node):
|
||||||
|
try:
|
||||||
|
value = node.children[0].value
|
||||||
|
except AttributeError:
|
||||||
|
return False
|
||||||
|
return value in ('if', 'for', 'while', 'try')
|
||||||
|
|
||||||
|
|
||||||
|
class _PositionUpdatingFinished(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def _update_positions(nodes, line_offset, last_leaf):
|
||||||
|
for node in nodes:
|
||||||
|
try:
|
||||||
|
children = node.children
|
||||||
|
except AttributeError:
|
||||||
|
# Is a leaf
|
||||||
|
node.line += line_offset
|
||||||
|
if node is last_leaf:
|
||||||
|
raise _PositionUpdatingFinished
|
||||||
|
else:
|
||||||
|
_update_positions(children, line_offset, last_leaf)
|
||||||
|
|
||||||
|
|
||||||
|
class DiffParser(object):
|
||||||
|
"""
|
||||||
|
An advanced form of parsing a file faster. Unfortunately comes with huge
|
||||||
|
side effects. It changes the given module.
|
||||||
|
"""
|
||||||
|
def __init__(self, grammar, module):
|
||||||
|
self._grammar = grammar
|
||||||
|
self._module = module
|
||||||
|
|
||||||
|
def _reset(self):
|
||||||
|
self._copy_count = 0
|
||||||
|
self._parser_count = 0
|
||||||
|
|
||||||
|
self._nodes_stack = _NodesStack(self._module)
|
||||||
|
|
||||||
|
def update(self, old_lines, new_lines):
|
||||||
|
'''
|
||||||
|
The algorithm works as follows:
|
||||||
|
|
||||||
|
Equal:
|
||||||
|
- Assure that the start is a newline, otherwise parse until we get
|
||||||
|
one.
|
||||||
|
- Copy from parsed_until_line + 1 to max(i2 + 1)
|
||||||
|
- Make sure that the indentation is correct (e.g. add DEDENT)
|
||||||
|
- Add old and change positions
|
||||||
|
Insert:
|
||||||
|
- Parse from parsed_until_line + 1 to min(j2 + 1), hopefully not
|
||||||
|
much more.
|
||||||
|
|
||||||
|
Returns the new module node.
|
||||||
|
'''
|
||||||
|
debug.speed('diff parser start')
|
||||||
|
# Reset the used names cache so they get regenerated.
|
||||||
|
self._module._used_names = None
|
||||||
|
|
||||||
|
self._parser_lines_new = new_lines
|
||||||
|
self._added_newline = False
|
||||||
|
if new_lines[-1] != '':
|
||||||
|
# The Python grammar needs a newline at the end of a file, but for
|
||||||
|
# everything else we keep working with new_lines here.
|
||||||
|
self._parser_lines_new = list(new_lines)
|
||||||
|
self._parser_lines_new[-1] += '\n'
|
||||||
|
self._parser_lines_new.append('')
|
||||||
|
self._added_newline = True
|
||||||
|
|
||||||
|
self._reset()
|
||||||
|
|
||||||
|
line_length = len(new_lines)
|
||||||
|
sm = difflib.SequenceMatcher(None, old_lines, self._parser_lines_new)
|
||||||
|
opcodes = sm.get_opcodes()
|
||||||
|
debug.speed('diff parser calculated')
|
||||||
|
debug.dbg('diff: line_lengths old: %s, new: %s' % (len(old_lines), line_length))
|
||||||
|
|
||||||
|
for operation, i1, i2, j1, j2 in opcodes:
|
||||||
|
debug.dbg('diff %s old[%s:%s] new[%s:%s]',
|
||||||
|
operation, i1 + 1, i2, j1 + 1, j2)
|
||||||
|
|
||||||
|
if j2 == line_length + int(self._added_newline):
|
||||||
|
# The empty part after the last newline is not relevant.
|
||||||
|
j2 -= 1
|
||||||
|
|
||||||
|
if operation == 'equal':
|
||||||
|
line_offset = j1 - i1
|
||||||
|
self._copy_from_old_parser(line_offset, i2, j2)
|
||||||
|
elif operation == 'replace':
|
||||||
|
self._parse(until_line=j2)
|
||||||
|
elif operation == 'insert':
|
||||||
|
self._parse(until_line=j2)
|
||||||
|
else:
|
||||||
|
assert operation == 'delete'
|
||||||
|
|
||||||
|
# With this action all change will finally be applied and we have a
|
||||||
|
# changed module.
|
||||||
|
self._nodes_stack.close()
|
||||||
|
|
||||||
|
if self._added_newline:
|
||||||
|
_remove_last_newline(self._module)
|
||||||
|
|
||||||
|
# Good for debugging.
|
||||||
|
if debug.debug_function:
|
||||||
|
self._enabled_debugging(old_lines, new_lines)
|
||||||
|
last_pos = self._module.end_pos[0]
|
||||||
|
if last_pos != line_length:
|
||||||
|
current_lines = splitlines(self._module.get_code(), keepends=True)
|
||||||
|
diff = difflib.unified_diff(current_lines, new_lines)
|
||||||
|
raise Exception(
|
||||||
|
"There's an issue (%s != %s) with the diff parser. Please report:\n%s"
|
||||||
|
% (last_pos, line_length, ''.join(diff))
|
||||||
|
)
|
||||||
|
|
||||||
|
debug.speed('diff parser end')
|
||||||
|
return self._module
|
||||||
|
|
||||||
|
def _enabled_debugging(self, old_lines, lines_new):
|
||||||
|
if self._module.get_code() != ''.join(lines_new):
|
||||||
|
debug.warning('parser issue:\n%s\n%s', ''.join(old_lines),
|
||||||
|
''.join(lines_new))
|
||||||
|
|
||||||
|
def _copy_from_old_parser(self, line_offset, until_line_old, until_line_new):
|
||||||
|
copied_nodes = [None]
|
||||||
|
|
||||||
|
last_until_line = -1
|
||||||
|
while until_line_new > self._nodes_stack.parsed_until_line:
|
||||||
|
parsed_until_line_old = self._nodes_stack.parsed_until_line - line_offset
|
||||||
|
line_stmt = self._get_old_line_stmt(parsed_until_line_old + 1)
|
||||||
|
if line_stmt is None:
|
||||||
|
# Parse 1 line at least. We don't need more, because we just
|
||||||
|
# want to get into a state where the old parser has statements
|
||||||
|
# again that can be copied (e.g. not lines within parentheses).
|
||||||
|
self._parse(self._nodes_stack.parsed_until_line + 1)
|
||||||
|
elif not copied_nodes:
|
||||||
|
# We have copied as much as possible (but definitely not too
|
||||||
|
# much). Therefore we just parse the rest.
|
||||||
|
# We might not reach the end, because there's a statement
|
||||||
|
# that is not finished.
|
||||||
|
self._parse(until_line_new)
|
||||||
|
else:
|
||||||
|
p_children = line_stmt.parent.children
|
||||||
|
index = p_children.index(line_stmt)
|
||||||
|
|
||||||
|
copied_nodes = self._nodes_stack.copy_nodes(
|
||||||
|
p_children[index:],
|
||||||
|
until_line_old,
|
||||||
|
line_offset
|
||||||
|
)
|
||||||
|
# Match all the nodes that are in the wanted range.
|
||||||
|
if copied_nodes:
|
||||||
|
self._copy_count += 1
|
||||||
|
|
||||||
|
from_ = copied_nodes[0].get_start_pos_of_prefix()[0] + line_offset
|
||||||
|
to = self._nodes_stack.parsed_until_line
|
||||||
|
|
||||||
|
debug.dbg('diff actually copy %s to %s', from_, to)
|
||||||
|
# Since there are potential bugs that might loop here endlessly, we
|
||||||
|
# just stop here.
|
||||||
|
assert last_until_line != self._nodes_stack.parsed_until_line \
|
||||||
|
or not copied_nodes, last_until_line
|
||||||
|
last_until_line = self._nodes_stack.parsed_until_line
|
||||||
|
|
||||||
|
def _get_old_line_stmt(self, old_line):
|
||||||
|
leaf = self._module.get_leaf_for_position((old_line, 0), include_prefixes=True)
|
||||||
|
|
||||||
|
if _ends_with_newline(leaf):
|
||||||
|
leaf = leaf.get_next_leaf()
|
||||||
|
if leaf.get_start_pos_of_prefix()[0] == old_line:
|
||||||
|
node = leaf
|
||||||
|
while node.parent.type not in ('file_input', 'suite'):
|
||||||
|
node = node.parent
|
||||||
|
return node
|
||||||
|
# Must be on the same line. Otherwise we need to parse that bit.
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _get_before_insertion_node(self):
|
||||||
|
if self._nodes_stack.is_empty():
|
||||||
|
return None
|
||||||
|
|
||||||
|
line = self._nodes_stack.parsed_until_line + 1
|
||||||
|
node = self._new_module.get_last_leaf()
|
||||||
|
while True:
|
||||||
|
parent = node.parent
|
||||||
|
if parent.type in ('suite', 'file_input'):
|
||||||
|
assert node.end_pos[0] <= line
|
||||||
|
assert node.end_pos[1] == 0 or '\n' in self._prefix
|
||||||
|
return node
|
||||||
|
node = parent
|
||||||
|
|
||||||
|
def _parse(self, until_line):
|
||||||
|
"""
|
||||||
|
Parses at least until the given line, but might just parse more until a
|
||||||
|
valid state is reached.
|
||||||
|
"""
|
||||||
|
last_until_line = 0
|
||||||
|
while until_line > self._nodes_stack.parsed_until_line:
|
||||||
|
node = self._try_parse_part(until_line)
|
||||||
|
nodes = self._get_children_nodes(node)
|
||||||
|
#self._insert_nodes(nodes)
|
||||||
|
|
||||||
|
self._nodes_stack.add_parsed_nodes(nodes)
|
||||||
|
debug.dbg(
|
||||||
|
'parse part %s to %s (to %s in parser)',
|
||||||
|
nodes[0].get_start_pos_of_prefix()[0],
|
||||||
|
self._nodes_stack.parsed_until_line,
|
||||||
|
node.end_pos[0] - 1
|
||||||
|
)
|
||||||
|
# Since the tokenizer sometimes has bugs, we cannot be sure that
|
||||||
|
# this loop terminates. Therefore assert that there's always a
|
||||||
|
# change.
|
||||||
|
assert last_until_line != self._nodes_stack.parsed_until_line, last_until_line
|
||||||
|
last_until_line = self._nodes_stack.parsed_until_line
|
||||||
|
|
||||||
|
def _get_children_nodes(self, node):
|
||||||
|
nodes = node.children
|
||||||
|
first_element = nodes[0]
|
||||||
|
# TODO this looks very strange...
|
||||||
|
if first_element.type == 'error_leaf' and \
|
||||||
|
first_element.original_type == 'indent':
|
||||||
|
assert False, str(nodes)
|
||||||
|
|
||||||
|
return nodes
|
||||||
|
|
||||||
|
def _try_parse_part(self, until_line):
|
||||||
|
"""
|
||||||
|
Sets up a normal parser that uses a spezialized tokenizer to only parse
|
||||||
|
until a certain position (or a bit longer if the statement hasn't
|
||||||
|
ended.
|
||||||
|
"""
|
||||||
|
self._parser_count += 1
|
||||||
|
# TODO speed up, shouldn't copy the whole list all the time.
|
||||||
|
# memoryview?
|
||||||
|
parsed_until_line = self._nodes_stack.parsed_until_line
|
||||||
|
lines_after = self._parser_lines_new[parsed_until_line:]
|
||||||
|
#print('parse_content', parsed_until_line, lines_after, until_line)
|
||||||
|
tokens = self._diff_tokenize(
|
||||||
|
lines_after,
|
||||||
|
until_line,
|
||||||
|
line_offset=parsed_until_line
|
||||||
|
)
|
||||||
|
self._active_parser = Parser(
|
||||||
|
self._grammar,
|
||||||
|
error_recovery=True
|
||||||
|
)
|
||||||
|
return self._active_parser.parse(tokens=tokens)
|
||||||
|
|
||||||
|
def _diff_tokenize(self, lines, until_line, line_offset=0):
|
||||||
|
is_first_token = True
|
||||||
|
omitted_first_indent = False
|
||||||
|
indents = []
|
||||||
|
tokens = generate_tokens(lines, use_exact_op_types=True)
|
||||||
|
stack = self._active_parser.pgen_parser.stack
|
||||||
|
for typ, string, start_pos, prefix in tokens:
|
||||||
|
start_pos = start_pos[0] + line_offset, start_pos[1]
|
||||||
|
if typ == INDENT:
|
||||||
|
indents.append(start_pos[1])
|
||||||
|
if is_first_token:
|
||||||
|
omitted_first_indent = True
|
||||||
|
# We want to get rid of indents that are only here because
|
||||||
|
# we only parse part of the file. These indents would only
|
||||||
|
# get parsed as error leafs, which doesn't make any sense.
|
||||||
|
is_first_token = False
|
||||||
|
continue
|
||||||
|
is_first_token = False
|
||||||
|
|
||||||
|
if typ == DEDENT:
|
||||||
|
indents.pop()
|
||||||
|
if omitted_first_indent and not indents:
|
||||||
|
# We are done here, only thing that can come now is an
|
||||||
|
# endmarker or another dedented code block.
|
||||||
|
typ, string, start_pos, prefix = next(tokens)
|
||||||
|
if '\n' in prefix:
|
||||||
|
prefix = re.sub(r'(<=\n)[^\n]+$', '', prefix)
|
||||||
|
else:
|
||||||
|
prefix = ''
|
||||||
|
yield TokenInfo(ENDMARKER, '', (start_pos[0] + line_offset, 0), prefix)
|
||||||
|
break
|
||||||
|
elif typ == NEWLINE and start_pos[0] >= until_line:
|
||||||
|
yield TokenInfo(typ, string, start_pos, prefix)
|
||||||
|
# Check if the parser is actually in a valid suite state.
|
||||||
|
if suite_or_file_input_is_valid(self._grammar, stack):
|
||||||
|
start_pos = start_pos[0] + 1, 0
|
||||||
|
while len(indents) > int(omitted_first_indent):
|
||||||
|
indents.pop()
|
||||||
|
yield TokenInfo(DEDENT, '', start_pos, '')
|
||||||
|
|
||||||
|
yield TokenInfo(ENDMARKER, '', start_pos, '')
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
|
||||||
|
yield TokenInfo(typ, string, start_pos, prefix)
|
||||||
|
|
||||||
|
|
||||||
|
class _NodesStackNode(object):
|
||||||
|
ChildrenGroup = namedtuple('ChildrenGroup', 'children line_offset last_line_offset_leaf')
|
||||||
|
|
||||||
|
def __init__(self, tree_node, parent=None):
|
||||||
|
self.tree_node = tree_node
|
||||||
|
self.children_groups = []
|
||||||
|
self.parent = parent
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
children = []
|
||||||
|
for children_part, line_offset, last_line_offset_leaf in self.children_groups:
|
||||||
|
if line_offset != 0:
|
||||||
|
try:
|
||||||
|
_update_positions(
|
||||||
|
children_part, line_offset, last_line_offset_leaf)
|
||||||
|
except _PositionUpdatingFinished:
|
||||||
|
pass
|
||||||
|
children += children_part
|
||||||
|
self.tree_node.children = children
|
||||||
|
# Reset the parents
|
||||||
|
for node in children:
|
||||||
|
node.parent = self.tree_node
|
||||||
|
|
||||||
|
def add(self, children, line_offset=0, last_line_offset_leaf=None):
|
||||||
|
group = self.ChildrenGroup(children, line_offset, last_line_offset_leaf)
|
||||||
|
self.children_groups.append(group)
|
||||||
|
|
||||||
|
def get_last_line(self, suffix):
|
||||||
|
line = 0
|
||||||
|
if self.children_groups:
|
||||||
|
children_group = self.children_groups[-1]
|
||||||
|
last_leaf = children_group.children[-1].get_last_leaf()
|
||||||
|
line = last_leaf.end_pos[0]
|
||||||
|
|
||||||
|
# Calculate the line offsets
|
||||||
|
offset = children_group.line_offset
|
||||||
|
if offset:
|
||||||
|
# In case the line_offset is not applied to this specific leaf,
|
||||||
|
# just ignore it.
|
||||||
|
if last_leaf.line <= children_group.last_line_offset_leaf.line:
|
||||||
|
line += children_group.line_offset
|
||||||
|
|
||||||
|
# Newlines end on the next line, which means that they would cover
|
||||||
|
# the next line. That line is not fully parsed at this point.
|
||||||
|
if _ends_with_newline(last_leaf, suffix):
|
||||||
|
line -= 1
|
||||||
|
line += suffix.count('\n')
|
||||||
|
return line
|
||||||
|
|
||||||
|
|
||||||
|
class _NodesStack(object):
|
||||||
|
endmarker_type = 'endmarker'
|
||||||
|
|
||||||
|
def __init__(self, module):
|
||||||
|
# Top of stack
|
||||||
|
self._tos = self._base_node = _NodesStackNode(module)
|
||||||
|
self._module = module
|
||||||
|
self._last_prefix = ''
|
||||||
|
self.prefix = ''
|
||||||
|
|
||||||
|
def is_empty(self):
|
||||||
|
return not self._base_node.children
|
||||||
|
|
||||||
|
@property
|
||||||
|
def parsed_until_line(self):
|
||||||
|
return self._tos.get_last_line(self.prefix)
|
||||||
|
|
||||||
|
def _get_insertion_node(self, indentation_node):
|
||||||
|
indentation = indentation_node.start_pos[1]
|
||||||
|
|
||||||
|
# find insertion node
|
||||||
|
node = self._tos
|
||||||
|
while True:
|
||||||
|
tree_node = node.tree_node
|
||||||
|
if tree_node.type == 'suite':
|
||||||
|
# A suite starts with NEWLINE, ...
|
||||||
|
node_indentation = tree_node.children[1].start_pos[1]
|
||||||
|
|
||||||
|
if indentation >= node_indentation: # Not a Dedent
|
||||||
|
# We might be at the most outer layer: modules. We
|
||||||
|
# don't want to depend on the first statement
|
||||||
|
# having the right indentation.
|
||||||
|
return node
|
||||||
|
|
||||||
|
elif tree_node.type == 'file_input':
|
||||||
|
return node
|
||||||
|
|
||||||
|
node = self._close_tos()
|
||||||
|
|
||||||
|
def _close_tos(self):
|
||||||
|
self._tos.close()
|
||||||
|
self._tos = self._tos.parent
|
||||||
|
return self._tos
|
||||||
|
|
||||||
|
def add_parsed_nodes(self, tree_nodes):
|
||||||
|
tree_nodes = self._remove_endmarker(tree_nodes)
|
||||||
|
if not tree_nodes:
|
||||||
|
return
|
||||||
|
|
||||||
|
assert tree_nodes[0].type != 'newline'
|
||||||
|
|
||||||
|
node = self._get_insertion_node(tree_nodes[0])
|
||||||
|
assert node.tree_node.type in ('suite', 'file_input')
|
||||||
|
node.add(tree_nodes)
|
||||||
|
self._update_tos(tree_nodes[-1])
|
||||||
|
|
||||||
|
def _remove_endmarker(self, tree_nodes):
|
||||||
|
"""
|
||||||
|
Helps cleaning up the tree nodes that get inserted.
|
||||||
|
"""
|
||||||
|
last_leaf = tree_nodes[-1].get_last_leaf()
|
||||||
|
is_endmarker = last_leaf.type == self.endmarker_type
|
||||||
|
self._last_prefix = ''
|
||||||
|
if is_endmarker:
|
||||||
|
try:
|
||||||
|
separation = last_leaf.prefix.rindex('\n')
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
# Remove the whitespace part of the prefix after a newline.
|
||||||
|
# That is not relevant if parentheses were opened. Always parse
|
||||||
|
# until the end of a line.
|
||||||
|
last_leaf.prefix, self._last_prefix = \
|
||||||
|
last_leaf.prefix[:separation + 1], last_leaf.prefix[separation + 1:]
|
||||||
|
|
||||||
|
first_leaf = tree_nodes[0].get_first_leaf()
|
||||||
|
first_leaf.prefix = self.prefix + first_leaf.prefix
|
||||||
|
self.prefix = ''
|
||||||
|
|
||||||
|
if is_endmarker:
|
||||||
|
self.prefix = last_leaf.prefix
|
||||||
|
|
||||||
|
tree_nodes = tree_nodes[:-1]
|
||||||
|
|
||||||
|
return tree_nodes
|
||||||
|
|
||||||
|
def copy_nodes(self, tree_nodes, until_line, line_offset):
|
||||||
|
"""
|
||||||
|
Copies tree nodes from the old parser tree.
|
||||||
|
|
||||||
|
Returns the number of tree nodes that were copied.
|
||||||
|
"""
|
||||||
|
tos = self._get_insertion_node(tree_nodes[0])
|
||||||
|
|
||||||
|
new_nodes, self._tos = self._copy_nodes(tos, tree_nodes, until_line, line_offset)
|
||||||
|
return new_nodes
|
||||||
|
|
||||||
|
def _copy_nodes(self, tos, nodes, until_line, line_offset):
|
||||||
|
new_nodes = []
|
||||||
|
|
||||||
|
new_tos = tos
|
||||||
|
for node in nodes:
|
||||||
|
if node.type == 'endmarker':
|
||||||
|
# Endmarkers just distort all the checks below. Remove them.
|
||||||
|
break
|
||||||
|
|
||||||
|
if node.start_pos[0] > until_line:
|
||||||
|
break
|
||||||
|
# TODO this check might take a bit of time for large files. We
|
||||||
|
# might want to change this to do more intelligent guessing or
|
||||||
|
# binary search.
|
||||||
|
if _get_last_line(node) > until_line:
|
||||||
|
# We can split up functions and classes later.
|
||||||
|
if node.type in ('classdef', 'funcdef') and node.children[-1].type == 'suite':
|
||||||
|
new_nodes.append(node)
|
||||||
|
break
|
||||||
|
|
||||||
|
new_nodes.append(node)
|
||||||
|
|
||||||
|
if not new_nodes:
|
||||||
|
return [], tos
|
||||||
|
|
||||||
|
last_node = new_nodes[-1]
|
||||||
|
line_offset_index = -1
|
||||||
|
if last_node.type in ('classdef', 'funcdef'):
|
||||||
|
suite = last_node.children[-1]
|
||||||
|
if suite.type == 'suite':
|
||||||
|
suite_tos = _NodesStackNode(suite)
|
||||||
|
# Don't need to pass line_offset here, it's already done by the
|
||||||
|
# parent.
|
||||||
|
suite_nodes, recursive_tos = self._copy_nodes(
|
||||||
|
suite_tos, suite.children, until_line, line_offset)
|
||||||
|
if len(suite_nodes) < 2:
|
||||||
|
# A suite only with newline is not valid.
|
||||||
|
new_nodes.pop()
|
||||||
|
else:
|
||||||
|
suite_tos.parent = tos
|
||||||
|
new_tos = recursive_tos
|
||||||
|
line_offset_index = -2
|
||||||
|
|
||||||
|
elif (new_nodes[-1].type in ('error_leaf', 'error_node') or
|
||||||
|
_is_flow_node(new_nodes[-1])):
|
||||||
|
# Error leafs/nodes don't have a defined start/end. Error
|
||||||
|
# nodes might not end with a newline (e.g. if there's an
|
||||||
|
# open `(`). Therefore ignore all of them unless they are
|
||||||
|
# succeeded with valid parser state.
|
||||||
|
# If we copy flows at the end, they might be continued
|
||||||
|
# after the copy limit (in the new parser).
|
||||||
|
# In this while loop we try to remove until we find a newline.
|
||||||
|
new_nodes.pop()
|
||||||
|
while new_nodes:
|
||||||
|
last_node = new_nodes[-1]
|
||||||
|
if last_node.get_last_leaf().type == 'newline':
|
||||||
|
break
|
||||||
|
new_nodes.pop()
|
||||||
|
|
||||||
|
if new_nodes:
|
||||||
|
try:
|
||||||
|
last_line_offset_leaf = new_nodes[line_offset_index].get_last_leaf()
|
||||||
|
except IndexError:
|
||||||
|
line_offset = 0
|
||||||
|
# In this case we don't have to calculate an offset, because
|
||||||
|
# there's no children to be managed.
|
||||||
|
last_line_offset_leaf = None
|
||||||
|
tos.add(new_nodes, line_offset, last_line_offset_leaf)
|
||||||
|
return new_nodes, new_tos
|
||||||
|
|
||||||
|
def _update_tos(self, tree_node):
|
||||||
|
if tree_node.type in ('suite', 'file_input'):
|
||||||
|
self._tos = _NodesStackNode(tree_node, self._tos)
|
||||||
|
self._tos.add(list(tree_node.children))
|
||||||
|
self._update_tos(tree_node.children[-1])
|
||||||
|
elif tree_node.type in ('classdef', 'funcdef'):
|
||||||
|
self._update_tos(tree_node.children[-1])
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
while self._tos is not None:
|
||||||
|
self._close_tos()
|
||||||
|
|
||||||
|
# Add an endmarker.
|
||||||
|
try:
|
||||||
|
last_leaf = self._module.get_last_leaf()
|
||||||
|
end_pos = list(last_leaf.end_pos)
|
||||||
|
except IndexError:
|
||||||
|
end_pos = [1, 0]
|
||||||
|
lines = splitlines(self.prefix)
|
||||||
|
assert len(lines) > 0
|
||||||
|
if len(lines) == 1:
|
||||||
|
end_pos[1] += len(lines[0])
|
||||||
|
else:
|
||||||
|
end_pos[0] += len(lines) - 1
|
||||||
|
end_pos[1] = len(lines[-1])
|
||||||
|
|
||||||
|
endmarker = EndMarker('', tuple(end_pos), self.prefix + self._last_prefix)
|
||||||
|
endmarker.parent = self._module
|
||||||
|
self._module.children.append(endmarker)
|
||||||
152
parso/python/grammar2.7.txt
Normal file
152
parso/python/grammar2.7.txt
Normal file
@@ -0,0 +1,152 @@
|
|||||||
|
# Grammar for 2to3. This grammar supports Python 2.x and 3.x.
|
||||||
|
|
||||||
|
# Note: Changing the grammar specified in this file will most likely
|
||||||
|
# require corresponding changes in the parser module
|
||||||
|
# (../Modules/parsermodule.c). If you can't make the changes to
|
||||||
|
# that module yourself, please co-ordinate the required changes
|
||||||
|
# with someone who can; ask around on python-dev for help. Fred
|
||||||
|
# Drake <fdrake@acm.org> will probably be listening there.
|
||||||
|
|
||||||
|
# NOTE WELL: You should also follow all the steps listed in PEP 306,
|
||||||
|
# "How to Change Python's Grammar"
|
||||||
|
|
||||||
|
|
||||||
|
# Start symbols for the grammar:
|
||||||
|
# file_input is a module or sequence of commands read from an input file;
|
||||||
|
# single_input is a single interactive statement;
|
||||||
|
# eval_input is the input for the eval() and input() functions.
|
||||||
|
# NB: compound_stmt in single_input is followed by extra NEWLINE!
|
||||||
|
file_input: (NEWLINE | stmt)* ENDMARKER
|
||||||
|
single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
|
||||||
|
eval_input: testlist NEWLINE* ENDMARKER
|
||||||
|
|
||||||
|
decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
|
||||||
|
decorators: decorator+
|
||||||
|
decorated: decorators (classdef | funcdef)
|
||||||
|
funcdef: 'def' NAME parameters ['->' test] ':' suite
|
||||||
|
parameters: '(' [typedargslist] ')'
|
||||||
|
typedargslist: ((tfpdef ['=' test] ',')*
|
||||||
|
('*' [tname] (',' tname ['=' test])* [',' '**' tname] | '**' tname)
|
||||||
|
| tfpdef ['=' test] (',' tfpdef ['=' test])* [','])
|
||||||
|
tname: NAME [':' test]
|
||||||
|
tfpdef: tname | '(' tfplist ')'
|
||||||
|
tfplist: tfpdef (',' tfpdef)* [',']
|
||||||
|
varargslist: ((vfpdef ['=' test] ',')*
|
||||||
|
('*' [vname] (',' vname ['=' test])* [',' '**' vname] | '**' vname)
|
||||||
|
| vfpdef ['=' test] (',' vfpdef ['=' test])* [','])
|
||||||
|
vname: NAME
|
||||||
|
vfpdef: vname | '(' vfplist ')'
|
||||||
|
vfplist: vfpdef (',' vfpdef)* [',']
|
||||||
|
|
||||||
|
stmt: simple_stmt | compound_stmt
|
||||||
|
simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
|
||||||
|
small_stmt: (expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt |
|
||||||
|
import_stmt | global_stmt | exec_stmt | assert_stmt)
|
||||||
|
expr_stmt: testlist_star_expr (augassign (yield_expr|testlist) |
|
||||||
|
('=' (yield_expr|testlist_star_expr))*)
|
||||||
|
testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
|
||||||
|
augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |
|
||||||
|
'<<=' | '>>=' | '**=' | '//=')
|
||||||
|
# For normal assignments, additional restrictions enforced by the interpreter
|
||||||
|
print_stmt: 'print' ( [ test (',' test)* [','] ] |
|
||||||
|
'>>' test [ (',' test)+ [','] ] )
|
||||||
|
del_stmt: 'del' exprlist
|
||||||
|
pass_stmt: 'pass'
|
||||||
|
flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
|
||||||
|
break_stmt: 'break'
|
||||||
|
continue_stmt: 'continue'
|
||||||
|
return_stmt: 'return' [testlist]
|
||||||
|
yield_stmt: yield_expr
|
||||||
|
raise_stmt: 'raise' [test [',' test [',' test]]]
|
||||||
|
import_stmt: import_name | import_from
|
||||||
|
import_name: 'import' dotted_as_names
|
||||||
|
# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS
|
||||||
|
import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+)
|
||||||
|
'import' ('*' | '(' import_as_names ')' | import_as_names))
|
||||||
|
import_as_name: NAME ['as' NAME]
|
||||||
|
dotted_as_name: dotted_name ['as' NAME]
|
||||||
|
import_as_names: import_as_name (',' import_as_name)* [',']
|
||||||
|
dotted_as_names: dotted_as_name (',' dotted_as_name)*
|
||||||
|
dotted_name: NAME ('.' NAME)*
|
||||||
|
global_stmt: 'global' NAME (',' NAME)*
|
||||||
|
exec_stmt: 'exec' expr ['in' test [',' test]]
|
||||||
|
assert_stmt: 'assert' test [',' test]
|
||||||
|
|
||||||
|
compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated
|
||||||
|
if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
|
||||||
|
while_stmt: 'while' test ':' suite ['else' ':' suite]
|
||||||
|
for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
|
||||||
|
try_stmt: ('try' ':' suite
|
||||||
|
((except_clause ':' suite)+
|
||||||
|
['else' ':' suite]
|
||||||
|
['finally' ':' suite] |
|
||||||
|
'finally' ':' suite))
|
||||||
|
with_stmt: 'with' with_item (',' with_item)* ':' suite
|
||||||
|
with_item: test ['as' expr]
|
||||||
|
with_var: 'as' expr
|
||||||
|
# NB compile.c makes sure that the default except clause is last
|
||||||
|
except_clause: 'except' [test [(',' | 'as') test]]
|
||||||
|
# Edit by David Halter: The stmt is now optional. This reflects how Jedi allows
|
||||||
|
# classes and functions to be empty, which is beneficial for autocompletion.
|
||||||
|
suite: simple_stmt | NEWLINE INDENT stmt* DEDENT
|
||||||
|
|
||||||
|
# Backward compatibility cruft to support:
|
||||||
|
# [ x for x in lambda: True, lambda: False if x() ]
|
||||||
|
# even while also allowing:
|
||||||
|
# lambda x: 5 if x else 2
|
||||||
|
# (But not a mix of the two)
|
||||||
|
testlist_safe: old_test [(',' old_test)+ [',']]
|
||||||
|
old_test: or_test | old_lambdef
|
||||||
|
old_lambdef: 'lambda' [varargslist] ':' old_test
|
||||||
|
|
||||||
|
test: or_test ['if' or_test 'else' test] | lambdef
|
||||||
|
or_test: and_test ('or' and_test)*
|
||||||
|
and_test: not_test ('and' not_test)*
|
||||||
|
not_test: 'not' not_test | comparison
|
||||||
|
comparison: expr (comp_op expr)*
|
||||||
|
comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
|
||||||
|
star_expr: '*' expr
|
||||||
|
expr: xor_expr ('|' xor_expr)*
|
||||||
|
xor_expr: and_expr ('^' and_expr)*
|
||||||
|
and_expr: shift_expr ('&' shift_expr)*
|
||||||
|
shift_expr: arith_expr (('<<'|'>>') arith_expr)*
|
||||||
|
arith_expr: term (('+'|'-') term)*
|
||||||
|
term: factor (('*'|'/'|'%'|'//') factor)*
|
||||||
|
factor: ('+'|'-'|'~') factor | power
|
||||||
|
power: atom trailer* ['**' factor]
|
||||||
|
atom: ('(' [yield_expr|testlist_comp] ')' |
|
||||||
|
'[' [testlist_comp] ']' |
|
||||||
|
'{' [dictorsetmaker] '}' |
|
||||||
|
'`' testlist1 '`' |
|
||||||
|
NAME | NUMBER | STRING+ | '.' '.' '.')
|
||||||
|
# Modification by David Halter, remove `testlist_gexp` and `listmaker`
|
||||||
|
testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
|
||||||
|
lambdef: 'lambda' [varargslist] ':' test
|
||||||
|
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
|
||||||
|
subscriptlist: subscript (',' subscript)* [',']
|
||||||
|
subscript: test | [test] ':' [test] [sliceop]
|
||||||
|
sliceop: ':' [test]
|
||||||
|
exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
|
||||||
|
testlist: test (',' test)* [',']
|
||||||
|
# Modification by David Halter, dictsetmaker -> dictorsetmaker (so that it's
|
||||||
|
# the same as in the 3.4 grammar).
|
||||||
|
dictorsetmaker: ( (test ':' test (comp_for | (',' test ':' test)* [','])) |
|
||||||
|
(test (comp_for | (',' test)* [','])) )
|
||||||
|
|
||||||
|
classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
|
||||||
|
|
||||||
|
arglist: (argument ',')* (argument [',']
|
||||||
|
|'*' test (',' argument)* [',' '**' test]
|
||||||
|
|'**' test)
|
||||||
|
argument: test [comp_for] | test '=' test # Really [keyword '='] test
|
||||||
|
|
||||||
|
comp_iter: comp_for | comp_if
|
||||||
|
comp_for: 'for' exprlist 'in' testlist_safe [comp_iter]
|
||||||
|
comp_if: 'if' old_test [comp_iter]
|
||||||
|
|
||||||
|
testlist1: test (',' test)*
|
||||||
|
|
||||||
|
# not used in grammar, but may appear in "node" passed from Parser to Compiler
|
||||||
|
encoding_decl: NAME
|
||||||
|
|
||||||
|
yield_expr: 'yield' [testlist]
|
||||||
135
parso/python/grammar3.4.txt
Normal file
135
parso/python/grammar3.4.txt
Normal file
@@ -0,0 +1,135 @@
|
|||||||
|
# Grammar for Python
|
||||||
|
|
||||||
|
# Note: Changing the grammar specified in this file will most likely
|
||||||
|
# require corresponding changes in the parser module
|
||||||
|
# (../Modules/parsermodule.c). If you can't make the changes to
|
||||||
|
# that module yourself, please co-ordinate the required changes
|
||||||
|
# with someone who can; ask around on python-dev for help. Fred
|
||||||
|
# Drake <fdrake@acm.org> will probably be listening there.
|
||||||
|
|
||||||
|
# NOTE WELL: You should also follow all the steps listed in PEP 306,
|
||||||
|
# "How to Change Python's Grammar"
|
||||||
|
|
||||||
|
# Start symbols for the grammar:
|
||||||
|
# single_input is a single interactive statement;
|
||||||
|
# file_input is a module or sequence of commands read from an input file;
|
||||||
|
# eval_input is the input for the eval() functions.
|
||||||
|
# NB: compound_stmt in single_input is followed by extra NEWLINE!
|
||||||
|
file_input: (NEWLINE | stmt)* ENDMARKER
|
||||||
|
single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
|
||||||
|
eval_input: testlist NEWLINE* ENDMARKER
|
||||||
|
|
||||||
|
decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
|
||||||
|
decorators: decorator+
|
||||||
|
decorated: decorators (classdef | funcdef)
|
||||||
|
funcdef: 'def' NAME parameters ['->' test] ':' suite
|
||||||
|
parameters: '(' [typedargslist] ')'
|
||||||
|
typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [','
|
||||||
|
['*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef]]
|
||||||
|
| '*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef)
|
||||||
|
tfpdef: NAME [':' test]
|
||||||
|
varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [','
|
||||||
|
['*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef]]
|
||||||
|
| '*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef)
|
||||||
|
vfpdef: NAME
|
||||||
|
|
||||||
|
stmt: simple_stmt | compound_stmt
|
||||||
|
simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
|
||||||
|
small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt |
|
||||||
|
import_stmt | global_stmt | nonlocal_stmt | assert_stmt)
|
||||||
|
expr_stmt: testlist_star_expr (augassign (yield_expr|testlist) |
|
||||||
|
('=' (yield_expr|testlist_star_expr))*)
|
||||||
|
testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
|
||||||
|
augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |
|
||||||
|
'<<=' | '>>=' | '**=' | '//=')
|
||||||
|
# For normal assignments, additional restrictions enforced by the interpreter
|
||||||
|
del_stmt: 'del' exprlist
|
||||||
|
pass_stmt: 'pass'
|
||||||
|
flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
|
||||||
|
break_stmt: 'break'
|
||||||
|
continue_stmt: 'continue'
|
||||||
|
return_stmt: 'return' [testlist]
|
||||||
|
yield_stmt: yield_expr
|
||||||
|
raise_stmt: 'raise' [test ['from' test]]
|
||||||
|
import_stmt: import_name | import_from
|
||||||
|
import_name: 'import' dotted_as_names
|
||||||
|
# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS
|
||||||
|
import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+)
|
||||||
|
'import' ('*' | '(' import_as_names ')' | import_as_names))
|
||||||
|
import_as_name: NAME ['as' NAME]
|
||||||
|
dotted_as_name: dotted_name ['as' NAME]
|
||||||
|
import_as_names: import_as_name (',' import_as_name)* [',']
|
||||||
|
dotted_as_names: dotted_as_name (',' dotted_as_name)*
|
||||||
|
dotted_name: NAME ('.' NAME)*
|
||||||
|
global_stmt: 'global' NAME (',' NAME)*
|
||||||
|
nonlocal_stmt: 'nonlocal' NAME (',' NAME)*
|
||||||
|
assert_stmt: 'assert' test [',' test]
|
||||||
|
|
||||||
|
compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated
|
||||||
|
if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
|
||||||
|
while_stmt: 'while' test ':' suite ['else' ':' suite]
|
||||||
|
for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
|
||||||
|
try_stmt: ('try' ':' suite
|
||||||
|
((except_clause ':' suite)+
|
||||||
|
['else' ':' suite]
|
||||||
|
['finally' ':' suite] |
|
||||||
|
'finally' ':' suite))
|
||||||
|
with_stmt: 'with' with_item (',' with_item)* ':' suite
|
||||||
|
with_item: test ['as' expr]
|
||||||
|
# NB compile.c makes sure that the default except clause is last
|
||||||
|
except_clause: 'except' [test ['as' NAME]]
|
||||||
|
# Edit by David Halter: The stmt is now optional. This reflects how Jedi allows
|
||||||
|
# classes and functions to be empty, which is beneficial for autocompletion.
|
||||||
|
suite: simple_stmt | NEWLINE INDENT stmt* DEDENT
|
||||||
|
|
||||||
|
test: or_test ['if' or_test 'else' test] | lambdef
|
||||||
|
test_nocond: or_test | lambdef_nocond
|
||||||
|
lambdef: 'lambda' [varargslist] ':' test
|
||||||
|
lambdef_nocond: 'lambda' [varargslist] ':' test_nocond
|
||||||
|
or_test: and_test ('or' and_test)*
|
||||||
|
and_test: not_test ('and' not_test)*
|
||||||
|
not_test: 'not' not_test | comparison
|
||||||
|
comparison: expr (comp_op expr)*
|
||||||
|
# <> isn't actually a valid comparison operator in Python. It's here for the
|
||||||
|
# sake of a __future__ import described in PEP 401
|
||||||
|
comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
|
||||||
|
star_expr: '*' expr
|
||||||
|
expr: xor_expr ('|' xor_expr)*
|
||||||
|
xor_expr: and_expr ('^' and_expr)*
|
||||||
|
and_expr: shift_expr ('&' shift_expr)*
|
||||||
|
shift_expr: arith_expr (('<<'|'>>') arith_expr)*
|
||||||
|
arith_expr: term (('+'|'-') term)*
|
||||||
|
term: factor (('*'|'/'|'%'|'//') factor)*
|
||||||
|
factor: ('+'|'-'|'~') factor | power
|
||||||
|
power: atom trailer* ['**' factor]
|
||||||
|
atom: ('(' [yield_expr|testlist_comp] ')' |
|
||||||
|
'[' [testlist_comp] ']' |
|
||||||
|
'{' [dictorsetmaker] '}' |
|
||||||
|
NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False')
|
||||||
|
testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
|
||||||
|
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
|
||||||
|
subscriptlist: subscript (',' subscript)* [',']
|
||||||
|
subscript: test | [test] ':' [test] [sliceop]
|
||||||
|
sliceop: ':' [test]
|
||||||
|
exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
|
||||||
|
testlist: test (',' test)* [',']
|
||||||
|
dictorsetmaker: ( (test ':' test (comp_for | (',' test ':' test)* [','])) |
|
||||||
|
(test (comp_for | (',' test)* [','])) )
|
||||||
|
|
||||||
|
classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
|
||||||
|
|
||||||
|
arglist: (argument ',')* (argument [',']
|
||||||
|
|'*' test (',' argument)* [',' '**' test]
|
||||||
|
|'**' test)
|
||||||
|
# The reason that keywords are test nodes instead of NAME is that using NAME
|
||||||
|
# results in an ambiguity. ast.c makes sure it's a NAME.
|
||||||
|
argument: test [comp_for] | test '=' test # Really [keyword '='] test
|
||||||
|
comp_iter: comp_for | comp_if
|
||||||
|
comp_for: 'for' exprlist 'in' or_test [comp_iter]
|
||||||
|
comp_if: 'if' test_nocond [comp_iter]
|
||||||
|
|
||||||
|
# not used in grammar, but may appear in "node" passed from Parser to Compiler
|
||||||
|
encoding_decl: NAME
|
||||||
|
|
||||||
|
yield_expr: 'yield' [yield_arg]
|
||||||
|
yield_arg: 'from' test | testlist
|
||||||
154
parso/python/grammar3.5.txt
Normal file
154
parso/python/grammar3.5.txt
Normal file
@@ -0,0 +1,154 @@
|
|||||||
|
# Grammar for Python
|
||||||
|
|
||||||
|
# Note: Changing the grammar specified in this file will most likely
|
||||||
|
# require corresponding changes in the parser module
|
||||||
|
# (../Modules/parsermodule.c). If you can't make the changes to
|
||||||
|
# that module yourself, please co-ordinate the required changes
|
||||||
|
# with someone who can; ask around on python-dev for help. Fred
|
||||||
|
# Drake <fdrake@acm.org> will probably be listening there.
|
||||||
|
|
||||||
|
# NOTE WELL: You should also follow all the steps listed at
|
||||||
|
# https://docs.python.org/devguide/grammar.html
|
||||||
|
|
||||||
|
# Start symbols for the grammar:
|
||||||
|
# single_input is a single interactive statement;
|
||||||
|
# file_input is a module or sequence of commands read from an input file;
|
||||||
|
# eval_input is the input for the eval() functions.
|
||||||
|
# NB: compound_stmt in single_input is followed by extra NEWLINE!
|
||||||
|
file_input: (NEWLINE | stmt)* ENDMARKER
|
||||||
|
single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
|
||||||
|
eval_input: testlist NEWLINE* ENDMARKER
|
||||||
|
|
||||||
|
decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
|
||||||
|
decorators: decorator+
|
||||||
|
decorated: decorators (classdef | funcdef | async_funcdef)
|
||||||
|
|
||||||
|
# NOTE: Reinoud Elhorst, using ASYNC/AWAIT keywords instead of tokens
|
||||||
|
# skipping python3.5 compatibility, in favour of 3.7 solution
|
||||||
|
async_funcdef: 'async' funcdef
|
||||||
|
funcdef: 'def' NAME parameters ['->' test] ':' suite
|
||||||
|
|
||||||
|
parameters: '(' [typedargslist] ')'
|
||||||
|
typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [','
|
||||||
|
['*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef]]
|
||||||
|
| '*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef)
|
||||||
|
tfpdef: NAME [':' test]
|
||||||
|
varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [','
|
||||||
|
['*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef]]
|
||||||
|
| '*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef)
|
||||||
|
vfpdef: NAME
|
||||||
|
|
||||||
|
stmt: simple_stmt | compound_stmt
|
||||||
|
simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
|
||||||
|
small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt |
|
||||||
|
import_stmt | global_stmt | nonlocal_stmt | assert_stmt)
|
||||||
|
expr_stmt: testlist_star_expr (augassign (yield_expr|testlist) |
|
||||||
|
('=' (yield_expr|testlist_star_expr))*)
|
||||||
|
testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
|
||||||
|
augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' |
|
||||||
|
'<<=' | '>>=' | '**=' | '//=')
|
||||||
|
# For normal assignments, additional restrictions enforced by the interpreter
|
||||||
|
del_stmt: 'del' exprlist
|
||||||
|
pass_stmt: 'pass'
|
||||||
|
flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
|
||||||
|
break_stmt: 'break'
|
||||||
|
continue_stmt: 'continue'
|
||||||
|
return_stmt: 'return' [testlist]
|
||||||
|
yield_stmt: yield_expr
|
||||||
|
raise_stmt: 'raise' [test ['from' test]]
|
||||||
|
import_stmt: import_name | import_from
|
||||||
|
import_name: 'import' dotted_as_names
|
||||||
|
# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS
|
||||||
|
import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+)
|
||||||
|
'import' ('*' | '(' import_as_names ')' | import_as_names))
|
||||||
|
import_as_name: NAME ['as' NAME]
|
||||||
|
dotted_as_name: dotted_name ['as' NAME]
|
||||||
|
import_as_names: import_as_name (',' import_as_name)* [',']
|
||||||
|
dotted_as_names: dotted_as_name (',' dotted_as_name)*
|
||||||
|
dotted_name: NAME ('.' NAME)*
|
||||||
|
global_stmt: 'global' NAME (',' NAME)*
|
||||||
|
nonlocal_stmt: 'nonlocal' NAME (',' NAME)*
|
||||||
|
assert_stmt: 'assert' test [',' test]
|
||||||
|
|
||||||
|
compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt
|
||||||
|
async_stmt: 'async' (funcdef | with_stmt | for_stmt)
|
||||||
|
if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
|
||||||
|
while_stmt: 'while' test ':' suite ['else' ':' suite]
|
||||||
|
for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
|
||||||
|
try_stmt: ('try' ':' suite
|
||||||
|
((except_clause ':' suite)+
|
||||||
|
['else' ':' suite]
|
||||||
|
['finally' ':' suite] |
|
||||||
|
'finally' ':' suite))
|
||||||
|
with_stmt: 'with' with_item (',' with_item)* ':' suite
|
||||||
|
with_item: test ['as' expr]
|
||||||
|
# NB compile.c makes sure that the default except clause is last
|
||||||
|
except_clause: 'except' [test ['as' NAME]]
|
||||||
|
# Edit by David Halter: The stmt is now optional. This reflects how Jedi allows
|
||||||
|
# classes and functions to be empty, which is beneficial for autocompletion.
|
||||||
|
suite: simple_stmt | NEWLINE INDENT stmt* DEDENT
|
||||||
|
|
||||||
|
test: or_test ['if' or_test 'else' test] | lambdef
|
||||||
|
test_nocond: or_test | lambdef_nocond
|
||||||
|
lambdef: 'lambda' [varargslist] ':' test
|
||||||
|
lambdef_nocond: 'lambda' [varargslist] ':' test_nocond
|
||||||
|
or_test: and_test ('or' and_test)*
|
||||||
|
and_test: not_test ('and' not_test)*
|
||||||
|
not_test: 'not' not_test | comparison
|
||||||
|
comparison: expr (comp_op expr)*
|
||||||
|
# <> isn't actually a valid comparison operator in Python. It's here for the
|
||||||
|
# sake of a __future__ import described in PEP 401 (which really works :-)
|
||||||
|
comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
|
||||||
|
star_expr: '*' expr
|
||||||
|
expr: xor_expr ('|' xor_expr)*
|
||||||
|
xor_expr: and_expr ('^' and_expr)*
|
||||||
|
and_expr: shift_expr ('&' shift_expr)*
|
||||||
|
shift_expr: arith_expr (('<<'|'>>') arith_expr)*
|
||||||
|
arith_expr: term (('+'|'-') term)*
|
||||||
|
term: factor (('*'|'@'|'/'|'%'|'//') factor)*
|
||||||
|
factor: ('+'|'-'|'~') factor | power
|
||||||
|
power: atom_expr ['**' factor]
|
||||||
|
atom_expr: ['await'] atom trailer*
|
||||||
|
atom: ('(' [yield_expr|testlist_comp] ')' |
|
||||||
|
'[' [testlist_comp] ']' |
|
||||||
|
'{' [dictorsetmaker] '}' |
|
||||||
|
NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False')
|
||||||
|
testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
|
||||||
|
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
|
||||||
|
subscriptlist: subscript (',' subscript)* [',']
|
||||||
|
subscript: test | [test] ':' [test] [sliceop]
|
||||||
|
sliceop: ':' [test]
|
||||||
|
exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
|
||||||
|
testlist: test (',' test)* [',']
|
||||||
|
dictorsetmaker: ( ((test ':' test | '**' expr)
|
||||||
|
(comp_for | (',' (test ':' test | '**' expr))* [','])) |
|
||||||
|
((test | star_expr)
|
||||||
|
(comp_for | (',' (test | star_expr))* [','])) )
|
||||||
|
|
||||||
|
classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
|
||||||
|
|
||||||
|
arglist: argument (',' argument)* [',']
|
||||||
|
|
||||||
|
# The reason that keywords are test nodes instead of NAME is that using NAME
|
||||||
|
# results in an ambiguity. ast.c makes sure it's a NAME.
|
||||||
|
# "test '=' test" is really "keyword '=' test", but we have no such token.
|
||||||
|
# These need to be in a single rule to avoid grammar that is ambiguous
|
||||||
|
# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr,
|
||||||
|
# we explicitly match '*' here, too, to give it proper precedence.
|
||||||
|
# Illegal combinations and orderings are blocked in ast.c:
|
||||||
|
# multiple (test comp_for) arguements are blocked; keyword unpackings
|
||||||
|
# that precede iterable unpackings are blocked; etc.
|
||||||
|
argument: ( test [comp_for] |
|
||||||
|
test '=' test |
|
||||||
|
'**' test |
|
||||||
|
'*' test )
|
||||||
|
|
||||||
|
comp_iter: comp_for | comp_if
|
||||||
|
comp_for: 'for' exprlist 'in' or_test [comp_iter]
|
||||||
|
comp_if: 'if' test_nocond [comp_iter]
|
||||||
|
|
||||||
|
# not used in grammar, but may appear in "node" passed from Parser to Compiler
|
||||||
|
encoding_decl: NAME
|
||||||
|
|
||||||
|
yield_expr: 'yield' [yield_arg]
|
||||||
|
yield_arg: 'from' test | testlist
|
||||||
161
parso/python/grammar3.6.txt
Normal file
161
parso/python/grammar3.6.txt
Normal file
@@ -0,0 +1,161 @@
|
|||||||
|
# Grammar for Python
|
||||||
|
|
||||||
|
# Note: Changing the grammar specified in this file will most likely
|
||||||
|
# require corresponding changes in the parser module
|
||||||
|
# (../Modules/parsermodule.c). If you can't make the changes to
|
||||||
|
# that module yourself, please co-ordinate the required changes
|
||||||
|
# with someone who can; ask around on python-dev for help. Fred
|
||||||
|
# Drake <fdrake@acm.org> will probably be listening there.
|
||||||
|
|
||||||
|
# NOTE WELL: You should also follow all the steps listed at
|
||||||
|
# https://docs.python.org/devguide/grammar.html
|
||||||
|
|
||||||
|
# Start symbols for the grammar:
|
||||||
|
# file_input is a module or sequence of commands read from an input file;
|
||||||
|
# single_input is a single interactive statement;
|
||||||
|
# eval_input is the input for the eval() functions.
|
||||||
|
# NB: compound_stmt in single_input is followed by extra NEWLINE!
|
||||||
|
file_input: (NEWLINE | stmt)* ENDMARKER
|
||||||
|
single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
|
||||||
|
eval_input: testlist NEWLINE* ENDMARKER
|
||||||
|
|
||||||
|
decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
|
||||||
|
decorators: decorator+
|
||||||
|
decorated: decorators (classdef | funcdef | async_funcdef)
|
||||||
|
|
||||||
|
# NOTE: Francisco Souza/Reinoud Elhorst, using ASYNC/'await' keywords instead of
|
||||||
|
# skipping python3.5+ compatibility, in favour of 3.7 solution
|
||||||
|
async_funcdef: 'async' funcdef
|
||||||
|
funcdef: 'def' NAME parameters ['->' test] ':' suite
|
||||||
|
|
||||||
|
parameters: '(' [typedargslist] ')'
|
||||||
|
typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' [
|
||||||
|
'*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
|
||||||
|
| '**' tfpdef [',']]]
|
||||||
|
| '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
|
||||||
|
| '**' tfpdef [','])
|
||||||
|
tfpdef: NAME [':' test]
|
||||||
|
varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [
|
||||||
|
'*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
|
||||||
|
| '**' vfpdef [',']]]
|
||||||
|
| '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
|
||||||
|
| '**' vfpdef [',']
|
||||||
|
)
|
||||||
|
vfpdef: NAME
|
||||||
|
|
||||||
|
stmt: simple_stmt | compound_stmt
|
||||||
|
simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
|
||||||
|
small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt |
|
||||||
|
import_stmt | global_stmt | nonlocal_stmt | assert_stmt)
|
||||||
|
expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) |
|
||||||
|
('=' (yield_expr|testlist_star_expr))*)
|
||||||
|
annassign: ':' test ['=' test]
|
||||||
|
testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
|
||||||
|
augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' |
|
||||||
|
'<<=' | '>>=' | '**=' | '//=')
|
||||||
|
# For normal and annotated assignments, additional restrictions enforced by the interpreter
|
||||||
|
del_stmt: 'del' exprlist
|
||||||
|
pass_stmt: 'pass'
|
||||||
|
flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
|
||||||
|
break_stmt: 'break'
|
||||||
|
continue_stmt: 'continue'
|
||||||
|
return_stmt: 'return' [testlist]
|
||||||
|
yield_stmt: yield_expr
|
||||||
|
raise_stmt: 'raise' [test ['from' test]]
|
||||||
|
import_stmt: import_name | import_from
|
||||||
|
import_name: 'import' dotted_as_names
|
||||||
|
# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS
|
||||||
|
import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+)
|
||||||
|
'import' ('*' | '(' import_as_names ')' | import_as_names))
|
||||||
|
import_as_name: NAME ['as' NAME]
|
||||||
|
dotted_as_name: dotted_name ['as' NAME]
|
||||||
|
import_as_names: import_as_name (',' import_as_name)* [',']
|
||||||
|
dotted_as_names: dotted_as_name (',' dotted_as_name)*
|
||||||
|
dotted_name: NAME ('.' NAME)*
|
||||||
|
global_stmt: 'global' NAME (',' NAME)*
|
||||||
|
nonlocal_stmt: 'nonlocal' NAME (',' NAME)*
|
||||||
|
assert_stmt: 'assert' test [',' test]
|
||||||
|
|
||||||
|
compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt
|
||||||
|
async_stmt: 'async' (funcdef | with_stmt | for_stmt)
|
||||||
|
if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
|
||||||
|
while_stmt: 'while' test ':' suite ['else' ':' suite]
|
||||||
|
for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
|
||||||
|
try_stmt: ('try' ':' suite
|
||||||
|
((except_clause ':' suite)+
|
||||||
|
['else' ':' suite]
|
||||||
|
['finally' ':' suite] |
|
||||||
|
'finally' ':' suite))
|
||||||
|
with_stmt: 'with' with_item (',' with_item)* ':' suite
|
||||||
|
with_item: test ['as' expr]
|
||||||
|
# NB compile.c makes sure that the default except clause is last
|
||||||
|
except_clause: 'except' [test ['as' NAME]]
|
||||||
|
# Edit by Francisco Souza/David Halter: The stmt is now optional. This reflects
|
||||||
|
# how Jedi allows classes and functions to be empty, which is beneficial for
|
||||||
|
# autocompletion.
|
||||||
|
suite: simple_stmt | NEWLINE INDENT stmt* DEDENT
|
||||||
|
|
||||||
|
test: or_test ['if' or_test 'else' test] | lambdef
|
||||||
|
test_nocond: or_test | lambdef_nocond
|
||||||
|
lambdef: 'lambda' [varargslist] ':' test
|
||||||
|
lambdef_nocond: 'lambda' [varargslist] ':' test_nocond
|
||||||
|
or_test: and_test ('or' and_test)*
|
||||||
|
and_test: not_test ('and' not_test)*
|
||||||
|
not_test: 'not' not_test | comparison
|
||||||
|
comparison: expr (comp_op expr)*
|
||||||
|
# <> isn't actually a valid comparison operator in Python. It's here for the
|
||||||
|
# sake of a __future__ import described in PEP 401 (which really works :-)
|
||||||
|
comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
|
||||||
|
star_expr: '*' expr
|
||||||
|
expr: xor_expr ('|' xor_expr)*
|
||||||
|
xor_expr: and_expr ('^' and_expr)*
|
||||||
|
and_expr: shift_expr ('&' shift_expr)*
|
||||||
|
shift_expr: arith_expr (('<<'|'>>') arith_expr)*
|
||||||
|
arith_expr: term (('+'|'-') term)*
|
||||||
|
term: factor (('*'|'@'|'/'|'%'|'//') factor)*
|
||||||
|
factor: ('+'|'-'|'~') factor | power
|
||||||
|
power: atom_expr ['**' factor]
|
||||||
|
atom_expr: ['await'] atom trailer*
|
||||||
|
atom: ('(' [yield_expr|testlist_comp] ')' |
|
||||||
|
'[' [testlist_comp] ']' |
|
||||||
|
'{' [dictorsetmaker] '}' |
|
||||||
|
NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False')
|
||||||
|
testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
|
||||||
|
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
|
||||||
|
subscriptlist: subscript (',' subscript)* [',']
|
||||||
|
subscript: test | [test] ':' [test] [sliceop]
|
||||||
|
sliceop: ':' [test]
|
||||||
|
exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
|
||||||
|
testlist: test (',' test)* [',']
|
||||||
|
dictorsetmaker: ( ((test ':' test | '**' expr)
|
||||||
|
(comp_for | (',' (test ':' test | '**' expr))* [','])) |
|
||||||
|
((test | star_expr)
|
||||||
|
(comp_for | (',' (test | star_expr))* [','])) )
|
||||||
|
|
||||||
|
classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
|
||||||
|
|
||||||
|
arglist: argument (',' argument)* [',']
|
||||||
|
|
||||||
|
# The reason that keywords are test nodes instead of NAME is that using NAME
|
||||||
|
# results in an ambiguity. ast.c makes sure it's a NAME.
|
||||||
|
# "test '=' test" is really "keyword '=' test", but we have no such token.
|
||||||
|
# These need to be in a single rule to avoid grammar that is ambiguous
|
||||||
|
# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr,
|
||||||
|
# we explicitly match '*' here, too, to give it proper precedence.
|
||||||
|
# Illegal combinations and orderings are blocked in ast.c:
|
||||||
|
# multiple (test comp_for) arguments are blocked; keyword unpackings
|
||||||
|
# that precede iterable unpackings are blocked; etc.
|
||||||
|
argument: ( test [comp_for] |
|
||||||
|
test '=' test |
|
||||||
|
'**' test |
|
||||||
|
'*' test )
|
||||||
|
|
||||||
|
comp_iter: comp_for | comp_if
|
||||||
|
comp_for: ['async'] 'for' exprlist 'in' or_test [comp_iter]
|
||||||
|
comp_if: 'if' test_nocond [comp_iter]
|
||||||
|
|
||||||
|
# not used in grammar, but may appear in "node" passed from Parser to Compiler
|
||||||
|
encoding_decl: NAME
|
||||||
|
|
||||||
|
yield_expr: 'yield' [yield_arg]
|
||||||
|
yield_arg: 'from' test | testlist
|
||||||
232
parso/python/parser.py
Normal file
232
parso/python/parser.py
Normal file
@@ -0,0 +1,232 @@
|
|||||||
|
from parso.python import tree
|
||||||
|
from parso import tokenize
|
||||||
|
from parso.token import (DEDENT, INDENT, ENDMARKER, NEWLINE, NUMBER,
|
||||||
|
STRING, tok_name)
|
||||||
|
from parso.parser import BaseParser
|
||||||
|
from jedi.common import splitlines
|
||||||
|
|
||||||
|
|
||||||
|
class Parser(BaseParser):
|
||||||
|
"""
|
||||||
|
This class is used to parse a Python file, it then divides them into a
|
||||||
|
class structure of different scopes.
|
||||||
|
|
||||||
|
:param grammar: The grammar object of pgen2. Loaded by load_grammar.
|
||||||
|
"""
|
||||||
|
|
||||||
|
node_map = {
|
||||||
|
'expr_stmt': tree.ExprStmt,
|
||||||
|
'classdef': tree.Class,
|
||||||
|
'funcdef': tree.Function,
|
||||||
|
'file_input': tree.Module,
|
||||||
|
'import_name': tree.ImportName,
|
||||||
|
'import_from': tree.ImportFrom,
|
||||||
|
'break_stmt': tree.KeywordStatement,
|
||||||
|
'continue_stmt': tree.KeywordStatement,
|
||||||
|
'return_stmt': tree.ReturnStmt,
|
||||||
|
'raise_stmt': tree.KeywordStatement,
|
||||||
|
'yield_expr': tree.YieldExpr,
|
||||||
|
'del_stmt': tree.KeywordStatement,
|
||||||
|
'pass_stmt': tree.KeywordStatement,
|
||||||
|
'global_stmt': tree.GlobalStmt,
|
||||||
|
'nonlocal_stmt': tree.KeywordStatement,
|
||||||
|
'print_stmt': tree.KeywordStatement,
|
||||||
|
'assert_stmt': tree.AssertStmt,
|
||||||
|
'if_stmt': tree.IfStmt,
|
||||||
|
'with_stmt': tree.WithStmt,
|
||||||
|
'for_stmt': tree.ForStmt,
|
||||||
|
'while_stmt': tree.WhileStmt,
|
||||||
|
'try_stmt': tree.TryStmt,
|
||||||
|
'comp_for': tree.CompFor,
|
||||||
|
'decorator': tree.Decorator,
|
||||||
|
'lambdef': tree.Lambda,
|
||||||
|
'old_lambdef': tree.Lambda,
|
||||||
|
'lambdef_nocond': tree.Lambda,
|
||||||
|
}
|
||||||
|
default_node = tree.PythonNode
|
||||||
|
|
||||||
|
def __init__(self, grammar, error_recovery=True, start_symbol='file_input'):
|
||||||
|
super(Parser, self).__init__(grammar, start_symbol, error_recovery=error_recovery)
|
||||||
|
|
||||||
|
self.syntax_errors = []
|
||||||
|
self._omit_dedent_list = []
|
||||||
|
self._indent_counter = 0
|
||||||
|
|
||||||
|
# TODO do print absolute import detection here.
|
||||||
|
# try:
|
||||||
|
# del python_grammar_no_print_statement.keywords["print"]
|
||||||
|
# except KeyError:
|
||||||
|
# pass # Doesn't exist in the Python 3 grammar.
|
||||||
|
|
||||||
|
# if self.options["print_function"]:
|
||||||
|
# python_grammar = pygram.python_grammar_no_print_statement
|
||||||
|
# else:
|
||||||
|
|
||||||
|
def parse(self, tokens):
|
||||||
|
if self._error_recovery:
|
||||||
|
if self._start_symbol != 'file_input':
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
tokens = self._recovery_tokenize(tokens)
|
||||||
|
|
||||||
|
node = super(Parser, self).parse(tokens)
|
||||||
|
|
||||||
|
if self._start_symbol == 'file_input' != node.type:
|
||||||
|
# If there's only one statement, we get back a non-module. That's
|
||||||
|
# not what we want, we want a module, so we add it here:
|
||||||
|
node = self.convert_node(
|
||||||
|
self._grammar,
|
||||||
|
self._grammar.symbol2number['file_input'],
|
||||||
|
[node]
|
||||||
|
)
|
||||||
|
|
||||||
|
return node
|
||||||
|
|
||||||
|
def convert_node(self, grammar, type, children):
|
||||||
|
"""
|
||||||
|
Convert raw node information to a PythonBaseNode instance.
|
||||||
|
|
||||||
|
This is passed to the parser driver which calls it whenever a reduction of a
|
||||||
|
grammar rule produces a new complete node, so that the tree is build
|
||||||
|
strictly bottom-up.
|
||||||
|
"""
|
||||||
|
# TODO REMOVE symbol, we don't want type here.
|
||||||
|
symbol = grammar.number2symbol[type]
|
||||||
|
try:
|
||||||
|
return self.node_map[symbol](children)
|
||||||
|
except KeyError:
|
||||||
|
if symbol == 'suite':
|
||||||
|
# We don't want the INDENT/DEDENT in our parser tree. Those
|
||||||
|
# leaves are just cancer. They are virtual leaves and not real
|
||||||
|
# ones and therefore have pseudo start/end positions and no
|
||||||
|
# prefixes. Just ignore them.
|
||||||
|
children = [children[0]] + children[2:-1]
|
||||||
|
return self.default_node(symbol, children)
|
||||||
|
|
||||||
|
def convert_leaf(self, grammar, type, value, prefix, start_pos):
|
||||||
|
# print('leaf', repr(value), token.tok_name[type])
|
||||||
|
if type == tokenize.NAME:
|
||||||
|
if value in grammar.keywords:
|
||||||
|
return tree.Keyword(value, start_pos, prefix)
|
||||||
|
else:
|
||||||
|
return tree.Name(value, start_pos, prefix)
|
||||||
|
elif type == STRING:
|
||||||
|
return tree.String(value, start_pos, prefix)
|
||||||
|
elif type == NUMBER:
|
||||||
|
return tree.Number(value, start_pos, prefix)
|
||||||
|
elif type == NEWLINE:
|
||||||
|
return tree.Newline(value, start_pos, prefix)
|
||||||
|
elif type == ENDMARKER:
|
||||||
|
return tree.EndMarker(value, start_pos, prefix)
|
||||||
|
else:
|
||||||
|
return tree.Operator(value, start_pos, prefix)
|
||||||
|
|
||||||
|
def error_recovery(self, grammar, stack, arcs, typ, value, start_pos, prefix,
|
||||||
|
add_token_callback):
|
||||||
|
"""
|
||||||
|
This parser is written in a dynamic way, meaning that this parser
|
||||||
|
allows using different grammars (even non-Python). However, error
|
||||||
|
recovery is purely written for Python.
|
||||||
|
"""
|
||||||
|
if not self._error_recovery:
|
||||||
|
return super(Parser, self).error_recovery(
|
||||||
|
grammar, stack, arcs, typ, value, start_pos, prefix,
|
||||||
|
add_token_callback)
|
||||||
|
|
||||||
|
def current_suite(stack):
|
||||||
|
# For now just discard everything that is not a suite or
|
||||||
|
# file_input, if we detect an error.
|
||||||
|
for index, (dfa, state, (type_, nodes)) in reversed(list(enumerate(stack))):
|
||||||
|
# `suite` can sometimes be only simple_stmt, not stmt.
|
||||||
|
symbol = grammar.number2symbol[type_]
|
||||||
|
if symbol == 'file_input':
|
||||||
|
break
|
||||||
|
elif symbol == 'suite' and len(nodes) > 1:
|
||||||
|
# suites without an indent in them get discarded.
|
||||||
|
break
|
||||||
|
return index, symbol, nodes
|
||||||
|
|
||||||
|
index, symbol, nodes = current_suite(stack)
|
||||||
|
|
||||||
|
# print('err', token.tok_name[typ], repr(value), start_pos, len(stack), index)
|
||||||
|
if self._stack_removal(grammar, stack, arcs, index + 1, value, start_pos):
|
||||||
|
add_token_callback(typ, value, start_pos, prefix)
|
||||||
|
else:
|
||||||
|
if typ == INDENT:
|
||||||
|
# For every deleted INDENT we have to delete a DEDENT as well.
|
||||||
|
# Otherwise the parser will get into trouble and DEDENT too early.
|
||||||
|
self._omit_dedent_list.append(self._indent_counter)
|
||||||
|
else:
|
||||||
|
error_leaf = tree.PythonErrorLeaf(tok_name[typ].lower(), value, start_pos, prefix)
|
||||||
|
stack[-1][2][1].append(error_leaf)
|
||||||
|
|
||||||
|
def _stack_removal(self, grammar, stack, arcs, start_index, value, start_pos):
|
||||||
|
failed_stack = []
|
||||||
|
found = False
|
||||||
|
all_nodes = []
|
||||||
|
for dfa, state, (typ, nodes) in stack[start_index:]:
|
||||||
|
if nodes:
|
||||||
|
found = True
|
||||||
|
if found:
|
||||||
|
symbol = grammar.number2symbol[typ]
|
||||||
|
failed_stack.append((symbol, nodes))
|
||||||
|
all_nodes += nodes
|
||||||
|
if failed_stack:
|
||||||
|
stack[start_index - 1][2][1].append(tree.PythonErrorNode(all_nodes))
|
||||||
|
|
||||||
|
stack[start_index:] = []
|
||||||
|
return failed_stack
|
||||||
|
|
||||||
|
def _recovery_tokenize(self, tokens):
|
||||||
|
for typ, value, start_pos, prefix in tokens:
|
||||||
|
# print(tokenize.tok_name[typ], repr(value), start_pos, repr(prefix))
|
||||||
|
if typ == DEDENT:
|
||||||
|
# We need to count indents, because if we just omit any DEDENT,
|
||||||
|
# we might omit them in the wrong place.
|
||||||
|
o = self._omit_dedent_list
|
||||||
|
if o and o[-1] == self._indent_counter:
|
||||||
|
o.pop()
|
||||||
|
continue
|
||||||
|
|
||||||
|
self._indent_counter -= 1
|
||||||
|
elif typ == INDENT:
|
||||||
|
self._indent_counter += 1
|
||||||
|
|
||||||
|
yield typ, value, start_pos, prefix
|
||||||
|
|
||||||
|
|
||||||
|
def _remove_last_newline(node):
|
||||||
|
endmarker = node.children[-1]
|
||||||
|
# The newline is either in the endmarker as a prefix or the previous
|
||||||
|
# leaf as a newline token.
|
||||||
|
prefix = endmarker.prefix
|
||||||
|
leaf = endmarker.get_previous_leaf()
|
||||||
|
if prefix:
|
||||||
|
text = prefix
|
||||||
|
else:
|
||||||
|
if leaf is None:
|
||||||
|
raise ValueError("You're trying to remove a newline from an empty module.")
|
||||||
|
|
||||||
|
text = leaf.value
|
||||||
|
|
||||||
|
if not text.endswith('\n'):
|
||||||
|
raise ValueError("There's no newline at the end, cannot remove it.")
|
||||||
|
|
||||||
|
text = text[:-1]
|
||||||
|
if prefix:
|
||||||
|
endmarker.prefix = text
|
||||||
|
|
||||||
|
if leaf is None:
|
||||||
|
end_pos = (1, 0)
|
||||||
|
else:
|
||||||
|
end_pos = leaf.end_pos
|
||||||
|
|
||||||
|
lines = splitlines(text, keepends=True)
|
||||||
|
if len(lines) == 1:
|
||||||
|
end_pos = end_pos[0], end_pos[1] + len(lines[0])
|
||||||
|
else:
|
||||||
|
end_pos = end_pos[0] + len(lines) - 1, len(lines[-1])
|
||||||
|
endmarker.start_pos = end_pos
|
||||||
|
else:
|
||||||
|
leaf.value = text
|
||||||
|
endmarker.start_pos = leaf.end_pos
|
||||||
1045
parso/python/tree.py
Normal file
1045
parso/python/tree.py
Normal file
File diff suppressed because it is too large
Load Diff
90
parso/token.py
Normal file
90
parso/token.py
Normal file
@@ -0,0 +1,90 @@
|
|||||||
|
from __future__ import absolute_import
|
||||||
|
|
||||||
|
from jedi._compatibility import is_py3, is_py35
|
||||||
|
from token import *
|
||||||
|
|
||||||
|
|
||||||
|
COMMENT = N_TOKENS
|
||||||
|
tok_name[COMMENT] = 'COMMENT'
|
||||||
|
N_TOKENS += 1
|
||||||
|
|
||||||
|
NL = N_TOKENS
|
||||||
|
tok_name[NL] = 'NL'
|
||||||
|
N_TOKENS += 1
|
||||||
|
|
||||||
|
if is_py3:
|
||||||
|
BACKQUOTE = N_TOKENS
|
||||||
|
tok_name[BACKQUOTE] = 'BACKQUOTE'
|
||||||
|
N_TOKENS += 1
|
||||||
|
else:
|
||||||
|
RARROW = N_TOKENS
|
||||||
|
tok_name[RARROW] = 'RARROW'
|
||||||
|
N_TOKENS += 1
|
||||||
|
ELLIPSIS = N_TOKENS
|
||||||
|
tok_name[ELLIPSIS] = 'ELLIPSIS'
|
||||||
|
N_TOKENS += 1
|
||||||
|
|
||||||
|
if not is_py35:
|
||||||
|
ATEQUAL = N_TOKENS
|
||||||
|
tok_name[ATEQUAL] = 'ATEQUAL'
|
||||||
|
N_TOKENS += 1
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Map from operator to number (since tokenize doesn't do this)
|
||||||
|
|
||||||
|
opmap_raw = """\
|
||||||
|
( LPAR
|
||||||
|
) RPAR
|
||||||
|
[ LSQB
|
||||||
|
] RSQB
|
||||||
|
: COLON
|
||||||
|
, COMMA
|
||||||
|
; SEMI
|
||||||
|
+ PLUS
|
||||||
|
- MINUS
|
||||||
|
* STAR
|
||||||
|
/ SLASH
|
||||||
|
| VBAR
|
||||||
|
& AMPER
|
||||||
|
< LESS
|
||||||
|
> GREATER
|
||||||
|
= EQUAL
|
||||||
|
. DOT
|
||||||
|
% PERCENT
|
||||||
|
` BACKQUOTE
|
||||||
|
{ LBRACE
|
||||||
|
} RBRACE
|
||||||
|
@ AT
|
||||||
|
== EQEQUAL
|
||||||
|
!= NOTEQUAL
|
||||||
|
<> NOTEQUAL
|
||||||
|
<= LESSEQUAL
|
||||||
|
>= GREATEREQUAL
|
||||||
|
~ TILDE
|
||||||
|
^ CIRCUMFLEX
|
||||||
|
<< LEFTSHIFT
|
||||||
|
>> RIGHTSHIFT
|
||||||
|
** DOUBLESTAR
|
||||||
|
+= PLUSEQUAL
|
||||||
|
-= MINEQUAL
|
||||||
|
*= STAREQUAL
|
||||||
|
/= SLASHEQUAL
|
||||||
|
%= PERCENTEQUAL
|
||||||
|
&= AMPEREQUAL
|
||||||
|
|= VBAREQUAL
|
||||||
|
@= ATEQUAL
|
||||||
|
^= CIRCUMFLEXEQUAL
|
||||||
|
<<= LEFTSHIFTEQUAL
|
||||||
|
>>= RIGHTSHIFTEQUAL
|
||||||
|
**= DOUBLESTAREQUAL
|
||||||
|
// DOUBLESLASH
|
||||||
|
//= DOUBLESLASHEQUAL
|
||||||
|
-> RARROW
|
||||||
|
... ELLIPSIS
|
||||||
|
"""
|
||||||
|
|
||||||
|
opmap = {}
|
||||||
|
for line in opmap_raw.splitlines():
|
||||||
|
op, name = line.split()
|
||||||
|
opmap[op] = globals()[name]
|
||||||
369
parso/tokenize.py
Normal file
369
parso/tokenize.py
Normal file
@@ -0,0 +1,369 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
This tokenizer has been copied from the ``tokenize.py`` standard library
|
||||||
|
tokenizer. The reason was simple: The standard library tokenizer fails
|
||||||
|
if the indentation is not right. The fast parser of jedi however requires
|
||||||
|
"wrong" indentation.
|
||||||
|
|
||||||
|
Basically this is a stripped down version of the standard library module, so
|
||||||
|
you can read the documentation there. Additionally we included some speed and
|
||||||
|
memory optimizations here.
|
||||||
|
"""
|
||||||
|
from __future__ import absolute_import
|
||||||
|
|
||||||
|
import string
|
||||||
|
import re
|
||||||
|
from collections import namedtuple
|
||||||
|
import itertools as _itertools
|
||||||
|
|
||||||
|
from parso.token import (tok_name, N_TOKENS, ENDMARKER, STRING, NUMBER, opmap,
|
||||||
|
NAME, OP, ERRORTOKEN, NEWLINE, INDENT, DEDENT)
|
||||||
|
from jedi._compatibility import is_py3, py_version, u
|
||||||
|
from jedi.common import splitlines
|
||||||
|
|
||||||
|
|
||||||
|
cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
|
||||||
|
|
||||||
|
|
||||||
|
if is_py3:
|
||||||
|
# Python 3 has str.isidentifier() to check if a char is a valid identifier
|
||||||
|
is_identifier = str.isidentifier
|
||||||
|
else:
|
||||||
|
namechars = string.ascii_letters + '_'
|
||||||
|
is_identifier = lambda s: s in namechars
|
||||||
|
|
||||||
|
|
||||||
|
COMMENT = N_TOKENS
|
||||||
|
tok_name[COMMENT] = 'COMMENT'
|
||||||
|
|
||||||
|
|
||||||
|
def group(*choices, **kwargs):
|
||||||
|
capture = kwargs.pop('capture', False) # Python 2, arrghhhhh :(
|
||||||
|
assert not kwargs
|
||||||
|
|
||||||
|
start = '('
|
||||||
|
if not capture:
|
||||||
|
start += '?:'
|
||||||
|
return start + '|'.join(choices) + ')'
|
||||||
|
|
||||||
|
def any(*choices):
|
||||||
|
return group(*choices) + '*'
|
||||||
|
|
||||||
|
def maybe(*choices):
|
||||||
|
return group(*choices) + '?'
|
||||||
|
|
||||||
|
# Note: we use unicode matching for names ("\w") but ascii matching for
|
||||||
|
# number literals.
|
||||||
|
Whitespace = r'[ \f\t]*'
|
||||||
|
Comment = r'#[^\r\n]*'
|
||||||
|
Name = r'\w+'
|
||||||
|
|
||||||
|
if py_version >= 36:
|
||||||
|
Hexnumber = r'0[xX](?:_?[0-9a-fA-F])+'
|
||||||
|
Binnumber = r'0[bB](?:_?[01])+'
|
||||||
|
Octnumber = r'0[oO](?:_?[0-7])+'
|
||||||
|
Decnumber = r'(?:0(?:_?0)*|[1-9](?:_?[0-9])*)'
|
||||||
|
Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
|
||||||
|
Exponent = r'[eE][-+]?[0-9](?:_?[0-9])*'
|
||||||
|
Pointfloat = group(r'[0-9](?:_?[0-9])*\.(?:[0-9](?:_?[0-9])*)?',
|
||||||
|
r'\.[0-9](?:_?[0-9])*') + maybe(Exponent)
|
||||||
|
Expfloat = r'[0-9](?:_?[0-9])*' + Exponent
|
||||||
|
Floatnumber = group(Pointfloat, Expfloat)
|
||||||
|
Imagnumber = group(r'[0-9](?:_?[0-9])*[jJ]', Floatnumber + r'[jJ]')
|
||||||
|
else:
|
||||||
|
Hexnumber = r'0[xX][0-9a-fA-F]+'
|
||||||
|
Binnumber = r'0[bB][01]+'
|
||||||
|
if is_py3:
|
||||||
|
Octnumber = r'0[oO][0-7]+'
|
||||||
|
else:
|
||||||
|
Octnumber = '0[0-7]+'
|
||||||
|
Decnumber = r'(?:0+|[1-9][0-9]*)'
|
||||||
|
Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
|
||||||
|
Exponent = r'[eE][-+]?[0-9]+'
|
||||||
|
Pointfloat = group(r'[0-9]+\.[0-9]*', r'\.[0-9]+') + maybe(Exponent)
|
||||||
|
Expfloat = r'[0-9]+' + Exponent
|
||||||
|
Floatnumber = group(Pointfloat, Expfloat)
|
||||||
|
Imagnumber = group(r'[0-9]+[jJ]', Floatnumber + r'[jJ]')
|
||||||
|
Number = group(Imagnumber, Floatnumber, Intnumber)
|
||||||
|
|
||||||
|
# Return the empty string, plus all of the valid string prefixes.
|
||||||
|
def _all_string_prefixes():
|
||||||
|
# The valid string prefixes. Only contain the lower case versions,
|
||||||
|
# and don't contain any permuations (include 'fr', but not
|
||||||
|
# 'rf'). The various permutations will be generated.
|
||||||
|
_valid_string_prefixes = ['b', 'r', 'u', 'br']
|
||||||
|
if py_version >= 36:
|
||||||
|
_valid_string_prefixes += ['f', 'fr']
|
||||||
|
if py_version <= 27:
|
||||||
|
# TODO this is actually not 100% valid. ur is valid in Python 2.7,
|
||||||
|
# while ru is not.
|
||||||
|
_valid_string_prefixes.append('ur')
|
||||||
|
|
||||||
|
# if we add binary f-strings, add: ['fb', 'fbr']
|
||||||
|
result = set([''])
|
||||||
|
for prefix in _valid_string_prefixes:
|
||||||
|
for t in _itertools.permutations(prefix):
|
||||||
|
# create a list with upper and lower versions of each
|
||||||
|
# character
|
||||||
|
for u in _itertools.product(*[(c, c.upper()) for c in t]):
|
||||||
|
result.add(''.join(u))
|
||||||
|
return result
|
||||||
|
|
||||||
|
def _compile(expr):
|
||||||
|
return re.compile(expr, re.UNICODE)
|
||||||
|
|
||||||
|
# Note that since _all_string_prefixes includes the empty string,
|
||||||
|
# StringPrefix can be the empty string (making it optional).
|
||||||
|
StringPrefix = group(*_all_string_prefixes())
|
||||||
|
|
||||||
|
# Tail end of ' string.
|
||||||
|
Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
|
||||||
|
# Tail end of " string.
|
||||||
|
Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
|
||||||
|
# Tail end of ''' string.
|
||||||
|
Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
|
||||||
|
# Tail end of """ string.
|
||||||
|
Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
|
||||||
|
Triple = group(StringPrefix + "'''", StringPrefix + '"""')
|
||||||
|
|
||||||
|
# Because of leftmost-then-longest match semantics, be sure to put the
|
||||||
|
# longest operators first (e.g., if = came before ==, == would get
|
||||||
|
# recognized as two instances of =).
|
||||||
|
Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"!=",
|
||||||
|
r"//=?", r"->",
|
||||||
|
r"[+\-*/%&@|^=<>]=?",
|
||||||
|
r"~")
|
||||||
|
|
||||||
|
Bracket = '[][(){}]'
|
||||||
|
Special = group(r'\r?\n', r'\.\.\.', r'[:;.,@]')
|
||||||
|
Funny = group(Operator, Bracket, Special)
|
||||||
|
|
||||||
|
PlainToken = group(Number, Funny, Name, capture=True)
|
||||||
|
|
||||||
|
# First (or only) line of ' or " string.
|
||||||
|
ContStr = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
|
||||||
|
group("'", r'\\\r?\n'),
|
||||||
|
StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
|
||||||
|
group('"', r'\\\r?\n'))
|
||||||
|
PseudoExtras = group(r'\\\r?\n|\Z', Comment, Triple)
|
||||||
|
PseudoToken = group(Whitespace, capture=True) + \
|
||||||
|
group(PseudoExtras, Number, Funny, ContStr, Name, capture=True)
|
||||||
|
|
||||||
|
# For a given string prefix plus quotes, endpats maps it to a regex
|
||||||
|
# to match the remainder of that string. _prefix can be empty, for
|
||||||
|
# a normal single or triple quoted string (with no prefix).
|
||||||
|
endpats = {}
|
||||||
|
for _prefix in _all_string_prefixes():
|
||||||
|
endpats[_prefix + "'"] = _compile(Single)
|
||||||
|
endpats[_prefix + '"'] = _compile(Double)
|
||||||
|
endpats[_prefix + "'''"] = _compile(Single3)
|
||||||
|
endpats[_prefix + '"""'] = _compile(Double3)
|
||||||
|
|
||||||
|
# A set of all of the single and triple quoted string prefixes,
|
||||||
|
# including the opening quotes.
|
||||||
|
single_quoted = set()
|
||||||
|
triple_quoted = set()
|
||||||
|
for t in _all_string_prefixes():
|
||||||
|
for p in (t + '"', t + "'"):
|
||||||
|
single_quoted.add(p)
|
||||||
|
for p in (t + '"""', t + "'''"):
|
||||||
|
triple_quoted.add(p)
|
||||||
|
|
||||||
|
|
||||||
|
# TODO add with?
|
||||||
|
ALWAYS_BREAK_TOKENS = (';', 'import', 'class', 'def', 'try', 'except',
|
||||||
|
'finally', 'while', 'return')
|
||||||
|
pseudo_token_compiled = _compile(PseudoToken)
|
||||||
|
|
||||||
|
|
||||||
|
class TokenInfo(namedtuple('Token', ['type', 'string', 'start_pos', 'prefix'])):
|
||||||
|
def __repr__(self):
|
||||||
|
return ('TokenInfo(type=%s, string=%r, start=%r, prefix=%r)' %
|
||||||
|
self._replace(type=self.get_type_name()))
|
||||||
|
|
||||||
|
def get_type_name(self, exact=True):
|
||||||
|
if exact:
|
||||||
|
typ = self.exact_type
|
||||||
|
else:
|
||||||
|
typ = self.type
|
||||||
|
return tok_name[typ]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def exact_type(self):
|
||||||
|
if self.type == OP and self.string in opmap:
|
||||||
|
return opmap[self.string]
|
||||||
|
else:
|
||||||
|
return self.type
|
||||||
|
|
||||||
|
@property
|
||||||
|
def end_pos(self):
|
||||||
|
lines = splitlines(self.string)
|
||||||
|
if len(lines) > 1:
|
||||||
|
return self.start_pos[0] + len(lines) - 1, 0
|
||||||
|
else:
|
||||||
|
return self.start_pos[0], self.start_pos[1] + len(self.string)
|
||||||
|
|
||||||
|
|
||||||
|
def source_tokens(source, use_exact_op_types=False):
|
||||||
|
"""Generate tokens from a the source code (string)."""
|
||||||
|
lines = splitlines(source, keepends=True)
|
||||||
|
return generate_tokens(lines, use_exact_op_types)
|
||||||
|
|
||||||
|
|
||||||
|
def generate_tokens(lines, use_exact_op_types=False):
|
||||||
|
"""
|
||||||
|
A heavily modified Python standard library tokenizer.
|
||||||
|
|
||||||
|
Additionally to the default information, yields also the prefix of each
|
||||||
|
token. This idea comes from lib2to3. The prefix contains all information
|
||||||
|
that is irrelevant for the parser like newlines in parentheses or comments.
|
||||||
|
"""
|
||||||
|
paren_level = 0 # count parentheses
|
||||||
|
indents = [0]
|
||||||
|
max = 0
|
||||||
|
numchars = '0123456789'
|
||||||
|
contstr = ''
|
||||||
|
contline = None
|
||||||
|
# We start with a newline. This makes indent at the first position
|
||||||
|
# possible. It's not valid Python, but still better than an INDENT in the
|
||||||
|
# second line (and not in the first). This makes quite a few things in
|
||||||
|
# Jedi's fast parser possible.
|
||||||
|
new_line = True
|
||||||
|
prefix = '' # Should never be required, but here for safety
|
||||||
|
additional_prefix = ''
|
||||||
|
for lnum, line in enumerate(lines, 1): # loop over lines in stream
|
||||||
|
pos, max = 0, len(line)
|
||||||
|
|
||||||
|
if contstr: # continued string
|
||||||
|
endmatch = endprog.match(line)
|
||||||
|
if endmatch:
|
||||||
|
pos = endmatch.end(0)
|
||||||
|
yield TokenInfo(STRING, contstr + line[:pos], contstr_start, prefix)
|
||||||
|
contstr = ''
|
||||||
|
contline = None
|
||||||
|
else:
|
||||||
|
contstr = contstr + line
|
||||||
|
contline = contline + line
|
||||||
|
continue
|
||||||
|
|
||||||
|
while pos < max:
|
||||||
|
pseudomatch = pseudo_token_compiled.match(line, pos)
|
||||||
|
if not pseudomatch: # scan for tokens
|
||||||
|
txt = line[pos:]
|
||||||
|
if txt.endswith('\n'):
|
||||||
|
new_line = True
|
||||||
|
yield TokenInfo(ERRORTOKEN, txt, (lnum, pos), prefix)
|
||||||
|
break
|
||||||
|
|
||||||
|
prefix = additional_prefix + pseudomatch.group(1)
|
||||||
|
additional_prefix = ''
|
||||||
|
start, pos = pseudomatch.span(2)
|
||||||
|
spos = (lnum, start)
|
||||||
|
token = pseudomatch.group(2)
|
||||||
|
initial = token[0]
|
||||||
|
|
||||||
|
if new_line and initial not in '\r\n#':
|
||||||
|
new_line = False
|
||||||
|
if paren_level == 0:
|
||||||
|
i = 0
|
||||||
|
while line[i] == '\f':
|
||||||
|
i += 1
|
||||||
|
start -= 1
|
||||||
|
if start > indents[-1]:
|
||||||
|
yield TokenInfo(INDENT, '', spos, '')
|
||||||
|
indents.append(start)
|
||||||
|
while start < indents[-1]:
|
||||||
|
yield TokenInfo(DEDENT, '', spos, '')
|
||||||
|
indents.pop()
|
||||||
|
|
||||||
|
if (initial in numchars or # ordinary number
|
||||||
|
(initial == '.' and token != '.' and token != '...')):
|
||||||
|
yield TokenInfo(NUMBER, token, spos, prefix)
|
||||||
|
elif initial in '\r\n':
|
||||||
|
if not new_line and paren_level == 0:
|
||||||
|
yield TokenInfo(NEWLINE, token, spos, prefix)
|
||||||
|
else:
|
||||||
|
additional_prefix = prefix + token
|
||||||
|
new_line = True
|
||||||
|
elif initial == '#': # Comments
|
||||||
|
assert not token.endswith("\n")
|
||||||
|
additional_prefix = prefix + token
|
||||||
|
elif token in triple_quoted:
|
||||||
|
endprog = endpats[token]
|
||||||
|
endmatch = endprog.match(line, pos)
|
||||||
|
if endmatch: # all on one line
|
||||||
|
pos = endmatch.end(0)
|
||||||
|
token = line[start:pos]
|
||||||
|
yield TokenInfo(STRING, token, spos, prefix)
|
||||||
|
else:
|
||||||
|
contstr_start = (lnum, start) # multiple lines
|
||||||
|
contstr = line[start:]
|
||||||
|
contline = line
|
||||||
|
break
|
||||||
|
elif initial in single_quoted or \
|
||||||
|
token[:2] in single_quoted or \
|
||||||
|
token[:3] in single_quoted:
|
||||||
|
if token[-1] == '\n': # continued string
|
||||||
|
contstr_start = lnum, start
|
||||||
|
endprog = (endpats.get(initial) or endpats.get(token[1])
|
||||||
|
or endpats.get(token[2]))
|
||||||
|
contstr = line[start:]
|
||||||
|
contline = line
|
||||||
|
break
|
||||||
|
else: # ordinary string
|
||||||
|
yield TokenInfo(STRING, token, spos, prefix)
|
||||||
|
elif is_identifier(initial): # ordinary name
|
||||||
|
if token in ALWAYS_BREAK_TOKENS:
|
||||||
|
paren_level = 0
|
||||||
|
while True:
|
||||||
|
indent = indents.pop()
|
||||||
|
if indent > start:
|
||||||
|
yield TokenInfo(DEDENT, '', spos, '')
|
||||||
|
else:
|
||||||
|
indents.append(indent)
|
||||||
|
break
|
||||||
|
yield TokenInfo(NAME, token, spos, prefix)
|
||||||
|
elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n'): # continued stmt
|
||||||
|
additional_prefix += prefix + line[start:]
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
if token in '([{':
|
||||||
|
paren_level += 1
|
||||||
|
elif token in ')]}':
|
||||||
|
paren_level -= 1
|
||||||
|
|
||||||
|
try:
|
||||||
|
# This check is needed in any case to check if it's a valid
|
||||||
|
# operator or just some random unicode character.
|
||||||
|
exact_type = opmap[token]
|
||||||
|
except KeyError:
|
||||||
|
exact_type = typ = ERRORTOKEN
|
||||||
|
if use_exact_op_types:
|
||||||
|
typ = exact_type
|
||||||
|
else:
|
||||||
|
typ = OP
|
||||||
|
yield TokenInfo(typ, token, spos, prefix)
|
||||||
|
|
||||||
|
if contstr:
|
||||||
|
yield TokenInfo(ERRORTOKEN, contstr, contstr_start, prefix)
|
||||||
|
if contstr.endswith('\n'):
|
||||||
|
new_line = True
|
||||||
|
|
||||||
|
end_pos = lnum, max
|
||||||
|
# As the last position we just take the maximally possible position. We
|
||||||
|
# remove -1 for the last new line.
|
||||||
|
for indent in indents[1:]:
|
||||||
|
yield TokenInfo(DEDENT, '', end_pos, '')
|
||||||
|
yield TokenInfo(ENDMARKER, '', end_pos, additional_prefix)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import sys
|
||||||
|
if len(sys.argv) >= 2:
|
||||||
|
path = sys.argv[1]
|
||||||
|
with open(path) as f:
|
||||||
|
code = u(f.read())
|
||||||
|
else:
|
||||||
|
code = u(sys.stdin.read())
|
||||||
|
for token in source_tokens(code, use_exact_op_types=True):
|
||||||
|
print(token)
|
||||||
328
parso/tree.py
Normal file
328
parso/tree.py
Normal file
@@ -0,0 +1,328 @@
|
|||||||
|
from abc import abstractmethod, abstractproperty
|
||||||
|
from parso._compatibility import utf8_repr, encoding, is_py3
|
||||||
|
|
||||||
|
|
||||||
|
def search_ancestor(node, *node_types):
|
||||||
|
"""
|
||||||
|
Recursively looks at the parents of a node and checks if the type names
|
||||||
|
match.
|
||||||
|
|
||||||
|
:param node: The node that is looked at.
|
||||||
|
:param node_types: A tuple or a string of type names that are
|
||||||
|
searched for.
|
||||||
|
"""
|
||||||
|
while True:
|
||||||
|
node = node.parent
|
||||||
|
if node is None or node.type in node_types:
|
||||||
|
return node
|
||||||
|
|
||||||
|
|
||||||
|
class NodeOrLeaf(object):
|
||||||
|
"""
|
||||||
|
The base class for nodes and leaves.
|
||||||
|
"""
|
||||||
|
__slots__ = ()
|
||||||
|
|
||||||
|
def get_root_node(self):
|
||||||
|
"""
|
||||||
|
Returns the root node of a parser tree. The returned node doesn't have
|
||||||
|
a parent node like all the other nodes/leaves.
|
||||||
|
"""
|
||||||
|
scope = self
|
||||||
|
while scope.parent is not None:
|
||||||
|
scope = scope.parent
|
||||||
|
return scope
|
||||||
|
|
||||||
|
def get_next_sibling(self):
|
||||||
|
"""
|
||||||
|
The node immediately following the invocant in their parent's children
|
||||||
|
list. If the invocant does not have a next sibling, it is None
|
||||||
|
"""
|
||||||
|
# Can't use index(); we need to test by identity
|
||||||
|
for i, child in enumerate(self.parent.children):
|
||||||
|
if child is self:
|
||||||
|
try:
|
||||||
|
return self.parent.children[i + 1]
|
||||||
|
except IndexError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_previous_sibling(self):
|
||||||
|
"""
|
||||||
|
The node/leaf immediately preceding the invocant in their parent's
|
||||||
|
children list. If the invocant does not have a previous sibling, it is
|
||||||
|
None.
|
||||||
|
"""
|
||||||
|
# Can't use index(); we need to test by identity
|
||||||
|
for i, child in enumerate(self.parent.children):
|
||||||
|
if child is self:
|
||||||
|
if i == 0:
|
||||||
|
return None
|
||||||
|
return self.parent.children[i - 1]
|
||||||
|
|
||||||
|
def get_previous_leaf(self):
|
||||||
|
"""
|
||||||
|
Returns the previous leaf in the parser tree.
|
||||||
|
Raises an IndexError if it's the first element in the parser tree.
|
||||||
|
"""
|
||||||
|
node = self
|
||||||
|
while True:
|
||||||
|
c = node.parent.children
|
||||||
|
i = c.index(node)
|
||||||
|
if i == 0:
|
||||||
|
node = node.parent
|
||||||
|
if node.parent is None:
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
node = c[i - 1]
|
||||||
|
break
|
||||||
|
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
node = node.children[-1]
|
||||||
|
except AttributeError: # A Leaf doesn't have children.
|
||||||
|
return node
|
||||||
|
|
||||||
|
def get_next_leaf(self):
|
||||||
|
"""
|
||||||
|
Returns the next leaf in the parser tree.
|
||||||
|
Returns `None` if it's the last element in the parser tree.
|
||||||
|
"""
|
||||||
|
node = self
|
||||||
|
while True:
|
||||||
|
c = node.parent.children
|
||||||
|
i = c.index(node)
|
||||||
|
if i == len(c) - 1:
|
||||||
|
node = node.parent
|
||||||
|
if node.parent is None:
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
node = c[i + 1]
|
||||||
|
break
|
||||||
|
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
node = node.children[0]
|
||||||
|
except AttributeError: # A Leaf doesn't have children.
|
||||||
|
return node
|
||||||
|
|
||||||
|
@abstractproperty
|
||||||
|
def start_pos(self):
|
||||||
|
"""
|
||||||
|
Returns the starting position of the prefix as a tuple, e.g. `(3, 4)`.
|
||||||
|
|
||||||
|
:return tuple of int: (line, column)
|
||||||
|
"""
|
||||||
|
|
||||||
|
@abstractproperty
|
||||||
|
def end_pos(self):
|
||||||
|
"""
|
||||||
|
Returns the end position of the prefix as a tuple, e.g. `(3, 4)`.
|
||||||
|
|
||||||
|
:return tuple of int: (line, column)
|
||||||
|
"""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def get_start_pos_of_prefix(self):
|
||||||
|
"""
|
||||||
|
Returns the start_pos of the prefix. This means basically it returns
|
||||||
|
the end_pos of the last prefix. The `get_start_pos_of_prefix()` of the
|
||||||
|
prefix `+` in `2 + 1` would be `(1, 1)`, while the start_pos is
|
||||||
|
`(1, 2)`.
|
||||||
|
|
||||||
|
:return tuple of int: (line, column)
|
||||||
|
"""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def get_first_leaf(self):
|
||||||
|
"""
|
||||||
|
Returns the first leaf of a node or itself it's a leaf.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def get_last_leaf(self):
|
||||||
|
"""
|
||||||
|
Returns the last leaf of a node or itself it's a leaf.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def get_code(self, normalized=False, include_prefix=True):
|
||||||
|
"""
|
||||||
|
Returns the code that was the input of the parser.
|
||||||
|
|
||||||
|
If a normalizer is given, the returned code will be normalized and will
|
||||||
|
not be equal to the input.
|
||||||
|
|
||||||
|
:param include_prefix: Removes the prefix (whitespace and comments) of e.g. a statement.
|
||||||
|
:param normalized: Deprecated. Please don't use. Will be replaced with something more powerful.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class Leaf(NodeOrLeaf):
|
||||||
|
__slots__ = ('value', 'parent', 'line', 'indent', 'prefix')
|
||||||
|
|
||||||
|
def __init__(self, value, start_pos, prefix=''):
|
||||||
|
self.value = value
|
||||||
|
self.start_pos = start_pos
|
||||||
|
self.prefix = prefix
|
||||||
|
self.parent = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def start_pos(self):
|
||||||
|
return self.line, self.indent
|
||||||
|
|
||||||
|
@start_pos.setter
|
||||||
|
def start_pos(self, value):
|
||||||
|
self.line = value[0]
|
||||||
|
self.indent = value[1]
|
||||||
|
|
||||||
|
def get_start_pos_of_prefix(self):
|
||||||
|
previous_leaf = self.get_previous_leaf()
|
||||||
|
if previous_leaf is None:
|
||||||
|
return self.line - self.prefix.count('\n'), 0 # It's the first leaf.
|
||||||
|
return previous_leaf.end_pos
|
||||||
|
|
||||||
|
def get_first_leaf(self):
|
||||||
|
return self
|
||||||
|
|
||||||
|
def get_last_leaf(self):
|
||||||
|
return self
|
||||||
|
|
||||||
|
def get_code(self, normalized=False, include_prefix=True):
|
||||||
|
if normalized:
|
||||||
|
return self.value
|
||||||
|
if include_prefix:
|
||||||
|
return self.prefix + self.value
|
||||||
|
else:
|
||||||
|
return self.value
|
||||||
|
|
||||||
|
@property
|
||||||
|
def end_pos(self):
|
||||||
|
lines = self.value.split('\n')
|
||||||
|
end_pos_line = self.line + len(lines) - 1
|
||||||
|
# Check for multiline token
|
||||||
|
if self.line == end_pos_line:
|
||||||
|
end_pos_indent = self.indent + len(lines[-1])
|
||||||
|
else:
|
||||||
|
end_pos_indent = len(lines[-1])
|
||||||
|
return end_pos_line, end_pos_indent
|
||||||
|
|
||||||
|
@utf8_repr
|
||||||
|
def __repr__(self):
|
||||||
|
return "<%s: %s start=%s>" % (type(self).__name__, self.value, self.start_pos)
|
||||||
|
|
||||||
|
|
||||||
|
class BaseNode(NodeOrLeaf):
|
||||||
|
"""
|
||||||
|
The super class for all nodes.
|
||||||
|
|
||||||
|
If you create custom nodes, you will probably want to inherit from this
|
||||||
|
``BaseNode``.
|
||||||
|
"""
|
||||||
|
__slots__ = ('children', 'parent')
|
||||||
|
type = None
|
||||||
|
|
||||||
|
def __init__(self, children):
|
||||||
|
for c in children:
|
||||||
|
c.parent = self
|
||||||
|
self.children = children
|
||||||
|
self.parent = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def start_pos(self):
|
||||||
|
return self.children[0].start_pos
|
||||||
|
|
||||||
|
def get_start_pos_of_prefix(self):
|
||||||
|
return self.children[0].get_start_pos_of_prefix()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def end_pos(self):
|
||||||
|
return self.children[-1].end_pos
|
||||||
|
|
||||||
|
def _get_code_for_children(self, children, normalized, include_prefix):
|
||||||
|
# TODO implement normalized (depending on context).
|
||||||
|
if include_prefix:
|
||||||
|
return "".join(c.get_code(normalized) for c in children)
|
||||||
|
else:
|
||||||
|
first = children[0].get_code(include_prefix=False)
|
||||||
|
return first + "".join(c.get_code(normalized) for c in children[1:])
|
||||||
|
|
||||||
|
def get_code(self, normalized=False, include_prefix=True):
|
||||||
|
return self._get_code_for_children(self.children, normalized, include_prefix)
|
||||||
|
|
||||||
|
def get_leaf_for_position(self, position, include_prefixes=False):
|
||||||
|
def binary_search(lower, upper):
|
||||||
|
if lower == upper:
|
||||||
|
element = self.children[lower]
|
||||||
|
if not include_prefixes and position < element.start_pos:
|
||||||
|
# We're on a prefix.
|
||||||
|
return None
|
||||||
|
# In case we have prefixes, a leaf always matches
|
||||||
|
try:
|
||||||
|
return element.get_leaf_for_position(position, include_prefixes)
|
||||||
|
except AttributeError:
|
||||||
|
return element
|
||||||
|
|
||||||
|
|
||||||
|
index = int((lower + upper) / 2)
|
||||||
|
element = self.children[index]
|
||||||
|
if position <= element.end_pos:
|
||||||
|
return binary_search(lower, index)
|
||||||
|
else:
|
||||||
|
return binary_search(index + 1, upper)
|
||||||
|
|
||||||
|
if not ((1, 0) <= position <= self.children[-1].end_pos):
|
||||||
|
raise ValueError('Please provide a position that exists within this node.')
|
||||||
|
return binary_search(0, len(self.children) - 1)
|
||||||
|
|
||||||
|
def get_first_leaf(self):
|
||||||
|
return self.children[0].get_first_leaf()
|
||||||
|
|
||||||
|
def get_last_leaf(self):
|
||||||
|
return self.children[-1].get_last_leaf()
|
||||||
|
|
||||||
|
@utf8_repr
|
||||||
|
def __repr__(self):
|
||||||
|
code = self.get_code().replace('\n', ' ').strip()
|
||||||
|
if not is_py3:
|
||||||
|
code = code.encode(encoding, 'replace')
|
||||||
|
return "<%s: %s@%s,%s>" % \
|
||||||
|
(type(self).__name__, code, self.start_pos[0], self.start_pos[1])
|
||||||
|
|
||||||
|
|
||||||
|
class Node(BaseNode):
|
||||||
|
"""Concrete implementation for interior nodes."""
|
||||||
|
__slots__ = ('type',)
|
||||||
|
|
||||||
|
def __init__(self, type, children):
|
||||||
|
super(Node, self).__init__(children)
|
||||||
|
self.type = type
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return "%s(%s, %r)" % (self.__class__.__name__, self.type, self.children)
|
||||||
|
|
||||||
|
|
||||||
|
class ErrorNode(BaseNode):
|
||||||
|
"""
|
||||||
|
A node that containes valid nodes/leaves that we're follow by a token that
|
||||||
|
was invalid. This basically means that the leaf after this node is where
|
||||||
|
Python would mark a syntax error.
|
||||||
|
"""
|
||||||
|
__slots__ = ()
|
||||||
|
type = 'error_node'
|
||||||
|
|
||||||
|
|
||||||
|
class ErrorLeaf(Leaf):
|
||||||
|
"""
|
||||||
|
A leaf that is either completely invalid in a language (like `$` in Python)
|
||||||
|
or is invalid at that position. Like the star in `1 +* 1`.
|
||||||
|
"""
|
||||||
|
__slots__ = ('original_type')
|
||||||
|
type = 'error_leaf'
|
||||||
|
|
||||||
|
def __init__(self, original_type, value, start_pos, prefix=''):
|
||||||
|
super(ErrorLeaf, self).__init__(value, start_pos, prefix)
|
||||||
|
self.original_type = original_type
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return "<%s: %s:%s, %s)>" % \
|
||||||
|
(type(self).__name__, self.original_type, repr(self.value), self.start_pos)
|
||||||
Reference in New Issue
Block a user