mirror of
https://github.com/davidhalter/jedi.git
synced 2025-12-08 06:44:46 +08:00
Add a new parser, check it pgen2 would work. (already modified outside this repository)
This commit is contained in:
158
jedi/parser/grammar.txt
Normal file
158
jedi/parser/grammar.txt
Normal file
@@ -0,0 +1,158 @@
|
|||||||
|
# Grammar for 2to3. This grammar supports Python 2.x and 3.x.
|
||||||
|
|
||||||
|
# Note: Changing the grammar specified in this file will most likely
|
||||||
|
# require corresponding changes in the parser module
|
||||||
|
# (../Modules/parsermodule.c). If you can't make the changes to
|
||||||
|
# that module yourself, please co-ordinate the required changes
|
||||||
|
# with someone who can; ask around on python-dev for help. Fred
|
||||||
|
# Drake <fdrake@acm.org> will probably be listening there.
|
||||||
|
|
||||||
|
# NOTE WELL: You should also follow all the steps listed in PEP 306,
|
||||||
|
# "How to Change Python's Grammar"
|
||||||
|
|
||||||
|
# Commands for Kees Blom's railroad program
|
||||||
|
#diagram:token NAME
|
||||||
|
#diagram:token NUMBER
|
||||||
|
#diagram:token STRING
|
||||||
|
#diagram:token NEWLINE
|
||||||
|
#diagram:token ENDMARKER
|
||||||
|
#diagram:token INDENT
|
||||||
|
#diagram:output\input python.bla
|
||||||
|
#diagram:token DEDENT
|
||||||
|
#diagram:output\textwidth 20.04cm\oddsidemargin 0.0cm\evensidemargin 0.0cm
|
||||||
|
#diagram:rules
|
||||||
|
|
||||||
|
# Start symbols for the grammar:
|
||||||
|
# file_input is a module or sequence of commands read from an input file;
|
||||||
|
# single_input is a single interactive statement;
|
||||||
|
# eval_input is the input for the eval() and input() functions.
|
||||||
|
# NB: compound_stmt in single_input is followed by extra NEWLINE!
|
||||||
|
file_input: (NEWLINE | stmt)* ENDMARKER
|
||||||
|
single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
|
||||||
|
eval_input: testlist NEWLINE* ENDMARKER
|
||||||
|
|
||||||
|
decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
|
||||||
|
decorators: decorator+
|
||||||
|
decorated: decorators (classdef | funcdef)
|
||||||
|
funcdef: 'def' NAME parameters ['->' test] ':' suite
|
||||||
|
parameters: '(' [typedargslist] ')'
|
||||||
|
typedargslist: ((tfpdef ['=' test] ',')*
|
||||||
|
('*' [tname] (',' tname ['=' test])* [',' '**' tname] | '**' tname)
|
||||||
|
| tfpdef ['=' test] (',' tfpdef ['=' test])* [','])
|
||||||
|
tname: NAME [':' test]
|
||||||
|
tfpdef: tname | '(' tfplist ')'
|
||||||
|
tfplist: tfpdef (',' tfpdef)* [',']
|
||||||
|
varargslist: ((vfpdef ['=' test] ',')*
|
||||||
|
('*' [vname] (',' vname ['=' test])* [',' '**' vname] | '**' vname)
|
||||||
|
| vfpdef ['=' test] (',' vfpdef ['=' test])* [','])
|
||||||
|
vname: NAME
|
||||||
|
vfpdef: vname | '(' vfplist ')'
|
||||||
|
vfplist: vfpdef (',' vfpdef)* [',']
|
||||||
|
|
||||||
|
stmt: simple_stmt | compound_stmt
|
||||||
|
simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
|
||||||
|
small_stmt: (expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt |
|
||||||
|
import_stmt | global_stmt | exec_stmt | assert_stmt)
|
||||||
|
expr_stmt: testlist_star_expr (augassign (yield_expr|testlist) |
|
||||||
|
('=' (yield_expr|testlist_star_expr))*)
|
||||||
|
testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
|
||||||
|
augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |
|
||||||
|
'<<=' | '>>=' | '**=' | '//=')
|
||||||
|
# For normal assignments, additional restrictions enforced by the interpreter
|
||||||
|
print_stmt: 'print' ( [ test (',' test)* [','] ] |
|
||||||
|
'>>' test [ (',' test)+ [','] ] )
|
||||||
|
del_stmt: 'del' exprlist
|
||||||
|
pass_stmt: 'pass'
|
||||||
|
flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
|
||||||
|
break_stmt: 'break'
|
||||||
|
continue_stmt: 'continue'
|
||||||
|
return_stmt: 'return' [testlist]
|
||||||
|
yield_stmt: yield_expr
|
||||||
|
raise_stmt: 'raise' [test ['from' test | ',' test [',' test]]]
|
||||||
|
import_stmt: import_name | import_from
|
||||||
|
import_name: 'import' dotted_as_names
|
||||||
|
import_from: ('from' ('.'* dotted_name | '.'+)
|
||||||
|
'import' ('*' | '(' import_as_names ')' | import_as_names))
|
||||||
|
import_as_name: NAME ['as' NAME]
|
||||||
|
dotted_as_name: dotted_name ['as' NAME]
|
||||||
|
import_as_names: import_as_name (',' import_as_name)* [',']
|
||||||
|
dotted_as_names: dotted_as_name (',' dotted_as_name)*
|
||||||
|
dotted_name: NAME ('.' NAME)*
|
||||||
|
global_stmt: ('global' | 'nonlocal') NAME (',' NAME)*
|
||||||
|
exec_stmt: 'exec' expr ['in' test [',' test]]
|
||||||
|
assert_stmt: 'assert' test [',' test]
|
||||||
|
|
||||||
|
compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated
|
||||||
|
if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
|
||||||
|
while_stmt: 'while' test ':' suite ['else' ':' suite]
|
||||||
|
for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
|
||||||
|
try_stmt: ('try' ':' suite
|
||||||
|
((except_clause ':' suite)+
|
||||||
|
['else' ':' suite]
|
||||||
|
['finally' ':' suite] |
|
||||||
|
'finally' ':' suite))
|
||||||
|
with_stmt: 'with' with_item (',' with_item)* ':' suite
|
||||||
|
with_item: test ['as' expr]
|
||||||
|
with_var: 'as' expr
|
||||||
|
# NB compile.c makes sure that the default except clause is last
|
||||||
|
except_clause: 'except' [test [(',' | 'as') test]]
|
||||||
|
suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
|
||||||
|
|
||||||
|
# Backward compatibility cruft to support:
|
||||||
|
# [ x for x in lambda: True, lambda: False if x() ]
|
||||||
|
# even while also allowing:
|
||||||
|
# lambda x: 5 if x else 2
|
||||||
|
# (But not a mix of the two)
|
||||||
|
testlist_safe: old_test [(',' old_test)+ [',']]
|
||||||
|
old_test: or_test | old_lambdef
|
||||||
|
old_lambdef: 'lambda' [varargslist] ':' old_test
|
||||||
|
|
||||||
|
test: or_test ['if' or_test 'else' test] | lambdef
|
||||||
|
or_test: and_test ('or' and_test)*
|
||||||
|
and_test: not_test ('and' not_test)*
|
||||||
|
not_test: 'not' not_test | comparison
|
||||||
|
comparison: expr (comp_op expr)*
|
||||||
|
comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
|
||||||
|
star_expr: '*' expr
|
||||||
|
expr: xor_expr ('|' xor_expr)*
|
||||||
|
xor_expr: and_expr ('^' and_expr)*
|
||||||
|
and_expr: shift_expr ('&' shift_expr)*
|
||||||
|
shift_expr: arith_expr (('<<'|'>>') arith_expr)*
|
||||||
|
arith_expr: term (('+'|'-') term)*
|
||||||
|
term: factor (('*'|'/'|'%'|'//') factor)*
|
||||||
|
factor: ('+'|'-'|'~') factor | power
|
||||||
|
power: atom trailer* ['**' factor]
|
||||||
|
atom: ('(' [yield_expr|testlist_gexp] ')' |
|
||||||
|
'[' [listmaker] ']' |
|
||||||
|
'{' [dictsetmaker] '}' |
|
||||||
|
'`' testlist1 '`' |
|
||||||
|
NAME | NUMBER | STRING+ | '.' '.' '.')
|
||||||
|
listmaker: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
|
||||||
|
testlist_gexp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
|
||||||
|
lambdef: 'lambda' [varargslist] ':' test
|
||||||
|
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
|
||||||
|
subscriptlist: subscript (',' subscript)* [',']
|
||||||
|
subscript: test | [test] ':' [test] [sliceop]
|
||||||
|
sliceop: ':' [test]
|
||||||
|
exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
|
||||||
|
testlist: test (',' test)* [',']
|
||||||
|
dictsetmaker: ( (test ':' test (comp_for | (',' test ':' test)* [','])) |
|
||||||
|
(test (comp_for | (',' test)* [','])) )
|
||||||
|
|
||||||
|
classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
|
||||||
|
|
||||||
|
arglist: (argument ',')* (argument [',']
|
||||||
|
|'*' test (',' argument)* [',' '**' test]
|
||||||
|
|'**' test)
|
||||||
|
argument: test [comp_for] | test '=' test # Really [keyword '='] test
|
||||||
|
|
||||||
|
comp_iter: comp_for | comp_if
|
||||||
|
comp_for: 'for' exprlist 'in' testlist_safe [comp_iter]
|
||||||
|
comp_if: 'if' old_test [comp_iter]
|
||||||
|
|
||||||
|
testlist1: test (',' test)*
|
||||||
|
|
||||||
|
# not used in grammar, but may appear in "node" passed from Parser to Compiler
|
||||||
|
encoding_decl: NAME
|
||||||
|
|
||||||
|
yield_expr: 'yield' [testlist]
|
||||||
114
jedi/parser/pgen2/__init__.py
Normal file
114
jedi/parser/pgen2/__init__.py
Normal file
@@ -0,0 +1,114 @@
|
|||||||
|
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
|
||||||
|
# Licensed to PSF under a Contributor Agreement.
|
||||||
|
|
||||||
|
# Modifications:
|
||||||
|
# Copyright 2006 Google, Inc. All Rights Reserved.
|
||||||
|
# Licensed to PSF under a Contributor Agreement.
|
||||||
|
|
||||||
|
__all__ = ["Driver", "load_grammar"]
|
||||||
|
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import logging
|
||||||
|
import io
|
||||||
|
|
||||||
|
from . import pgen
|
||||||
|
from . import grammar
|
||||||
|
from . import parse
|
||||||
|
from . import token
|
||||||
|
from . import tokenize
|
||||||
|
|
||||||
|
|
||||||
|
class Driver(object):
|
||||||
|
def __init__(self, grammar, convert=None, logger=None):
|
||||||
|
self.grammar = grammar
|
||||||
|
if logger is None:
|
||||||
|
logger = logging.getLogger()
|
||||||
|
self.logger = logger
|
||||||
|
self.convert = convert
|
||||||
|
|
||||||
|
def parse_tokens(self, tokens):
|
||||||
|
"""Parse a series of tokens and return the syntax tree."""
|
||||||
|
# XXX Move the prefix computation into a wrapper around tokenize.
|
||||||
|
p = parse.Parser(self.grammar, self.convert)
|
||||||
|
lineno = 1
|
||||||
|
column = 0
|
||||||
|
type = value = start = end = line_text = None
|
||||||
|
prefix = ""
|
||||||
|
for quintuple in tokens:
|
||||||
|
type, value, start, end, line_text = quintuple
|
||||||
|
if start != (lineno, column):
|
||||||
|
assert (lineno, column) <= start, ((lineno, column), start)
|
||||||
|
s_lineno, s_column = start
|
||||||
|
if lineno < s_lineno:
|
||||||
|
prefix += "\n" * (s_lineno - lineno)
|
||||||
|
lineno = s_lineno
|
||||||
|
column = 0
|
||||||
|
if column < s_column:
|
||||||
|
prefix += line_text[column:s_column]
|
||||||
|
column = s_column
|
||||||
|
if type in (tokenize.COMMENT, tokenize.NL): # NL != NEWLINE
|
||||||
|
prefix += value
|
||||||
|
lineno, column = end
|
||||||
|
if value.endswith("\n"):
|
||||||
|
lineno += 1
|
||||||
|
column = 0
|
||||||
|
continue
|
||||||
|
if type == token.OP:
|
||||||
|
type = grammar.opmap[value]
|
||||||
|
#self.logger.debug("%s %r (prefix=%r)", token.tok_name[type], value, prefix)
|
||||||
|
if p.addtoken(type, value, (prefix, start)):
|
||||||
|
break
|
||||||
|
prefix = ""
|
||||||
|
lineno, column = end
|
||||||
|
if value.endswith("\n"):
|
||||||
|
lineno += 1
|
||||||
|
column = 0
|
||||||
|
else:
|
||||||
|
# We never broke out -- EOF is too soon (how can this happen???)
|
||||||
|
raise parse.ParseError("incomplete input",
|
||||||
|
type, value, (prefix, start))
|
||||||
|
return p.rootnode
|
||||||
|
|
||||||
|
def parse_string(self, text):
|
||||||
|
"""Parse a string and return the syntax tree."""
|
||||||
|
tokens = tokenize.generate_tokens(io.StringIO(text).readline)
|
||||||
|
return self.parse_tokens(tokens)
|
||||||
|
|
||||||
|
|
||||||
|
def load_grammar(grammar_path="grammar.txt", pickle_path=None,
|
||||||
|
save=True, force=False, logger=None):
|
||||||
|
"""Load the grammar (maybe from a pickle)."""
|
||||||
|
if logger is None:
|
||||||
|
logger = logging.getLogger()
|
||||||
|
if pickle_path is None:
|
||||||
|
head, tail = os.path.splitext(grammar_path)
|
||||||
|
if tail == ".txt":
|
||||||
|
tail = ""
|
||||||
|
pickle_path = head + tail + ".".join(map(str, sys.version_info)) + ".pickle"
|
||||||
|
if force or not _newer(pickle_path, grammar_path):
|
||||||
|
logger.info("Generating grammar tables from %s", grammar_path)
|
||||||
|
g = pgen.generate_grammar(grammar_path)
|
||||||
|
# the pickle files mismatch, when built on different architectures.
|
||||||
|
# don't save these for now. An alternative solution might be to
|
||||||
|
# include the multiarch triplet into the file name
|
||||||
|
if False:
|
||||||
|
logger.info("Writing grammar tables to %s", pickle_path)
|
||||||
|
try:
|
||||||
|
g.dump(pickle_path)
|
||||||
|
except OSError as e:
|
||||||
|
logger.info("Writing failed:" + str(e))
|
||||||
|
else:
|
||||||
|
g = grammar.Grammar()
|
||||||
|
g.load(pickle_path)
|
||||||
|
return g
|
||||||
|
|
||||||
|
|
||||||
|
def _newer(a, b):
|
||||||
|
"""Inquire whether file a was written since file b."""
|
||||||
|
if not os.path.exists(a):
|
||||||
|
return False
|
||||||
|
if not os.path.exists(b):
|
||||||
|
return True
|
||||||
|
return os.path.getmtime(a) >= os.path.getmtime(b)
|
||||||
182
jedi/parser/pgen2/grammar.py
Normal file
182
jedi/parser/pgen2/grammar.py
Normal file
@@ -0,0 +1,182 @@
|
|||||||
|
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
|
||||||
|
# Licensed to PSF under a Contributor Agreement.
|
||||||
|
|
||||||
|
"""This module defines the data structures used to represent a grammar.
|
||||||
|
|
||||||
|
These are a bit arcane because they are derived from the data
|
||||||
|
structures used by Python's 'pgen' parser generator.
|
||||||
|
|
||||||
|
There's also a table here mapping operators to their names in the
|
||||||
|
token module; the Python tokenize module reports all operators as the
|
||||||
|
fallback token code OP, but the parser needs the actual token code.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Python imports
|
||||||
|
import pickle
|
||||||
|
|
||||||
|
# Local imports
|
||||||
|
from . import token
|
||||||
|
|
||||||
|
|
||||||
|
class Grammar(object):
|
||||||
|
"""Pgen parsing tables conversion class.
|
||||||
|
|
||||||
|
Once initialized, this class supplies the grammar tables for the
|
||||||
|
parsing engine implemented by parse.py. The parsing engine
|
||||||
|
accesses the instance variables directly. The class here does not
|
||||||
|
provide initialization of the tables; several subclasses exist to
|
||||||
|
do this (see the conv and pgen modules).
|
||||||
|
|
||||||
|
The load() method reads the tables from a pickle file, which is
|
||||||
|
much faster than the other ways offered by subclasses. The pickle
|
||||||
|
file is written by calling dump() (after loading the grammar
|
||||||
|
tables using a subclass). The report() method prints a readable
|
||||||
|
representation of the tables to stdout, for debugging.
|
||||||
|
|
||||||
|
The instance variables are as follows:
|
||||||
|
|
||||||
|
symbol2number -- a dict mapping symbol names to numbers. Symbol
|
||||||
|
numbers are always 256 or higher, to distinguish
|
||||||
|
them from token numbers, which are between 0 and
|
||||||
|
255 (inclusive).
|
||||||
|
|
||||||
|
number2symbol -- a dict mapping numbers to symbol names;
|
||||||
|
these two are each other's inverse.
|
||||||
|
|
||||||
|
states -- a list of DFAs, where each DFA is a list of
|
||||||
|
states, each state is a list of arcs, and each
|
||||||
|
arc is a (i, j) pair where i is a label and j is
|
||||||
|
a state number. The DFA number is the index into
|
||||||
|
this list. (This name is slightly confusing.)
|
||||||
|
Final states are represented by a special arc of
|
||||||
|
the form (0, j) where j is its own state number.
|
||||||
|
|
||||||
|
dfas -- a dict mapping symbol numbers to (DFA, first)
|
||||||
|
pairs, where DFA is an item from the states list
|
||||||
|
above, and first is a set of tokens that can
|
||||||
|
begin this grammar rule (represented by a dict
|
||||||
|
whose values are always 1).
|
||||||
|
|
||||||
|
labels -- a list of (x, y) pairs where x is either a token
|
||||||
|
number or a symbol number, and y is either None
|
||||||
|
or a string; the strings are keywords. The label
|
||||||
|
number is the index in this list; label numbers
|
||||||
|
are used to mark state transitions (arcs) in the
|
||||||
|
DFAs.
|
||||||
|
|
||||||
|
start -- the number of the grammar's start symbol.
|
||||||
|
|
||||||
|
keywords -- a dict mapping keyword strings to arc labels.
|
||||||
|
|
||||||
|
tokens -- a dict mapping token numbers to arc labels.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.symbol2number = {}
|
||||||
|
self.number2symbol = {}
|
||||||
|
self.states = []
|
||||||
|
self.dfas = {}
|
||||||
|
self.labels = [(0, "EMPTY")]
|
||||||
|
self.keywords = {}
|
||||||
|
self.tokens = {}
|
||||||
|
self.symbol2label = {}
|
||||||
|
self.start = 256
|
||||||
|
|
||||||
|
def dump(self, filename):
|
||||||
|
"""Dump the grammar tables to a pickle file."""
|
||||||
|
with open(filename, "wb") as f:
|
||||||
|
pickle.dump(self.__dict__, f, 2)
|
||||||
|
|
||||||
|
def load(self, filename):
|
||||||
|
"""Load the grammar tables from a pickle file."""
|
||||||
|
with open(filename, "rb") as f:
|
||||||
|
d = pickle.load(f)
|
||||||
|
self.__dict__.update(d)
|
||||||
|
|
||||||
|
def copy(self):
|
||||||
|
"""
|
||||||
|
Copy the grammar.
|
||||||
|
"""
|
||||||
|
new = self.__class__()
|
||||||
|
for dict_attr in ("symbol2number", "number2symbol", "dfas", "keywords",
|
||||||
|
"tokens", "symbol2label"):
|
||||||
|
setattr(new, dict_attr, getattr(self, dict_attr).copy())
|
||||||
|
new.labels = self.labels[:]
|
||||||
|
new.states = self.states[:]
|
||||||
|
new.start = self.start
|
||||||
|
return new
|
||||||
|
|
||||||
|
def report(self):
|
||||||
|
"""Dump the grammar tables to standard output, for debugging."""
|
||||||
|
from pprint import pprint
|
||||||
|
print("s2n")
|
||||||
|
pprint(self.symbol2number)
|
||||||
|
print("n2s")
|
||||||
|
pprint(self.number2symbol)
|
||||||
|
print("states")
|
||||||
|
pprint(self.states)
|
||||||
|
print("dfas")
|
||||||
|
pprint(self.dfas)
|
||||||
|
print("labels")
|
||||||
|
pprint(self.labels)
|
||||||
|
print("start", self.start)
|
||||||
|
|
||||||
|
|
||||||
|
# Map from operator to number (since tokenize doesn't do this)
|
||||||
|
|
||||||
|
opmap_raw = """
|
||||||
|
( LPAR
|
||||||
|
) RPAR
|
||||||
|
[ LSQB
|
||||||
|
] RSQB
|
||||||
|
: COLON
|
||||||
|
, COMMA
|
||||||
|
; SEMI
|
||||||
|
+ PLUS
|
||||||
|
- MINUS
|
||||||
|
* STAR
|
||||||
|
/ SLASH
|
||||||
|
| VBAR
|
||||||
|
& AMPER
|
||||||
|
< LESS
|
||||||
|
> GREATER
|
||||||
|
= EQUAL
|
||||||
|
. DOT
|
||||||
|
% PERCENT
|
||||||
|
` BACKQUOTE
|
||||||
|
{ LBRACE
|
||||||
|
} RBRACE
|
||||||
|
@ AT
|
||||||
|
== EQEQUAL
|
||||||
|
!= NOTEQUAL
|
||||||
|
<> NOTEQUAL
|
||||||
|
<= LESSEQUAL
|
||||||
|
>= GREATEREQUAL
|
||||||
|
~ TILDE
|
||||||
|
^ CIRCUMFLEX
|
||||||
|
<< LEFTSHIFT
|
||||||
|
>> RIGHTSHIFT
|
||||||
|
** DOUBLESTAR
|
||||||
|
+= PLUSEQUAL
|
||||||
|
-= MINEQUAL
|
||||||
|
*= STAREQUAL
|
||||||
|
/= SLASHEQUAL
|
||||||
|
%= PERCENTEQUAL
|
||||||
|
&= AMPEREQUAL
|
||||||
|
|= VBAREQUAL
|
||||||
|
^= CIRCUMFLEXEQUAL
|
||||||
|
<<= LEFTSHIFTEQUAL
|
||||||
|
>>= RIGHTSHIFTEQUAL
|
||||||
|
**= DOUBLESTAREQUAL
|
||||||
|
// DOUBLESLASH
|
||||||
|
//= DOUBLESLASHEQUAL
|
||||||
|
-> RARROW
|
||||||
|
"""
|
||||||
|
|
||||||
|
opmap = {}
|
||||||
|
for line in opmap_raw.splitlines():
|
||||||
|
if line:
|
||||||
|
op, name = line.split()
|
||||||
|
opmap[op] = getattr(token, name)
|
||||||
190
jedi/parser/pgen2/parse.py
Normal file
190
jedi/parser/pgen2/parse.py
Normal file
@@ -0,0 +1,190 @@
|
|||||||
|
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
|
||||||
|
# Licensed to PSF under a Contributor Agreement.
|
||||||
|
|
||||||
|
"""
|
||||||
|
Parser engine for the grammar tables generated by pgen.
|
||||||
|
|
||||||
|
The grammar table must be loaded first.
|
||||||
|
|
||||||
|
See Parser/parser.c in the Python distribution for additional info on
|
||||||
|
how this parsing engine works.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Local imports
|
||||||
|
from . import token
|
||||||
|
|
||||||
|
|
||||||
|
class ParseError(Exception):
|
||||||
|
"""Exception to signal the parser is stuck."""
|
||||||
|
|
||||||
|
def __init__(self, msg, type, value, context):
|
||||||
|
Exception.__init__(self, "%s: type=%r, value=%r, context=%r" %
|
||||||
|
(msg, type, value, context))
|
||||||
|
self.msg = msg
|
||||||
|
self.type = type
|
||||||
|
self.value = value
|
||||||
|
self.context = context
|
||||||
|
|
||||||
|
|
||||||
|
class Parser(object):
|
||||||
|
"""Parser engine.
|
||||||
|
|
||||||
|
The proper usage sequence is:
|
||||||
|
|
||||||
|
p = Parser(grammar, [converter]) # create instance
|
||||||
|
p.setup([start]) # prepare for parsing
|
||||||
|
<for each input token>:
|
||||||
|
if p.addtoken(...): # parse a token; may raise ParseError
|
||||||
|
break
|
||||||
|
root = p.rootnode # root of abstract syntax tree
|
||||||
|
|
||||||
|
A Parser instance may be reused by calling setup() repeatedly.
|
||||||
|
|
||||||
|
A Parser instance contains state pertaining to the current token
|
||||||
|
sequence, and should not be used concurrently by different threads
|
||||||
|
to parse separate token sequences.
|
||||||
|
|
||||||
|
See driver.py for how to get input tokens by tokenizing a file or
|
||||||
|
string.
|
||||||
|
|
||||||
|
Parsing is complete when addtoken() returns True; the root of the
|
||||||
|
abstract syntax tree can then be retrieved from the rootnode
|
||||||
|
instance variable. When a syntax error occurs, addtoken() raises
|
||||||
|
the ParseError exception. There is no error recovery; the parser
|
||||||
|
cannot be used after a syntax error was reported (but it can be
|
||||||
|
reinitialized by calling setup()).
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, grammar, convert=None):
|
||||||
|
"""Constructor.
|
||||||
|
|
||||||
|
The grammar argument is a grammar.Grammar instance; see the
|
||||||
|
grammar module for more information.
|
||||||
|
|
||||||
|
The parser is not ready yet for parsing; you must call the
|
||||||
|
setup() method to get it started.
|
||||||
|
|
||||||
|
The optional convert argument is a function mapping concrete
|
||||||
|
syntax tree nodes to abstract syntax tree nodes. If not
|
||||||
|
given, no conversion is done and the syntax tree produced is
|
||||||
|
the concrete syntax tree. If given, it must be a function of
|
||||||
|
two arguments, the first being the grammar (a grammar.Grammar
|
||||||
|
instance), and the second being the concrete syntax tree node
|
||||||
|
to be converted. The syntax tree is converted from the bottom
|
||||||
|
up.
|
||||||
|
|
||||||
|
A concrete syntax tree node is a (type, value, context, nodes)
|
||||||
|
tuple, where type is the node type (a token or symbol number),
|
||||||
|
value is None for symbols and a string for tokens, context is
|
||||||
|
None or an opaque value used for error reporting (typically a
|
||||||
|
(lineno, offset) pair), and nodes is a list of children for
|
||||||
|
symbols, and None for tokens.
|
||||||
|
|
||||||
|
An abstract syntax tree node may be anything; this is entirely
|
||||||
|
up to the converter function.
|
||||||
|
|
||||||
|
"""
|
||||||
|
self.grammar = grammar
|
||||||
|
self.convert = convert or (lambda grammar, node: node)
|
||||||
|
|
||||||
|
# Prepare for parsing.
|
||||||
|
start = self.grammar.start
|
||||||
|
# Each stack entry is a tuple: (dfa, state, node).
|
||||||
|
# A node is a tuple: (type, value, context, children),
|
||||||
|
# where children is a list of nodes or None, and context may be None.
|
||||||
|
newnode = (start, None, None, [])
|
||||||
|
stackentry = (self.grammar.dfas[start], 0, newnode)
|
||||||
|
self.stack = [stackentry]
|
||||||
|
self.rootnode = None
|
||||||
|
self.used_names = set() # Aliased to self.rootnode.used_names in pop()
|
||||||
|
|
||||||
|
def addtoken(self, type, value, context):
|
||||||
|
"""Add a token; return True iff this is the end of the program."""
|
||||||
|
# Map from token to label
|
||||||
|
ilabel = self.classify(type, value, context)
|
||||||
|
# Loop until the token is shifted; may raise exceptions
|
||||||
|
while True:
|
||||||
|
dfa, state, node = self.stack[-1]
|
||||||
|
states, first = dfa
|
||||||
|
arcs = states[state]
|
||||||
|
# Look for a state with this label
|
||||||
|
for i, newstate in arcs:
|
||||||
|
t, v = self.grammar.labels[i]
|
||||||
|
if ilabel == i:
|
||||||
|
# Look it up in the list of labels
|
||||||
|
assert t < 256
|
||||||
|
# Shift a token; we're done with it
|
||||||
|
self.shift(type, value, newstate, context)
|
||||||
|
# Pop while we are in an accept-only state
|
||||||
|
state = newstate
|
||||||
|
while states[state] == [(0, state)]:
|
||||||
|
self.pop()
|
||||||
|
if not self.stack:
|
||||||
|
# Done parsing!
|
||||||
|
return True
|
||||||
|
dfa, state, node = self.stack[-1]
|
||||||
|
states, first = dfa
|
||||||
|
# Done with this token
|
||||||
|
return False
|
||||||
|
elif t >= 256:
|
||||||
|
# See if it's a symbol and if we're in its first set
|
||||||
|
itsdfa = self.grammar.dfas[t]
|
||||||
|
itsstates, itsfirst = itsdfa
|
||||||
|
if ilabel in itsfirst:
|
||||||
|
# Push a symbol
|
||||||
|
self.push(t, self.grammar.dfas[t], newstate, context)
|
||||||
|
break # To continue the outer while loop
|
||||||
|
else:
|
||||||
|
if (0, state) in arcs:
|
||||||
|
# An accepting state, pop it and try something else
|
||||||
|
self.pop()
|
||||||
|
if not self.stack:
|
||||||
|
# Done parsing, but another token is input
|
||||||
|
raise ParseError("too much input",
|
||||||
|
type, value, context)
|
||||||
|
else:
|
||||||
|
# No success finding a transition
|
||||||
|
raise ParseError("bad input", type, value, context)
|
||||||
|
|
||||||
|
def classify(self, type, value, context):
|
||||||
|
"""Turn a token into a label. (Internal)"""
|
||||||
|
if type == token.NAME:
|
||||||
|
# Keep a listing of all used names
|
||||||
|
self.used_names.add(value)
|
||||||
|
# Check for reserved words
|
||||||
|
ilabel = self.grammar.keywords.get(value)
|
||||||
|
if ilabel is not None:
|
||||||
|
return ilabel
|
||||||
|
ilabel = self.grammar.tokens.get(type)
|
||||||
|
if ilabel is None:
|
||||||
|
raise ParseError("bad token", type, value, context)
|
||||||
|
return ilabel
|
||||||
|
|
||||||
|
def shift(self, type, value, newstate, context):
|
||||||
|
"""Shift a token. (Internal)"""
|
||||||
|
dfa, state, node = self.stack[-1]
|
||||||
|
newnode = (type, value, context, None)
|
||||||
|
newnode = self.convert(self.grammar, newnode)
|
||||||
|
if newnode is not None:
|
||||||
|
node[-1].append(newnode)
|
||||||
|
self.stack[-1] = (dfa, newstate, node)
|
||||||
|
|
||||||
|
def push(self, type, newdfa, newstate, context):
|
||||||
|
"""Push a nonterminal. (Internal)"""
|
||||||
|
dfa, state, node = self.stack[-1]
|
||||||
|
newnode = (type, None, context, [])
|
||||||
|
self.stack[-1] = (dfa, newstate, node)
|
||||||
|
self.stack.append((newdfa, 0, newnode))
|
||||||
|
|
||||||
|
def pop(self):
|
||||||
|
"""Pop a nonterminal. (Internal)"""
|
||||||
|
popdfa, popstate, popnode = self.stack.pop()
|
||||||
|
newnode = self.convert(self.grammar, popnode)
|
||||||
|
if newnode is not None:
|
||||||
|
if self.stack:
|
||||||
|
dfa, state, node = self.stack[-1]
|
||||||
|
node[-1].append(newnode)
|
||||||
|
else:
|
||||||
|
self.rootnode = newnode
|
||||||
|
self.rootnode.used_names = self.used_names
|
||||||
387
jedi/parser/pgen2/pgen.py
Normal file
387
jedi/parser/pgen2/pgen.py
Normal file
@@ -0,0 +1,387 @@
|
|||||||
|
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
|
||||||
|
# Licensed to PSF under a Contributor Agreement.
|
||||||
|
|
||||||
|
# Pgen imports
|
||||||
|
from . import grammar, token, tokenize
|
||||||
|
|
||||||
|
|
||||||
|
class ParserGenerator(object):
|
||||||
|
def __init__(self, filename, stream=None):
|
||||||
|
close_stream = None
|
||||||
|
if stream is None:
|
||||||
|
stream = open(filename)
|
||||||
|
close_stream = stream.close
|
||||||
|
self.filename = filename
|
||||||
|
self.stream = stream
|
||||||
|
self.generator = tokenize.generate_tokens(stream.readline)
|
||||||
|
self.gettoken() # Initialize lookahead
|
||||||
|
self.dfas, self.startsymbol = self.parse()
|
||||||
|
if close_stream is not None:
|
||||||
|
close_stream()
|
||||||
|
self.first = {} # map from symbol name to set of tokens
|
||||||
|
self.addfirstsets()
|
||||||
|
|
||||||
|
def make_grammar(self):
|
||||||
|
c = grammar.Grammar()
|
||||||
|
names = list(self.dfas.keys())
|
||||||
|
names.sort()
|
||||||
|
names.remove(self.startsymbol)
|
||||||
|
names.insert(0, self.startsymbol)
|
||||||
|
for name in names:
|
||||||
|
i = 256 + len(c.symbol2number)
|
||||||
|
c.symbol2number[name] = i
|
||||||
|
c.number2symbol[i] = name
|
||||||
|
for name in names:
|
||||||
|
dfa = self.dfas[name]
|
||||||
|
states = []
|
||||||
|
for state in dfa:
|
||||||
|
arcs = []
|
||||||
|
for label, next in state.arcs.items():
|
||||||
|
arcs.append((self.make_label(c, label), dfa.index(next)))
|
||||||
|
if state.isfinal:
|
||||||
|
arcs.append((0, dfa.index(state)))
|
||||||
|
states.append(arcs)
|
||||||
|
c.states.append(states)
|
||||||
|
c.dfas[c.symbol2number[name]] = (states, self.make_first(c, name))
|
||||||
|
c.start = c.symbol2number[self.startsymbol]
|
||||||
|
return c
|
||||||
|
|
||||||
|
def make_first(self, c, name):
|
||||||
|
rawfirst = self.first[name]
|
||||||
|
first = {}
|
||||||
|
for label in rawfirst:
|
||||||
|
ilabel = self.make_label(c, label)
|
||||||
|
##assert ilabel not in first # XXX failed on <> ... !=
|
||||||
|
first[ilabel] = 1
|
||||||
|
return first
|
||||||
|
|
||||||
|
def make_label(self, c, label):
|
||||||
|
# XXX Maybe this should be a method on a subclass of converter?
|
||||||
|
ilabel = len(c.labels)
|
||||||
|
if label[0].isalpha():
|
||||||
|
# Either a symbol name or a named token
|
||||||
|
if label in c.symbol2number:
|
||||||
|
# A symbol name (a non-terminal)
|
||||||
|
if label in c.symbol2label:
|
||||||
|
return c.symbol2label[label]
|
||||||
|
else:
|
||||||
|
c.labels.append((c.symbol2number[label], None))
|
||||||
|
c.symbol2label[label] = ilabel
|
||||||
|
return ilabel
|
||||||
|
else:
|
||||||
|
# A named token (NAME, NUMBER, STRING)
|
||||||
|
itoken = getattr(token, label, None)
|
||||||
|
assert isinstance(itoken, int), label
|
||||||
|
assert itoken in token.tok_name, label
|
||||||
|
if itoken in c.tokens:
|
||||||
|
return c.tokens[itoken]
|
||||||
|
else:
|
||||||
|
c.labels.append((itoken, None))
|
||||||
|
c.tokens[itoken] = ilabel
|
||||||
|
return ilabel
|
||||||
|
else:
|
||||||
|
# Either a keyword or an operator
|
||||||
|
assert label[0] in ('"', "'"), label
|
||||||
|
value = eval(label)
|
||||||
|
if value[0].isalpha():
|
||||||
|
# A keyword
|
||||||
|
if value in c.keywords:
|
||||||
|
return c.keywords[value]
|
||||||
|
else:
|
||||||
|
c.labels.append((token.NAME, value))
|
||||||
|
c.keywords[value] = ilabel
|
||||||
|
return ilabel
|
||||||
|
else:
|
||||||
|
# An operator (any non-numeric token)
|
||||||
|
itoken = grammar.opmap[value] # Fails if unknown token
|
||||||
|
if itoken in c.tokens:
|
||||||
|
return c.tokens[itoken]
|
||||||
|
else:
|
||||||
|
c.labels.append((itoken, None))
|
||||||
|
c.tokens[itoken] = ilabel
|
||||||
|
return ilabel
|
||||||
|
|
||||||
|
def addfirstsets(self):
|
||||||
|
names = list(self.dfas.keys())
|
||||||
|
names.sort()
|
||||||
|
for name in names:
|
||||||
|
if name not in self.first:
|
||||||
|
self.calcfirst(name)
|
||||||
|
#print name, self.first[name].keys()
|
||||||
|
|
||||||
|
def calcfirst(self, name):
|
||||||
|
dfa = self.dfas[name]
|
||||||
|
self.first[name] = None # dummy to detect left recursion
|
||||||
|
state = dfa[0]
|
||||||
|
totalset = {}
|
||||||
|
overlapcheck = {}
|
||||||
|
for label, next in state.arcs.items():
|
||||||
|
if label in self.dfas:
|
||||||
|
if label in self.first:
|
||||||
|
fset = self.first[label]
|
||||||
|
if fset is None:
|
||||||
|
raise ValueError("recursion for rule %r" % name)
|
||||||
|
else:
|
||||||
|
self.calcfirst(label)
|
||||||
|
fset = self.first[label]
|
||||||
|
totalset.update(fset)
|
||||||
|
overlapcheck[label] = fset
|
||||||
|
else:
|
||||||
|
totalset[label] = 1
|
||||||
|
overlapcheck[label] = {label: 1}
|
||||||
|
inverse = {}
|
||||||
|
for label, itsfirst in overlapcheck.items():
|
||||||
|
for symbol in itsfirst:
|
||||||
|
if symbol in inverse:
|
||||||
|
raise ValueError("rule %s is ambiguous; %s is in the"
|
||||||
|
" first sets of %s as well as %s" %
|
||||||
|
(name, symbol, label, inverse[symbol]))
|
||||||
|
inverse[symbol] = label
|
||||||
|
self.first[name] = totalset
|
||||||
|
|
||||||
|
def parse(self):
|
||||||
|
dfas = {}
|
||||||
|
startsymbol = None
|
||||||
|
# MSTART: (NEWLINE | RULE)* ENDMARKER
|
||||||
|
while self.type != token.ENDMARKER:
|
||||||
|
while self.type == token.NEWLINE:
|
||||||
|
self.gettoken()
|
||||||
|
# RULE: NAME ':' RHS NEWLINE
|
||||||
|
name = self.expect(token.NAME)
|
||||||
|
self.expect(token.OP, ":")
|
||||||
|
a, z = self.parse_rhs()
|
||||||
|
self.expect(token.NEWLINE)
|
||||||
|
#self.dump_nfa(name, a, z)
|
||||||
|
dfa = self.make_dfa(a, z)
|
||||||
|
#self.dump_dfa(name, dfa)
|
||||||
|
# oldlen = len(dfa)
|
||||||
|
self.simplify_dfa(dfa)
|
||||||
|
# newlen = len(dfa)
|
||||||
|
dfas[name] = dfa
|
||||||
|
#print name, oldlen, newlen
|
||||||
|
if startsymbol is None:
|
||||||
|
startsymbol = name
|
||||||
|
return dfas, startsymbol
|
||||||
|
|
||||||
|
def make_dfa(self, start, finish):
|
||||||
|
# To turn an NFA into a DFA, we define the states of the DFA
|
||||||
|
# to correspond to *sets* of states of the NFA. Then do some
|
||||||
|
# state reduction. Let's represent sets as dicts with 1 for
|
||||||
|
# values.
|
||||||
|
assert isinstance(start, NFAState)
|
||||||
|
assert isinstance(finish, NFAState)
|
||||||
|
|
||||||
|
def closure(state):
|
||||||
|
base = {}
|
||||||
|
addclosure(state, base)
|
||||||
|
return base
|
||||||
|
|
||||||
|
def addclosure(state, base):
|
||||||
|
assert isinstance(state, NFAState)
|
||||||
|
if state in base:
|
||||||
|
return
|
||||||
|
base[state] = 1
|
||||||
|
for label, next in state.arcs:
|
||||||
|
if label is None:
|
||||||
|
addclosure(next, base)
|
||||||
|
|
||||||
|
states = [DFAState(closure(start), finish)]
|
||||||
|
for state in states: # NB states grows while we're iterating
|
||||||
|
arcs = {}
|
||||||
|
for nfastate in state.nfaset:
|
||||||
|
for label, next in nfastate.arcs:
|
||||||
|
if label is not None:
|
||||||
|
addclosure(next, arcs.setdefault(label, {}))
|
||||||
|
for label, nfaset in arcs.items():
|
||||||
|
for st in states:
|
||||||
|
if st.nfaset == nfaset:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
st = DFAState(nfaset, finish)
|
||||||
|
states.append(st)
|
||||||
|
state.addarc(st, label)
|
||||||
|
return states # List of DFAState instances; first one is start
|
||||||
|
|
||||||
|
def dump_nfa(self, name, start, finish):
|
||||||
|
print("Dump of NFA for", name)
|
||||||
|
todo = [start]
|
||||||
|
for i, state in enumerate(todo):
|
||||||
|
print(" State", i, state is finish and "(final)" or "")
|
||||||
|
for label, next in state.arcs:
|
||||||
|
if next in todo:
|
||||||
|
j = todo.index(next)
|
||||||
|
else:
|
||||||
|
j = len(todo)
|
||||||
|
todo.append(next)
|
||||||
|
if label is None:
|
||||||
|
print(" -> %d" % j)
|
||||||
|
else:
|
||||||
|
print(" %s -> %d" % (label, j))
|
||||||
|
|
||||||
|
def dump_dfa(self, name, dfa):
|
||||||
|
print("Dump of DFA for", name)
|
||||||
|
for i, state in enumerate(dfa):
|
||||||
|
print(" State", i, state.isfinal and "(final)" or "")
|
||||||
|
for label, next in state.arcs.items():
|
||||||
|
print(" %s -> %d" % (label, dfa.index(next)))
|
||||||
|
|
||||||
|
def simplify_dfa(self, dfa):
|
||||||
|
# This is not theoretically optimal, but works well enough.
|
||||||
|
# Algorithm: repeatedly look for two states that have the same
|
||||||
|
# set of arcs (same labels pointing to the same nodes) and
|
||||||
|
# unify them, until things stop changing.
|
||||||
|
|
||||||
|
# dfa is a list of DFAState instances
|
||||||
|
changes = True
|
||||||
|
while changes:
|
||||||
|
changes = False
|
||||||
|
for i, state_i in enumerate(dfa):
|
||||||
|
for j in range(i + 1, len(dfa)):
|
||||||
|
state_j = dfa[j]
|
||||||
|
if state_i == state_j:
|
||||||
|
#print " unify", i, j
|
||||||
|
del dfa[j]
|
||||||
|
for state in dfa:
|
||||||
|
state.unifystate(state_j, state_i)
|
||||||
|
changes = True
|
||||||
|
break
|
||||||
|
|
||||||
|
def parse_rhs(self):
|
||||||
|
# RHS: ALT ('|' ALT)*
|
||||||
|
a, z = self.parse_alt()
|
||||||
|
if self.value != "|":
|
||||||
|
return a, z
|
||||||
|
else:
|
||||||
|
aa = NFAState()
|
||||||
|
zz = NFAState()
|
||||||
|
aa.addarc(a)
|
||||||
|
z.addarc(zz)
|
||||||
|
while self.value == "|":
|
||||||
|
self.gettoken()
|
||||||
|
a, z = self.parse_alt()
|
||||||
|
aa.addarc(a)
|
||||||
|
z.addarc(zz)
|
||||||
|
return aa, zz
|
||||||
|
|
||||||
|
def parse_alt(self):
|
||||||
|
# ALT: ITEM+
|
||||||
|
a, b = self.parse_item()
|
||||||
|
while (self.value in ("(", "[") or
|
||||||
|
self.type in (token.NAME, token.STRING)):
|
||||||
|
c, d = self.parse_item()
|
||||||
|
b.addarc(c)
|
||||||
|
b = d
|
||||||
|
return a, b
|
||||||
|
|
||||||
|
def parse_item(self):
|
||||||
|
# ITEM: '[' RHS ']' | ATOM ['+' | '*']
|
||||||
|
if self.value == "[":
|
||||||
|
self.gettoken()
|
||||||
|
a, z = self.parse_rhs()
|
||||||
|
self.expect(token.OP, "]")
|
||||||
|
a.addarc(z)
|
||||||
|
return a, z
|
||||||
|
else:
|
||||||
|
a, z = self.parse_atom()
|
||||||
|
value = self.value
|
||||||
|
if value not in ("+", "*"):
|
||||||
|
return a, z
|
||||||
|
self.gettoken()
|
||||||
|
z.addarc(a)
|
||||||
|
if value == "+":
|
||||||
|
return a, z
|
||||||
|
else:
|
||||||
|
return a, a
|
||||||
|
|
||||||
|
def parse_atom(self):
|
||||||
|
# ATOM: '(' RHS ')' | NAME | STRING
|
||||||
|
if self.value == "(":
|
||||||
|
self.gettoken()
|
||||||
|
a, z = self.parse_rhs()
|
||||||
|
self.expect(token.OP, ")")
|
||||||
|
return a, z
|
||||||
|
elif self.type in (token.NAME, token.STRING):
|
||||||
|
a = NFAState()
|
||||||
|
z = NFAState()
|
||||||
|
a.addarc(z, self.value)
|
||||||
|
self.gettoken()
|
||||||
|
return a, z
|
||||||
|
else:
|
||||||
|
self.raise_error("expected (...) or NAME or STRING, got %s/%s",
|
||||||
|
self.type, self.value)
|
||||||
|
|
||||||
|
def expect(self, type, value=None):
|
||||||
|
if self.type != type or (value is not None and self.value != value):
|
||||||
|
self.raise_error("expected %s/%s, got %s/%s",
|
||||||
|
type, value, self.type, self.value)
|
||||||
|
value = self.value
|
||||||
|
self.gettoken()
|
||||||
|
return value
|
||||||
|
|
||||||
|
def gettoken(self):
|
||||||
|
tup = next(self.generator)
|
||||||
|
while tup[0] in (tokenize.COMMENT, tokenize.NL):
|
||||||
|
tup = next(self.generator)
|
||||||
|
self.type, self.value, self.begin, self.end, self.line = tup
|
||||||
|
#print token.tok_name[self.type], repr(self.value)
|
||||||
|
|
||||||
|
def raise_error(self, msg, *args):
|
||||||
|
if args:
|
||||||
|
try:
|
||||||
|
msg = msg % args
|
||||||
|
except:
|
||||||
|
msg = " ".join([msg] + list(map(str, args)))
|
||||||
|
raise SyntaxError(msg, (self.filename, self.end[0],
|
||||||
|
self.end[1], self.line))
|
||||||
|
|
||||||
|
|
||||||
|
class NFAState(object):
|
||||||
|
def __init__(self):
|
||||||
|
self.arcs = [] # list of (label, NFAState) pairs
|
||||||
|
|
||||||
|
def addarc(self, next, label=None):
|
||||||
|
assert label is None or isinstance(label, str)
|
||||||
|
assert isinstance(next, NFAState)
|
||||||
|
self.arcs.append((label, next))
|
||||||
|
|
||||||
|
|
||||||
|
class DFAState(object):
|
||||||
|
def __init__(self, nfaset, final):
|
||||||
|
assert isinstance(nfaset, dict)
|
||||||
|
assert isinstance(next(iter(nfaset)), NFAState)
|
||||||
|
assert isinstance(final, NFAState)
|
||||||
|
self.nfaset = nfaset
|
||||||
|
self.isfinal = final in nfaset
|
||||||
|
self.arcs = {} # map from label to DFAState
|
||||||
|
|
||||||
|
def addarc(self, next, label):
|
||||||
|
assert isinstance(label, str)
|
||||||
|
assert label not in self.arcs
|
||||||
|
assert isinstance(next, DFAState)
|
||||||
|
self.arcs[label] = next
|
||||||
|
|
||||||
|
def unifystate(self, old, new):
|
||||||
|
for label, next in self.arcs.items():
|
||||||
|
if next is old:
|
||||||
|
self.arcs[label] = new
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
# Equality test -- ignore the nfaset instance variable
|
||||||
|
assert isinstance(other, DFAState)
|
||||||
|
if self.isfinal != other.isfinal:
|
||||||
|
return False
|
||||||
|
# Can't just return self.arcs == other.arcs, because that
|
||||||
|
# would invoke this method recursively, with cycles...
|
||||||
|
if len(self.arcs) != len(other.arcs):
|
||||||
|
return False
|
||||||
|
for label, next in self.arcs.items():
|
||||||
|
if next is not other.arcs.get(label):
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
__hash__ = None # For Py3 compatibility.
|
||||||
|
|
||||||
|
|
||||||
|
def generate_grammar(filename="Grammar.txt"):
|
||||||
|
p = ParserGenerator(filename)
|
||||||
|
return p.make_grammar()
|
||||||
82
jedi/parser/pgen2/token.py
Executable file
82
jedi/parser/pgen2/token.py
Executable file
@@ -0,0 +1,82 @@
|
|||||||
|
#! /usr/bin/python3.4
|
||||||
|
|
||||||
|
"""Token constants (from "token.h")."""
|
||||||
|
|
||||||
|
# Taken from Python (r53757) and modified to include some tokens
|
||||||
|
# originally monkeypatched in by pgen2.tokenize
|
||||||
|
|
||||||
|
#--start constants--
|
||||||
|
ENDMARKER = 0
|
||||||
|
NAME = 1
|
||||||
|
NUMBER = 2
|
||||||
|
STRING = 3
|
||||||
|
NEWLINE = 4
|
||||||
|
INDENT = 5
|
||||||
|
DEDENT = 6
|
||||||
|
LPAR = 7
|
||||||
|
RPAR = 8
|
||||||
|
LSQB = 9
|
||||||
|
RSQB = 10
|
||||||
|
COLON = 11
|
||||||
|
COMMA = 12
|
||||||
|
SEMI = 13
|
||||||
|
PLUS = 14
|
||||||
|
MINUS = 15
|
||||||
|
STAR = 16
|
||||||
|
SLASH = 17
|
||||||
|
VBAR = 18
|
||||||
|
AMPER = 19
|
||||||
|
LESS = 20
|
||||||
|
GREATER = 21
|
||||||
|
EQUAL = 22
|
||||||
|
DOT = 23
|
||||||
|
PERCENT = 24
|
||||||
|
BACKQUOTE = 25
|
||||||
|
LBRACE = 26
|
||||||
|
RBRACE = 27
|
||||||
|
EQEQUAL = 28
|
||||||
|
NOTEQUAL = 29
|
||||||
|
LESSEQUAL = 30
|
||||||
|
GREATEREQUAL = 31
|
||||||
|
TILDE = 32
|
||||||
|
CIRCUMFLEX = 33
|
||||||
|
LEFTSHIFT = 34
|
||||||
|
RIGHTSHIFT = 35
|
||||||
|
DOUBLESTAR = 36
|
||||||
|
PLUSEQUAL = 37
|
||||||
|
MINEQUAL = 38
|
||||||
|
STAREQUAL = 39
|
||||||
|
SLASHEQUAL = 40
|
||||||
|
PERCENTEQUAL = 41
|
||||||
|
AMPEREQUAL = 42
|
||||||
|
VBAREQUAL = 43
|
||||||
|
CIRCUMFLEXEQUAL = 44
|
||||||
|
LEFTSHIFTEQUAL = 45
|
||||||
|
RIGHTSHIFTEQUAL = 46
|
||||||
|
DOUBLESTAREQUAL = 47
|
||||||
|
DOUBLESLASH = 48
|
||||||
|
DOUBLESLASHEQUAL = 49
|
||||||
|
AT = 50
|
||||||
|
OP = 51
|
||||||
|
COMMENT = 52
|
||||||
|
NL = 53
|
||||||
|
RARROW = 54
|
||||||
|
ERRORTOKEN = 55
|
||||||
|
N_TOKENS = 56
|
||||||
|
NT_OFFSET = 256
|
||||||
|
#--end constants--
|
||||||
|
|
||||||
|
tok_name = {}
|
||||||
|
for _name, _value in list(globals().items()):
|
||||||
|
if type(_value) is type(0):
|
||||||
|
tok_name[_value] = _name
|
||||||
|
|
||||||
|
|
||||||
|
def ISTERMINAL(x):
|
||||||
|
return x < NT_OFFSET
|
||||||
|
|
||||||
|
def ISNONTERMINAL(x):
|
||||||
|
return x >= NT_OFFSET
|
||||||
|
|
||||||
|
def ISEOF(x):
|
||||||
|
return x == ENDMARKER
|
||||||
503
jedi/parser/pgen2/tokenize.py
Normal file
503
jedi/parser/pgen2/tokenize.py
Normal file
@@ -0,0 +1,503 @@
|
|||||||
|
# Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006 Python Software Foundation.
|
||||||
|
# All rights reserved.
|
||||||
|
|
||||||
|
"""Tokenization help for Python programs.
|
||||||
|
|
||||||
|
generate_tokens(readline) is a generator that breaks a stream of
|
||||||
|
text into Python tokens. It accepts a readline-like method which is called
|
||||||
|
repeatedly to get the next line of input (or "" for EOF). It generates
|
||||||
|
5-tuples with these members:
|
||||||
|
|
||||||
|
the token type (see token.py)
|
||||||
|
the token (a string)
|
||||||
|
the starting (row, column) indices of the token (a 2-tuple of ints)
|
||||||
|
the ending (row, column) indices of the token (a 2-tuple of ints)
|
||||||
|
the original line (string)
|
||||||
|
|
||||||
|
It is designed to match the working of the Python tokenizer exactly, except
|
||||||
|
that it produces COMMENT tokens for comments and gives type OP for all
|
||||||
|
operators
|
||||||
|
|
||||||
|
Older entry points
|
||||||
|
tokenize_loop(readline, tokeneater)
|
||||||
|
tokenize(readline, tokeneater=printtoken)
|
||||||
|
are the same, except instead of generating tokens, tokeneater is a callback
|
||||||
|
function to which the 5 fields described above are passed as 5 arguments,
|
||||||
|
each time a new token is found."""
|
||||||
|
|
||||||
|
__author__ = 'Ka-Ping Yee <ping@lfw.org>'
|
||||||
|
__credits__ = \
|
||||||
|
'GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro'
|
||||||
|
|
||||||
|
import string, re
|
||||||
|
from codecs import BOM_UTF8, lookup
|
||||||
|
|
||||||
|
from .token import *
|
||||||
|
from . import token
|
||||||
|
|
||||||
|
__all__ = [x for x in dir(token) if x[0] != '_'] + ["tokenize",
|
||||||
|
"generate_tokens", "untokenize"]
|
||||||
|
del token
|
||||||
|
|
||||||
|
try:
|
||||||
|
bytes
|
||||||
|
except NameError:
|
||||||
|
# Support bytes type in Python <= 2.5, so 2to3 turns itself into
|
||||||
|
# valid Python 3 code.
|
||||||
|
bytes = str
|
||||||
|
|
||||||
|
def group(*choices): return '(' + '|'.join(choices) + ')'
|
||||||
|
def any(*choices): return group(*choices) + '*'
|
||||||
|
def maybe(*choices): return group(*choices) + '?'
|
||||||
|
|
||||||
|
Whitespace = r'[ \f\t]*'
|
||||||
|
Comment = r'#[^\r\n]*'
|
||||||
|
Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
|
||||||
|
Name = r'[a-zA-Z_]\w*'
|
||||||
|
|
||||||
|
Binnumber = r'0[bB][01]*'
|
||||||
|
Hexnumber = r'0[xX][\da-fA-F]*[lL]?'
|
||||||
|
Octnumber = r'0[oO]?[0-7]*[lL]?'
|
||||||
|
Decnumber = r'[1-9]\d*[lL]?'
|
||||||
|
Intnumber = group(Binnumber, Hexnumber, Octnumber, Decnumber)
|
||||||
|
Exponent = r'[eE][-+]?\d+'
|
||||||
|
Pointfloat = group(r'\d+\.\d*', r'\.\d+') + maybe(Exponent)
|
||||||
|
Expfloat = r'\d+' + Exponent
|
||||||
|
Floatnumber = group(Pointfloat, Expfloat)
|
||||||
|
Imagnumber = group(r'\d+[jJ]', Floatnumber + r'[jJ]')
|
||||||
|
Number = group(Imagnumber, Floatnumber, Intnumber)
|
||||||
|
|
||||||
|
# Tail end of ' string.
|
||||||
|
Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
|
||||||
|
# Tail end of " string.
|
||||||
|
Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
|
||||||
|
# Tail end of ''' string.
|
||||||
|
Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
|
||||||
|
# Tail end of """ string.
|
||||||
|
Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
|
||||||
|
Triple = group("[ubUB]?[rR]?'''", '[ubUB]?[rR]?"""')
|
||||||
|
# Single-line ' or " string.
|
||||||
|
String = group(r"[uU]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
|
||||||
|
r'[uU]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*"')
|
||||||
|
|
||||||
|
# Because of leftmost-then-longest match semantics, be sure to put the
|
||||||
|
# longest operators first (e.g., if = came before ==, == would get
|
||||||
|
# recognized as two instances of =).
|
||||||
|
Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"<>", r"!=",
|
||||||
|
r"//=?", r"->",
|
||||||
|
r"[+\-*/%&|^=<>]=?",
|
||||||
|
r"~")
|
||||||
|
|
||||||
|
Bracket = '[][(){}]'
|
||||||
|
Special = group(r'\r?\n', r'[:;.,`@]')
|
||||||
|
Funny = group(Operator, Bracket, Special)
|
||||||
|
|
||||||
|
PlainToken = group(Number, Funny, String, Name)
|
||||||
|
Token = Ignore + PlainToken
|
||||||
|
|
||||||
|
# First (or only) line of ' or " string.
|
||||||
|
ContStr = group(r"[uUbB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
|
||||||
|
group("'", r'\\\r?\n'),
|
||||||
|
r'[uUbB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
|
||||||
|
group('"', r'\\\r?\n'))
|
||||||
|
PseudoExtras = group(r'\\\r?\n', Comment, Triple)
|
||||||
|
PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
|
||||||
|
|
||||||
|
tokenprog, pseudoprog, single3prog, double3prog = list(map(
|
||||||
|
re.compile, (Token, PseudoToken, Single3, Double3)))
|
||||||
|
endprogs = {"'": re.compile(Single), '"': re.compile(Double),
|
||||||
|
"'''": single3prog, '"""': double3prog,
|
||||||
|
"r'''": single3prog, 'r"""': double3prog,
|
||||||
|
"u'''": single3prog, 'u"""': double3prog,
|
||||||
|
"b'''": single3prog, 'b"""': double3prog,
|
||||||
|
"ur'''": single3prog, 'ur"""': double3prog,
|
||||||
|
"br'''": single3prog, 'br"""': double3prog,
|
||||||
|
"R'''": single3prog, 'R"""': double3prog,
|
||||||
|
"U'''": single3prog, 'U"""': double3prog,
|
||||||
|
"B'''": single3prog, 'B"""': double3prog,
|
||||||
|
"uR'''": single3prog, 'uR"""': double3prog,
|
||||||
|
"Ur'''": single3prog, 'Ur"""': double3prog,
|
||||||
|
"UR'''": single3prog, 'UR"""': double3prog,
|
||||||
|
"bR'''": single3prog, 'bR"""': double3prog,
|
||||||
|
"Br'''": single3prog, 'Br"""': double3prog,
|
||||||
|
"BR'''": single3prog, 'BR"""': double3prog,
|
||||||
|
'r': None, 'R': None,
|
||||||
|
'u': None, 'U': None,
|
||||||
|
'b': None, 'B': None}
|
||||||
|
|
||||||
|
triple_quoted = {}
|
||||||
|
for t in ("'''", '"""',
|
||||||
|
"r'''", 'r"""', "R'''", 'R"""',
|
||||||
|
"u'''", 'u"""', "U'''", 'U"""',
|
||||||
|
"b'''", 'b"""', "B'''", 'B"""',
|
||||||
|
"ur'''", 'ur"""', "Ur'''", 'Ur"""',
|
||||||
|
"uR'''", 'uR"""', "UR'''", 'UR"""',
|
||||||
|
"br'''", 'br"""', "Br'''", 'Br"""',
|
||||||
|
"bR'''", 'bR"""', "BR'''", 'BR"""',):
|
||||||
|
triple_quoted[t] = t
|
||||||
|
single_quoted = {}
|
||||||
|
for t in ("'", '"',
|
||||||
|
"r'", 'r"', "R'", 'R"',
|
||||||
|
"u'", 'u"', "U'", 'U"',
|
||||||
|
"b'", 'b"', "B'", 'B"',
|
||||||
|
"ur'", 'ur"', "Ur'", 'Ur"',
|
||||||
|
"uR'", 'uR"', "UR'", 'UR"',
|
||||||
|
"br'", 'br"', "Br'", 'Br"',
|
||||||
|
"bR'", 'bR"', "BR'", 'BR"', ):
|
||||||
|
single_quoted[t] = t
|
||||||
|
|
||||||
|
tabsize = 8
|
||||||
|
|
||||||
|
class TokenError(Exception): pass
|
||||||
|
|
||||||
|
class StopTokenizing(Exception): pass
|
||||||
|
|
||||||
|
def printtoken(type, token, xxx_todo_changeme, xxx_todo_changeme1, line): # for testing
|
||||||
|
(srow, scol) = xxx_todo_changeme
|
||||||
|
(erow, ecol) = xxx_todo_changeme1
|
||||||
|
print("%d,%d-%d,%d:\t%s\t%s" % \
|
||||||
|
(srow, scol, erow, ecol, tok_name[type], repr(token)))
|
||||||
|
|
||||||
|
def tokenize(readline, tokeneater=printtoken):
|
||||||
|
"""
|
||||||
|
The tokenize() function accepts two parameters: one representing the
|
||||||
|
input stream, and one providing an output mechanism for tokenize().
|
||||||
|
|
||||||
|
The first parameter, readline, must be a callable object which provides
|
||||||
|
the same interface as the readline() method of built-in file objects.
|
||||||
|
Each call to the function should return one line of input as a string.
|
||||||
|
|
||||||
|
The second parameter, tokeneater, must also be a callable object. It is
|
||||||
|
called once for each token, with five arguments, corresponding to the
|
||||||
|
tuples generated by generate_tokens().
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
tokenize_loop(readline, tokeneater)
|
||||||
|
except StopTokenizing:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# backwards compatible interface
|
||||||
|
def tokenize_loop(readline, tokeneater):
|
||||||
|
for token_info in generate_tokens(readline):
|
||||||
|
tokeneater(*token_info)
|
||||||
|
|
||||||
|
class Untokenizer:
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.tokens = []
|
||||||
|
self.prev_row = 1
|
||||||
|
self.prev_col = 0
|
||||||
|
|
||||||
|
def add_whitespace(self, start):
|
||||||
|
row, col = start
|
||||||
|
assert row <= self.prev_row
|
||||||
|
col_offset = col - self.prev_col
|
||||||
|
if col_offset:
|
||||||
|
self.tokens.append(" " * col_offset)
|
||||||
|
|
||||||
|
def untokenize(self, iterable):
|
||||||
|
for t in iterable:
|
||||||
|
if len(t) == 2:
|
||||||
|
self.compat(t, iterable)
|
||||||
|
break
|
||||||
|
tok_type, token, start, end, line = t
|
||||||
|
self.add_whitespace(start)
|
||||||
|
self.tokens.append(token)
|
||||||
|
self.prev_row, self.prev_col = end
|
||||||
|
if tok_type in (NEWLINE, NL):
|
||||||
|
self.prev_row += 1
|
||||||
|
self.prev_col = 0
|
||||||
|
return "".join(self.tokens)
|
||||||
|
|
||||||
|
def compat(self, token, iterable):
|
||||||
|
startline = False
|
||||||
|
indents = []
|
||||||
|
toks_append = self.tokens.append
|
||||||
|
toknum, tokval = token
|
||||||
|
if toknum in (NAME, NUMBER):
|
||||||
|
tokval += ' '
|
||||||
|
if toknum in (NEWLINE, NL):
|
||||||
|
startline = True
|
||||||
|
for tok in iterable:
|
||||||
|
toknum, tokval = tok[:2]
|
||||||
|
|
||||||
|
if toknum in (NAME, NUMBER):
|
||||||
|
tokval += ' '
|
||||||
|
|
||||||
|
if toknum == INDENT:
|
||||||
|
indents.append(tokval)
|
||||||
|
continue
|
||||||
|
elif toknum == DEDENT:
|
||||||
|
indents.pop()
|
||||||
|
continue
|
||||||
|
elif toknum in (NEWLINE, NL):
|
||||||
|
startline = True
|
||||||
|
elif startline and indents:
|
||||||
|
toks_append(indents[-1])
|
||||||
|
startline = False
|
||||||
|
toks_append(tokval)
|
||||||
|
|
||||||
|
cookie_re = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)', re.ASCII)
|
||||||
|
blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII)
|
||||||
|
|
||||||
|
def _get_normal_name(orig_enc):
|
||||||
|
"""Imitates get_normal_name in tokenizer.c."""
|
||||||
|
# Only care about the first 12 characters.
|
||||||
|
enc = orig_enc[:12].lower().replace("_", "-")
|
||||||
|
if enc == "utf-8" or enc.startswith("utf-8-"):
|
||||||
|
return "utf-8"
|
||||||
|
if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \
|
||||||
|
enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")):
|
||||||
|
return "iso-8859-1"
|
||||||
|
return orig_enc
|
||||||
|
|
||||||
|
def detect_encoding(readline):
|
||||||
|
"""
|
||||||
|
The detect_encoding() function is used to detect the encoding that should
|
||||||
|
be used to decode a Python source file. It requires one argument, readline,
|
||||||
|
in the same way as the tokenize() generator.
|
||||||
|
|
||||||
|
It will call readline a maximum of twice, and return the encoding used
|
||||||
|
(as a string) and a list of any lines (left as bytes) it has read
|
||||||
|
in.
|
||||||
|
|
||||||
|
It detects the encoding from the presence of a utf-8 bom or an encoding
|
||||||
|
cookie as specified in pep-0263. If both a bom and a cookie are present, but
|
||||||
|
disagree, a SyntaxError will be raised. If the encoding cookie is an invalid
|
||||||
|
charset, raise a SyntaxError. Note that if a utf-8 bom is found,
|
||||||
|
'utf-8-sig' is returned.
|
||||||
|
|
||||||
|
If no encoding is specified, then the default of 'utf-8' will be returned.
|
||||||
|
"""
|
||||||
|
bom_found = False
|
||||||
|
encoding = None
|
||||||
|
default = 'utf-8'
|
||||||
|
def read_or_stop():
|
||||||
|
try:
|
||||||
|
return readline()
|
||||||
|
except StopIteration:
|
||||||
|
return bytes()
|
||||||
|
|
||||||
|
def find_cookie(line):
|
||||||
|
try:
|
||||||
|
line_string = line.decode('ascii')
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
return None
|
||||||
|
match = cookie_re.match(line_string)
|
||||||
|
if not match:
|
||||||
|
return None
|
||||||
|
encoding = _get_normal_name(match.group(1))
|
||||||
|
try:
|
||||||
|
codec = lookup(encoding)
|
||||||
|
except LookupError:
|
||||||
|
# This behaviour mimics the Python interpreter
|
||||||
|
raise SyntaxError("unknown encoding: " + encoding)
|
||||||
|
|
||||||
|
if bom_found:
|
||||||
|
if codec.name != 'utf-8':
|
||||||
|
# This behaviour mimics the Python interpreter
|
||||||
|
raise SyntaxError('encoding problem: utf-8')
|
||||||
|
encoding += '-sig'
|
||||||
|
return encoding
|
||||||
|
|
||||||
|
first = read_or_stop()
|
||||||
|
if first.startswith(BOM_UTF8):
|
||||||
|
bom_found = True
|
||||||
|
first = first[3:]
|
||||||
|
default = 'utf-8-sig'
|
||||||
|
if not first:
|
||||||
|
return default, []
|
||||||
|
|
||||||
|
encoding = find_cookie(first)
|
||||||
|
if encoding:
|
||||||
|
return encoding, [first]
|
||||||
|
if not blank_re.match(first):
|
||||||
|
return default, [first]
|
||||||
|
|
||||||
|
second = read_or_stop()
|
||||||
|
if not second:
|
||||||
|
return default, [first]
|
||||||
|
|
||||||
|
encoding = find_cookie(second)
|
||||||
|
if encoding:
|
||||||
|
return encoding, [first, second]
|
||||||
|
|
||||||
|
return default, [first, second]
|
||||||
|
|
||||||
|
def untokenize(iterable):
|
||||||
|
"""Transform tokens back into Python source code.
|
||||||
|
|
||||||
|
Each element returned by the iterable must be a token sequence
|
||||||
|
with at least two elements, a token number and token value. If
|
||||||
|
only two tokens are passed, the resulting output is poor.
|
||||||
|
|
||||||
|
Round-trip invariant for full input:
|
||||||
|
Untokenized source will match input source exactly
|
||||||
|
|
||||||
|
Round-trip invariant for limited intput:
|
||||||
|
# Output text will tokenize the back to the input
|
||||||
|
t1 = [tok[:2] for tok in generate_tokens(f.readline)]
|
||||||
|
newcode = untokenize(t1)
|
||||||
|
readline = iter(newcode.splitlines(1)).next
|
||||||
|
t2 = [tok[:2] for tokin generate_tokens(readline)]
|
||||||
|
assert t1 == t2
|
||||||
|
"""
|
||||||
|
ut = Untokenizer()
|
||||||
|
return ut.untokenize(iterable)
|
||||||
|
|
||||||
|
def generate_tokens(readline):
|
||||||
|
"""
|
||||||
|
The generate_tokens() generator requires one argument, readline, which
|
||||||
|
must be a callable object which provides the same interface as the
|
||||||
|
readline() method of built-in file objects. Each call to the function
|
||||||
|
should return one line of input as a string. Alternately, readline
|
||||||
|
can be a callable function terminating with StopIteration:
|
||||||
|
readline = open(myfile).next # Example of alternate readline
|
||||||
|
|
||||||
|
The generator produces 5-tuples with these members: the token type; the
|
||||||
|
token string; a 2-tuple (srow, scol) of ints specifying the row and
|
||||||
|
column where the token begins in the source; a 2-tuple (erow, ecol) of
|
||||||
|
ints specifying the row and column where the token ends in the source;
|
||||||
|
and the line on which the token was found. The line passed is the
|
||||||
|
logical line; continuation lines are included.
|
||||||
|
"""
|
||||||
|
lnum = parenlev = continued = 0
|
||||||
|
namechars, numchars = string.ascii_letters + '_', '0123456789'
|
||||||
|
contstr, needcont = '', 0
|
||||||
|
contline = None
|
||||||
|
indents = [0]
|
||||||
|
|
||||||
|
while 1: # loop over lines in stream
|
||||||
|
try:
|
||||||
|
line = readline()
|
||||||
|
except StopIteration:
|
||||||
|
line = ''
|
||||||
|
lnum = lnum + 1
|
||||||
|
pos, max = 0, len(line)
|
||||||
|
|
||||||
|
if contstr: # continued string
|
||||||
|
if not line:
|
||||||
|
raise TokenError("EOF in multi-line string", strstart)
|
||||||
|
endmatch = endprog.match(line)
|
||||||
|
if endmatch:
|
||||||
|
pos = end = endmatch.end(0)
|
||||||
|
yield (STRING, contstr + line[:end],
|
||||||
|
strstart, (lnum, end), contline + line)
|
||||||
|
contstr, needcont = '', 0
|
||||||
|
contline = None
|
||||||
|
elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':
|
||||||
|
yield (ERRORTOKEN, contstr + line,
|
||||||
|
strstart, (lnum, len(line)), contline)
|
||||||
|
contstr = ''
|
||||||
|
contline = None
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
contstr = contstr + line
|
||||||
|
contline = contline + line
|
||||||
|
continue
|
||||||
|
|
||||||
|
elif parenlev == 0 and not continued: # new statement
|
||||||
|
if not line: break
|
||||||
|
column = 0
|
||||||
|
while pos < max: # measure leading whitespace
|
||||||
|
if line[pos] == ' ': column = column + 1
|
||||||
|
elif line[pos] == '\t': column = (column//tabsize + 1)*tabsize
|
||||||
|
elif line[pos] == '\f': column = 0
|
||||||
|
else: break
|
||||||
|
pos = pos + 1
|
||||||
|
if pos == max: break
|
||||||
|
|
||||||
|
if line[pos] in '#\r\n': # skip comments or blank lines
|
||||||
|
if line[pos] == '#':
|
||||||
|
comment_token = line[pos:].rstrip('\r\n')
|
||||||
|
nl_pos = pos + len(comment_token)
|
||||||
|
yield (COMMENT, comment_token,
|
||||||
|
(lnum, pos), (lnum, pos + len(comment_token)), line)
|
||||||
|
yield (NL, line[nl_pos:],
|
||||||
|
(lnum, nl_pos), (lnum, len(line)), line)
|
||||||
|
else:
|
||||||
|
yield ((NL, COMMENT)[line[pos] == '#'], line[pos:],
|
||||||
|
(lnum, pos), (lnum, len(line)), line)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if column > indents[-1]: # count indents or dedents
|
||||||
|
indents.append(column)
|
||||||
|
yield (INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
|
||||||
|
while column < indents[-1]:
|
||||||
|
if column not in indents:
|
||||||
|
raise IndentationError(
|
||||||
|
"unindent does not match any outer indentation level",
|
||||||
|
("<tokenize>", lnum, pos, line))
|
||||||
|
indents = indents[:-1]
|
||||||
|
yield (DEDENT, '', (lnum, pos), (lnum, pos), line)
|
||||||
|
|
||||||
|
else: # continued statement
|
||||||
|
if not line:
|
||||||
|
raise TokenError("EOF in multi-line statement", (lnum, 0))
|
||||||
|
continued = 0
|
||||||
|
|
||||||
|
while pos < max:
|
||||||
|
pseudomatch = pseudoprog.match(line, pos)
|
||||||
|
if pseudomatch: # scan for tokens
|
||||||
|
start, end = pseudomatch.span(1)
|
||||||
|
spos, epos, pos = (lnum, start), (lnum, end), end
|
||||||
|
token, initial = line[start:end], line[start]
|
||||||
|
|
||||||
|
if initial in numchars or \
|
||||||
|
(initial == '.' and token != '.'): # ordinary number
|
||||||
|
yield (NUMBER, token, spos, epos, line)
|
||||||
|
elif initial in '\r\n':
|
||||||
|
newline = NEWLINE
|
||||||
|
if parenlev > 0:
|
||||||
|
newline = NL
|
||||||
|
yield (newline, token, spos, epos, line)
|
||||||
|
elif initial == '#':
|
||||||
|
assert not token.endswith("\n")
|
||||||
|
yield (COMMENT, token, spos, epos, line)
|
||||||
|
elif token in triple_quoted:
|
||||||
|
endprog = endprogs[token]
|
||||||
|
endmatch = endprog.match(line, pos)
|
||||||
|
if endmatch: # all on one line
|
||||||
|
pos = endmatch.end(0)
|
||||||
|
token = line[start:pos]
|
||||||
|
yield (STRING, token, spos, (lnum, pos), line)
|
||||||
|
else:
|
||||||
|
strstart = (lnum, start) # multiple lines
|
||||||
|
contstr = line[start:]
|
||||||
|
contline = line
|
||||||
|
break
|
||||||
|
elif initial in single_quoted or \
|
||||||
|
token[:2] in single_quoted or \
|
||||||
|
token[:3] in single_quoted:
|
||||||
|
if token[-1] == '\n': # continued string
|
||||||
|
strstart = (lnum, start)
|
||||||
|
endprog = (endprogs[initial] or endprogs[token[1]] or
|
||||||
|
endprogs[token[2]])
|
||||||
|
contstr, needcont = line[start:], 1
|
||||||
|
contline = line
|
||||||
|
break
|
||||||
|
else: # ordinary string
|
||||||
|
yield (STRING, token, spos, epos, line)
|
||||||
|
elif initial in namechars: # ordinary name
|
||||||
|
yield (NAME, token, spos, epos, line)
|
||||||
|
elif initial == '\\': # continued stmt
|
||||||
|
# This yield is new; needed for better idempotency:
|
||||||
|
yield (NL, token, spos, (lnum, pos), line)
|
||||||
|
continued = 1
|
||||||
|
else:
|
||||||
|
if initial in '([{': parenlev = parenlev + 1
|
||||||
|
elif initial in ')]}': parenlev = parenlev - 1
|
||||||
|
yield (OP, token, spos, epos, line)
|
||||||
|
else:
|
||||||
|
yield (ERRORTOKEN, line[pos],
|
||||||
|
(lnum, pos), (lnum, pos+1), line)
|
||||||
|
pos = pos + 1
|
||||||
|
|
||||||
|
for indent in indents[1:]: # pop remaining indent levels
|
||||||
|
yield (DEDENT, '', (lnum, 0), (lnum, 0), '')
|
||||||
|
yield (ENDMARKER, '', (lnum, 0), (lnum, 0), '')
|
||||||
|
|
||||||
|
if __name__ == '__main__': # testing
|
||||||
|
import sys
|
||||||
|
if len(sys.argv) > 1: tokenize(open(sys.argv[1]).readline)
|
||||||
|
else: tokenize(sys.stdin.readline)
|
||||||
456
jedi/parser/pytree.py
Normal file
456
jedi/parser/pytree.py
Normal file
@@ -0,0 +1,456 @@
|
|||||||
|
# Copyright 2006 Google, Inc. All Rights Reserved.
|
||||||
|
# Licensed to PSF under a Contributor Agreement.
|
||||||
|
|
||||||
|
"""
|
||||||
|
Python parse tree definitions.
|
||||||
|
|
||||||
|
This is a very concrete parse tree; we need to keep every token and
|
||||||
|
even the comments and whitespace between tokens.
|
||||||
|
|
||||||
|
There's also a pattern matching implementation here.
|
||||||
|
"""
|
||||||
|
|
||||||
|
__author__ = "Guido van Rossum <guido@python.org>"
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
|
||||||
|
from . import pgen2
|
||||||
|
|
||||||
|
HUGE = 0x7FFFFFFF # maximum repeat count, default max
|
||||||
|
|
||||||
|
_type_reprs = {}
|
||||||
|
|
||||||
|
|
||||||
|
# The grammar file
|
||||||
|
_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__), "grammar.txt")
|
||||||
|
|
||||||
|
|
||||||
|
class Symbols(object):
|
||||||
|
|
||||||
|
def __init__(self, grammar):
|
||||||
|
"""Initializer.
|
||||||
|
|
||||||
|
Creates an attribute for each grammar symbol (nonterminal),
|
||||||
|
whose value is the symbol's type (an int >= 256).
|
||||||
|
"""
|
||||||
|
for name, symbol in grammar.symbol2number.items():
|
||||||
|
setattr(self, name, symbol)
|
||||||
|
|
||||||
|
|
||||||
|
python_grammar = pgen2.load_grammar(_GRAMMAR_FILE)
|
||||||
|
|
||||||
|
python_symbols = Symbols(python_grammar)
|
||||||
|
|
||||||
|
python_grammar_no_print_statement = python_grammar.copy()
|
||||||
|
del python_grammar_no_print_statement.keywords["print"]
|
||||||
|
|
||||||
|
|
||||||
|
def type_repr(type_num):
|
||||||
|
global _type_reprs
|
||||||
|
if not _type_reprs:
|
||||||
|
# printing tokens is possible but not as useful
|
||||||
|
# from .pgen2 import token // token.__dict__.items():
|
||||||
|
for name, val in python_symbols.__dict__.items():
|
||||||
|
if type(val) == int:
|
||||||
|
_type_reprs[val] = name
|
||||||
|
return _type_reprs.setdefault(type_num, type_num)
|
||||||
|
|
||||||
|
|
||||||
|
class Base(object):
|
||||||
|
|
||||||
|
"""
|
||||||
|
Abstract base class for Node and Leaf.
|
||||||
|
|
||||||
|
This provides some default functionality and boilerplate using the
|
||||||
|
template pattern.
|
||||||
|
|
||||||
|
A node may be a subnode of at most one parent.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Default values for instance variables
|
||||||
|
type = None # int: token number (< 256) or symbol number (>= 256)
|
||||||
|
parent = None # Parent node pointer, or None
|
||||||
|
children = () # Tuple of subnodes
|
||||||
|
was_changed = False
|
||||||
|
was_checked = False
|
||||||
|
|
||||||
|
def __new__(cls, *args, **kwds):
|
||||||
|
"""Constructor that prevents Base from being instantiated."""
|
||||||
|
assert cls is not Base, "Cannot instantiate Base"
|
||||||
|
return object.__new__(cls)
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
"""
|
||||||
|
Compare two nodes for equality.
|
||||||
|
|
||||||
|
This calls the method _eq().
|
||||||
|
"""
|
||||||
|
if self.__class__ is not other.__class__:
|
||||||
|
return NotImplemented
|
||||||
|
return self._eq(other)
|
||||||
|
|
||||||
|
__hash__ = None # For Py3 compatibility.
|
||||||
|
|
||||||
|
def __ne__(self, other):
|
||||||
|
"""
|
||||||
|
Compare two nodes for inequality.
|
||||||
|
|
||||||
|
This calls the method _eq().
|
||||||
|
"""
|
||||||
|
if self.__class__ is not other.__class__:
|
||||||
|
return NotImplemented
|
||||||
|
return not self._eq(other)
|
||||||
|
|
||||||
|
def _eq(self, other):
|
||||||
|
"""
|
||||||
|
Compare two nodes for equality.
|
||||||
|
|
||||||
|
This is called by __eq__ and __ne__. It is only called if the two nodes
|
||||||
|
have the same type. This must be implemented by the concrete subclass.
|
||||||
|
Nodes should be considered equal if they have the same structure,
|
||||||
|
ignoring the prefix string and other context information.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def clone(self):
|
||||||
|
"""
|
||||||
|
Return a cloned (deep) copy of self.
|
||||||
|
|
||||||
|
This must be implemented by the concrete subclass.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def post_order(self):
|
||||||
|
"""
|
||||||
|
Return a post-order iterator for the tree.
|
||||||
|
|
||||||
|
This must be implemented by the concrete subclass.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def pre_order(self):
|
||||||
|
"""
|
||||||
|
Return a pre-order iterator for the tree.
|
||||||
|
|
||||||
|
This must be implemented by the concrete subclass.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def replace(self, new):
|
||||||
|
"""Replace this node with a new one in the parent."""
|
||||||
|
assert self.parent is not None, str(self)
|
||||||
|
assert new is not None
|
||||||
|
if not isinstance(new, list):
|
||||||
|
new = [new]
|
||||||
|
l_children = []
|
||||||
|
found = False
|
||||||
|
for ch in self.parent.children:
|
||||||
|
if ch is self:
|
||||||
|
assert not found, (self.parent.children, self, new)
|
||||||
|
if new is not None:
|
||||||
|
l_children.extend(new)
|
||||||
|
found = True
|
||||||
|
else:
|
||||||
|
l_children.append(ch)
|
||||||
|
assert found, (self.children, self, new)
|
||||||
|
self.parent.changed()
|
||||||
|
self.parent.children = l_children
|
||||||
|
for x in new:
|
||||||
|
x.parent = self.parent
|
||||||
|
self.parent = None
|
||||||
|
|
||||||
|
def get_lineno(self):
|
||||||
|
"""Return the line number which generated the invocant node."""
|
||||||
|
node = self
|
||||||
|
while not isinstance(node, Leaf):
|
||||||
|
if not node.children:
|
||||||
|
return
|
||||||
|
node = node.children[0]
|
||||||
|
return node.lineno
|
||||||
|
|
||||||
|
def changed(self):
|
||||||
|
if self.parent:
|
||||||
|
self.parent.changed()
|
||||||
|
self.was_changed = True
|
||||||
|
|
||||||
|
def remove(self):
|
||||||
|
"""
|
||||||
|
Remove the node from the tree. Returns the position of the node in its
|
||||||
|
parent's children before it was removed.
|
||||||
|
"""
|
||||||
|
if self.parent:
|
||||||
|
for i, node in enumerate(self.parent.children):
|
||||||
|
if node is self:
|
||||||
|
self.parent.changed()
|
||||||
|
del self.parent.children[i]
|
||||||
|
self.parent = None
|
||||||
|
return i
|
||||||
|
|
||||||
|
@property
|
||||||
|
def next_sibling(self):
|
||||||
|
"""
|
||||||
|
The node immediately following the invocant in their parent's children
|
||||||
|
list. If the invocant does not have a next sibling, it is None
|
||||||
|
"""
|
||||||
|
if self.parent is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Can't use index(); we need to test by identity
|
||||||
|
for i, child in enumerate(self.parent.children):
|
||||||
|
if child is self:
|
||||||
|
try:
|
||||||
|
return self.parent.children[i + 1]
|
||||||
|
except IndexError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def prev_sibling(self):
|
||||||
|
"""
|
||||||
|
The node immediately preceding the invocant in their parent's children
|
||||||
|
list. If the invocant does not have a previous sibling, it is None.
|
||||||
|
"""
|
||||||
|
if self.parent is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Can't use index(); we need to test by identity
|
||||||
|
for i, child in enumerate(self.parent.children):
|
||||||
|
if child is self:
|
||||||
|
if i == 0:
|
||||||
|
return None
|
||||||
|
return self.parent.children[i - 1]
|
||||||
|
|
||||||
|
def leaves(self):
|
||||||
|
for child in self.children:
|
||||||
|
for leave in child.leaves():
|
||||||
|
yield leave
|
||||||
|
|
||||||
|
def depth(self):
|
||||||
|
if self.parent is None:
|
||||||
|
return 0
|
||||||
|
return 1 + self.parent.depth()
|
||||||
|
|
||||||
|
def get_suffix(self):
|
||||||
|
"""
|
||||||
|
Return the string immediately following the invocant node. This is
|
||||||
|
effectively equivalent to node.next_sibling.prefix
|
||||||
|
"""
|
||||||
|
next_sib = self.next_sibling
|
||||||
|
if next_sib is None:
|
||||||
|
return ""
|
||||||
|
return next_sib.prefix
|
||||||
|
|
||||||
|
if sys.version_info < (3, 0):
|
||||||
|
def __str__(self):
|
||||||
|
return str(self).encode("ascii")
|
||||||
|
|
||||||
|
|
||||||
|
class Node(Base):
|
||||||
|
"""Concrete implementation for interior nodes."""
|
||||||
|
|
||||||
|
def __init__(self, type, children,
|
||||||
|
context=None,
|
||||||
|
prefix=None,
|
||||||
|
fixers_applied=None):
|
||||||
|
"""
|
||||||
|
Initializer.
|
||||||
|
|
||||||
|
Takes a type constant (a symbol number >= 256), a sequence of
|
||||||
|
child nodes, and an optional context keyword argument.
|
||||||
|
|
||||||
|
As a side effect, the parent pointers of the children are updated.
|
||||||
|
"""
|
||||||
|
assert type >= 256, type
|
||||||
|
self.type = type
|
||||||
|
self.children = list(children)
|
||||||
|
for ch in self.children:
|
||||||
|
assert ch.parent is None, repr(ch)
|
||||||
|
ch.parent = self
|
||||||
|
if prefix is not None:
|
||||||
|
self.prefix = prefix
|
||||||
|
if fixers_applied:
|
||||||
|
self.fixers_applied = fixers_applied[:]
|
||||||
|
else:
|
||||||
|
self.fixers_applied = None
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
"""Return a canonical string representation."""
|
||||||
|
return "%s(%s, %r)" % (self.__class__.__name__,
|
||||||
|
type_repr(self.type),
|
||||||
|
self.children)
|
||||||
|
|
||||||
|
def __unicode__(self):
|
||||||
|
"""
|
||||||
|
Return a pretty string representation.
|
||||||
|
|
||||||
|
This reproduces the input source exactly.
|
||||||
|
"""
|
||||||
|
return "".join(map(str, self.children))
|
||||||
|
|
||||||
|
if sys.version_info > (3, 0):
|
||||||
|
__str__ = __unicode__
|
||||||
|
|
||||||
|
def _eq(self, other):
|
||||||
|
"""Compare two nodes for equality."""
|
||||||
|
return (self.type, self.children) == (other.type, other.children)
|
||||||
|
|
||||||
|
def clone(self):
|
||||||
|
"""Return a cloned (deep) copy of self."""
|
||||||
|
return Node(self.type, [ch.clone() for ch in self.children],
|
||||||
|
fixers_applied=self.fixers_applied)
|
||||||
|
|
||||||
|
def post_order(self):
|
||||||
|
"""Return a post-order iterator for the tree."""
|
||||||
|
for child in self.children:
|
||||||
|
for el in child.post_order():
|
||||||
|
yield el
|
||||||
|
yield self
|
||||||
|
|
||||||
|
def pre_order(self):
|
||||||
|
"""Return a pre-order iterator for the tree."""
|
||||||
|
yield self
|
||||||
|
for child in self.children:
|
||||||
|
for el in child.post_order():
|
||||||
|
yield el
|
||||||
|
|
||||||
|
def _prefix_getter(self):
|
||||||
|
"""
|
||||||
|
The whitespace and comments preceding this node in the input.
|
||||||
|
"""
|
||||||
|
if not self.children:
|
||||||
|
return ""
|
||||||
|
return self.children[0].prefix
|
||||||
|
|
||||||
|
def _prefix_setter(self, prefix):
|
||||||
|
if self.children:
|
||||||
|
self.children[0].prefix = prefix
|
||||||
|
|
||||||
|
prefix = property(_prefix_getter, _prefix_setter)
|
||||||
|
|
||||||
|
def set_child(self, i, child):
|
||||||
|
"""
|
||||||
|
Equivalent to 'node.children[i] = child'. This method also sets the
|
||||||
|
child's parent attribute appropriately.
|
||||||
|
"""
|
||||||
|
child.parent = self
|
||||||
|
self.children[i].parent = None
|
||||||
|
self.children[i] = child
|
||||||
|
self.changed()
|
||||||
|
|
||||||
|
def insert_child(self, i, child):
|
||||||
|
"""
|
||||||
|
Equivalent to 'node.children.insert(i, child)'. This method also sets
|
||||||
|
the child's parent attribute appropriately.
|
||||||
|
"""
|
||||||
|
child.parent = self
|
||||||
|
self.children.insert(i, child)
|
||||||
|
self.changed()
|
||||||
|
|
||||||
|
def append_child(self, child):
|
||||||
|
"""
|
||||||
|
Equivalent to 'node.children.append(child)'. This method also sets the
|
||||||
|
child's parent attribute appropriately.
|
||||||
|
"""
|
||||||
|
child.parent = self
|
||||||
|
self.children.append(child)
|
||||||
|
self.changed()
|
||||||
|
|
||||||
|
|
||||||
|
class Leaf(Base):
|
||||||
|
|
||||||
|
"""Concrete implementation for leaf nodes."""
|
||||||
|
|
||||||
|
# Default values for instance variables
|
||||||
|
_prefix = "" # Whitespace and comments preceding this token in the input
|
||||||
|
lineno = 0 # Line where this token starts in the input
|
||||||
|
column = 0 # Column where this token tarts in the input
|
||||||
|
|
||||||
|
def __init__(self, type, value,
|
||||||
|
context=None,
|
||||||
|
prefix=None,
|
||||||
|
fixers_applied=[]):
|
||||||
|
"""
|
||||||
|
Initializer.
|
||||||
|
|
||||||
|
Takes a type constant (a token number < 256), a string value, and an
|
||||||
|
optional context keyword argument.
|
||||||
|
"""
|
||||||
|
assert 0 <= type < 256, type
|
||||||
|
if context is not None:
|
||||||
|
self._prefix, (self.lineno, self.column) = context
|
||||||
|
self.type = type
|
||||||
|
self.value = value
|
||||||
|
if prefix is not None:
|
||||||
|
self._prefix = prefix
|
||||||
|
self.fixers_applied = fixers_applied[:]
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
"""Return a canonical string representation."""
|
||||||
|
return "%s(%r, %r)" % (self.__class__.__name__,
|
||||||
|
self.type,
|
||||||
|
self.value)
|
||||||
|
|
||||||
|
def __unicode__(self):
|
||||||
|
"""
|
||||||
|
Return a pretty string representation.
|
||||||
|
|
||||||
|
This reproduces the input source exactly.
|
||||||
|
"""
|
||||||
|
return self.prefix + str(self.value)
|
||||||
|
|
||||||
|
if sys.version_info > (3, 0):
|
||||||
|
__str__ = __unicode__
|
||||||
|
|
||||||
|
def _eq(self, other):
|
||||||
|
"""Compare two nodes for equality."""
|
||||||
|
return (self.type, self.value) == (other.type, other.value)
|
||||||
|
|
||||||
|
def clone(self):
|
||||||
|
"""Return a cloned (deep) copy of self."""
|
||||||
|
return Leaf(self.type, self.value,
|
||||||
|
(self.prefix, (self.lineno, self.column)),
|
||||||
|
fixers_applied=self.fixers_applied)
|
||||||
|
|
||||||
|
def leaves(self):
|
||||||
|
yield self
|
||||||
|
|
||||||
|
def post_order(self):
|
||||||
|
"""Return a post-order iterator for the tree."""
|
||||||
|
yield self
|
||||||
|
|
||||||
|
def pre_order(self):
|
||||||
|
"""Return a pre-order iterator for the tree."""
|
||||||
|
yield self
|
||||||
|
|
||||||
|
def _prefix_getter(self):
|
||||||
|
"""
|
||||||
|
The whitespace and comments preceding this token in the input.
|
||||||
|
"""
|
||||||
|
return self._prefix
|
||||||
|
|
||||||
|
def _prefix_setter(self, prefix):
|
||||||
|
self.changed()
|
||||||
|
self._prefix = prefix
|
||||||
|
|
||||||
|
prefix = property(_prefix_getter, _prefix_setter)
|
||||||
|
|
||||||
|
|
||||||
|
def convert(gr, raw_node):
|
||||||
|
"""
|
||||||
|
Convert raw node information to a Node or Leaf instance.
|
||||||
|
|
||||||
|
This is passed to the parser driver which calls it whenever a reduction of a
|
||||||
|
grammar rule produces a new complete node, so that the tree is build
|
||||||
|
strictly bottom-up.
|
||||||
|
"""
|
||||||
|
#import pdb; pdb.set_trace()
|
||||||
|
print(raw_node)
|
||||||
|
type, value, context, children = raw_node
|
||||||
|
if children or type in gr.number2symbol:
|
||||||
|
# If there's exactly one child, return that child instead of
|
||||||
|
# creating a new node.
|
||||||
|
if len(children) == 1:
|
||||||
|
return children[0]
|
||||||
|
return Node(type, children, context=context)
|
||||||
|
else:
|
||||||
|
return Leaf(type, value, context=context)
|
||||||
27
test/test_new_parser.py
Normal file
27
test/test_new_parser.py
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
import logging
|
||||||
|
|
||||||
|
from jedi.parser import pytree
|
||||||
|
from jedi.parser.pgen2 import Driver
|
||||||
|
|
||||||
|
|
||||||
|
def test_basic():
|
||||||
|
#if self.options["print_function"]:
|
||||||
|
# python_grammar = pygram.python_grammar_no_print_statement
|
||||||
|
#else:
|
||||||
|
# When this is True, the refactor*() methods will call write_file() for
|
||||||
|
# files processed even if they were not changed during refactoring. If
|
||||||
|
# and only if the refactor method's write parameter was True.
|
||||||
|
logger = logging.getLogger("RefactoringTool")
|
||||||
|
d = Driver(pytree.python_grammar, convert=pytree.convert, logger=logger)
|
||||||
|
|
||||||
|
print(d)
|
||||||
|
tree = d.parse_string('wblabla* 1\t\n')
|
||||||
|
print(repr(tree))
|
||||||
|
print(tree)
|
||||||
|
#import pdb; pdb.set_trace()
|
||||||
|
print(repr(d.parse_string('def x(): pass\n')))
|
||||||
|
print()
|
||||||
|
x = d.parse_string('\na #pass\n')
|
||||||
|
print(repr(x))
|
||||||
|
print(x)
|
||||||
|
assert False
|
||||||
Reference in New Issue
Block a user