1
0
forked from VimPlug/jedi

Trying to replace the old pgen2 token module with a token module more tightly coupled to the standard library.

This commit is contained in:
Dave Halter
2014-12-16 01:52:15 +01:00
parent eaace104dd
commit d9d3740c92
5 changed files with 112 additions and 89 deletions

View File

@@ -19,7 +19,7 @@ import os
from jedi.parser import tree as pt
from jedi.parser import tokenize
from jedi.parser.pgen2 import grammar
from jedi.parser import token
from jedi.parser.pgen2.pgen import generate_grammar
from jedi.parser.pgen2.parse import PgenParser
@@ -184,11 +184,11 @@ class Parser(object):
arr = self.scope_names_stack[-1].setdefault(name.value, [])
arr.append(name)
return name
elif type == tokenize.STRING:
elif type == token.STRING:
return pt.String(value, start_pos, prefix)
elif type == tokenize.NUMBER:
elif type == token.NUMBER:
return pt.Number(value, start_pos, prefix)
elif type in (tokenize.NEWLINE, tokenize.ENDMARKER):
elif type in (token.NEWLINE, token.ENDMARKER):
return pt.Whitespace(value, start_pos, prefix)
else:
return pt.Operator(value, start_pos, prefix)
@@ -228,12 +228,12 @@ class Parser(object):
nodes = suite_nodes
stack[index]
#print('err', tokenize.tok_name[typ], repr(value), start_pos, len(stack), index)
#print('err', token.tok_name[typ], repr(value), start_pos, len(stack), index)
self._stack_removal(grammar, stack, index + 1, value, start_pos)
if value in ('import', 'from', 'class', 'def', 'try', 'while', 'return'):
# Those can always be new statements.
add_token_callback(typ, value, prefix, start_pos)
elif typ == tokenize.DEDENT:
elif typ == token.DEDENT:
if symbol == 'suite':
# If a function or anything else contains a suite that is
# "empty" (just NEWLINE/INDENT), we remove it. If it's not
@@ -282,7 +282,7 @@ class Parser(object):
def _tokenize(self, tokenizer):
"""
while first_pos[1] <= self._scope.start_pos[1] \
and (token_type == tokenize.NAME or tok_str in ('(', '['))\
and (token_type == token.NAME or tok_str in ('(', '['))\
and self._scope != self.module:
self._scope.end_pos = first_pos
self._scope = self._scope.parent
@@ -292,8 +292,8 @@ class Parser(object):
"""
for typ, value, start_pos, prefix in tokenizer:
if typ == tokenize.OP:
typ = grammar.opmap[value]
if typ == token.OP:
typ = token.opmap[value]
yield typ, value, prefix, start_pos
def __repr__(self):

View File

@@ -19,9 +19,6 @@ fallback token code OP, but the parser needs the actual token code.
# Python imports
import pickle
# Local imports
from . import token
class Grammar(object):
"""Pgen parsing tables conversion class.
@@ -126,62 +123,3 @@ class Grammar(object):
print("labels")
pprint(self.labels)
print("start", self.start)
# Map from operator to number (since tokenize doesn't do this)
opmap_raw = """
( LPAR
) RPAR
[ LSQB
] RSQB
: COLON
, COMMA
; SEMI
+ PLUS
- MINUS
* STAR
/ SLASH
| VBAR
& AMPER
< LESS
> GREATER
= EQUAL
. DOT
% PERCENT
` BACKQUOTE
{ LBRACE
} RBRACE
@ AT
== EQEQUAL
!= NOTEQUAL
<> NOTEQUAL
<= LESSEQUAL
>= GREATEREQUAL
~ TILDE
^ CIRCUMFLEX
<< LEFTSHIFT
>> RIGHTSHIFT
** DOUBLESTAR
+= PLUSEQUAL
-= MINEQUAL
*= STAREQUAL
/= SLASHEQUAL
%= PERCENTEQUAL
&= AMPEREQUAL
|= VBAREQUAL
^= CIRCUMFLEXEQUAL
<<= LEFTSHIFTEQUAL
>>= RIGHTSHIFTEQUAL
**= DOUBLESTAREQUAL
// DOUBLESLASH
//= DOUBLESLASHEQUAL
-> RARROW
... ELLIPSIS
"""
opmap = {}
for line in opmap_raw.splitlines():
if line:
op, name = line.split()
opmap[op] = getattr(token, name)

View File

@@ -6,7 +6,9 @@
# Modifications are dual-licensed: MIT and PSF.
# Pgen imports
from . import grammar, tokenize
from . import grammar
from jedi.parser import token
from jedi.parser import tokenize
class ParserGenerator(object):
@@ -74,9 +76,9 @@ class ParserGenerator(object):
return ilabel
else:
# A named token (NAME, NUMBER, STRING)
itoken = getattr(tokenize, label, None)
itoken = getattr(token, label, None)
assert isinstance(itoken, int), label
assert itoken in tokenize.tok_name, label
assert itoken in token.tok_name, label
if itoken in c.tokens:
return c.tokens[itoken]
else:
@@ -92,12 +94,12 @@ class ParserGenerator(object):
if value in c.keywords:
return c.keywords[value]
else:
c.labels.append((tokenize.NAME, value))
c.labels.append((token.NAME, value))
c.keywords[value] = ilabel
return ilabel
else:
# An operator (any non-numeric token)
itoken = grammar.opmap[value] # Fails if unknown token
itoken = token.opmap[value] # Fails if unknown token
if itoken in c.tokens:
return c.tokens[itoken]
else:
@@ -147,14 +149,14 @@ class ParserGenerator(object):
dfas = {}
startsymbol = None
# MSTART: (NEWLINE | RULE)* ENDMARKER
while self.type != tokenize.ENDMARKER:
while self.type == tokenize.NEWLINE:
while self.type != token.ENDMARKER:
while self.type == token.NEWLINE:
self.gettoken()
# RULE: NAME ':' RHS NEWLINE
name = self.expect(tokenize.NAME)
self.expect(tokenize.OP, ":")
name = self.expect(token.NAME)
self.expect(token.OP, ":")
a, z = self.parse_rhs()
self.expect(tokenize.NEWLINE)
self.expect(token.NEWLINE)
#self.dump_nfa(name, a, z)
dfa = self.make_dfa(a, z)
#self.dump_dfa(name, dfa)
@@ -271,7 +273,7 @@ class ParserGenerator(object):
# ALT: ITEM+
a, b = self.parse_item()
while (self.value in ("(", "[") or
self.type in (tokenize.NAME, tokenize.STRING)):
self.type in (token.NAME, token.STRING)):
c, d = self.parse_item()
b.addarc(c)
b = d
@@ -282,7 +284,7 @@ class ParserGenerator(object):
if self.value == "[":
self.gettoken()
a, z = self.parse_rhs()
self.expect(tokenize.OP, "]")
self.expect(token.OP, "]")
a.addarc(z)
return a, z
else:
@@ -302,9 +304,9 @@ class ParserGenerator(object):
if self.value == "(":
self.gettoken()
a, z = self.parse_rhs()
self.expect(tokenize.OP, ")")
self.expect(token.OP, ")")
return a, z
elif self.type in (tokenize.NAME, tokenize.STRING):
elif self.type in (token.NAME, token.STRING):
a = NFAState()
z = NFAState()
a.addarc(z, self.value)
@@ -324,9 +326,9 @@ class ParserGenerator(object):
def gettoken(self):
tup = next(self.generator)
while tup[0] in (tokenize.COMMENT, tokenize.NL):
while tup[0] in (token.COMMENT, token.NL):
tup = next(self.generator)
self.type, self.value, self.begin, self.end, self.line = tup
self.type, self.value, self.begin, prefix = tup
#print tokenize.tok_name[self.type], repr(self.value)
def raise_error(self, msg, *args):
@@ -335,8 +337,9 @@ class ParserGenerator(object):
msg = msg % args
except:
msg = " ".join([msg] + list(map(str, args)))
raise SyntaxError(msg, (self.filename, self.end[0],
self.end[1], self.line))
line = open(self.filename).readlines()[self.begin[0]]
raise SyntaxError(msg, (self.filename, self.begin[0],
self.begin[1], line))
class NFAState(object):

79
jedi/parser/token.py Normal file
View File

@@ -0,0 +1,79 @@
from __future__ import absolute_import
from jedi._compatibility import is_py3
from token import *
COMMENT = N_TOKENS
tok_name[COMMENT] = 'COMMENT'
N_TOKENS += 1
NL = N_TOKENS
tok_name[NL] = 'NL'
N_TOKENS += 1
if is_py3:
BACKQUOTE = N_TOKENS
tok_name[BACKQUOTE] = 'BACKQUOTE'
N_TOKENS += 1
# Map from operator to number (since tokenize doesn't do this)
opmap_raw = """
( LPAR
) RPAR
[ LSQB
] RSQB
: COLON
, COMMA
; SEMI
+ PLUS
- MINUS
* STAR
/ SLASH
| VBAR
& AMPER
< LESS
> GREATER
= EQUAL
. DOT
% PERCENT
` BACKQUOTE
{ LBRACE
} RBRACE
@ AT
== EQEQUAL
!= NOTEQUAL
<> NOTEQUAL
<= LESSEQUAL
>= GREATEREQUAL
~ TILDE
^ CIRCUMFLEX
<< LEFTSHIFT
>> RIGHTSHIFT
** DOUBLESTAR
+= PLUSEQUAL
-= MINEQUAL
*= STAREQUAL
/= SLASHEQUAL
%= PERCENTEQUAL
&= AMPEREQUAL
|= VBAREQUAL
^= CIRCUMFLEXEQUAL
<<= LEFTSHIFTEQUAL
>>= RIGHTSHIFTEQUAL
**= DOUBLESTAREQUAL
// DOUBLESLASH
//= DOUBLESLASHEQUAL
-> RARROW
... ELLIPSIS
"""
opmap = {}
for line in opmap_raw.splitlines():
if line:
op, name = line.split()
if is_py3 and name == 'BACKQUOTE':
continue
opmap[op] = globals()[name]

View File

@@ -16,6 +16,9 @@ import re
from io import StringIO
from token import (tok_name, N_TOKENS, ENDMARKER, STRING, NUMBER, NAME, OP,
ERRORTOKEN, NEWLINE, INDENT, DEDENT)
import token
from jedi._compatibility import is_py3
cookie_re = re.compile("coding[:=]\s*([-\w.]+)")