Make some parsing work.

This commit is contained in:
Dave Halter
2017-08-24 01:39:17 +02:00
parent 033e880408
commit 3c3e7f5317
2 changed files with 35 additions and 13 deletions
+6 -3
View File
@@ -199,21 +199,24 @@ class PythonGrammar(Grammar):
class PythonFStringGrammar(Grammar): class PythonFStringGrammar(Grammar):
_token_namespace = fstring.TokenNamespace
def __init__(self): def __init__(self):
super(PythonFStringGrammar, self).__init__( super(PythonFStringGrammar, self).__init__(
text=fstring.GRAMMAR, text=fstring.GRAMMAR,
tokenizer=fstring.tokenize tokenizer=fstring.tokenize,
parser=fstring.Parser
) )
def parse(self, code, **kwargs): def parse(self, code, **kwargs):
return self._parse(code, **kwargs) return self._parse(code, **kwargs)
def _parse(self, code, error_recovery=True, start_pos=(1, 0)): def _parse(self, code, error_recovery=True, start_pos=(1, 0)):
tokens = self._tokenizer(lines) tokens = self._tokenizer(code, start_pos=start_pos)
p = self._parser( p = self._parser(
self._pgen_grammar, self._pgen_grammar,
error_recovery=error_recovery, error_recovery=error_recovery,
start_symbol=start_symbol start_symbol=fstring.START_SYMBOL,
) )
return p.parse(tokens=tokens) return p.parse(tokens=tokens)
+29 -10
View File
@@ -3,12 +3,13 @@ import re
from parso.utils import PythonVersionInfo from parso.utils import PythonVersionInfo
from parso.python.tokenize import Token from parso.python.tokenize import Token
from parso.python import token from parso.python import token
from parso import parser
version36 = PythonVersionInfo(3, 6) version36 = PythonVersionInfo(3, 6)
class TokenNamespace: class TokenNamespace:
LBRACE = token.LBRACE, LBRACE = token.LBRACE
RBRACE = token.RBRACE RBRACE = token.RBRACE
ENDMARKER = token.ENDMARKER ENDMARKER = token.ENDMARKER
ERRORTOKEN = token.ERRORTOKEN ERRORTOKEN = token.ERRORTOKEN
@@ -25,18 +26,20 @@ class TokenNamespace:
return cls.RBRACE return cls.RBRACE
elif string == '!': elif string == '!':
return cls.EXCLAMATION_MARK return cls.EXCLAMATION_MARK
elif string == ':':
return cls.COLON
return getattr(cls, string) return getattr(cls, string)
START_SYMBOL = 'fstring'
GRAMMAR = """ GRAMMAR = """
fstring: expressions ENDMARKER fstring: expression* ENDMARKER
expressions: expression* expression: '{' PYTHON_EXPR [ '!' CONVERSION ] [ ':' expression* ] '}'
expression: '{' PYTHON_EXPR [ '!' CONVERSION ] [ ':' expressions ] '}'
""" """
_prefix = r'((?:[^{}]+|\}\}|\{\{)*)' _prefix = r'((?:[^{}]+|\}\}|\{\{)*)'
_expr = _prefix + r'(\{|\}|$)' _expr = _prefix + r'(\{|\}|$)'
_in_expr = r'[^{}\[\]:"\'!]*(.?)' _in_expr = r'([^{}\[\]:"\'!]*)(.?)'
# There's only one conversion character allowed. But the rules have to be # There's only one conversion character allowed. But the rules have to be
# checked later anyway, so allow more here. This makes error recovery nicer. # checked later anyway, so allow more here. This makes error recovery nicer.
_conversion = r'([^={}:]+)(.?)' _conversion = r'([^={}:]+)(.?)'
@@ -46,13 +49,17 @@ _compiled_in_expr = re.compile(_in_expr)
_compiled_conversion = re.compile(_conversion) _compiled_conversion = re.compile(_conversion)
def tokenize(code, start_pos=(1, 0)): def tokenize(*args, **kwargs):
for t in _tokenize(*args, **kwargs):
print(t)
yield t
def _tokenize(code, start_pos=(1, 0)):
def tok(value, type=None, prefix=''): def tok(value, type=None, prefix=''):
if type is None: if type is None:
type = TokenNamespace.generate_token_id(found) type = TokenNamespace.generate_token_id(value)
line = column=1
return Token(type, value, (line, column), prefix) return Token(type, value, (line, column), prefix)
code = ''
start = 0 start = 0
while True: while True:
match = _compiled_expr.match(code, start) match = _compiled_expr.match(code, start)
@@ -74,7 +81,8 @@ def tokenize(code, start_pos=(1, 0)):
curly_count = 0 curly_count = 0
while True: while True:
expr_match = _compiled_in_expr.match(code, start) expr_match = _compiled_in_expr.match(code, start)
expression += expr_match.group(0) print(start, expr_match.group(1), expr_match.groups())
expression += expr_match.group(1)
found = expr_match.group(2) found = expr_match.group(2)
start = expr_match.end() start = expr_match.end()
@@ -120,7 +128,7 @@ def tokenize(code, start_pos=(1, 0)):
conversion_match = _compiled_conversion.match(code, start) conversion_match = _compiled_conversion.match(code, start)
found = conversion_match.group(2) found = conversion_match.group(2)
start = conversion_match.end() start = conversion_match.end()
yield tok(conversion_match.group(1)) yield tok(conversion_match.group(1), type=TokenNamespace.CONVERSION)
if found: if found:
yield tok(found) yield tok(found)
@@ -128,3 +136,14 @@ def tokenize(code, start_pos=(1, 0)):
# basically new tokens. # basically new tokens.
yield tok('', type=TokenNamespace.ENDMARKER, prefix=prefix) yield tok('', type=TokenNamespace.ENDMARKER, prefix=prefix)
class Parser(parser.BaseParser):
def parse(self, tokens):
node = super(Parser, self).parse(tokens)
if isinstance(node, self.default_leaf): # Is an endmarker.
# If there's no curly braces we get back a non-module. We always
# want an fstring.
node = self.default_node('fstring', [node])
return node