mirror of
https://github.com/davidhalter/parso.git
synced 2026-05-19 23:10:16 +08:00
Make some parsing work.
This commit is contained in:
+6
-3
@@ -199,21 +199,24 @@ class PythonGrammar(Grammar):
|
|||||||
|
|
||||||
|
|
||||||
class PythonFStringGrammar(Grammar):
|
class PythonFStringGrammar(Grammar):
|
||||||
|
_token_namespace = fstring.TokenNamespace
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super(PythonFStringGrammar, self).__init__(
|
super(PythonFStringGrammar, self).__init__(
|
||||||
text=fstring.GRAMMAR,
|
text=fstring.GRAMMAR,
|
||||||
tokenizer=fstring.tokenize
|
tokenizer=fstring.tokenize,
|
||||||
|
parser=fstring.Parser
|
||||||
)
|
)
|
||||||
|
|
||||||
def parse(self, code, **kwargs):
|
def parse(self, code, **kwargs):
|
||||||
return self._parse(code, **kwargs)
|
return self._parse(code, **kwargs)
|
||||||
|
|
||||||
def _parse(self, code, error_recovery=True, start_pos=(1, 0)):
|
def _parse(self, code, error_recovery=True, start_pos=(1, 0)):
|
||||||
tokens = self._tokenizer(lines)
|
tokens = self._tokenizer(code, start_pos=start_pos)
|
||||||
p = self._parser(
|
p = self._parser(
|
||||||
self._pgen_grammar,
|
self._pgen_grammar,
|
||||||
error_recovery=error_recovery,
|
error_recovery=error_recovery,
|
||||||
start_symbol=start_symbol
|
start_symbol=fstring.START_SYMBOL,
|
||||||
)
|
)
|
||||||
return p.parse(tokens=tokens)
|
return p.parse(tokens=tokens)
|
||||||
|
|
||||||
|
|||||||
+29
-10
@@ -3,12 +3,13 @@ import re
|
|||||||
from parso.utils import PythonVersionInfo
|
from parso.utils import PythonVersionInfo
|
||||||
from parso.python.tokenize import Token
|
from parso.python.tokenize import Token
|
||||||
from parso.python import token
|
from parso.python import token
|
||||||
|
from parso import parser
|
||||||
|
|
||||||
version36 = PythonVersionInfo(3, 6)
|
version36 = PythonVersionInfo(3, 6)
|
||||||
|
|
||||||
|
|
||||||
class TokenNamespace:
|
class TokenNamespace:
|
||||||
LBRACE = token.LBRACE,
|
LBRACE = token.LBRACE
|
||||||
RBRACE = token.RBRACE
|
RBRACE = token.RBRACE
|
||||||
ENDMARKER = token.ENDMARKER
|
ENDMARKER = token.ENDMARKER
|
||||||
ERRORTOKEN = token.ERRORTOKEN
|
ERRORTOKEN = token.ERRORTOKEN
|
||||||
@@ -25,18 +26,20 @@ class TokenNamespace:
|
|||||||
return cls.RBRACE
|
return cls.RBRACE
|
||||||
elif string == '!':
|
elif string == '!':
|
||||||
return cls.EXCLAMATION_MARK
|
return cls.EXCLAMATION_MARK
|
||||||
|
elif string == ':':
|
||||||
|
return cls.COLON
|
||||||
return getattr(cls, string)
|
return getattr(cls, string)
|
||||||
|
|
||||||
|
|
||||||
|
START_SYMBOL = 'fstring'
|
||||||
GRAMMAR = """
|
GRAMMAR = """
|
||||||
fstring: expressions ENDMARKER
|
fstring: expression* ENDMARKER
|
||||||
expressions: expression*
|
expression: '{' PYTHON_EXPR [ '!' CONVERSION ] [ ':' expression* ] '}'
|
||||||
expression: '{' PYTHON_EXPR [ '!' CONVERSION ] [ ':' expressions ] '}'
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
_prefix = r'((?:[^{}]+|\}\}|\{\{)*)'
|
_prefix = r'((?:[^{}]+|\}\}|\{\{)*)'
|
||||||
_expr = _prefix + r'(\{|\}|$)'
|
_expr = _prefix + r'(\{|\}|$)'
|
||||||
_in_expr = r'[^{}\[\]:"\'!]*(.?)'
|
_in_expr = r'([^{}\[\]:"\'!]*)(.?)'
|
||||||
# There's only one conversion character allowed. But the rules have to be
|
# There's only one conversion character allowed. But the rules have to be
|
||||||
# checked later anyway, so allow more here. This makes error recovery nicer.
|
# checked later anyway, so allow more here. This makes error recovery nicer.
|
||||||
_conversion = r'([^={}:]+)(.?)'
|
_conversion = r'([^={}:]+)(.?)'
|
||||||
@@ -46,13 +49,17 @@ _compiled_in_expr = re.compile(_in_expr)
|
|||||||
_compiled_conversion = re.compile(_conversion)
|
_compiled_conversion = re.compile(_conversion)
|
||||||
|
|
||||||
|
|
||||||
def tokenize(code, start_pos=(1, 0)):
|
def tokenize(*args, **kwargs):
|
||||||
|
for t in _tokenize(*args, **kwargs):
|
||||||
|
print(t)
|
||||||
|
yield t
|
||||||
|
def _tokenize(code, start_pos=(1, 0)):
|
||||||
def tok(value, type=None, prefix=''):
|
def tok(value, type=None, prefix=''):
|
||||||
if type is None:
|
if type is None:
|
||||||
type = TokenNamespace.generate_token_id(found)
|
type = TokenNamespace.generate_token_id(value)
|
||||||
|
line = column=1
|
||||||
return Token(type, value, (line, column), prefix)
|
return Token(type, value, (line, column), prefix)
|
||||||
|
|
||||||
code = ''
|
|
||||||
start = 0
|
start = 0
|
||||||
while True:
|
while True:
|
||||||
match = _compiled_expr.match(code, start)
|
match = _compiled_expr.match(code, start)
|
||||||
@@ -74,7 +81,8 @@ def tokenize(code, start_pos=(1, 0)):
|
|||||||
curly_count = 0
|
curly_count = 0
|
||||||
while True:
|
while True:
|
||||||
expr_match = _compiled_in_expr.match(code, start)
|
expr_match = _compiled_in_expr.match(code, start)
|
||||||
expression += expr_match.group(0)
|
print(start, expr_match.group(1), expr_match.groups())
|
||||||
|
expression += expr_match.group(1)
|
||||||
found = expr_match.group(2)
|
found = expr_match.group(2)
|
||||||
start = expr_match.end()
|
start = expr_match.end()
|
||||||
|
|
||||||
@@ -120,7 +128,7 @@ def tokenize(code, start_pos=(1, 0)):
|
|||||||
conversion_match = _compiled_conversion.match(code, start)
|
conversion_match = _compiled_conversion.match(code, start)
|
||||||
found = conversion_match.group(2)
|
found = conversion_match.group(2)
|
||||||
start = conversion_match.end()
|
start = conversion_match.end()
|
||||||
yield tok(conversion_match.group(1))
|
yield tok(conversion_match.group(1), type=TokenNamespace.CONVERSION)
|
||||||
if found:
|
if found:
|
||||||
yield tok(found)
|
yield tok(found)
|
||||||
|
|
||||||
@@ -128,3 +136,14 @@ def tokenize(code, start_pos=(1, 0)):
|
|||||||
# basically new tokens.
|
# basically new tokens.
|
||||||
|
|
||||||
yield tok('', type=TokenNamespace.ENDMARKER, prefix=prefix)
|
yield tok('', type=TokenNamespace.ENDMARKER, prefix=prefix)
|
||||||
|
|
||||||
|
|
||||||
|
class Parser(parser.BaseParser):
|
||||||
|
def parse(self, tokens):
|
||||||
|
node = super(Parser, self).parse(tokens)
|
||||||
|
if isinstance(node, self.default_leaf): # Is an endmarker.
|
||||||
|
# If there's no curly braces we get back a non-module. We always
|
||||||
|
# want an fstring.
|
||||||
|
node = self.default_node('fstring', [node])
|
||||||
|
|
||||||
|
return node
|
||||||
|
|||||||
Reference in New Issue
Block a user