mirror of
https://github.com/davidhalter/parso.git
synced 2025-12-30 08:04:40 +08:00
A first iteration of an fstring tokenizer/grammar.
This commit is contained in:
130
parso/python/fstring.py
Normal file
130
parso/python/fstring.py
Normal file
@@ -0,0 +1,130 @@
|
||||
import re
|
||||
|
||||
from parso.utils import PythonVersionInfo
|
||||
from parso.python.tokenize import Token
|
||||
from parso.python import token
|
||||
|
||||
version36 = PythonVersionInfo(3, 6)
|
||||
|
||||
|
||||
class TokenNamespace:
|
||||
LBRACE = token.LBRACE,
|
||||
RBRACE = token.RBRACE
|
||||
ENDMARKER = token.ENDMARKER
|
||||
ERRORTOKEN = token.ERRORTOKEN
|
||||
COLON = token.COLON
|
||||
CONVERSION = 100
|
||||
PYTHON_EXPR = 101
|
||||
EXCLAMATION_MARK = 102
|
||||
|
||||
@classmethod
|
||||
def generate_token_id(cls, string):
|
||||
if string == '{':
|
||||
return cls.LBRACE
|
||||
elif string == '}':
|
||||
return cls.RBRACE
|
||||
elif string == '!':
|
||||
return cls.EXCLAMATION_MARK
|
||||
return getattr(cls, string)
|
||||
|
||||
|
||||
GRAMMAR = """
|
||||
fstring: expressions ENDMARKER
|
||||
expressions: expression*
|
||||
expression: '{' PYTHON_EXPR [ '!' CONVERSION ] [ ':' expressions ] '}'
|
||||
"""
|
||||
|
||||
_prefix = r'((?:[^{}]+|\}\}|\{\{)*)'
|
||||
_expr = _prefix + r'(\{|\}|$)'
|
||||
_in_expr = r'[^{}\[\]:"\'!]*(.?)'
|
||||
# There's only one conversion character allowed. But the rules have to be
|
||||
# checked later anyway, so allow more here. This makes error recovery nicer.
|
||||
_conversion = r'([^={}:]+)(.?)'
|
||||
|
||||
_compiled_expr = re.compile(_expr)
|
||||
_compiled_in_expr = re.compile(_in_expr)
|
||||
_compiled_conversion = re.compile(_conversion)
|
||||
|
||||
|
||||
def tokenize(code, start_pos=(1, 0)):
|
||||
def tok(value, type=None, prefix=''):
|
||||
if type is None:
|
||||
type = TokenNamespace.generate_token_id(found)
|
||||
return Token(type, value, (line, column), prefix)
|
||||
|
||||
code = ''
|
||||
start = 0
|
||||
while True:
|
||||
match = _compiled_expr.match(code, start)
|
||||
prefix = match.group(1)
|
||||
found = match.group(2)
|
||||
start = match.end()
|
||||
if not found:
|
||||
# We're at the end.
|
||||
break
|
||||
|
||||
if found == '}':
|
||||
yield tok(found, prefix=prefix)
|
||||
else:
|
||||
assert found == '{'
|
||||
yield tok(found, prefix=prefix)
|
||||
|
||||
expression = ''
|
||||
squared_count = 0
|
||||
curly_count = 0
|
||||
while True:
|
||||
expr_match = _compiled_in_expr.match(code, start)
|
||||
expression += expr_match.group(0)
|
||||
found = expr_match.group(2)
|
||||
start = expr_match.end()
|
||||
|
||||
if found == '{':
|
||||
curly_count += 1
|
||||
expression += found
|
||||
elif found == '}' and curly_count > 0:
|
||||
curly_count -= 1
|
||||
expression += found
|
||||
elif found == '[':
|
||||
squared_count += 1
|
||||
expression += found
|
||||
elif found == ']':
|
||||
# Use a max function here, because the Python code might
|
||||
# just have syntax errors.
|
||||
squared_count = max(0, squared_count - 1)
|
||||
expression += found
|
||||
elif found == ':' and (squared_count or curly_count):
|
||||
expression += found
|
||||
elif found in ('"', "'"):
|
||||
expression += found
|
||||
search = found
|
||||
if len(code) > start + 1 and \
|
||||
code[start] == found == code[start+1]:
|
||||
search *= 3
|
||||
start += 2
|
||||
|
||||
index = code.find(search)
|
||||
if index == -1:
|
||||
index = len(code)
|
||||
expression += code[start:index]
|
||||
start = index
|
||||
elif found == '!' and len(code) > start and code[start] == '=':
|
||||
# This is a python `!=` and not a conversion.
|
||||
expression += found
|
||||
else:
|
||||
yield tok(expression, type=TokenNamespace.PYTHON_EXPR)
|
||||
if found:
|
||||
yield tok(found)
|
||||
break
|
||||
|
||||
if found == '!':
|
||||
conversion_match = _compiled_conversion.match(code, start)
|
||||
found = conversion_match.group(2)
|
||||
start = conversion_match.end()
|
||||
yield tok(conversion_match.group(1))
|
||||
if found:
|
||||
yield tok(found)
|
||||
|
||||
# We don't need to handle everything after ':', because that is
|
||||
# basically new tokens.
|
||||
|
||||
yield tok('', type=TokenNamespace.ENDMARKER, prefix=prefix)
|
||||
Reference in New Issue
Block a user