1
0
forked from VimPlug/jedi

clean up tokenize

This commit is contained in:
Dave Halter
2014-02-25 17:17:21 +01:00
parent 761c28ef00
commit 40be00826e

View File

@@ -16,7 +16,7 @@ from io import StringIO
from token import (tok_name, N_TOKENS, ENDMARKER, STRING, NUMBER, NAME, OP, from token import (tok_name, N_TOKENS, ENDMARKER, STRING, NUMBER, NAME, OP,
ERRORTOKEN, NEWLINE) ERRORTOKEN, NEWLINE)
from jedi._compatibility import u, unicode from jedi._compatibility import u
cookie_re = re.compile("coding[:=]\s*([-\w.]+)") cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
@@ -117,58 +117,58 @@ def maybe(*choices):
# Note: we use unicode matching for names ("\w") but ascii matching for # Note: we use unicode matching for names ("\w") but ascii matching for
# number literals. # number literals.
Whitespace = r'[ \f\t]*' whitespace = r'[ \f\t]*'
Comment = r'#[^\r\n]*' comment = r'#[^\r\n]*'
Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment) ignore = whitespace + any(r'\\\r?\n' + whitespace) + maybe(comment)
Name = r'\w+' name = r'\w+'
Hexnumber = r'0[xX][0-9a-fA-F]+' hex_number = r'0[xX][0-9a-fA-F]+'
Binnumber = r'0[bB][01]+' bin_number = r'0[bB][01]+'
Octnumber = r'0[oO][0-7]+' oct_number = r'0[oO][0-7]+'
Decnumber = r'(?:0+|[1-9][0-9]*)' dec_number = r'(?:0+|[1-9][0-9]*)'
Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber) int_number = group(hex_number, bin_number, oct_number, dec_number)
Exponent = r'[eE][-+]?[0-9]+' exponent = r'[eE][-+]?[0-9]+'
Pointfloat = group(r'[0-9]+\.[0-9]*', r'\.[0-9]+') + maybe(Exponent) point_float = group(r'[0-9]+\.[0-9]*', r'\.[0-9]+') + maybe(exponent)
Expfloat = r'[0-9]+' + Exponent Expfloat = r'[0-9]+' + exponent
Floatnumber = group(Pointfloat, Expfloat) float_number = group(point_float, Expfloat)
Imagnumber = group(r'[0-9]+[jJ]', Floatnumber + r'[jJ]') imag_number = group(r'[0-9]+[jJ]', float_number + r'[jJ]')
Number = group(Imagnumber, Floatnumber, Intnumber) number = group(imag_number, float_number, int_number)
# Tail end of ' string. # Tail end of ' string.
Single = r"[^'\\]*(?:\\.[^'\\]*)*'" single = r"[^'\\]*(?:\\.[^'\\]*)*'"
# Tail end of " string. # Tail end of " string.
Double = r'[^"\\]*(?:\\.[^"\\]*)*"' double = r'[^"\\]*(?:\\.[^"\\]*)*"'
# Tail end of ''' string. # Tail end of ''' string.
Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''" single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
# Tail end of """ string. # Tail end of """ string.
Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""' double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
Triple = group("[bB]?[rR]?'''", '[bB]?[rR]?"""') triple = group("[bB]?[rR]?'''", '[bB]?[rR]?"""')
# Single-line ' or " string. # Single-line ' or " string.
String = group(r"[bB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*'", string = group(r"[bB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
r'[bB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*"') r'[bB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*"')
# Because of leftmost-then-longest match semantics, be sure to put the # Because of leftmost-then-longest match semantics, be sure to put the
# longest operators first (e.g., if = came before ==, == would get # longest operators first (e.g., if = came before ==, == would get
# recognized as two instances of =). # recognized as two instances of =).
Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"!=", operator = group(r"\*\*=?", r">>=?", r"<<=?", r"!=",
r"//=?", r"->", r"//=?", r"->",
r"[+\-*/%&|^=<>]=?", r"[+\-*/%&|^=<>]=?",
r"~") r"~")
Bracket = '[][(){}]' bracket = '[][(){}]'
Special = group(r'\r?\n', r'\.\.\.', r'[:;.,@]') special = group(r'\r?\n', r'\.\.\.', r'[:;.,@]')
Funny = group(Operator, Bracket, Special) funny = group(operator, bracket, special)
PlainToken = group(Number, Funny, String, Name) plain_token = group(number, funny, string, name)
token = Ignore + PlainToken token = ignore + plain_token
# First (or only) line of ' or " string. # First (or only) line of ' or " string.
ContStr = group(r"[bB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*" + cont_str = group(r"[bB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
group("'", r'\\\r?\n'), group("'", r'\\\r?\n'),
r'[bB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*' + r'[bB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
group('"', r'\\\r?\n')) group('"', r'\\\r?\n'))
PseudoExtras = group(r'\\\r?\n', Comment, Triple) pseudo_extras = group(r'\\\r?\n', comment, triple)
PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name) pseudo_token = whitespace + group(pseudo_extras, number, funny, cont_str, name)
def _compile(expr): def _compile(expr):
@@ -176,8 +176,8 @@ def _compile(expr):
tokenprog, pseudoprog, single3prog, double3prog = map( tokenprog, pseudoprog, single3prog, double3prog = map(
_compile, (token, PseudoToken, Single3, Double3)) _compile, (token, pseudo_token, single3, double3))
endprogs = {"'": _compile(Single), '"': _compile(Double), endprogs = {"'": _compile(single), '"': _compile(double),
"'''": single3prog, '"""': double3prog, "'''": single3prog, '"""': double3prog,
"r'''": single3prog, 'r"""': double3prog, "r'''": single3prog, 'r"""': double3prog,
"b'''": single3prog, 'b"""': double3prog, "b'''": single3prog, 'b"""': double3prog,