1
0
forked from VimPlug/jedi

tokenizer py27 support

This commit is contained in:
David Halter
2013-04-20 09:21:22 +04:30
parent fa53579950
commit 06da7ddeec

View File

@@ -27,12 +27,15 @@ operators. Additionally, all token lists start with an ENCODING token
which tells you which encoding was used to decode the bytes stream. which tells you which encoding was used to decode the bytes stream.
""" """
import string
import re import re
from token import * from token import *
from codecs import lookup, BOM_UTF8 from codecs import lookup, BOM_UTF8
import collections import collections
cookie_re = re.compile("coding[:=]\s*([-\w.]+)") cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
namechars = string.ascii_letters + '_'
COMMENT = N_TOKENS COMMENT = N_TOKENS
tok_name[COMMENT] = 'COMMENT' tok_name[COMMENT] = 'COMMENT'
@@ -388,7 +391,7 @@ def _tokenize(readline, encoding):
break break
else: # ordinary string else: # ordinary string
yield TokenInfo(STRING, token, spos, epos, line) yield TokenInfo(STRING, token, spos, epos, line)
elif initial.isidentifier(): # ordinary name elif initial in namechars: # ordinary name
yield TokenInfo(NAME, token, spos, epos, line) yield TokenInfo(NAME, token, spos, epos, line)
elif initial == '\\': # continued stmt elif initial == '\\': # continued stmt
continued = 1 continued = 1
@@ -400,9 +403,15 @@ def _tokenize(readline, encoding):
yield TokenInfo(OP, token, spos, epos, line) yield TokenInfo(OP, token, spos, epos, line)
else: else:
yield TokenInfo(ERRORTOKEN, line[pos], yield TokenInfo(ERRORTOKEN, line[pos],
(lnum, pos), (lnum, pos+1), line) (lnum, pos), (lnum, pos + 1), line)
pos += 1 pos += 1
for indent in indents[1:]: # pop remaining indent levels for indent in indents[1:]: # pop remaining indent levels
yield TokenInfo(DEDENT, '', (lnum, 0), (lnum, 0), '') yield TokenInfo(DEDENT, '', (lnum, 0), (lnum, 0), '')
yield TokenInfo(ENDMARKER, '', (lnum, 0), (lnum, 0), '') yield TokenInfo(ENDMARKER, '', (lnum, 0), (lnum, 0), '')
# An undocumented, backwards compatible, API for all the places in the standard
# library that expect to be able to use tokenize with strings
def generate_tokens(readline):
return _tokenize(readline, None)