1
0
forked from VimPlug/jedi

tokenizer removed unused functions

This commit is contained in:
David Halter
2013-04-20 09:23:27 +04:30
parent 06da7ddeec
commit 6b2619a844

View File

@@ -148,119 +148,6 @@ tabsize = 8
class TokenError(Exception): pass
class StopTokenizing(Exception): pass
def _get_normal_name(orig_enc):
"""Imitates get_normal_name in tokenizer.c."""
# Only care about the first 12 characters.
enc = orig_enc[:12].lower().replace("_", "-")
if enc == "utf-8" or enc.startswith("utf-8-"):
return "utf-8"
if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \
enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")):
return "iso-8859-1"
return orig_enc
def detect_encoding(readline):
"""
The detect_encoding() function is used to detect the encoding that should
be used to decode a Python source file. It requires one argment, readline,
in the same way as the tokenize() generator.
It will call readline a maximum of twice, and return the encoding used
(as a string) and a list of any lines (left as bytes) it has read in.
It detects the encoding from the presence of a utf-8 bom or an encoding
cookie as specified in pep-0263. If both a bom and a cookie are present,
but disagree, a SyntaxError will be raised. If the encoding cookie is an
invalid charset, raise a SyntaxError. Note that if a utf-8 bom is found,
'utf-8-sig' is returned.
If no encoding is specified, then the default of 'utf-8' will be returned.
"""
bom_found = False
encoding = None
default = 'utf-8'
def read_or_stop():
try:
return readline()
except StopIteration:
return b''
def find_cookie(line):
try:
line_string = line.decode('ascii')
except UnicodeDecodeError:
return None
matches = cookie_re.findall(line_string)
if not matches:
return None
encoding = _get_normal_name(matches[0])
try:
codec = lookup(encoding)
except LookupError:
# This behaviour mimics the Python interpreter
raise SyntaxError("unknown encoding: " + encoding)
if bom_found:
if codec.name != 'utf-8':
# This behaviour mimics the Python interpreter
raise SyntaxError('encoding problem: utf-8')
encoding += '-sig'
return encoding
first = read_or_stop()
if first.startswith(BOM_UTF8):
bom_found = True
first = first[3:]
default = 'utf-8-sig'
if not first:
return default, []
encoding = find_cookie(first)
if encoding:
return encoding, [first]
second = read_or_stop()
if not second:
return default, [first]
encoding = find_cookie(second)
if encoding:
return encoding, [first, second]
return default, [first, second]
def tokenize(readline):
"""
The tokenize() generator requires one argment, readline, which
must be a callable object which provides the same interface as the
readline() method of built-in file objects. Each call to the function
should return one line of input as bytes. Alternately, readline
can be a callable function terminating with StopIteration:
readline = open(myfile, 'rb').__next__ # Example of alternate readline
The generator produces 5-tuples with these members: the token type; the
token string; a 2-tuple (srow, scol) of ints specifying the row and
column where the token begins in the source; a 2-tuple (erow, ecol) of
ints specifying the row and column where the token ends in the source;
and the line on which the token was found. The line passed is the
logical line; continuation lines are included.
The first token sequence will always be an ENCODING token
which tells you which encoding was used to decode the bytes stream.
"""
# This import is here to avoid problems when the itertools module is not
# built yet and tokenize is imported.
from itertools import chain, repeat
encoding, consumed = detect_encoding(readline)
rl_gen = iter(readline, b"")
empty = repeat(b"")
return _tokenize(chain(consumed, rl_gen, empty).__next__, encoding)
def _tokenize(readline, encoding):
lnum = parenlev = continued = 0