forked from VimPlug/jedi
tokenizer removed unused functions
This commit is contained in:
@@ -148,119 +148,6 @@ tabsize = 8
|
|||||||
|
|
||||||
class TokenError(Exception): pass
|
class TokenError(Exception): pass
|
||||||
|
|
||||||
class StopTokenizing(Exception): pass
|
|
||||||
|
|
||||||
|
|
||||||
def _get_normal_name(orig_enc):
|
|
||||||
"""Imitates get_normal_name in tokenizer.c."""
|
|
||||||
# Only care about the first 12 characters.
|
|
||||||
enc = orig_enc[:12].lower().replace("_", "-")
|
|
||||||
if enc == "utf-8" or enc.startswith("utf-8-"):
|
|
||||||
return "utf-8"
|
|
||||||
if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \
|
|
||||||
enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")):
|
|
||||||
return "iso-8859-1"
|
|
||||||
return orig_enc
|
|
||||||
|
|
||||||
def detect_encoding(readline):
|
|
||||||
"""
|
|
||||||
The detect_encoding() function is used to detect the encoding that should
|
|
||||||
be used to decode a Python source file. It requires one argment, readline,
|
|
||||||
in the same way as the tokenize() generator.
|
|
||||||
|
|
||||||
It will call readline a maximum of twice, and return the encoding used
|
|
||||||
(as a string) and a list of any lines (left as bytes) it has read in.
|
|
||||||
|
|
||||||
It detects the encoding from the presence of a utf-8 bom or an encoding
|
|
||||||
cookie as specified in pep-0263. If both a bom and a cookie are present,
|
|
||||||
but disagree, a SyntaxError will be raised. If the encoding cookie is an
|
|
||||||
invalid charset, raise a SyntaxError. Note that if a utf-8 bom is found,
|
|
||||||
'utf-8-sig' is returned.
|
|
||||||
|
|
||||||
If no encoding is specified, then the default of 'utf-8' will be returned.
|
|
||||||
"""
|
|
||||||
bom_found = False
|
|
||||||
encoding = None
|
|
||||||
default = 'utf-8'
|
|
||||||
def read_or_stop():
|
|
||||||
try:
|
|
||||||
return readline()
|
|
||||||
except StopIteration:
|
|
||||||
return b''
|
|
||||||
|
|
||||||
def find_cookie(line):
|
|
||||||
try:
|
|
||||||
line_string = line.decode('ascii')
|
|
||||||
except UnicodeDecodeError:
|
|
||||||
return None
|
|
||||||
|
|
||||||
matches = cookie_re.findall(line_string)
|
|
||||||
if not matches:
|
|
||||||
return None
|
|
||||||
encoding = _get_normal_name(matches[0])
|
|
||||||
try:
|
|
||||||
codec = lookup(encoding)
|
|
||||||
except LookupError:
|
|
||||||
# This behaviour mimics the Python interpreter
|
|
||||||
raise SyntaxError("unknown encoding: " + encoding)
|
|
||||||
|
|
||||||
if bom_found:
|
|
||||||
if codec.name != 'utf-8':
|
|
||||||
# This behaviour mimics the Python interpreter
|
|
||||||
raise SyntaxError('encoding problem: utf-8')
|
|
||||||
encoding += '-sig'
|
|
||||||
return encoding
|
|
||||||
|
|
||||||
first = read_or_stop()
|
|
||||||
if first.startswith(BOM_UTF8):
|
|
||||||
bom_found = True
|
|
||||||
first = first[3:]
|
|
||||||
default = 'utf-8-sig'
|
|
||||||
if not first:
|
|
||||||
return default, []
|
|
||||||
|
|
||||||
encoding = find_cookie(first)
|
|
||||||
if encoding:
|
|
||||||
return encoding, [first]
|
|
||||||
|
|
||||||
second = read_or_stop()
|
|
||||||
if not second:
|
|
||||||
return default, [first]
|
|
||||||
|
|
||||||
encoding = find_cookie(second)
|
|
||||||
if encoding:
|
|
||||||
return encoding, [first, second]
|
|
||||||
|
|
||||||
return default, [first, second]
|
|
||||||
|
|
||||||
|
|
||||||
def tokenize(readline):
|
|
||||||
"""
|
|
||||||
The tokenize() generator requires one argment, readline, which
|
|
||||||
must be a callable object which provides the same interface as the
|
|
||||||
readline() method of built-in file objects. Each call to the function
|
|
||||||
should return one line of input as bytes. Alternately, readline
|
|
||||||
can be a callable function terminating with StopIteration:
|
|
||||||
readline = open(myfile, 'rb').__next__ # Example of alternate readline
|
|
||||||
|
|
||||||
The generator produces 5-tuples with these members: the token type; the
|
|
||||||
token string; a 2-tuple (srow, scol) of ints specifying the row and
|
|
||||||
column where the token begins in the source; a 2-tuple (erow, ecol) of
|
|
||||||
ints specifying the row and column where the token ends in the source;
|
|
||||||
and the line on which the token was found. The line passed is the
|
|
||||||
logical line; continuation lines are included.
|
|
||||||
|
|
||||||
The first token sequence will always be an ENCODING token
|
|
||||||
which tells you which encoding was used to decode the bytes stream.
|
|
||||||
"""
|
|
||||||
# This import is here to avoid problems when the itertools module is not
|
|
||||||
# built yet and tokenize is imported.
|
|
||||||
from itertools import chain, repeat
|
|
||||||
encoding, consumed = detect_encoding(readline)
|
|
||||||
rl_gen = iter(readline, b"")
|
|
||||||
empty = repeat(b"")
|
|
||||||
return _tokenize(chain(consumed, rl_gen, empty).__next__, encoding)
|
|
||||||
|
|
||||||
|
|
||||||
def _tokenize(readline, encoding):
|
def _tokenize(readline, encoding):
|
||||||
lnum = parenlev = continued = 0
|
lnum = parenlev = continued = 0
|
||||||
|
|||||||
Reference in New Issue
Block a user