From 6b2619a844c56cd56c927d5be503ec2efd04c6b7 Mon Sep 17 00:00:00 2001 From: David Halter Date: Sat, 20 Apr 2013 09:23:27 +0430 Subject: [PATCH] tokenizer removed unused functions --- jedi/tokenizer.py | 113 ---------------------------------------------- 1 file changed, 113 deletions(-) diff --git a/jedi/tokenizer.py b/jedi/tokenizer.py index 989678a6..c5ff5e2f 100644 --- a/jedi/tokenizer.py +++ b/jedi/tokenizer.py @@ -148,119 +148,6 @@ tabsize = 8 class TokenError(Exception): pass -class StopTokenizing(Exception): pass - - -def _get_normal_name(orig_enc): - """Imitates get_normal_name in tokenizer.c.""" - # Only care about the first 12 characters. - enc = orig_enc[:12].lower().replace("_", "-") - if enc == "utf-8" or enc.startswith("utf-8-"): - return "utf-8" - if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \ - enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")): - return "iso-8859-1" - return orig_enc - -def detect_encoding(readline): - """ - The detect_encoding() function is used to detect the encoding that should - be used to decode a Python source file. It requires one argment, readline, - in the same way as the tokenize() generator. - - It will call readline a maximum of twice, and return the encoding used - (as a string) and a list of any lines (left as bytes) it has read in. - - It detects the encoding from the presence of a utf-8 bom or an encoding - cookie as specified in pep-0263. If both a bom and a cookie are present, - but disagree, a SyntaxError will be raised. If the encoding cookie is an - invalid charset, raise a SyntaxError. Note that if a utf-8 bom is found, - 'utf-8-sig' is returned. - - If no encoding is specified, then the default of 'utf-8' will be returned. - """ - bom_found = False - encoding = None - default = 'utf-8' - def read_or_stop(): - try: - return readline() - except StopIteration: - return b'' - - def find_cookie(line): - try: - line_string = line.decode('ascii') - except UnicodeDecodeError: - return None - - matches = cookie_re.findall(line_string) - if not matches: - return None - encoding = _get_normal_name(matches[0]) - try: - codec = lookup(encoding) - except LookupError: - # This behaviour mimics the Python interpreter - raise SyntaxError("unknown encoding: " + encoding) - - if bom_found: - if codec.name != 'utf-8': - # This behaviour mimics the Python interpreter - raise SyntaxError('encoding problem: utf-8') - encoding += '-sig' - return encoding - - first = read_or_stop() - if first.startswith(BOM_UTF8): - bom_found = True - first = first[3:] - default = 'utf-8-sig' - if not first: - return default, [] - - encoding = find_cookie(first) - if encoding: - return encoding, [first] - - second = read_or_stop() - if not second: - return default, [first] - - encoding = find_cookie(second) - if encoding: - return encoding, [first, second] - - return default, [first, second] - - -def tokenize(readline): - """ - The tokenize() generator requires one argment, readline, which - must be a callable object which provides the same interface as the - readline() method of built-in file objects. Each call to the function - should return one line of input as bytes. Alternately, readline - can be a callable function terminating with StopIteration: - readline = open(myfile, 'rb').__next__ # Example of alternate readline - - The generator produces 5-tuples with these members: the token type; the - token string; a 2-tuple (srow, scol) of ints specifying the row and - column where the token begins in the source; a 2-tuple (erow, ecol) of - ints specifying the row and column where the token ends in the source; - and the line on which the token was found. The line passed is the - logical line; continuation lines are included. - - The first token sequence will always be an ENCODING token - which tells you which encoding was used to decode the bytes stream. - """ - # This import is here to avoid problems when the itertools module is not - # built yet and tokenize is imported. - from itertools import chain, repeat - encoding, consumed = detect_encoding(readline) - rl_gen = iter(readline, b"") - empty = repeat(b"") - return _tokenize(chain(consumed, rl_gen, empty).__next__, encoding) - def _tokenize(readline, encoding): lnum = parenlev = continued = 0