tokenizer: removed encoding

2013-04-20 09:28:32 +04:30
parent 6b2619a844
commit 0d385563a5
1 changed files with 3 additions and 35 deletions
--- a/jedi/tokenizer.py
+++ b/jedi/tokenizer.py
@@ -4,27 +4,8 @@ tokenizer. The reason was simple: The standanrd library  tokenizer fails
 if the indentation is not right. The fast parser of jedi however requires
 "wrong" indentation.
-Tokenization help for Python programs.
+Basically this is a stripped down version of the standard library module, so
-++++++++++++++++++++++++++++++++++++++
+you can read the documentation there.
 tokenize(readline) is a generator that breaks a stream of bytes into
 Python tokens.  It decodes the bytes according to PEP-0263 for
 determining source file encoding.
 It accepts a readline-like method which is called repeatedly to get the
 next line of input (or b"" for EOF).  It generates 5-tuples with these
 members:
    the token type (see token.py)
    the token (a string)
    the starting (row, column) indices of the token (a 2-tuple of ints)
    the ending (row, column) indices of the token (a 2-tuple of ints)
    the original line (string)
 It is designed to match the working of the Python tokenizer exactly, except
 that it produces COMMENT tokens for comments and gives type OP for all
 operators.  Additionally, all token lists start with an ENCODING token
 which tells you which encoding was used to decode the bytes stream.
 """
 import string
@@ -149,26 +130,19 @@ tabsize = 8
 class TokenError(Exception): pass
-def _tokenize(readline, encoding):
+def generate_tokens(readline):
    lnum = parenlev = continued = 0
    numchars = '0123456789'
    contstr, needcont = '', 0
    contline = None
    indents = [0]
    if encoding is not None:
        if encoding == "utf-8-sig":
            # BOM will already have been stripped.
            encoding = "utf-8"
        yield TokenInfo(ENCODING, encoding, (0, 0), (0, 0), '')
    while True:             # loop over lines in stream
        try:
            line = readline()
        except StopIteration:
            line = b''
        if encoding is not None:
            line = line.decode(encoding)
        lnum += 1
        pos, max = 0, len(line)
@@ -296,9 +270,3 @@ def _tokenize(readline, encoding):
    for indent in indents[1:]:                 # pop remaining indent levels
        yield TokenInfo(DEDENT, '', (lnum, 0), (lnum, 0), '')
    yield TokenInfo(ENDMARKER, '', (lnum, 0), (lnum, 0), '')
 # An undocumented, backwards compatible, API for all the places in the standard
 # library that expect to be able to use tokenize with strings
 def generate_tokens(readline):
    return _tokenize(readline, None)