small refactorings of the tokenizer

2025-12-23 13:51:27 +08:00 · 2014-02-21 17:45:56 +01:00
parent efba0cd80c
commit d9bf9be35e
1 changed files with 48 additions and 50 deletions
--- a/jedi/parser/tokenize.py
+++ b/jedi/parser/tokenize.py
@@ -158,9 +158,8 @@ def generate_tokens(readline, line_offset=0):
    contstr = ''
    contline = None
    while True:             # loop over lines in stream
-        try:
-            line = readline()
-        except StopIteration:
+        line = readline()  # readline returns empty if it's finished. See StringIO
+        if not line:
            if contstr:
                yield TokenInfo(ERRORTOKEN, contstr, strstart, (lnum, pos))
            break
@@ -179,12 +178,15 @@ def generate_tokens(readline, line_offset=0):
                contstr = contstr + line
                contline = contline + line
                continue
-        elif pos == max:
-            break  # Don't really understand why this must be here.

        while pos < max:
            pseudomatch = pseudoprog.match(line, pos)
-            if pseudomatch:                                # scan for tokens
+            if not pseudomatch:                             # scan for tokens
+                yield TokenInfo(ERRORTOKEN, line[pos],
+                               (lnum, pos), (lnum, pos + 1))
+                pos += 1
+                continue
+
            start, end = pseudomatch.span(1)
            spos, epos, pos = (lnum, start), (lnum, end), end
            token, initial = line[start:end], line[start]
@@ -227,10 +229,6 @@ def generate_tokens(readline, line_offset=0):
                continue
            else:
                yield TokenInfo(OP, token, spos, epos)
-            else:
-                yield TokenInfo(ERRORTOKEN, line[pos],
-                               (lnum, pos), (lnum, pos + 1))
-                pos += 1

    yield TokenInfo(ENDMARKER, '', (lnum, 0), (lnum, 0))