From d9bf9be35e8ea62360ef35cafd774fdc6ee4c5f8 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Fri, 21 Feb 2014 17:45:56 +0100 Subject: [PATCH] small refactorings of the tokenizer --- jedi/parser/tokenize.py | 98 ++++++++++++++++++++--------------------- 1 file changed, 48 insertions(+), 50 deletions(-) diff --git a/jedi/parser/tokenize.py b/jedi/parser/tokenize.py index f3cb9c4f..40c63123 100644 --- a/jedi/parser/tokenize.py +++ b/jedi/parser/tokenize.py @@ -158,9 +158,8 @@ def generate_tokens(readline, line_offset=0): contstr = '' contline = None while True: # loop over lines in stream - try: - line = readline() - except StopIteration: + line = readline() # readline returns empty if it's finished. See StringIO + if not line: if contstr: yield TokenInfo(ERRORTOKEN, contstr, strstart, (lnum, pos)) break @@ -168,7 +167,7 @@ def generate_tokens(readline, line_offset=0): lnum += 1 pos, max = 0, len(line) - if contstr: # continued string + if contstr: # continued string endmatch = endprog.match(line) if endmatch: pos = end = endmatch.end(0) @@ -179,58 +178,57 @@ def generate_tokens(readline, line_offset=0): contstr = contstr + line contline = contline + line continue - elif pos == max: - break # Don't really understand why this must be here. while pos < max: pseudomatch = pseudoprog.match(line, pos) - if pseudomatch: # scan for tokens - start, end = pseudomatch.span(1) - spos, epos, pos = (lnum, start), (lnum, end), end - token, initial = line[start:end], line[start] - - if (initial in numchars or # ordinary number - (initial == '.' and token != '.' and token != '...')): - yield TokenInfo(NUMBER, token, spos, epos) - elif initial in '\r\n': - yield TokenInfo(NEWLINE, token, spos, epos) - elif initial == '#': - assert not token.endswith("\n") - yield TokenInfo(COMMENT, token, spos, epos) - elif token in triple_quoted: - endprog = endprogs[token] - endmatch = endprog.match(line, pos) - if endmatch: # all on one line - pos = endmatch.end(0) - token = line[start:pos] - yield TokenInfo(STRING, token, spos, (lnum, pos)) - else: - strstart = (lnum, start) # multiple lines - contstr = line[start:] - contline = line - break - elif initial in single_quoted or \ - token[:2] in single_quoted or \ - token[:3] in single_quoted: - if token[-1] == '\n': # continued string - strstart = (lnum, start) - endprog = (endprogs[initial] or endprogs[token[1]] or - endprogs[token[2]]) - contstr = line[start:] - contline = line - break - else: # ordinary string - yield TokenInfo(STRING, token, spos, epos) - elif initial in namechars: # ordinary name - yield TokenInfo(NAME, token, spos, epos) - elif initial == '\\' and line[start:] == '\\\n': # continued stmt - continue - else: - yield TokenInfo(OP, token, spos, epos) - else: + if not pseudomatch: # scan for tokens yield TokenInfo(ERRORTOKEN, line[pos], (lnum, pos), (lnum, pos + 1)) pos += 1 + continue + + start, end = pseudomatch.span(1) + spos, epos, pos = (lnum, start), (lnum, end), end + token, initial = line[start:end], line[start] + + if (initial in numchars or # ordinary number + (initial == '.' and token != '.' and token != '...')): + yield TokenInfo(NUMBER, token, spos, epos) + elif initial in '\r\n': + yield TokenInfo(NEWLINE, token, spos, epos) + elif initial == '#': + assert not token.endswith("\n") + yield TokenInfo(COMMENT, token, spos, epos) + elif token in triple_quoted: + endprog = endprogs[token] + endmatch = endprog.match(line, pos) + if endmatch: # all on one line + pos = endmatch.end(0) + token = line[start:pos] + yield TokenInfo(STRING, token, spos, (lnum, pos)) + else: + strstart = (lnum, start) # multiple lines + contstr = line[start:] + contline = line + break + elif initial in single_quoted or \ + token[:2] in single_quoted or \ + token[:3] in single_quoted: + if token[-1] == '\n': # continued string + strstart = (lnum, start) + endprog = (endprogs[initial] or endprogs[token[1]] or + endprogs[token[2]]) + contstr = line[start:] + contline = line + break + else: # ordinary string + yield TokenInfo(STRING, token, spos, epos) + elif initial in namechars: # ordinary name + yield TokenInfo(NAME, token, spos, epos) + elif initial == '\\' and line[start:] == '\\\n': # continued stmt + continue + else: + yield TokenInfo(OP, token, spos, epos) yield TokenInfo(ENDMARKER, '', (lnum, 0), (lnum, 0))