1
0
forked from VimPlug/jedi

Make the tokenizer a generator.

This commit is contained in:
Dave Halter
2017-03-09 18:53:09 +01:00
parent 989e4bac89
commit c7a74e6d1c
4 changed files with 41 additions and 52 deletions

View File

@@ -356,8 +356,7 @@ class DiffParser(object):
is_first_token = True
omitted_first_indent = False
indents = []
l = iter(lines)
tokens = generate_tokens(lambda: next(l, ''), use_exact_op_types=True)
tokens = generate_tokens(lines, use_exact_op_types=True)
stack = self._active_parser.pgen_parser.stack
for typ, string, start_pos, prefix in tokens:
start_pos = start_pos[0] + line_offset, start_pos[1]

View File

@@ -12,18 +12,13 @@ from jedi.parser import tokenize
class ParserGenerator(object):
def __init__(self, filename, stream=None):
close_stream = None
if stream is None:
stream = open(filename)
close_stream = stream.close
def __init__(self, filename):
with open(filename) as f:
code = f.read()
self.filename = filename
self.stream = stream
self.generator = tokenize.generate_tokens(stream.readline)
self.generator = tokenize.source_tokens(code)
self.gettoken() # Initialize lookahead
self.dfas, self.startsymbol = self.parse()
if close_stream is not None:
close_stream()
self.first = {} # map from symbol name to set of tokens
self.addfirstsets()

View File

@@ -14,7 +14,6 @@ from __future__ import absolute_import
import string
import re
from collections import namedtuple
from io import StringIO
import itertools as _itertools
from jedi.parser.token import (tok_name, N_TOKENS, ENDMARKER, STRING, NUMBER, opmap,
@@ -207,12 +206,11 @@ class TokenInfo(namedtuple('Token', ['type', 'string', 'start_pos', 'prefix'])):
def source_tokens(source, use_exact_op_types=False):
"""Generate tokens from a the source code (string)."""
source = source
readline = StringIO(source).readline
return generate_tokens(readline, use_exact_op_types)
lines = splitlines(source, keepends=True)
return generate_tokens(lines, use_exact_op_types)
def generate_tokens(readline, use_exact_op_types=False):
def generate_tokens(lines, use_exact_op_types=False):
"""
A heavily modified Python standard library tokenizer.
@@ -222,7 +220,6 @@ def generate_tokens(readline, use_exact_op_types=False):
"""
paren_level = 0 # count parentheses
indents = [0]
lnum = 0
max = 0
numchars = '0123456789'
contstr = ''
@@ -234,16 +231,7 @@ def generate_tokens(readline, use_exact_op_types=False):
new_line = True
prefix = '' # Should never be required, but here for safety
additional_prefix = ''
while True: # loop over lines in stream
line = readline() # readline returns empty when finished. See StringIO
if not line:
if contstr:
yield TokenInfo(ERRORTOKEN, contstr, contstr_start, prefix)
if contstr.endswith('\n'):
new_line = True
break
lnum += 1
for lnum, line in enumerate(lines, 1): # loop over lines in stream
pos, max = 0, len(line)
if contstr: # continued string
@@ -359,10 +347,12 @@ def generate_tokens(readline, use_exact_op_types=False):
typ = OP
yield TokenInfo(typ, token, spos, prefix)
if new_line or additional_prefix[-1:] == '\n':
end_pos = lnum + 1, 0
else:
end_pos = lnum, max
if contstr:
yield TokenInfo(ERRORTOKEN, contstr, contstr_start, prefix)
if contstr.endswith('\n'):
new_line = True
end_pos = lnum, max
# As the last position we just take the maximally possible position. We
# remove -1 for the last new line.
for indent in indents[1:]: