1
0
forked from VimPlug/jedi

Small tokenizer changes & tokens now have a prefix attribute instead of preceeding_whitespace.

This commit is contained in:
Dave Halter
2014-11-27 01:10:20 +01:00
parent 02cb1fef95
commit c7862925f5
2 changed files with 31 additions and 31 deletions

View File

@@ -37,7 +37,7 @@ tok_name[COMMENT] = 'COMMENT'
class Token(object): class Token(object):
""" """
The token object is an efficient representation of the structure The token object is an efficient representation of the structure
(type, token, (start_pos_line, start_pos_col, preceding_whitespace)). It has indexer (type, token, (start_pos_line, start_pos_col, prefix)). It has indexer
methods that maintain compatibility to existing code that expects the above methods that maintain compatibility to existing code that expects the above
structure. structure.
@@ -51,28 +51,27 @@ class Token(object):
<Token: ('NAME', 'foo', (3, 4, ''))> <Token: ('NAME', 'foo', (3, 4, ''))>
>>> a.start_pos >>> a.start_pos
(3, 4) (3, 4)
>>> a.string >>> a.value
'foo' 'foo'
>>> a._start_pos_col >>> a._start_pos_col
4 4
>>> Token(1, u("😷"), (1 ,1, '')).string + "p" == u("😷p") >>> Token(1, u("😷"), (1 ,1, '')).value + "p" == u("😷p")
True True
""" """
__slots__ = ("type", "string", "_start_pos_line", "_start_pos_col", __slots__ = ("type", "value", "_start_pos_line", "_start_pos_col",
"_preceding_whitespace") "prefix")
def __init__(self, type, string, start_pos, whitespace=''): def __init__(self, type, value, start_pos, prefix=''):
self.type = type self.type = type
self.string = string self.value = value
self._start_pos_line = start_pos[0] self._start_pos_line = start_pos[0]
self._start_pos_col = start_pos[1] self._start_pos_col = start_pos[1]
self._preceding_whitespace = whitespace self.prefix = whitespace
def __repr__(self): def __repr__(self):
typ = tok_name[self.type] typ = tok_name[self.type]
content = typ, self.string,\ content = typ, self.value,\
(self._start_pos_line, self._start_pos_col, (self._start_pos_line, self._start_pos_col, self.prefix)
self._preceding_whitespace)
return "<%s: %s>" % (type(self).__name__, content) return "<%s: %s>" % (type(self).__name__, content)
@property @property
@@ -83,8 +82,8 @@ class Token(object):
def end_pos(self): def end_pos(self):
"""Returns end position respecting multiline tokens.""" """Returns end position respecting multiline tokens."""
end_pos_line = self._start_pos_line end_pos_line = self._start_pos_line
lines = self.string.split('\n') lines = self.value.split('\n')
if self.string.endswith('\n'): if self.value.endswith('\n'):
lines = lines[:-1] lines = lines[:-1]
lines[-1] += '\n' lines[-1] += '\n'
end_pos_line += len(lines) - 1 end_pos_line += len(lines) - 1
@@ -98,16 +97,17 @@ class Token(object):
# Make cache footprint smaller for faster unpickling # Make cache footprint smaller for faster unpickling
def __getstate__(self): def __getstate__(self):
return (self.type, self.string, return (self.type, self.value,
self._start_pos_line, self._start_pos_col, self._start_pos_line, self._start_pos_col,
self._preceding_whitespace) self.prefix)
# TODO DELETE this is not needed anymore, I guess. It should not get pickled.
def __setstate__(self, state): def __setstate__(self, state):
self.type = state[0] self.type = state[0]
self.string = state[1] self.value = state[1]
self._start_pos_line = state[2] self._start_pos_line = state[2]
self._start_pos_col = state[3] self._start_pos_col = state[3]
self._preceding_whitespace = state[4] self.prefix = state[4]
def group(*choices): def group(*choices):
@@ -228,12 +228,12 @@ def generate_tokens(readline, line_offset=0):
numchars = '0123456789' numchars = '0123456789'
contstr = '' contstr = ''
contline = None contline = None
ws = '' # Should never be required, but here for safety prefix = '' # Should never be required, but here for safety
while True: # loop over lines in stream while True: # loop over lines in stream
line = readline() # readline returns empty when finished. See StringIO line = readline() # readline returns empty when finished. See StringIO
if not line: if not line:
if contstr: if contstr:
yield Token(ERRORTOKEN, contstr, contstr_start, whitespace=ws) yield Token(ERRORTOKEN, contstr, contstr_start, prefix)
break break
lnum += 1 lnum += 1
@@ -244,7 +244,7 @@ def generate_tokens(readline, line_offset=0):
if endmatch: if endmatch:
pos = endmatch.end(0) pos = endmatch.end(0)
yield Token(STRING, contstr + line[:pos], yield Token(STRING, contstr + line[:pos],
contstr_start, whitespace=ws) contstr_start, prefix)
contstr = '' contstr = ''
contline = None contline = None
else: else:
@@ -264,26 +264,26 @@ def generate_tokens(readline, line_offset=0):
pos += 1 pos += 1
continue continue
ws = pseudomatch.group(1) prefix = pseudomatch.group(1)
start, pos = pseudomatch.span(2) start, pos = pseudomatch.span(2)
spos = (lnum, start) spos = (lnum, start)
token, initial = line[start:pos], line[start] token, initial = line[start:pos], line[start]
if (initial in numchars or # ordinary number if (initial in numchars or # ordinary number
(initial == '.' and token != '.' and token != '...')): (initial == '.' and token != '.' and token != '...')):
yield Token(NUMBER, token, spos, whitespace=ws) yield Token(NUMBER, token, spos, prefix)
elif initial in '\r\n': elif initial in '\r\n':
yield Token(NEWLINE, token, spos, whitespace=ws) yield Token(NEWLINE, token, spos, prefix)
elif initial == '#': elif initial == '#':
assert not token.endswith("\n") assert not token.endswith("\n")
yield Token(COMMENT, token, spos, whitespace=ws) yield Token(COMMENT, token, spos, prefix)
elif token in triple_quoted: elif token in triple_quoted:
endprog = endprogs[token] endprog = endprogs[token]
endmatch = endprog.match(line, pos) endmatch = endprog.match(line, pos)
if endmatch: # all on one line if endmatch: # all on one line
pos = endmatch.end(0) pos = endmatch.end(0)
token = line[start:pos] token = line[start:pos]
yield Token(STRING, token, spos, whitespace=ws) yield Token(STRING, token, spos, prefix)
else: else:
contstr_start = (lnum, start) # multiple lines contstr_start = (lnum, start) # multiple lines
contstr = line[start:] contstr = line[start:]
@@ -300,12 +300,12 @@ def generate_tokens(readline, line_offset=0):
contline = line contline = line
break break
else: # ordinary string else: # ordinary string
yield Token(STRING, token, spos, whitespace=ws) yield Token(STRING, token, spos, prefix)
elif initial in namechars: # ordinary name elif initial in namechars: # ordinary name
yield Token(NAME, token, spos, whitespace=ws) yield Token(NAME, token, spos, prefix)
elif initial == '\\' and line[start:] == '\\\n': # continued stmt elif initial == '\\' and line[start:] == '\\\n': # continued stmt
continue continue
else: else:
yield Token(OP, token, spos, whitespace=ws) yield Token(OP, token, spos, prefix)
yield Token(ENDMARKER, '', (lnum, 0)) yield Token(ENDMARKER, '', (lnum, 0), prefix)

View File

@@ -73,7 +73,7 @@ class UserContext(object):
is_first = True is_first = True
for tok in gen: for tok in gen:
tok_type = tok.type tok_type = tok.type
tok_str = tok.string tok_str = tok.value
end = tok.end_pos end = tok.end_pos
self._column_temp = self._line_length - end[1] self._column_temp = self._line_length - end[1]
if is_first: if is_first:
@@ -115,7 +115,7 @@ class UserContext(object):
else: else:
if tok_str == '-': if tok_str == '-':
next_tok = next(gen) next_tok = next(gen)
if next_tok.string == 'e': if next_tok.value == 'e':
gen.push_back(next_tok) gen.push_back(next_tok)
else: else:
break break