forked from VimPlug/jedi
Small tokenizer changes & tokens now have a prefix attribute instead of preceeding_whitespace.
This commit is contained in:
@@ -37,7 +37,7 @@ tok_name[COMMENT] = 'COMMENT'
|
|||||||
class Token(object):
|
class Token(object):
|
||||||
"""
|
"""
|
||||||
The token object is an efficient representation of the structure
|
The token object is an efficient representation of the structure
|
||||||
(type, token, (start_pos_line, start_pos_col, preceding_whitespace)). It has indexer
|
(type, token, (start_pos_line, start_pos_col, prefix)). It has indexer
|
||||||
methods that maintain compatibility to existing code that expects the above
|
methods that maintain compatibility to existing code that expects the above
|
||||||
structure.
|
structure.
|
||||||
|
|
||||||
@@ -51,28 +51,27 @@ class Token(object):
|
|||||||
<Token: ('NAME', 'foo', (3, 4, ''))>
|
<Token: ('NAME', 'foo', (3, 4, ''))>
|
||||||
>>> a.start_pos
|
>>> a.start_pos
|
||||||
(3, 4)
|
(3, 4)
|
||||||
>>> a.string
|
>>> a.value
|
||||||
'foo'
|
'foo'
|
||||||
>>> a._start_pos_col
|
>>> a._start_pos_col
|
||||||
4
|
4
|
||||||
>>> Token(1, u("😷"), (1 ,1, '')).string + "p" == u("😷p")
|
>>> Token(1, u("😷"), (1 ,1, '')).value + "p" == u("😷p")
|
||||||
True
|
True
|
||||||
"""
|
"""
|
||||||
__slots__ = ("type", "string", "_start_pos_line", "_start_pos_col",
|
__slots__ = ("type", "value", "_start_pos_line", "_start_pos_col",
|
||||||
"_preceding_whitespace")
|
"prefix")
|
||||||
|
|
||||||
def __init__(self, type, string, start_pos, whitespace=''):
|
def __init__(self, type, value, start_pos, prefix=''):
|
||||||
self.type = type
|
self.type = type
|
||||||
self.string = string
|
self.value = value
|
||||||
self._start_pos_line = start_pos[0]
|
self._start_pos_line = start_pos[0]
|
||||||
self._start_pos_col = start_pos[1]
|
self._start_pos_col = start_pos[1]
|
||||||
self._preceding_whitespace = whitespace
|
self.prefix = whitespace
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
typ = tok_name[self.type]
|
typ = tok_name[self.type]
|
||||||
content = typ, self.string,\
|
content = typ, self.value,\
|
||||||
(self._start_pos_line, self._start_pos_col,
|
(self._start_pos_line, self._start_pos_col, self.prefix)
|
||||||
self._preceding_whitespace)
|
|
||||||
return "<%s: %s>" % (type(self).__name__, content)
|
return "<%s: %s>" % (type(self).__name__, content)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@@ -83,8 +82,8 @@ class Token(object):
|
|||||||
def end_pos(self):
|
def end_pos(self):
|
||||||
"""Returns end position respecting multiline tokens."""
|
"""Returns end position respecting multiline tokens."""
|
||||||
end_pos_line = self._start_pos_line
|
end_pos_line = self._start_pos_line
|
||||||
lines = self.string.split('\n')
|
lines = self.value.split('\n')
|
||||||
if self.string.endswith('\n'):
|
if self.value.endswith('\n'):
|
||||||
lines = lines[:-1]
|
lines = lines[:-1]
|
||||||
lines[-1] += '\n'
|
lines[-1] += '\n'
|
||||||
end_pos_line += len(lines) - 1
|
end_pos_line += len(lines) - 1
|
||||||
@@ -98,16 +97,17 @@ class Token(object):
|
|||||||
|
|
||||||
# Make cache footprint smaller for faster unpickling
|
# Make cache footprint smaller for faster unpickling
|
||||||
def __getstate__(self):
|
def __getstate__(self):
|
||||||
return (self.type, self.string,
|
return (self.type, self.value,
|
||||||
self._start_pos_line, self._start_pos_col,
|
self._start_pos_line, self._start_pos_col,
|
||||||
self._preceding_whitespace)
|
self.prefix)
|
||||||
|
|
||||||
|
# TODO DELETE this is not needed anymore, I guess. It should not get pickled.
|
||||||
def __setstate__(self, state):
|
def __setstate__(self, state):
|
||||||
self.type = state[0]
|
self.type = state[0]
|
||||||
self.string = state[1]
|
self.value = state[1]
|
||||||
self._start_pos_line = state[2]
|
self._start_pos_line = state[2]
|
||||||
self._start_pos_col = state[3]
|
self._start_pos_col = state[3]
|
||||||
self._preceding_whitespace = state[4]
|
self.prefix = state[4]
|
||||||
|
|
||||||
|
|
||||||
def group(*choices):
|
def group(*choices):
|
||||||
@@ -228,12 +228,12 @@ def generate_tokens(readline, line_offset=0):
|
|||||||
numchars = '0123456789'
|
numchars = '0123456789'
|
||||||
contstr = ''
|
contstr = ''
|
||||||
contline = None
|
contline = None
|
||||||
ws = '' # Should never be required, but here for safety
|
prefix = '' # Should never be required, but here for safety
|
||||||
while True: # loop over lines in stream
|
while True: # loop over lines in stream
|
||||||
line = readline() # readline returns empty when finished. See StringIO
|
line = readline() # readline returns empty when finished. See StringIO
|
||||||
if not line:
|
if not line:
|
||||||
if contstr:
|
if contstr:
|
||||||
yield Token(ERRORTOKEN, contstr, contstr_start, whitespace=ws)
|
yield Token(ERRORTOKEN, contstr, contstr_start, prefix)
|
||||||
break
|
break
|
||||||
|
|
||||||
lnum += 1
|
lnum += 1
|
||||||
@@ -244,7 +244,7 @@ def generate_tokens(readline, line_offset=0):
|
|||||||
if endmatch:
|
if endmatch:
|
||||||
pos = endmatch.end(0)
|
pos = endmatch.end(0)
|
||||||
yield Token(STRING, contstr + line[:pos],
|
yield Token(STRING, contstr + line[:pos],
|
||||||
contstr_start, whitespace=ws)
|
contstr_start, prefix)
|
||||||
contstr = ''
|
contstr = ''
|
||||||
contline = None
|
contline = None
|
||||||
else:
|
else:
|
||||||
@@ -264,26 +264,26 @@ def generate_tokens(readline, line_offset=0):
|
|||||||
pos += 1
|
pos += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
ws = pseudomatch.group(1)
|
prefix = pseudomatch.group(1)
|
||||||
start, pos = pseudomatch.span(2)
|
start, pos = pseudomatch.span(2)
|
||||||
spos = (lnum, start)
|
spos = (lnum, start)
|
||||||
token, initial = line[start:pos], line[start]
|
token, initial = line[start:pos], line[start]
|
||||||
|
|
||||||
if (initial in numchars or # ordinary number
|
if (initial in numchars or # ordinary number
|
||||||
(initial == '.' and token != '.' and token != '...')):
|
(initial == '.' and token != '.' and token != '...')):
|
||||||
yield Token(NUMBER, token, spos, whitespace=ws)
|
yield Token(NUMBER, token, spos, prefix)
|
||||||
elif initial in '\r\n':
|
elif initial in '\r\n':
|
||||||
yield Token(NEWLINE, token, spos, whitespace=ws)
|
yield Token(NEWLINE, token, spos, prefix)
|
||||||
elif initial == '#':
|
elif initial == '#':
|
||||||
assert not token.endswith("\n")
|
assert not token.endswith("\n")
|
||||||
yield Token(COMMENT, token, spos, whitespace=ws)
|
yield Token(COMMENT, token, spos, prefix)
|
||||||
elif token in triple_quoted:
|
elif token in triple_quoted:
|
||||||
endprog = endprogs[token]
|
endprog = endprogs[token]
|
||||||
endmatch = endprog.match(line, pos)
|
endmatch = endprog.match(line, pos)
|
||||||
if endmatch: # all on one line
|
if endmatch: # all on one line
|
||||||
pos = endmatch.end(0)
|
pos = endmatch.end(0)
|
||||||
token = line[start:pos]
|
token = line[start:pos]
|
||||||
yield Token(STRING, token, spos, whitespace=ws)
|
yield Token(STRING, token, spos, prefix)
|
||||||
else:
|
else:
|
||||||
contstr_start = (lnum, start) # multiple lines
|
contstr_start = (lnum, start) # multiple lines
|
||||||
contstr = line[start:]
|
contstr = line[start:]
|
||||||
@@ -300,12 +300,12 @@ def generate_tokens(readline, line_offset=0):
|
|||||||
contline = line
|
contline = line
|
||||||
break
|
break
|
||||||
else: # ordinary string
|
else: # ordinary string
|
||||||
yield Token(STRING, token, spos, whitespace=ws)
|
yield Token(STRING, token, spos, prefix)
|
||||||
elif initial in namechars: # ordinary name
|
elif initial in namechars: # ordinary name
|
||||||
yield Token(NAME, token, spos, whitespace=ws)
|
yield Token(NAME, token, spos, prefix)
|
||||||
elif initial == '\\' and line[start:] == '\\\n': # continued stmt
|
elif initial == '\\' and line[start:] == '\\\n': # continued stmt
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
yield Token(OP, token, spos, whitespace=ws)
|
yield Token(OP, token, spos, prefix)
|
||||||
|
|
||||||
yield Token(ENDMARKER, '', (lnum, 0))
|
yield Token(ENDMARKER, '', (lnum, 0), prefix)
|
||||||
|
|||||||
@@ -73,7 +73,7 @@ class UserContext(object):
|
|||||||
is_first = True
|
is_first = True
|
||||||
for tok in gen:
|
for tok in gen:
|
||||||
tok_type = tok.type
|
tok_type = tok.type
|
||||||
tok_str = tok.string
|
tok_str = tok.value
|
||||||
end = tok.end_pos
|
end = tok.end_pos
|
||||||
self._column_temp = self._line_length - end[1]
|
self._column_temp = self._line_length - end[1]
|
||||||
if is_first:
|
if is_first:
|
||||||
@@ -115,7 +115,7 @@ class UserContext(object):
|
|||||||
else:
|
else:
|
||||||
if tok_str == '-':
|
if tok_str == '-':
|
||||||
next_tok = next(gen)
|
next_tok = next(gen)
|
||||||
if next_tok.string == 'e':
|
if next_tok.value == 'e':
|
||||||
gen.push_back(next_tok)
|
gen.push_back(next_tok)
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
|
|||||||
Reference in New Issue
Block a user