mirror of
https://github.com/davidhalter/jedi.git
synced 2025-12-08 14:54:47 +08:00
The tokenizer now includes all newlines and comments in its prefix.
This commit is contained in:
@@ -5,6 +5,7 @@ the interesting information about completion and goto operations.
|
|||||||
"""
|
"""
|
||||||
import warnings
|
import warnings
|
||||||
from itertools import chain
|
from itertools import chain
|
||||||
|
import re
|
||||||
|
|
||||||
from jedi._compatibility import next, unicode, use_metaclass
|
from jedi._compatibility import next, unicode, use_metaclass
|
||||||
from jedi import settings
|
from jedi import settings
|
||||||
@@ -579,7 +580,10 @@ class Definition(use_metaclass(CachedMetaClass, BaseDefinition)):
|
|||||||
d = d.get_code()
|
d = d.get_code()
|
||||||
finally:
|
finally:
|
||||||
first_leaf.prefix = old
|
first_leaf.prefix = old
|
||||||
return d.replace('\n', '').replace('\r', '')
|
# Delete comments:
|
||||||
|
d = re.sub('#[^\n]+\n', ' ', d)
|
||||||
|
# Delete multi spaces/newlines
|
||||||
|
return re.sub('\s+', ' ', d).strip()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def desc_with_module(self):
|
def desc_with_module(self):
|
||||||
|
|||||||
@@ -14,8 +14,8 @@ from __future__ import absolute_import
|
|||||||
import string
|
import string
|
||||||
import re
|
import re
|
||||||
from io import StringIO
|
from io import StringIO
|
||||||
from token import (tok_name, N_TOKENS, ENDMARKER, STRING, NUMBER, NAME, OP,
|
from jedi.parser.token import (tok_name, N_TOKENS, ENDMARKER, STRING, NUMBER,
|
||||||
ERRORTOKEN, NEWLINE, INDENT, DEDENT)
|
NAME, OP, ERRORTOKEN, NEWLINE, INDENT, DEDENT)
|
||||||
|
|
||||||
|
|
||||||
cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
|
cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
|
||||||
@@ -147,8 +147,11 @@ def source_tokens(source, line_offset=0):
|
|||||||
|
|
||||||
def generate_tokens(readline, line_offset=0):
|
def generate_tokens(readline, line_offset=0):
|
||||||
"""
|
"""
|
||||||
The original stdlib Python version with minor modifications.
|
A heavily modified Python standard library tokenizer.
|
||||||
Modified to not care about dedents.
|
|
||||||
|
Additionally to the default information, yields also the prefix of each
|
||||||
|
token. This idea comes from lib2to3. The prefix contains all information
|
||||||
|
that is irrelevant for the parser like newlines in parentheses or comments.
|
||||||
"""
|
"""
|
||||||
paren_level = 0 # count parentheses
|
paren_level = 0 # count parentheses
|
||||||
indents = [0]
|
indents = [0]
|
||||||
@@ -158,6 +161,7 @@ def generate_tokens(readline, line_offset=0):
|
|||||||
contline = None
|
contline = None
|
||||||
new_line = False
|
new_line = False
|
||||||
prefix = '' # Should never be required, but here for safety
|
prefix = '' # Should never be required, but here for safety
|
||||||
|
additional_prefix = ''
|
||||||
while True: # loop over lines in stream
|
while True: # loop over lines in stream
|
||||||
line = readline() # readline returns empty when finished. See StringIO
|
line = readline() # readline returns empty when finished. See StringIO
|
||||||
if not line:
|
if not line:
|
||||||
@@ -192,7 +196,8 @@ def generate_tokens(readline, line_offset=0):
|
|||||||
pos += 1
|
pos += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
prefix = pseudomatch.group(1)
|
prefix = pseudomatch.group(1) + additional_prefix
|
||||||
|
additional_prefix = ''
|
||||||
start, pos = pseudomatch.span(2)
|
start, pos = pseudomatch.span(2)
|
||||||
spos = (lnum, start)
|
spos = (lnum, start)
|
||||||
token, initial = line[start:pos], line[start]
|
token, initial = line[start:pos], line[start]
|
||||||
@@ -213,10 +218,12 @@ def generate_tokens(readline, line_offset=0):
|
|||||||
elif initial in '\r\n':
|
elif initial in '\r\n':
|
||||||
if not new_line and paren_level == 0:
|
if not new_line and paren_level == 0:
|
||||||
yield NEWLINE, token, spos, prefix
|
yield NEWLINE, token, spos, prefix
|
||||||
|
else:
|
||||||
|
additional_prefix = prefix + token
|
||||||
new_line = True
|
new_line = True
|
||||||
elif initial == '#':
|
elif initial == '#': # Comments
|
||||||
assert not token.endswith("\n")
|
assert not token.endswith("\n")
|
||||||
#yield Token(COMMENT, token, spos, prefix)
|
additional_prefix = prefix + token
|
||||||
elif token in triple_quoted:
|
elif token in triple_quoted:
|
||||||
endprog = endprogs[token]
|
endprog = endprogs[token]
|
||||||
endmatch = endprog.match(line, pos)
|
endmatch = endprog.match(line, pos)
|
||||||
|
|||||||
@@ -59,7 +59,7 @@ class UserContext(object):
|
|||||||
first_line = (tok_str.splitlines() or [''])[0]
|
first_line = (tok_str.splitlines() or [''])[0]
|
||||||
column -= len(first_line)
|
column -= len(first_line)
|
||||||
# Reverse the token again, so that it is in normal order again.
|
# Reverse the token again, so that it is in normal order again.
|
||||||
yield typ, tok_str[::-1], (self._line_temp, column), prefix
|
yield typ, tok_str[::-1], (self._line_temp, column), prefix[::-1]
|
||||||
|
|
||||||
def _calc_path_until_cursor(self, start_pos):
|
def _calc_path_until_cursor(self, start_pos):
|
||||||
"""
|
"""
|
||||||
@@ -122,7 +122,7 @@ class UserContext(object):
|
|||||||
break
|
break
|
||||||
|
|
||||||
start_cursor = tok_start_pos
|
start_cursor = tok_start_pos
|
||||||
string = tok_str + string
|
string = tok_str + prefix + string
|
||||||
last_type = tok_type
|
last_type = tok_type
|
||||||
|
|
||||||
# Don't need whitespace around a statement.
|
# Don't need whitespace around a statement.
|
||||||
|
|||||||
Reference in New Issue
Block a user