Fix more issues in the fast parser.

This commit is contained in:
Dave Halter
2015-01-29 15:38:38 +01:00
parent 0a537c05c4
commit a221eee02c
3 changed files with 50 additions and 20 deletions

View File

@@ -298,6 +298,7 @@ class FastParser(use_metaclass(CachedFastParser)):
# For testing purposes: It is important that the number of parsers used
# can be minimized. With this variable we can test it.
self.number_parsers_used = 0
self.number_of_splits = 0
self.module.reset_caches()
try:
self._parse(source)
@@ -315,6 +316,7 @@ class FastParser(use_metaclass(CachedFastParser)):
def gen_part():
text = '\n'.join(current_lines)
del current_lines[:]
self.number_of_splits += 1
return text
# Split only new lines. Distinction between \r\n is the tokenizer's
@@ -594,17 +596,11 @@ class FastTokenizer(object):
#self._parser_indent += 1 # new scope: must be higher
#self._new_indent = True
if value != '@':
if self._first_stmt and not self._new_indent:
self._parser_indent = indent
self._first_stmt = False
# Ignore closing parentheses, because they are all
# irrelevant for the indentation.
if value in '([{' and value:
self._parentheses_level += 1
elif value in ')]}' and value:
# Ignore closing parentheses, because they are all
# irrelevant for the indentation.
self._parentheses_level = max(self._parentheses_level - 1, 0)
return current
@@ -612,7 +608,10 @@ class FastTokenizer(object):
if self._first_stmt:
# Continue like nothing has happened, because we want to enter
# the first class/function.
self._first_stmt = False
if self.current[1] != '@':
#if self._first_stmt and not self._new_indent:
#self._parser_indent = indent
self._first_stmt = False
return self.current
else:
self._closed = True
@@ -627,7 +626,10 @@ class FastTokenizer(object):
elif not self._returned_endmarker:
self._returned_endmarker = True
# We're using the current prefix for the endmarker to not loose any
# information.
return ENDMARKER, '', start_pos, self.current[3]
# information. However we care about "lost" lines. The prefix of
# the current line (indent) will always be included in the current
# line.
prefix = re.sub('[^\n]+$', '', self.current[3])
return ENDMARKER, '', start_pos, prefix
else:
raise StopIteration

View File

@@ -154,7 +154,11 @@ def generate_tokens(readline, line_offset=0):
numchars = '0123456789'
contstr = ''
contline = None
new_line = False
# We start with a newline. This makes indent at the first position
# possible. It's not valid Python, but still better than an INDENT in the
# second line (and not in the first). This makes quite a few things in
# Jedi's fast parser possible.
new_line = True
prefix = '' # Should never be required, but here for safety
additional_prefix = ''
while True: # loop over lines in stream

View File

@@ -59,13 +59,17 @@ def test_carriage_return_splitting():
assert [n.value for lst in p.module.names_dict.values() for n in lst] == ['Foo']
def check_fp(src, number_parsers_used):
def check_fp(src, number_parsers_used, number_of_splits=None):
if number_of_splits is None:
number_of_splits = number_parsers_used
p = FastParser(load_grammar(), u(src))
cache.save_parser(None, None, p, pickling=False)
# TODO Don't change get_code, the whole thing should be the same.
# -> Need to refactor the parser first, though.
assert src == p.module.get_code()
assert p.number_of_splits == number_of_splits
assert p.number_parsers_used == number_parsers_used
return p.module
@@ -77,18 +81,18 @@ def test_change_and_undo():
# Parse the function and a.
check_fp(func_before + 'a', 2)
# Parse just b.
check_fp(func_before + 'b', 1)
check_fp(func_before + 'b', 1, 2)
# b has changed to a again, so parse that.
check_fp(func_before + 'a', 1)
check_fp(func_before + 'a', 1, 2)
# Same as before no parsers should be used.
check_fp(func_before + 'a', 0)
check_fp(func_before + 'a', 0, 2)
# Getting rid of an old parser: Still no parsers used.
check_fp('a', 0)
check_fp('a', 0, 1)
# Now the file has completely change and we need to parse.
check_fp('b', 1)
check_fp('b', 1, 1)
# And again.
check_fp('a', 1)
check_fp('a', 1, 1)
def test_positions():
@@ -100,7 +104,7 @@ def test_positions():
assert m.start_pos == (1, 0)
assert m.end_pos == (3, 1)
m = check_fp('a', 0)
m = check_fp('a', 0, 1)
assert m.start_pos == (1, 0)
assert m.end_pos == (1, 1)
@@ -156,6 +160,26 @@ def test_func_with_if():
check_fp(src, 1)
def test_decorator():
src = dedent("""\
class Decorator():
@memoize
def dec(self, a):
return a
""")
check_fp(src, 2)
def test_nested_funcs():
src = dedent("""\
def memoize(func):
def wrapper(*args, **kwargs):
return func(*args, **kwargs)
return wrapper
""")
check_fp(src, 3)
def test_incomplete_function():
source = '''return ImportErr'''