Remove the line_offset calculation. We can now also remove it from tokenize. With the position_modifier we have enough tools to change a position, we don't need to do that in tokenize.py.

This commit is contained in:
Dave Halter
2015-01-29 17:57:01 +01:00
parent a3cdec819e
commit 413da3b790
3 changed files with 32 additions and 12 deletions

View File

@@ -307,22 +307,27 @@ class Parser(object):
if typ == token.OP: if typ == token.OP:
typ = token.opmap[value] typ = token.opmap[value]
#print(token.tok_name[typ], repr(value), start_pos) print(token.tok_name[typ], repr(value), start_pos, repr(prefix))
yield typ, value, prefix, start_pos yield typ, value, prefix, start_pos
def __repr__(self): def __repr__(self):
return "<%s: %s>" % (type(self).__name__, self.module) return "<%s: %s>" % (type(self).__name__, self.module)
def remove_last_newline(self): def remove_last_newline(self):
"""
In all of this we need to work with _start_pos, because if we worked
with start_pos, we would need to check the position_modifier as well
(which is accounted for in the start_pos property).
"""
endmarker = self.module.children[-1] endmarker = self.module.children[-1]
# The newline is either in the endmarker as a prefix or the previous # The newline is either in the endmarker as a prefix or the previous
# leaf as a newline token. # leaf as a newline token.
print('REMOVE', endmarker.start_pos)
if endmarker.prefix.endswith('\n'): if endmarker.prefix.endswith('\n'):
endmarker.prefix = endmarker.prefix[:-1] endmarker.prefix = endmarker.prefix[:-1]
last_line = re.sub('.*\n', '', endmarker.prefix) last_line = re.sub('.*\n', '', endmarker.prefix)
endmarker.start_pos = endmarker.start_pos[0] - 1, len(last_line) endmarker._start_pos = endmarker._start_pos[0] - 1, len(last_line)
else: else:
print(self.error_statement_stacks)
try: try:
newline = endmarker.get_previous() newline = endmarker.get_previous()
except IndexError: except IndexError:
@@ -334,12 +339,12 @@ class Parser(object):
else: else:
assert newline.value == '\n' assert newline.value == '\n'
newline.value = '' newline.value = ''
if self._last_failed_start_pos > newline.start_pos: if self._last_failed_start_pos > newline._start_pos:
# It may be the case that there was a syntax error in a # It may be the case that there was a syntax error in a
# function. In that case error correction removes the # function. In that case error correction removes the
# right newline. So we use the previously assigned # right newline. So we use the previously assigned
# _last_failed_start_pos variable to account for that. # _last_failed_start_pos variable to account for that.
endmarker.start_pos = self._last_failed_start_pos endmarker._start_pos = self._last_failed_start_pos
else: else:
endmarker.start_pos = newline.start_pos endmarker._start_pos = newline._start_pos
break break

View File

@@ -240,7 +240,7 @@ class ParserNode(object):
# Changing the line offsets is very important, because if they don't # Changing the line offsets is very important, because if they don't
# fit, all the start_pos values will be wrong. # fit, all the start_pos values will be wrong.
m = node.parser.module m = node.parser.module
node.parser.position_modifier.line = line_offset + 1 - m.start_pos[0] node.parser.position_modifier.line = line_offset
self._fast_module.modules.append(m) self._fast_module.modules.append(m)
node.parent = self node.parent = self
@@ -401,8 +401,7 @@ class FastParser(use_metaclass(CachedFastParser)):
for code_part in self._split_parts(source): for code_part in self._split_parts(source):
if not is_first: if not is_first:
print('OFF', line_offset + 2, print('OFF', line_offset + 1, self.current_node.parser.module.end_pos)
self.current_node.parser.module.end_pos[0])
#import pdb; pdb.set_trace() #import pdb; pdb.set_trace()
if is_first or line_offset + 1 == self.current_node.parser.module.end_pos[0]: if is_first or line_offset + 1 == self.current_node.parser.module.end_pos[0]:
print(repr(code_part)) print(repr(code_part))
@@ -483,15 +482,16 @@ class FastParser(use_metaclass(CachedFastParser)):
nodes.remove(node) nodes.remove(node)
break break
else: else:
tokenizer = FastTokenizer(parser_code, line_offset) tokenizer = FastTokenizer(parser_code, 0)
self.number_parsers_used += 1 self.number_parsers_used += 1
p = Parser(self._grammar, parser_code, self.module_path, tokenizer=tokenizer) p = Parser(self._grammar, parser_code, self.module_path, tokenizer=tokenizer)
#p.module.parent = self.module # With the new parser this is not #p.module.parent = self.module # With the new parser this is not
# necessary anymore? # necessary anymore?
node = ParserNode(self.module) node = ParserNode(self.module)
end = p.module.end_pos[0] end = line_offset + p.module.end_pos[0]
print('\nACTUALLY PARSING', p.module.end_pos, repr(source), len(self._lines)) print('\nACTUALLY PARSING', p.module.end_pos, repr(source),
len(self._lines), line_offset)
if not (len(self._lines) == end): if not (len(self._lines) == end):
# We don't keep the last line, except if were done. A newline # We don't keep the last line, except if were done. A newline
# ends on the next line, which is part of the next parser. But # ends on the next line, which is part of the next parser. But
@@ -510,6 +510,7 @@ class FastTokenizer(object):
Breaks when certain conditions are met, i.e. a new function or class opens. Breaks when certain conditions are met, i.e. a new function or class opens.
""" """
def __init__(self, source, line_offset=0): def __init__(self, source, line_offset=0):
# TODO remove the whole line_offset stuff, it's not used anymore.
self.source = source self.source = source
self._gen = source_tokens(source, line_offset) self._gen = source_tokens(source, line_offset)
self._closed = False self._closed = False

View File

@@ -180,6 +180,20 @@ def test_nested_funcs():
check_fp(src, 3) check_fp(src, 3)
def test_func_with_for_and_comment():
# The first newline is important, leave it.
src = dedent("""\
def func():
pass
for a in [1]:
# COMMENT
a""")
check_fp(src, 2, 3)
check_fp('a\n' + src, 1, 3)
def test_incomplete_function(): def test_incomplete_function():
source = '''return ImportErr''' source = '''return ImportErr'''