Fix issues with error correction / newline correction.

2026-05-17 13:59:37 +08:00 · 2015-01-27 12:24:54 +01:00
parent 62e45aa42b
commit 5e8f8f7a8d
4 changed files with 40 additions and 13 deletions
@@ -109,6 +109,7 @@ class Parser(object):
        self.global_names = []
        self._omit_dedent = 0
        self._last_failed_start_pos = (0, 0)
        # TODO do print absolute import detection here.
        #try:
@@ -294,6 +295,8 @@ class Parser(object):
            err = ErrorStatement(failed_stack, value, start_pos)
            self.error_statement_stacks.append(err)
        self._last_failed_start_pos = start_pos
        stack[start_index:] = []
    def _tokenize(self, tokenizer):
@@ -319,6 +322,7 @@ class Parser(object):
            last_line = re.sub('.*\n', '', endmarker.prefix)
            endmarker.start_pos = endmarker.start_pos[0] - 1, len(last_line)
        else:
            print(self.error_statement_stacks)
            try:
                newline = endmarker.get_previous()
            except IndexError:
@@ -330,5 +334,12 @@ class Parser(object):
                else:
                    assert newline.value == '\n'
                    newline.value = ''
-                    endmarker.start_pos = newline.start_pos
+                    if self._last_failed_start_pos > newline.start_pos:
                        # It may be the case that there was a syntax error in a
                        # function. In that case error correction removes the
                        # right newline. So we use the previously assigned
                        # _last_failed_start_pos variable to account for that.
                        endmarker.start_pos = self._last_failed_start_pos
                    else:
                        endmarker.start_pos = newline.start_pos
                    break
@@ -156,6 +156,17 @@ class Leaf(Base):
    def start_pos(self):
        return self._start_pos[0] + self.position_modifier.line, self._start_pos[1]
    @start_pos.setter
    def start_pos(self, value):
        # TODO I think this is wrong, because the position_modifier.line needs
        # to be looked at as well. Probably it needs to be substracted.
        self._start_pos = value
    @property
    def end_pos(self):
        return (self._start_pos[0] + self.position_modifier.line,
                self._start_pos[1] + len(self.value))
    def get_previous(self):
        """
        Returns the previous leaf in the parser tree.
@@ -178,17 +189,6 @@ class Leaf(Base):
            except AttributeError:  # A Leaf doesn't have children.
                return node
    @start_pos.setter
    def start_pos(self, value):
        # TODO I think this is wrong, because the position_modifier.line needs
        # to be looked at as well. Probably it needs to be substracted.
        self._start_pos = value
    @property
    def end_pos(self):
        return (self._start_pos[0] + self.position_modifier.line,
                self._start_pos[1] + len(self.value))
    def get_code(self):
        return self.prefix + self.value
@@ -287,7 +287,7 @@ class UserContextParser(object):
                print(scope, scope.subscopes)
                for s in scope.subscopes + list(reversed(scope.flows)):
                    if isinstance(s, (pr.Scope, pr.Flow)):
-                        print(s, self._position, s.end_pos, s.children,
+                        print(s, self._position, 'X', s.start_pos, s.end_pos, s.children,
                                s.children[-2])
                        if s.start_pos <= self._position <= s.end_pos:
                            if isinstance(s, pr.Flow):
@@ -163,3 +163,19 @@ def test_newline_positions():
    new_line = endmarker.get_previous()
    assert new_line.start_pos == (1, 1)
    assert new_line.end_pos == (2, 0)
 def test_end_pos_error_correction():
    """
    Source code without ending newline are given one, because the Python
    grammar needs it. However, they are removed again. We still want the right
    end_pos, even if something breaks in the parser (error correction).
    """
    s = 'def x():\n .'
    m = Parser(load_grammar(), s).module
    func = m.children[0]
    assert func.type == 'funcdef'
    # This is not exactly correct, but ok, because it doesn't make a difference
    # at all. We just want to make sure that the module end_pos is correct!
    assert func.end_pos == (3, 0)
    assert m.end_pos == (2, 2)