Rewrite the fuzz diff parser to cache errors (so we can re-run those)

2026-03-18 04:22:44 +08:00 · 2019-01-05 14:05:19 +01:00
parent c1846dd082
commit 5e6d5dec59
2 changed files with 119 additions and 45 deletions
--- a/test/fuzz_diff_parser.py
+++ b/test/fuzz_diff_parser.py
@@ -1,12 +1,11 @@
 """
 Usage:
-  fuzz_diff_parser.py [--pdb|--ipdb] [-l] [-n=<nr>] [-x=<nr>] [--record=<file>] random [<path>]
-  fuzz_diff_parser.py [--pdb|--ipdb] [-l] [--record=<file>] redo
+  fuzz_diff_parser.py [--pdb|--ipdb] [-l] [-n=<nr>] [-x=<nr>] random [<path>]
+  fuzz_diff_parser.py [--pdb|--ipdb] [-l] redo
  fuzz_diff_parser.py -h | --help

 Options:
  -h --help              Show this screen
-  --record=<file>        Exceptions are recorded in here [default: record.json]
  -n, --maxtries=<nr>    Maximum of random tries [default: 100]
  -x, --changes=<nr>     Amount of changes to be done to a file per try [default: 2]
  -l, --logging          Prints all the logs
@@ -19,6 +18,7 @@ import logging
 import sys
 import os
 import random
+import pickle

 from docopt import docopt

@@ -36,56 +36,118 @@ def find_python_files_in_tree(file_path):
                yield os.path.join(root, name)


-def generate_line_modification(code, change_count):
-    def random_line(include_end=False):
-        return random.randint(0, len(lines) - (not include_end))
+class LineDeletion:
+    def __init__(self, line_nr):
+        self.line_nr = line_nr

-    lines = split_lines(code, keepends=True)
-    for _ in range(change_count):
-        if not lines:
-            break
+    def apply(self, code_lines):
+        del code_lines[self.line_nr]

-        if random.choice([False, True]):
-            # Deletion
-            del lines[random_line()]
-        else:
-            # Copy / Insertion
-            lines.insert(
+
+class LineCopy:
+    def __init__(self, copy_line, insertion_line):
+        self._copy_line = copy_line
+        self._insertion_line = insertion_line
+
+    def apply(self, code_lines):
+        code_lines.insert(
+            self._insertion_line,
+            # Use some line from the file. This doesn't feel totally
+            # random, but for the diff parser it will feel like it.
+            code_lines[self._copy_line]
+        )
+
+
+class FileModification:
+    @classmethod
+    def generate(cls, code_lines, change_count):
+        return cls(list(cls._generate_line_modifications(code_lines, change_count)))
+
+    @staticmethod
+    def _generate_line_modifications(lines, change_count):
+        def random_line(include_end=False):
+            return random.randint(0, len(lines) - (not include_end))
+
+        lines = list(lines)
+        for _ in range(change_count):
+            if not lines:
+                break
+
+            if random.choice([False, True]):
+                l = LineDeletion(random_line())
+            else:
+                # Copy / Insertion
                # Make it possible to insert into the first and the last line
-                random_line(include_end=True),
-                # Use some line from the file. This doesn't feel totally
-                # random, but for the diff parser it will feel like it.
-                lines[random_line()]
-            )
-    return ''.join(lines)
+                l = LineCopy(random_line(), random_line(include_end=True))
+            l.apply(lines)
+            yield l
+
+    def __init__(self, modification_list):
+        self._modification_list = modification_list
+
+    def _apply(self, code_lines):
+        changed_lines = list(code_lines)
+        for modification in self._modification_list:
+            modification.apply(changed_lines)
+        return ''.join(changed_lines)
+
+    def run(self, grammar, code_lines):
+        code = ''.join(code_lines)
+        modified_code = self._apply(code_lines)
+
+        grammar.parse(code, diff_cache=True)
+        grammar.parse(modified_code, diff_cache=True)
+        # Also check if it's possible to "revert" the changes.
+        grammar.parse(code, diff_cache=True)


-def run(path, maxtries, debugger, change_count):
-    grammar = parso.load_grammar()
-    print("Checking %s" % path)
-    with open(path) as f:
-        code = f.read()
-    try:
-        for _ in range(maxtries):
-            grammar.parse(code, diff_cache=True)
-            code2 = generate_line_modification(code, change_count)
-            grammar.parse(code2, diff_cache=True)
-            print('.', end='')
-            sys.stdout.flush()
-        print()
-    except Exception:
-        print("Issue in file: %s" % path)
-        if debugger:
-            einfo = sys.exc_info()
-            pdb = __import__(debugger)
-            pdb.post_mortem(einfo[2])
-        raise
+class FileTests:
+    def __init__(self, file_path, test_count, change_count):
+        self._path = file_path
+        with open(file_path) as f:
+            code = f.read()
+        self._code_lines = split_lines(code, keepends=True)
+        self._test_count = test_count
+        self._change_count = change_count
+
+        with open(file_path) as f:
+            code = f.read()
+        self._file_modifications = []
+
+    def _run(self, grammar, file_modifications, debugger):
+        try:
+            print("Checking %s" % self._path)
+            for fm in file_modifications:
+                fm.run(grammar, self._code_lines)
+                print('.', end='')
+                sys.stdout.flush()
+            print()
+        except Exception:
+            print("Issue in file: %s" % self._path)
+            raise
+            if debugger:
+                einfo = sys.exc_info()
+                pdb = __import__(debugger)
+                pdb.post_mortem(einfo[2])
+            raise
+
+    def redo(self, grammar, debugger):
+        self._run(grammar, self._file_modifications, debugger)
+
+    def run(self, grammar, debugger):
+        def iterate():
+            for _ in range(self._test_count):
+                fm = FileModification.generate(self._code_lines, self._change_count)
+                self._file_modifications.append(fm)
+                yield fm
+
+        self._run(grammar, iterate(), debugger)


 def main(arguments):
    debugger = 'pdb' if arguments['--pdb'] else \
               'ipdb' if arguments['--ipdb'] else None
-    record = arguments['--record']
+    redo_file = os.path.join(os.path.dirname(__file__), 'fuzz-redo.pickle')

    if arguments['--logging']:
        root = logging.getLogger()
@@ -95,15 +157,26 @@ def main(arguments):
        ch.setLevel(logging.DEBUG)
        root.addHandler(ch)

+    grammar = parso.load_grammar()
    parso.python.diff.DEBUG_DIFF_PARSER = True
    if arguments['redo']:
-        raise NotImplementedError("This has not yet been implemented")
+        with open(redo_file, 'rb') as f:
+            file_tests_obj = pickle.load(f)
+        file_tests_obj.redo(grammar, debugger)
    elif arguments['random']:
        # A random file is used to do diff parser checks if no file is given.
        # This helps us to find errors in a lot of different files.
        file_path_generator = find_python_files_in_tree(arguments['<path>'] or '.')
        path = next(file_path_generator)
-        run(path, int(arguments['--maxtries']), debugger, int(arguments['--changes']))
+        file_tests_obj = FileTests(
+            path, int(arguments['--maxtries']), int(arguments['--changes'])
+        )
+        try:
+            file_tests_obj.run(grammar, debugger)
+        except Exception:
+            with open(redo_file, 'wb') as f:
+                pickle.dump(file_tests_obj, f)
+            raise
    else:
        raise NotImplementedError('Command is not implemented')