Feature: The diff parser fuzzer is now able to use random Python fragments

This hopefully leads to the fuzzer finding more and faster issues in the diff parser.
2025-12-06 12:54:29 +08:00 · 2019-01-13 16:00:36 +01:00
parent eaee2b9ca0
commit f80d9de7a0
1 changed files with 27 additions and 7 deletions
--- a/test/fuzz_diff_parser.py
+++ b/test/fuzz_diff_parser.py
@@ -34,6 +34,18 @@ from docopt import docopt
 import parso
 from parso.utils import split_lines

+_latest_grammar = parso.load_grammar(version='3.8')
+_python_reserved_strings = tuple(
+    # Keywords are ususally only interesting in combination with spaces after
+    # them. We don't put a space before keywords, to avoid indentation errors.
+    s + (' ' if s.isalpha() else '')
+    for s in _latest_grammar._pgen_grammar.reserved_syntax_strings.keys()
+)
+_random_python_fragments = _python_reserved_strings + (
+    ' ', '\t', '\n', '\r', '\f', 'f"', 'F"""', "fr'", "RF'''", '"', '"""', "'",
+    "'''", ';', ' some_random_word ', '\\'
+)
+

 def find_python_files_in_tree(file_path):
    if not os.path.isdir(file_path):
@@ -51,7 +63,7 @@ class LineReplacement:
        self._new_line = new_line

    def apply(self, code_lines):
-        #print(repr(self._new_line))
+        # print(repr(self._new_line))
        code_lines[self._line_nr] = self._new_line


@@ -92,23 +104,31 @@ class FileModification:
            if not lines:
                break

-            rand = random.randint(1, 3)
+            rand = random.randint(1, 4)
            if rand == 1:
                l = LineDeletion(random_line())
            elif rand == 2:
                # Copy / Insertion
                # Make it possible to insert into the first and the last line
                l = LineCopy(random_line(), random_line(include_end=True))
-            elif rand == 3:
+            elif rand in (3, 4):
                # Modify a line in some weird random ways.
                line_nr = random_line()
                line = lines[line_nr]
                column = random.randint(0, len(line))
+                random_string = ''
+                for _ in range(random.randint(1, 7)):
+                    if rand == 3:
                        # The lower characters cause way more issues.
                        unicode_range = 0x1f if random.randint(0, 1) else 0x3000
-                random_string = ''.join(chr(random.randint(0, unicode_range)) for _ in range(5))
+                        random_string += chr(random.randint(0, unicode_range))
+                    else:
+                        # These insertions let us understand how random
+                        # keyword/operator insertions work. Theoretically this
+                        # could also be done with unicode insertions, but the
+                        # fuzzer is just way more effective here.
+                        random_string += random.choice(_random_python_fragments)
                l = LineReplacement(line_nr, line[:column] + random_string + line[column:])
-            l.apply(lines)
            yield l

    def __init__(self, modification_list):