Feature: The diff parser fuzzer is now able to use random Python fragments

This hopefully leads to the fuzzer finding more and faster issues in the diff
parser.
This commit is contained in:
Dave Halter
2019-01-13 16:00:36 +01:00
parent eaee2b9ca0
commit f80d9de7a0

View File

@@ -34,6 +34,18 @@ from docopt import docopt
import parso import parso
from parso.utils import split_lines from parso.utils import split_lines
_latest_grammar = parso.load_grammar(version='3.8')
_python_reserved_strings = tuple(
# Keywords are ususally only interesting in combination with spaces after
# them. We don't put a space before keywords, to avoid indentation errors.
s + (' ' if s.isalpha() else '')
for s in _latest_grammar._pgen_grammar.reserved_syntax_strings.keys()
)
_random_python_fragments = _python_reserved_strings + (
' ', '\t', '\n', '\r', '\f', 'f"', 'F"""', "fr'", "RF'''", '"', '"""', "'",
"'''", ';', ' some_random_word ', '\\'
)
def find_python_files_in_tree(file_path): def find_python_files_in_tree(file_path):
if not os.path.isdir(file_path): if not os.path.isdir(file_path):
@@ -51,7 +63,7 @@ class LineReplacement:
self._new_line = new_line self._new_line = new_line
def apply(self, code_lines): def apply(self, code_lines):
#print(repr(self._new_line)) # print(repr(self._new_line))
code_lines[self._line_nr] = self._new_line code_lines[self._line_nr] = self._new_line
@@ -92,23 +104,31 @@ class FileModification:
if not lines: if not lines:
break break
rand = random.randint(1, 3) rand = random.randint(1, 4)
if rand == 1: if rand == 1:
l = LineDeletion(random_line()) l = LineDeletion(random_line())
elif rand == 2: elif rand == 2:
# Copy / Insertion # Copy / Insertion
# Make it possible to insert into the first and the last line # Make it possible to insert into the first and the last line
l = LineCopy(random_line(), random_line(include_end=True)) l = LineCopy(random_line(), random_line(include_end=True))
elif rand == 3: elif rand in (3, 4):
# Modify a line in some weird random ways. # Modify a line in some weird random ways.
line_nr = random_line() line_nr = random_line()
line = lines[line_nr] line = lines[line_nr]
column = random.randint(0, len(line)) column = random.randint(0, len(line))
random_string = ''
for _ in range(random.randint(1, 7)):
if rand == 3:
# The lower characters cause way more issues. # The lower characters cause way more issues.
unicode_range = 0x1f if random.randint(0, 1) else 0x3000 unicode_range = 0x1f if random.randint(0, 1) else 0x3000
random_string = ''.join(chr(random.randint(0, unicode_range)) for _ in range(5)) random_string += chr(random.randint(0, unicode_range))
else:
# These insertions let us understand how random
# keyword/operator insertions work. Theoretically this
# could also be done with unicode insertions, but the
# fuzzer is just way more effective here.
random_string += random.choice(_random_python_fragments)
l = LineReplacement(line_nr, line[:column] + random_string + line[column:]) l = LineReplacement(line_nr, line[:column] + random_string + line[column:])
l.apply(lines)
yield l yield l
def __init__(self, modification_list): def __init__(self, modification_list):