Change fuzzer: Add ways to not always use correct parse input

This commit is contained in:
Dave Halter
2019-01-20 18:18:13 +01:00
parent 0da0a8655a
commit 069c08883a

View File

@@ -103,7 +103,11 @@ class LineCopy:
class FileModification: class FileModification:
@classmethod @classmethod
def generate(cls, code_lines, change_count): def generate(cls, code_lines, change_count):
return cls(list(cls._generate_line_modifications(code_lines, change_count))) return cls(
list(cls._generate_line_modifications(code_lines, change_count)),
# work with changed trees more than with normal ones.
check_original=random.random() > 0.8,
)
@staticmethod @staticmethod
def _generate_line_modifications(lines, change_count): def _generate_line_modifications(lines, change_count):
@@ -129,8 +133,8 @@ class FileModification:
line = lines[line_nr] line = lines[line_nr]
column = random.randint(0, len(line)) column = random.randint(0, len(line))
random_string = '' random_string = ''
for _ in range(random.randint(1, 7)): for _ in range(random.randint(1, 3)):
if rand == 3: if random.random() > 0.8:
# The lower characters cause way more issues. # The lower characters cause way more issues.
unicode_range = 0x1f if random.randint(0, 1) else 0x3000 unicode_range = 0x1f if random.randint(0, 1) else 0x3000
random_string += chr(random.randint(0, unicode_range)) random_string += chr(random.randint(0, unicode_range))
@@ -140,12 +144,22 @@ class FileModification:
# could also be done with unicode insertions, but the # could also be done with unicode insertions, but the
# fuzzer is just way more effective here. # fuzzer is just way more effective here.
random_string += random.choice(_random_python_fragments) random_string += random.choice(_random_python_fragments)
l = LineReplacement(line_nr, line[:column] + random_string + line[column:]) if random.random() > 0.5:
# In this case we insert at a very random place that
# probably breaks syntax.
line = line[:column] + random_string + line[column:]
else:
# Here we have better chances to not break syntax, because
# we really replace the line with something that has
# indentation.
line = ' ' * random.randint(0, 12) + random_string + '\n'
l = LineReplacement(line_nr, line)
l.apply(lines) l.apply(lines)
yield l yield l
def __init__(self, modification_list): def __init__(self, modification_list, check_original):
self._modification_list = modification_list self._modification_list = modification_list
self._check_original = check_original
def _apply(self, code_lines): def _apply(self, code_lines):
changed_lines = list(code_lines) changed_lines = list(code_lines)
@@ -159,19 +173,25 @@ class FileModification:
modified_code = ''.join(modified_lines) modified_code = ''.join(modified_lines)
if print_code: if print_code:
print('Original:') if self._check_original:
_print_copyable_lines(code_lines) print('Original:')
_print_copyable_lines(code_lines)
print('\nModified:') print('\nModified:')
_print_copyable_lines(modified_lines) _print_copyable_lines(modified_lines)
print() print()
m = grammar.parse(code, diff_cache=True) if self._check_original:
start1 = _get_first_error_start_pos_or_none(m) m = grammar.parse(code, diff_cache=True)
m = grammar.parse(modified_code, diff_cache=True) start1 = _get_first_error_start_pos_or_none(m)
# Also check if it's possible to "revert" the changes.
m = grammar.parse(code, diff_cache=True) grammar.parse(modified_code, diff_cache=True)
start2 = _get_first_error_start_pos_or_none(m)
assert start1 == start2, (start1, start2) if self._check_original:
# Also check if it's possible to "revert" the changes.
m = grammar.parse(code, diff_cache=True)
start2 = _get_first_error_start_pos_or_none(m)
assert start1 == start2, (start1, start2)
class FileTests: class FileTests: