Improve .gitignore handling

At present, .gitignore patterns not starting with '/' are classified
as "ignored names" (opposing to "ignored paths") and not used for
filtering directories. But, according to the spec [1], the situation
is a bit different: all patterns apply to directories (and those
ending with '/' apply to directories only). Besides that, there two
kinds of patterns: those that match only w.r.t the directory where
defining .gitignore is located (they must contain a '/' in the
beginning or in the middle), which we call "absolute", and those that
also match in all subdirectories under the directory where defining
.gitignore is located (they must not contain '/' or contain only
trailing '/'), which we call "relative".

This commit implements handling of both "absolute" and "relative"
.gitignore patterns according to the spec. "Absolute" patterns are
handled mostly like `ignored_paths` were handled in the previous
implementation. "Relative" patterns are collected into a distinct set
containing `(defining_gitignore_dir, pattern)` tuples. For each
traversed `root_folder_io`, all applicable "relative" patterns are
expanded into a set of plain paths, which are then used for filtering
`folder_io`s.

While at it, also fix some minor issues. Explicitly ignore negative
and wildcard patterns, since we don't handle them correctly
anyway. Also, use '/' as a path separator instead of `os.path.sep`
when dealing with .gitignore, since the spec explicitly says that '/'
must be used on all platforms.

[1] https://git-scm.com/docs/gitignore
This commit is contained in:
Mikhail Rudenko
2022-08-21 21:12:32 +03:00
parent 7ff0d2d595
commit cfb7e300af

View File

@@ -180,26 +180,34 @@ def _check_fs(inference_state, file_io, regex):
return m.as_context() return m.as_context()
def gitignored_lines(folder_io, file_io): def gitignored_paths(folder_io, file_io):
ignored_paths = set() ignored_paths_abs = set()
ignored_names = set() ignored_paths_rel = set()
for l in file_io.read().splitlines(): for l in file_io.read().splitlines():
if not l or l.startswith(b'#'): if not l or l.startswith(b'#') or l.startswith(b'!') or b'*' in l:
continue continue
p = l.decode('utf-8', 'ignore') p = l.decode('utf-8', 'ignore').rstrip('/')
if p.startswith('/'): if '/' in p:
name = p[1:] name = p.lstrip('/')
if name.endswith(os.path.sep): ignored_paths_abs.add(os.path.join(folder_io.path, name))
name = name[:-1]
ignored_paths.add(os.path.join(folder_io.path, name))
else: else:
ignored_names.add(p) name = p
return ignored_paths, ignored_names ignored_paths_rel.add((folder_io.path, name))
return ignored_paths_abs, ignored_paths_rel
def expand_relative_ignore_paths(folder_io, relative_paths):
curr_path = folder_io.path
return {os.path.join(curr_path, p[1]) for p in relative_paths if curr_path.startswith(p[0])}
def recurse_find_python_folders_and_files(folder_io, except_paths=()): def recurse_find_python_folders_and_files(folder_io, except_paths=()):
except_paths = set(except_paths) except_paths = set(except_paths)
except_paths_relative = set()
for root_folder_io, folder_ios, file_ios in folder_io.walk(): for root_folder_io, folder_ios, file_ios in folder_io.walk():
# Delete folders that we don't want to iterate over. # Delete folders that we don't want to iterate over.
for file_io in file_ios: for file_io in file_ios:
@@ -209,20 +217,26 @@ def recurse_find_python_folders_and_files(folder_io, except_paths=()):
yield None, file_io yield None, file_io
if path.name == '.gitignore': if path.name == '.gitignore':
ignored_paths, ignored_names = \ ignored_paths_abs, ignored_paths_rel = gitignored_paths(
gitignored_lines(root_folder_io, file_io) root_folder_io, file_io
except_paths |= ignored_paths )
except_paths |= ignored_paths_abs
except_paths_relative |= ignored_paths_rel
except_paths_relative_expanded = expand_relative_ignore_paths(
root_folder_io, except_paths_relative
)
folder_ios[:] = [ folder_ios[:] = [
folder_io folder_io
for folder_io in folder_ios for folder_io in folder_ios
if folder_io.path not in except_paths if folder_io.path not in except_paths
and folder_io.path not in except_paths_relative_expanded
and folder_io.get_base_name() not in _IGNORE_FOLDERS and folder_io.get_base_name() not in _IGNORE_FOLDERS
] ]
for folder_io in folder_ios: for folder_io in folder_ios:
yield folder_io, None yield folder_io, None
def recurse_find_python_files(folder_io, except_paths=()): def recurse_find_python_files(folder_io, except_paths=()):
for folder_io, file_io in recurse_find_python_folders_and_files(folder_io, except_paths): for folder_io, file_io in recurse_find_python_folders_and_files(folder_io, except_paths):
if file_io is not None: if file_io is not None: