From cfb7e300af0c86b2aa6c824aadd1f3612166cc38 Mon Sep 17 00:00:00 2001 From: Mikhail Rudenko Date: Sun, 21 Aug 2022 21:12:32 +0300 Subject: [PATCH] Improve .gitignore handling At present, .gitignore patterns not starting with '/' are classified as "ignored names" (opposing to "ignored paths") and not used for filtering directories. But, according to the spec [1], the situation is a bit different: all patterns apply to directories (and those ending with '/' apply to directories only). Besides that, there two kinds of patterns: those that match only w.r.t the directory where defining .gitignore is located (they must contain a '/' in the beginning or in the middle), which we call "absolute", and those that also match in all subdirectories under the directory where defining .gitignore is located (they must not contain '/' or contain only trailing '/'), which we call "relative". This commit implements handling of both "absolute" and "relative" .gitignore patterns according to the spec. "Absolute" patterns are handled mostly like `ignored_paths` were handled in the previous implementation. "Relative" patterns are collected into a distinct set containing `(defining_gitignore_dir, pattern)` tuples. For each traversed `root_folder_io`, all applicable "relative" patterns are expanded into a set of plain paths, which are then used for filtering `folder_io`s. While at it, also fix some minor issues. Explicitly ignore negative and wildcard patterns, since we don't handle them correctly anyway. Also, use '/' as a path separator instead of `os.path.sep` when dealing with .gitignore, since the spec explicitly says that '/' must be used on all platforms. [1] https://git-scm.com/docs/gitignore --- jedi/inference/references.py | 46 +++++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/jedi/inference/references.py b/jedi/inference/references.py index 6ffa160e..5a9973c4 100644 --- a/jedi/inference/references.py +++ b/jedi/inference/references.py @@ -180,26 +180,34 @@ def _check_fs(inference_state, file_io, regex): return m.as_context() -def gitignored_lines(folder_io, file_io): - ignored_paths = set() - ignored_names = set() +def gitignored_paths(folder_io, file_io): + ignored_paths_abs = set() + ignored_paths_rel = set() + for l in file_io.read().splitlines(): - if not l or l.startswith(b'#'): + if not l or l.startswith(b'#') or l.startswith(b'!') or b'*' in l: continue - p = l.decode('utf-8', 'ignore') - if p.startswith('/'): - name = p[1:] - if name.endswith(os.path.sep): - name = name[:-1] - ignored_paths.add(os.path.join(folder_io.path, name)) + p = l.decode('utf-8', 'ignore').rstrip('/') + if '/' in p: + name = p.lstrip('/') + ignored_paths_abs.add(os.path.join(folder_io.path, name)) else: - ignored_names.add(p) - return ignored_paths, ignored_names + name = p + ignored_paths_rel.add((folder_io.path, name)) + + return ignored_paths_abs, ignored_paths_rel + + +def expand_relative_ignore_paths(folder_io, relative_paths): + curr_path = folder_io.path + return {os.path.join(curr_path, p[1]) for p in relative_paths if curr_path.startswith(p[0])} def recurse_find_python_folders_and_files(folder_io, except_paths=()): except_paths = set(except_paths) + except_paths_relative = set() + for root_folder_io, folder_ios, file_ios in folder_io.walk(): # Delete folders that we don't want to iterate over. for file_io in file_ios: @@ -209,20 +217,26 @@ def recurse_find_python_folders_and_files(folder_io, except_paths=()): yield None, file_io if path.name == '.gitignore': - ignored_paths, ignored_names = \ - gitignored_lines(root_folder_io, file_io) - except_paths |= ignored_paths + ignored_paths_abs, ignored_paths_rel = gitignored_paths( + root_folder_io, file_io + ) + except_paths |= ignored_paths_abs + except_paths_relative |= ignored_paths_rel + + except_paths_relative_expanded = expand_relative_ignore_paths( + root_folder_io, except_paths_relative + ) folder_ios[:] = [ folder_io for folder_io in folder_ios if folder_io.path not in except_paths + and folder_io.path not in except_paths_relative_expanded and folder_io.get_base_name() not in _IGNORE_FOLDERS ] for folder_io in folder_ios: yield folder_io, None - def recurse_find_python_files(folder_io, except_paths=()): for folder_io, file_io in recurse_find_python_folders_and_files(folder_io, except_paths): if file_io is not None: