Use pathlib.Path instead of strings

This commit is contained in:
Dave Halter
2020-07-26 01:19:41 +02:00
parent 97c10facf7
commit ea6b01b968
5 changed files with 68 additions and 60 deletions

View File

@@ -3,6 +3,7 @@ import tempfile
import shutil import shutil
import logging import logging
import os import os
from pathlib import Path
import pytest import pytest
@@ -28,7 +29,7 @@ def clean_parso_cache():
""" """
old = cache._default_cache_path old = cache._default_cache_path
tmp = tempfile.mkdtemp(prefix='parso-test-') tmp = tempfile.mkdtemp(prefix='parso-test-')
cache._default_cache_path = tmp cache._default_cache_path = Path(str(tmp))
yield yield
cache._default_cache_path = old cache._default_cache_path = old
shutil.rmtree(tmp) shutil.rmtree(tmp)

View File

@@ -8,10 +8,9 @@ import platform
import logging import logging
import warnings import warnings
import pickle import pickle
from pathlib import Path
from typing import Dict, Any from typing import Dict, Any
from parso.file_io import FileIO
LOG = logging.getLogger(__name__) LOG = logging.getLogger(__name__)
_CACHED_FILE_MINIMUM_SURVIVAL = 60 * 10 # 10 minutes _CACHED_FILE_MINIMUM_SURVIVAL = 60 * 10 # 10 minutes
@@ -66,13 +65,12 @@ See: http://docs.python.org/3/library/sys.html#sys.implementation
def _get_default_cache_path(): def _get_default_cache_path():
if platform.system().lower() == 'windows': if platform.system().lower() == 'windows':
dir_ = os.path.join(os.getenv('LOCALAPPDATA') dir_ = Path(os.getenv('LOCALAPPDATA') or os.path.expanduser('~'), 'Parso', 'Parso')
or os.path.expanduser('~'), 'Parso', 'Parso')
elif platform.system().lower() == 'darwin': elif platform.system().lower() == 'darwin':
dir_ = os.path.join('~', 'Library', 'Caches', 'Parso') dir_ = Path('~', 'Library', 'Caches', 'Parso')
else: else:
dir_ = os.path.join(os.getenv('XDG_CACHE_HOME') or '~/.cache', 'parso') dir_ = Path(os.getenv('XDG_CACHE_HOME') or '~/.cache', 'parso')
return os.path.expanduser(dir_) return dir_.expanduser()
_default_cache_path = _get_default_cache_path() _default_cache_path = _get_default_cache_path()
@@ -88,15 +86,15 @@ On Linux, if environment variable ``$XDG_CACHE_HOME`` is set,
_CACHE_CLEAR_THRESHOLD = 60 * 60 * 24 _CACHE_CLEAR_THRESHOLD = 60 * 60 * 24
def _get_cache_clear_lock(cache_path=None): def _get_cache_clear_lock_path(cache_path=None):
""" """
The path where the cache lock is stored. The path where the cache lock is stored.
Cache lock will prevent continous cache clearing and only allow garbage Cache lock will prevent continous cache clearing and only allow garbage
collection once a day (can be configured in _CACHE_CLEAR_THRESHOLD). collection once a day (can be configured in _CACHE_CLEAR_THRESHOLD).
""" """
cache_path = cache_path or _get_default_cache_path() cache_path = cache_path or _default_cache_path
return FileIO(os.path.join(cache_path, "PARSO-CACHE-LOCK")) return cache_path.joinpath("PARSO-CACHE-LOCK")
parser_cache: Dict[str, Any] = {} parser_cache: Dict[str, Any] = {}
@@ -213,18 +211,15 @@ def clear_inactive_cache(
inactivity_threshold=_CACHED_FILE_MAXIMUM_SURVIVAL, inactivity_threshold=_CACHED_FILE_MAXIMUM_SURVIVAL,
): ):
if cache_path is None: if cache_path is None:
cache_path = _get_default_cache_path() cache_path = _default_cache_path
if not os.path.exists(cache_path): if not cache_path.exists():
return False return False
for version_path in os.listdir(cache_path): for dirname in os.listdir(cache_path):
version_path = os.path.join(cache_path, version_path) version_path = cache_path.joinpath(dirname)
if not os.path.isdir(version_path): if not version_path.is_dir():
continue continue
for file in os.scandir(version_path): for file in os.scandir(version_path):
if ( if file.stat().st_atime + _CACHED_FILE_MAXIMUM_SURVIVAL <= time.time():
file.stat().st_atime + _CACHED_FILE_MAXIMUM_SURVIVAL
<= time.time()
):
try: try:
os.remove(file.path) os.remove(file.path)
except OSError: # silently ignore all failures except OSError: # silently ignore all failures
@@ -233,14 +228,29 @@ def clear_inactive_cache(
return True return True
def _touch(path):
try:
os.utime(path, None)
except FileNotFoundError:
try:
file = open(path, 'a')
file.close()
except (OSError, IOError): # TODO Maybe log this?
return False
return True
def _remove_cache_and_update_lock(cache_path=None): def _remove_cache_and_update_lock(cache_path=None):
lock = _get_cache_clear_lock(cache_path=cache_path) lock_path = _get_cache_clear_lock_path(cache_path=cache_path)
clear_lock_time = lock.get_last_modified() try:
clear_lock_time = os.path.getmtime(lock_path)
except FileNotFoundError:
clear_lock_time = None
if ( if (
clear_lock_time is None # first time clear_lock_time is None # first time
or clear_lock_time + _CACHE_CLEAR_THRESHOLD <= time.time() or clear_lock_time + _CACHE_CLEAR_THRESHOLD <= time.time()
): ):
if not lock._touch(): if not _touch(lock_path):
# First make sure that as few as possible other cleanup jobs also # First make sure that as few as possible other cleanup jobs also
# get started. There is still a race condition but it's probably # get started. There is still a race condition but it's probably
# not a big problem. # not a big problem.
@@ -252,14 +262,14 @@ def _remove_cache_and_update_lock(cache_path=None):
def _get_hashed_path(hashed_grammar, path, cache_path=None): def _get_hashed_path(hashed_grammar, path, cache_path=None):
directory = _get_cache_directory_path(cache_path=cache_path) directory = _get_cache_directory_path(cache_path=cache_path)
file_hash = hashlib.sha256(path.encode("utf-8")).hexdigest() file_hash = hashlib.sha256(str(path).encode("utf-8")).hexdigest()
return os.path.join(directory, '%s-%s.pkl' % (hashed_grammar, file_hash)) return os.path.join(directory, '%s-%s.pkl' % (hashed_grammar, file_hash))
def _get_cache_directory_path(cache_path=None): def _get_cache_directory_path(cache_path=None):
if cache_path is None: if cache_path is None:
cache_path = _default_cache_path cache_path = _default_cache_path
directory = os.path.join(cache_path, _VERSION_TAG) directory = cache_path.joinpath(_VERSION_TAG)
if not os.path.exists(directory): if not directory.exists():
os.makedirs(directory) os.makedirs(directory)
return directory return directory

View File

@@ -1,8 +1,12 @@
import os import os
from pathlib import Path
from typing import Union
class FileIO: class FileIO:
def __init__(self, path): def __init__(self, path: Union[os.PathLike, str]):
if isinstance(path, str):
path = Path(path)
self.path = path self.path = path
def read(self): # Returns bytes/str def read(self): # Returns bytes/str
@@ -21,17 +25,6 @@ class FileIO:
except FileNotFoundError: except FileNotFoundError:
return None return None
def _touch(self):
try:
os.utime(self.path, None)
except FileNotFoundError:
try:
file = open(self.path, 'a')
file.close()
except (OSError, IOError): # TODO Maybe log this?
return False
return True
def __repr__(self): def __repr__(self):
return '%s(%s)' % (self.__class__.__name__, self.path) return '%s(%s)' % (self.__class__.__name__, self.path)

View File

@@ -1,6 +1,7 @@
import hashlib import hashlib
import os import os
from typing import Generic, TypeVar, Union, Dict, Optional, Any from typing import Generic, TypeVar, Union, Dict, Optional, Any
from pathlib import Path
from parso._compatibility import is_pypy from parso._compatibility import is_pypy
from parso.pgen2 import generate_grammar from parso.pgen2 import generate_grammar
@@ -49,11 +50,11 @@ class Grammar(Generic[_NodeT]):
code: Union[str, bytes] = None, code: Union[str, bytes] = None,
*, *,
error_recovery=True, error_recovery=True,
path: str = None, path: Union[os.PathLike, str] = None,
start_symbol: str = None, start_symbol: str = None,
cache=False, cache=False,
diff_cache=False, diff_cache=False,
cache_path: str = None, cache_path: Union[os.PathLike, str] = None,
file_io: FileIO = None) -> _NodeT: file_io: FileIO = None) -> _NodeT:
""" """
If you want to parse a Python file you want to start here, most likely. If you want to parse a Python file you want to start here, most likely.
@@ -92,6 +93,11 @@ class Grammar(Generic[_NodeT]):
if code is None and path is None and file_io is None: if code is None and path is None and file_io is None:
raise TypeError("Please provide either code or a path.") raise TypeError("Please provide either code or a path.")
if isinstance(path, str):
path = Path(path)
if isinstance(cache_path, str):
cache_path = Path(cache_path)
if start_symbol is None: if start_symbol is None:
start_symbol = self._start_nonterminal start_symbol = self._start_nonterminal
@@ -100,7 +106,7 @@ class Grammar(Generic[_NodeT]):
if file_io is None: if file_io is None:
if code is None: if code is None:
file_io = FileIO(path) file_io = FileIO(path) # type: ignore
else: else:
file_io = KnownContentFileIO(path, code) file_io = KnownContentFileIO(path, code)

View File

@@ -3,13 +3,12 @@ Test all things related to the ``jedi.cache`` module.
""" """
import os import os
import os.path
import pytest import pytest
import time import time
from pathlib import Path
from parso.cache import (_CACHED_FILE_MAXIMUM_SURVIVAL, _VERSION_TAG, from parso.cache import (_CACHED_FILE_MAXIMUM_SURVIVAL, _VERSION_TAG,
_get_cache_clear_lock, _get_hashed_path, _get_cache_clear_lock_path, _get_hashed_path,
_load_from_file_system, _NodeCacheItem, _load_from_file_system, _NodeCacheItem,
_remove_cache_and_update_lock, _save_to_file_system, _remove_cache_and_update_lock, _save_to_file_system,
load_module, parser_cache, try_to_save_module) load_module, parser_cache, try_to_save_module)
@@ -30,9 +29,8 @@ skip_pypy = pytest.mark.skipif(
def isolated_parso_cache(monkeypatch, tmpdir): def isolated_parso_cache(monkeypatch, tmpdir):
"""Set `parso.cache._default_cache_path` to a temporary directory """Set `parso.cache._default_cache_path` to a temporary directory
during the test. """ during the test. """
cache_path = str(os.path.join(str(tmpdir), "__parso_cache")) cache_path = Path(str(tmpdir), "__parso_cache")
monkeypatch.setattr(cache, '_default_cache_path', cache_path) monkeypatch.setattr(cache, '_default_cache_path', cache_path)
monkeypatch.setattr(cache, '_get_default_cache_path', lambda *args, **kwargs: cache_path)
return cache_path return cache_path
@@ -42,13 +40,13 @@ def test_modulepickling_change_cache_dir(tmpdir):
See: `#168 <https://github.com/davidhalter/jedi/pull/168>`_ See: `#168 <https://github.com/davidhalter/jedi/pull/168>`_
""" """
dir_1 = str(tmpdir.mkdir('first')) dir_1 = Path(str(tmpdir.mkdir('first')))
dir_2 = str(tmpdir.mkdir('second')) dir_2 = Path(str(tmpdir.mkdir('second')))
item_1 = _NodeCacheItem('bla', []) item_1 = _NodeCacheItem('bla', [])
item_2 = _NodeCacheItem('bla', []) item_2 = _NodeCacheItem('bla', [])
path_1 = 'fake path 1' path_1 = Path('fake path 1')
path_2 = 'fake path 2' path_2 = Path('fake path 2')
hashed_grammar = load_grammar()._hashed hashed_grammar = load_grammar()._hashed
_save_to_file_system(hashed_grammar, path_1, item_1, cache_path=dir_1) _save_to_file_system(hashed_grammar, path_1, item_1, cache_path=dir_1)
@@ -86,7 +84,7 @@ def test_modulepickling_simulate_deleted_cache(tmpdir):
module = 'fake parser' module = 'fake parser'
# Create the file # Create the file
path = tmpdir.dirname + '/some_path' path = Path(str(tmpdir.dirname), 'some_path')
with open(path, 'w'): with open(path, 'w'):
pass pass
io = file_io.FileIO(path) io = file_io.FileIO(path)
@@ -134,7 +132,7 @@ class _FixedTimeFileIO(file_io.KnownContentFileIO):
@pytest.mark.parametrize('diff_cache', [False, True]) @pytest.mark.parametrize('diff_cache', [False, True])
@pytest.mark.parametrize('use_file_io', [False, True]) @pytest.mark.parametrize('use_file_io', [False, True])
def test_cache_last_used_update(diff_cache, use_file_io): def test_cache_last_used_update(diff_cache, use_file_io):
p = '/path/last-used' p = Path('/path/last-used')
parser_cache.clear() # Clear, because then it's easier to find stuff. parser_cache.clear() # Clear, because then it's easier to find stuff.
parse('somecode', cache=True, path=p) parse('somecode', cache=True, path=p)
node_cache_item = next(iter(parser_cache.values()))[p] node_cache_item = next(iter(parser_cache.values()))[p]
@@ -157,21 +155,21 @@ def test_inactive_cache(tmpdir, isolated_parso_cache):
test_subjects = "abcdef" test_subjects = "abcdef"
for path in test_subjects: for path in test_subjects:
parse('somecode', cache=True, path=os.path.join(str(tmpdir), path)) parse('somecode', cache=True, path=os.path.join(str(tmpdir), path))
raw_cache_path = os.path.join(isolated_parso_cache, _VERSION_TAG) raw_cache_path = isolated_parso_cache.joinpath(_VERSION_TAG)
assert os.path.exists(raw_cache_path) assert raw_cache_path.exists()
paths = os.listdir(raw_cache_path) dir_names = os.listdir(raw_cache_path)
a_while_ago = time.time() - _CACHED_FILE_MAXIMUM_SURVIVAL a_while_ago = time.time() - _CACHED_FILE_MAXIMUM_SURVIVAL
old_paths = set() old_paths = set()
for path in paths[:len(test_subjects) // 2]: # make certain number of paths old for dir_name in dir_names[:len(test_subjects) // 2]: # make certain number of paths old
os.utime(os.path.join(raw_cache_path, path), (a_while_ago, a_while_ago)) os.utime(raw_cache_path.joinpath(dir_name), (a_while_ago, a_while_ago))
old_paths.add(path) old_paths.add(dir_name)
# nothing should be cleared while the lock is on # nothing should be cleared while the lock is on
assert os.path.exists(_get_cache_clear_lock().path) assert _get_cache_clear_lock_path().exists()
_remove_cache_and_update_lock() # it shouldn't clear anything _remove_cache_and_update_lock() # it shouldn't clear anything
assert len(os.listdir(raw_cache_path)) == len(test_subjects) assert len(os.listdir(raw_cache_path)) == len(test_subjects)
assert old_paths.issubset(os.listdir(raw_cache_path)) assert old_paths.issubset(os.listdir(raw_cache_path))
os.utime(_get_cache_clear_lock().path, (a_while_ago, a_while_ago)) os.utime(_get_cache_clear_lock_path(), (a_while_ago, a_while_ago))
_remove_cache_and_update_lock() _remove_cache_and_update_lock()
assert len(os.listdir(raw_cache_path)) == len(test_subjects) // 2 assert len(os.listdir(raw_cache_path)) == len(test_subjects) // 2
assert not old_paths.intersection(os.listdir(raw_cache_path)) assert not old_paths.intersection(os.listdir(raw_cache_path))