From f3eaf0056956da40ba164d46ad83e65ff8fbfb98 Mon Sep 17 00:00:00 2001 From: mutantsan Date: Thu, 2 May 2024 16:25:42 +0300 Subject: [PATCH] DATASHADES-321 / fix file ttl invalidaton, add more tests --- ckanext/charts/cache.py | 27 +++++++++++++++---------- ckanext/charts/tests/test_cache.py | 32 ++++++++++++++++++++++++++++-- 2 files changed, 46 insertions(+), 13 deletions(-) diff --git a/ckanext/charts/cache.py b/ckanext/charts/cache.py index 39b29a8..b39f5a7 100644 --- a/ckanext/charts/cache.py +++ b/ckanext/charts/cache.py @@ -74,9 +74,8 @@ def __init__(self): def get_data(self, key: str) -> pd.DataFrame | None: """Return data from cache if exists""" - file_path = os.path.join( - self.directory, f"{self.generate_unique_consistent_filename(key)}.orc" - ) + + file_path = self.make_file_path_from_key(key) if os.path.exists(file_path): if self.is_file_cache_expired(file_path): @@ -89,21 +88,22 @@ def get_data(self, key: str) -> pd.DataFrame | None: def set_data(self, key: str, data: pd.DataFrame) -> None: """Save data to cache. The data will be stored as an ORC file.""" - file_path = os.path.join( - self.directory, f"{self.generate_unique_consistent_filename(key)}.orc" - ) + file_path = self.make_file_path_from_key(key) data.to_orc(file_path) def invalidate(self, key: str) -> None: """Remove data from cache""" - file_path = os.path.join( - self.directory, f"{self.generate_unique_consistent_filename(key)}.orc" - ) + file_path = self.make_file_path_from_key(key) if os.path.exists(file_path): os.remove(file_path) + def make_file_path_from_key(self, key: str) -> str: + return os.path.join( + self.directory, f"{self.generate_unique_consistent_filename(key)}.orc" + ) + def generate_unique_consistent_filename(self, key: str) -> str: """Generate unique and consistent filename based on the key""" hash_object = hashlib.sha256() @@ -112,8 +112,13 @@ def generate_unique_consistent_filename(self, key: str) -> str: @staticmethod def is_file_cache_expired(file_path: str) -> bool: - """Check if file cache is expired""" - return os.path.getmtime(file_path) + config.get_file_cache_ttl() < time.time() + """Check if file cache is expired. If TTL is 0 then cache never expires.""" + file_ttl = config.get_file_cache_ttl() + + if not file_ttl: + return False + + return time.time() - os.path.getmtime(file_path) > file_ttl def get_cache_manager(cache_strategy: str | None) -> CacheStrategy: diff --git a/ckanext/charts/tests/test_cache.py b/ckanext/charts/tests/test_cache.py index 238b823..2c5503c 100644 --- a/ckanext/charts/tests/test_cache.py +++ b/ckanext/charts/tests/test_cache.py @@ -6,8 +6,10 @@ from ckan.tests.helpers import call_action import ckanext.charts.fetchers as fetchers +import ckanext.charts.cache as cache import ckanext.charts.tests.helpers as helpers import ckanext.charts.const as const +import ckanext.charts.config as config @pytest.mark.ckan_config("ckan.plugins", "charts_view datastore") @@ -188,8 +190,30 @@ def test_invalidate_file_cache(self): assert fetcher.get_cached_data() is None -class TestCalculateFileCacheTTL: - def test_calculate_file_cache_ttl(self): +from freezegun import freeze_time +from datetime import timedelta, datetime + + +@pytest.mark.usefixtures("clean_file_cache") +@pytest.mark.ckan_config(config.CONF_FILE_CACHE_TTL, 100) +class TestCalculateFileExpiration: + def test_file_is_expired(self): + fetcher = fetchers.FileSystemDataFetcher( + helpers.get_file_path("sample.csv"), cache_strategy=const.CACHE_FILE + ) + + assert fetcher.get_cached_data() is None + + fetcher.fetch_data() + + assert isinstance(fetcher.get_cached_data(), pd.DataFrame) + + file_path = cache.FileCache().make_file_path_from_key(fetcher.make_cache_key()) + + with freeze_time(datetime.now() + timedelta(seconds=101)): + assert cache.FileCache().is_file_cache_expired(file_path) + + def test_file_is_not_expired(self): fetcher = fetchers.FileSystemDataFetcher( helpers.get_file_path("sample.csv"), cache_strategy=const.CACHE_FILE ) @@ -199,3 +223,7 @@ def test_calculate_file_cache_ttl(self): fetcher.fetch_data() assert isinstance(fetcher.get_cached_data(), pd.DataFrame) + + file_path = cache.FileCache().make_file_path_from_key(fetcher.make_cache_key()) + + assert not cache.FileCache().is_file_cache_expired(file_path)