diff --git a/alphabase/io/tempmmap.py b/alphabase/io/tempmmap.py index 80c2af30..2e9db3a3 100644 --- a/alphabase/io/tempmmap.py +++ b/alphabase/io/tempmmap.py @@ -25,14 +25,14 @@ def _init_temp_dir(prefix: str = "temp_mmap_") -> str: TEMP_DIR_NAME = _TEMP_DIR.name logging.info( - f"Temp mmap arrays are written to {TEMP_DIR_NAME}. " + f"Memory-mapped arrays are written to temporary directory {TEMP_DIR_NAME}. " "Cleanup of this folder is OS dependent and might need to be triggered manually!" ) return TEMP_DIR_NAME -def _change_temp_dir_location(abs_path: str) -> str: +def _change_temp_dir_location(abs_path: str) -> None: """ Check if the directory to which the temp arrays should be written exists, if so defines this as the new temp dir location. If not raise a value error. @@ -51,14 +51,14 @@ def _change_temp_dir_location(abs_path: str) -> str: if os.path.isdir(abs_path): TEMP_DIR_NAME = abs_path else: - raise ValueError(f"The path {abs_path} does not point to a directory.") + raise ValueError(f"The path '{abs_path}' does not point to a directory.") else: raise ValueError( - f"The directory {abs_path} in which the file should be created does not exist." + f"The directory '{abs_path}' in which the file should be created does not exist." ) -def _get_file_location(abs_file_path: str, overwrite=False) -> str: +def _get_file_location(abs_file_path: str, overwrite: bool = False) -> str: """ Check if the path specified for the new temporary file is valid. If not raise a value error. @@ -69,7 +69,7 @@ def _get_file_location(abs_file_path: str, overwrite=False) -> str: Parameters ---------- - abs_path : str + abs_file_path : str The absolute path to the new temporary file. Returns @@ -77,30 +77,26 @@ def _get_file_location(abs_file_path: str, overwrite=False) -> str: str The file path if it is valid. """ - # check overwrite status and existence of file if not overwrite and os.path.exists(abs_file_path): raise ValueError( - "The file already exists. Set overwrite to True to overwrite the file or choose a different name." + f"The file '{abs_file_path}' already exists. Set overwrite to True to overwrite the file or choose a different name." ) - # ensure that the filename conforms to the naming convention if not os.path.basename(abs_file_path).endswith(".hdf"): raise ValueError( f"The chosen file name '{os.path.basename(abs_file_path)}' needs to end with .hdf" ) - # ensure that the directory in which the file should be created exists - if os.path.isdir(os.path.dirname(abs_file_path)): - return abs_file_path - else: + if not os.path.isdir(os.path.dirname(abs_file_path)): raise ValueError( f"The directory '{os.path.dirname(abs_file_path)}' in which the file should be created does not exist." ) + return abs_file_path -def redefine_temp_location(path): - """ - Redfine the location where the temp arrays are written to. + +def redefine_temp_location(path: str) -> str: + """Redefine the location where the temp arrays are written to. Parameters ---------- @@ -113,28 +109,21 @@ def redefine_temp_location(path): """ - global _TEMP_DIR, TEMP_DIR_NAME + global TEMP_DIR_NAME - logging.warning( - f"""Folder {TEMP_DIR_NAME} with temp mmap arrays is being deleted. All existing temp mmapp arrays will be unusable!""" - ) + _clear() - # cleaup old temporary directory + # cleanup old temporary directory shutil.rmtree(TEMP_DIR_NAME, ignore_errors=True) # create new tempfile at desired location - _TEMP_DIR = tempfile.TemporaryDirectory(prefix=os.path.join(path, "temp_mmap_")) - TEMP_DIR_NAME = _TEMP_DIR.name + temp_dir_name = _init_temp_dir(prefix=os.path.join(path, "temp_mmap_")) - logging.warning( - f"""New temp folder location. Temp mmap arrays are written to {TEMP_DIR_NAME}. Cleanup of this folder is OS dependant, and might need to be triggered manually!""" - ) - - return TEMP_DIR_NAME + return temp_dir_name def array(shape: tuple, dtype: np.dtype, tmp_dir_abs_path: str = None) -> np.ndarray: - """Create a writable temporary mmapped array. + """Create a writable temporary memory-mapped array. Parameters ---------- @@ -150,7 +139,7 @@ def array(shape: tuple, dtype: np.dtype, tmp_dir_abs_path: str = None) -> np.nda Returns ------- type - A writable temporary mmapped array. + A writable temporary memory-mapped array. """ temp_dir_name = _init_temp_dir() @@ -160,17 +149,20 @@ def array(shape: tuple, dtype: np.dtype, tmp_dir_abs_path: str = None) -> np.nda _change_temp_dir_location(tmp_dir_abs_path) temp_dir_name = tmp_dir_abs_path - temp_file_name = os.path.join( + temp_file_path = os.path.join( temp_dir_name, f"temp_mmap_{np.random.randint(2**63, dtype=np.int64)}.hdf" ) - with h5py.File(temp_file_name, "w") as hdf_file: - array = hdf_file.create_dataset("array", shape=shape, dtype=dtype) - array[0] = np.string_("") if isinstance(dtype, np.dtypes.StrDType) else 0 - offset = array.id.get_offset() + with h5py.File(temp_file_path, "w") as hdf_file: + created_array = hdf_file.create_dataset("array", shape=shape, dtype=dtype) + created_array[0] = ( + np.string_("") if isinstance(dtype, np.dtypes.StrDType) else 0 + ) + offset = created_array.id.get_offset() - with open(temp_file_name, "rb+") as raw_hdf_file: + with open(temp_file_path, "rb+") as raw_hdf_file: mmap_obj = mmap.mmap(raw_hdf_file.fileno(), 0, access=mmap.ACCESS_WRITE) + return np.frombuffer( mmap_obj, dtype=dtype, count=np.prod(shape), offset=offset ).reshape(shape) @@ -218,23 +210,23 @@ def create_empty_mmap( # if path does not exist generate a random file name in the TEMP directory if file_path is None: - temp_file_name = os.path.join( + temp_file_path = os.path.join( temp_dir_name, f"temp_mmap_{np.random.randint(2**63, dtype=np.int64)}.hdf" ) else: - temp_file_name = _get_file_location( - file_path, overwrite=False - ) # TODO overwrite=overwrite + temp_file_path = _get_file_location(file_path, overwrite=overwrite) - with h5py.File(temp_file_name, "w") as hdf_file: - array = hdf_file.create_dataset("array", shape=shape, dtype=dtype) - array[0] = np.string_("") if isinstance(dtype, np.dtypes.StrDType) else 0 + with h5py.File(temp_file_path, "w") as hdf_file: + created_array = hdf_file.create_dataset("array", shape=shape, dtype=dtype) + created_array[0] = ( + np.string_("") if isinstance(dtype, np.dtypes.StrDType) else 0 + ) - return temp_file_name # TODO temp_file_path + return temp_file_path def mmap_array_from_path(hdf_file: str) -> np.ndarray: - """reconnect to an exisiting HDF5 file to generate a writable temporary mmapped array. + """reconnect to an exisiting HDF5 file to generate a writable temporary memory-mapped array. Parameters ---------- @@ -244,17 +236,17 @@ def mmap_array_from_path(hdf_file: str) -> np.ndarray: Returns ------- type - A writable temporary mmapped array. + A writable temporary memory-mapped array. """ path = os.path.join(hdf_file) # read parameters required to reinitialize the mmap object with h5py.File(path, "r") as hdf_file: - array = hdf_file["array"] - offset = array.id.get_offset() - shape = array.shape - dtype = array.dtype + array_ = hdf_file["array"] + offset = array_.id.get_offset() + shape = array_.shape + dtype = array_.dtype # reinitialize the mmap object with open(path, "rb+") as raw_hdf_file: @@ -265,7 +257,7 @@ def mmap_array_from_path(hdf_file: str) -> np.ndarray: def zeros(shape: tuple, dtype: np.dtype) -> np.ndarray: - """Create a writable temporary mmapped array filled with zeros. + """Create a writable temporary memory-mapped array filled with zeros. Parameters ---------- @@ -277,15 +269,15 @@ def zeros(shape: tuple, dtype: np.dtype) -> np.ndarray: Returns ------- type - A writable temporary mmapped array filled with zeros. + A writable temporary memory-mapped array filled with zeros. """ - _array = array(shape, dtype) - _array[:] = 0 - return _array + array_ = array(shape, dtype) + array_[:] = 0 + return array_ def ones(shape: tuple, dtype: np.dtype) -> np.ndarray: - """Create a writable temporary mmapped array filled with ones. + """Create a writable temporary memory-mapped array filled with ones. Parameters ---------- @@ -297,33 +289,37 @@ def ones(shape: tuple, dtype: np.dtype) -> np.ndarray: Returns ------- type - A writable temporary mmapped array filled with ones. + A writable temporary memory-mapped array filled with ones. """ - _array = array(shape, dtype) - _array[:] = 1 - return _array + array_ = array(shape, dtype) + array_[:] = 1 + return array_ @atexit.register def _clear() -> None: - """Reset the temporary folder containing temp mmapped arrays. + """Reset the temporary folder containing temp memory-mapped arrays. WARNING: All existing temp mmapp arrays will be unusable! """ global _TEMP_DIR, TEMP_DIR_NAME if _TEMP_DIR is not None: - logging.warning( - f"Folder {TEMP_DIR_NAME} with temp mmap arrays is being deleted. " - "All existing temp mmapp arrays will be unusable!" + logging.info( + f"Temporary folder {TEMP_DIR_NAME} with memory-mapped arrays is being deleted. " + "All existing memory-mapped arrays will be unusable!" ) - del _TEMP_DIR + _TEMP_DIR = None # TempDirectory will take care of the cleanup + if os.path.exists(TEMP_DIR_NAME): + logging.warning( + f"Temporary folder {TEMP_DIR_NAME} still exists, manual removal necessary." + ) TEMP_DIR_NAME = None def clear() -> str: - """Reset the temporary folder containing temp mmapped arrays and create a new one. + """Reset the temporary folder containing temp memory-mapped arrays and create a new one. WARNING: All existing temp mmapp arrays will be unusable! diff --git a/tests/unit/io/test_tempmmap.py b/tests/unit/io/test_tempmmap.py index d000759f..047426fb 100644 --- a/tests/unit/io/test_tempmmap.py +++ b/tests/unit/io/test_tempmmap.py @@ -22,9 +22,8 @@ def teardown_function(function): tempmmap = sys.modules["alphabase.io.tempmmap"] tempmmap._clear() # simulating @atexit.register - # # later: - # assert tempmmap._TEMP_DIR is None - # assert tempmmap.TEMP_DIR_NAME is None + assert tempmmap._TEMP_DIR is None + assert tempmmap.TEMP_DIR_NAME is None del sys.modules["alphabase.io.tempmmap"] @@ -46,6 +45,22 @@ def test_create_array(): assert tempmmap._TEMP_DIR is not None +def test_check_temp_dir_deletion(): + """Test that tempdir is deleted at exit.""" + tempmmap = sys.modules["alphabase.io.tempmmap"] + + _ = tempmmap.array((5, 5), np.float32) + temp_dir_name = tempmmap._TEMP_DIR.name + + # check presence of temp dir first + assert os.path.exists(temp_dir_name) + + # when + tempmmap._clear() + + assert not os.path.exists(temp_dir_name) + + def test_create_array_with_custom_temp_dir(): """Test creating and accessing an array with custom temp dir.""" tempmmap = sys.modules["alphabase.io.tempmmap"] @@ -66,6 +81,33 @@ def test_create_array_with_custom_temp_dir(): assert temp_dir == tempmmap.TEMP_DIR_NAME +def test_create_array_with_custom_temp_dir_nonexisting(): + """Test creating an array with custom temp dir: not existing.""" + tempmmap = sys.modules["alphabase.io.tempmmap"] + + temp_dir = "nonexisting_dir" + # when + with pytest.raises( + ValueError, + match="The directory 'nonexisting_dir' in which the file should be created does not exist.", + ): + _ = tempmmap.array((5, 5), np.int32, tmp_dir_abs_path=temp_dir) + + +def test_create_array_with_custom_temp_dir_not_a_dir(): + """Test creating an array with custom temp dir: not a directory.""" + tempmmap = sys.modules["alphabase.io.tempmmap"] + + with tempfile.TemporaryFile() as temp_file, pytest.raises( + ValueError, + match=f"The path '{temp_file.name}' does not point to a directory.", + ): + # when + _ = tempmmap.create_empty_mmap( + (5, 5), np.int32, tmp_dir_abs_path=temp_file.name + ) + + def test_mmap_array_from_path(): """Test reconnecting to an existing array.""" tempmmap = sys.modules["alphabase.io.tempmmap"] @@ -156,6 +198,27 @@ def test_create_empty_with_custom_file_path(): assert temp_dir != tempmmap.TEMP_DIR_NAME +def test_create_empty_with_custom_file_path_exists(): + """Test creating and accessing an empty array with custom file path that exists.""" + tempmmap = sys.modules["alphabase.io.tempmmap"] + + # when + with tempfile.TemporaryFile() as temp_file, pytest.raises( + ValueError, + match=f"The file '{temp_file.name}' already exists. Set overwrite to True to overwrite the file or choose a different name.", + ): + _ = tempmmap.create_empty_mmap((5, 5), np.float32, file_path=temp_file.name) + + # when 2 + with tempfile.TemporaryDirectory() as temp_dir, open( + f"{temp_dir}/temp_mmap.hdf", "w" + ) as temp_file: + _ = tempmmap.create_empty_mmap( + (5, 5), np.float32, file_path=temp_file.name, overwrite=True + ) + # did not raise -> OK + + def test_create_empty_with_custom_file_path_error_cases(): """Test creating and accessing an empty array: error cases.""" tempmmap = sys.modules["alphabase.io.tempmmap"]