Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor tempmmap2 #262

Merged
merged 11 commits into from
Jan 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 62 additions & 66 deletions alphabase/io/tempmmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,14 @@ def _init_temp_dir(prefix: str = "temp_mmap_") -> str:
TEMP_DIR_NAME = _TEMP_DIR.name

logging.info(
f"Temp mmap arrays are written to {TEMP_DIR_NAME}. "
f"Memory-mapped arrays are written to temporary directory {TEMP_DIR_NAME}. "
"Cleanup of this folder is OS dependent and might need to be triggered manually!"
)

return TEMP_DIR_NAME


def _change_temp_dir_location(abs_path: str) -> str:
def _change_temp_dir_location(abs_path: str) -> None:
"""
Check if the directory to which the temp arrays should be written exists, if so defines this as the new temp dir location. If not raise a value error.

Expand All @@ -51,14 +51,14 @@ def _change_temp_dir_location(abs_path: str) -> str:
if os.path.isdir(abs_path):
TEMP_DIR_NAME = abs_path
else:
raise ValueError(f"The path {abs_path} does not point to a directory.")
raise ValueError(f"The path '{abs_path}' does not point to a directory.")
else:
raise ValueError(
f"The directory {abs_path} in which the file should be created does not exist."
f"The directory '{abs_path}' in which the file should be created does not exist."
)


def _get_file_location(abs_file_path: str, overwrite=False) -> str:
def _get_file_location(abs_file_path: str, overwrite: bool = False) -> str:
"""
Check if the path specified for the new temporary file is valid. If not raise a value error.

Expand All @@ -69,38 +69,34 @@ def _get_file_location(abs_file_path: str, overwrite=False) -> str:

Parameters
----------
abs_path : str
abs_file_path : str
The absolute path to the new temporary file.

Returns
------
str
The file path if it is valid.
"""
# check overwrite status and existence of file
if not overwrite and os.path.exists(abs_file_path):
raise ValueError(
"The file already exists. Set overwrite to True to overwrite the file or choose a different name."
f"The file '{abs_file_path}' already exists. Set overwrite to True to overwrite the file or choose a different name."
)

# ensure that the filename conforms to the naming convention
if not os.path.basename(abs_file_path).endswith(".hdf"):
raise ValueError(
f"The chosen file name '{os.path.basename(abs_file_path)}' needs to end with .hdf"
)

# ensure that the directory in which the file should be created exists
if os.path.isdir(os.path.dirname(abs_file_path)):
return abs_file_path
else:
if not os.path.isdir(os.path.dirname(abs_file_path)):
raise ValueError(
f"The directory '{os.path.dirname(abs_file_path)}' in which the file should be created does not exist."
)

return abs_file_path

def redefine_temp_location(path):
"""
Redfine the location where the temp arrays are written to.

def redefine_temp_location(path: str) -> str:
"""Redefine the location where the temp arrays are written to.

Parameters
----------
Expand All @@ -113,28 +109,21 @@ def redefine_temp_location(path):

"""

global _TEMP_DIR, TEMP_DIR_NAME
global TEMP_DIR_NAME

logging.warning(
f"""Folder {TEMP_DIR_NAME} with temp mmap arrays is being deleted. All existing temp mmapp arrays will be unusable!"""
)
_clear()

# cleaup old temporary directory
# cleanup old temporary directory
shutil.rmtree(TEMP_DIR_NAME, ignore_errors=True)

# create new tempfile at desired location
_TEMP_DIR = tempfile.TemporaryDirectory(prefix=os.path.join(path, "temp_mmap_"))
TEMP_DIR_NAME = _TEMP_DIR.name
temp_dir_name = _init_temp_dir(prefix=os.path.join(path, "temp_mmap_"))

logging.warning(
f"""New temp folder location. Temp mmap arrays are written to {TEMP_DIR_NAME}. Cleanup of this folder is OS dependant, and might need to be triggered manually!"""
)

return TEMP_DIR_NAME
return temp_dir_name


def array(shape: tuple, dtype: np.dtype, tmp_dir_abs_path: str = None) -> np.ndarray:
"""Create a writable temporary mmapped array.
"""Create a writable temporary memory-mapped array.

Parameters
----------
Expand All @@ -150,7 +139,7 @@ def array(shape: tuple, dtype: np.dtype, tmp_dir_abs_path: str = None) -> np.nda
Returns
-------
type
A writable temporary mmapped array.
A writable temporary memory-mapped array.
"""
temp_dir_name = _init_temp_dir()

Expand All @@ -160,17 +149,20 @@ def array(shape: tuple, dtype: np.dtype, tmp_dir_abs_path: str = None) -> np.nda
_change_temp_dir_location(tmp_dir_abs_path)
temp_dir_name = tmp_dir_abs_path

temp_file_name = os.path.join(
temp_file_path = os.path.join(
temp_dir_name, f"temp_mmap_{np.random.randint(2**63, dtype=np.int64)}.hdf"
)

with h5py.File(temp_file_name, "w") as hdf_file:
array = hdf_file.create_dataset("array", shape=shape, dtype=dtype)
array[0] = np.string_("") if isinstance(dtype, np.dtypes.StrDType) else 0
offset = array.id.get_offset()
with h5py.File(temp_file_path, "w") as hdf_file:
created_array = hdf_file.create_dataset("array", shape=shape, dtype=dtype)
created_array[0] = (
np.string_("") if isinstance(dtype, np.dtypes.StrDType) else 0
)
offset = created_array.id.get_offset()

with open(temp_file_name, "rb+") as raw_hdf_file:
with open(temp_file_path, "rb+") as raw_hdf_file:
mmap_obj = mmap.mmap(raw_hdf_file.fileno(), 0, access=mmap.ACCESS_WRITE)

return np.frombuffer(
mmap_obj, dtype=dtype, count=np.prod(shape), offset=offset
).reshape(shape)
Expand Down Expand Up @@ -218,23 +210,23 @@ def create_empty_mmap(

# if path does not exist generate a random file name in the TEMP directory
if file_path is None:
temp_file_name = os.path.join(
temp_file_path = os.path.join(
temp_dir_name, f"temp_mmap_{np.random.randint(2**63, dtype=np.int64)}.hdf"
)
else:
temp_file_name = _get_file_location(
file_path, overwrite=False
) # TODO overwrite=overwrite
temp_file_path = _get_file_location(file_path, overwrite=overwrite)

with h5py.File(temp_file_name, "w") as hdf_file:
array = hdf_file.create_dataset("array", shape=shape, dtype=dtype)
array[0] = np.string_("") if isinstance(dtype, np.dtypes.StrDType) else 0
with h5py.File(temp_file_path, "w") as hdf_file:
created_array = hdf_file.create_dataset("array", shape=shape, dtype=dtype)
created_array[0] = (
np.string_("") if isinstance(dtype, np.dtypes.StrDType) else 0
)

return temp_file_name # TODO temp_file_path
return temp_file_path


def mmap_array_from_path(hdf_file: str) -> np.ndarray:
"""reconnect to an exisiting HDF5 file to generate a writable temporary mmapped array.
"""reconnect to an exisiting HDF5 file to generate a writable temporary memory-mapped array.

Parameters
----------
Expand All @@ -244,17 +236,17 @@ def mmap_array_from_path(hdf_file: str) -> np.ndarray:
Returns
-------
type
A writable temporary mmapped array.
A writable temporary memory-mapped array.
"""

path = os.path.join(hdf_file)

# read parameters required to reinitialize the mmap object
with h5py.File(path, "r") as hdf_file:
array = hdf_file["array"]
offset = array.id.get_offset()
shape = array.shape
dtype = array.dtype
array_ = hdf_file["array"]
offset = array_.id.get_offset()
shape = array_.shape
dtype = array_.dtype

# reinitialize the mmap object
with open(path, "rb+") as raw_hdf_file:
Expand All @@ -265,7 +257,7 @@ def mmap_array_from_path(hdf_file: str) -> np.ndarray:


def zeros(shape: tuple, dtype: np.dtype) -> np.ndarray:
"""Create a writable temporary mmapped array filled with zeros.
"""Create a writable temporary memory-mapped array filled with zeros.

Parameters
----------
Expand All @@ -277,15 +269,15 @@ def zeros(shape: tuple, dtype: np.dtype) -> np.ndarray:
Returns
-------
type
A writable temporary mmapped array filled with zeros.
A writable temporary memory-mapped array filled with zeros.
"""
_array = array(shape, dtype)
_array[:] = 0
return _array
array_ = array(shape, dtype)
array_[:] = 0
return array_


def ones(shape: tuple, dtype: np.dtype) -> np.ndarray:
"""Create a writable temporary mmapped array filled with ones.
"""Create a writable temporary memory-mapped array filled with ones.

Parameters
----------
Expand All @@ -297,33 +289,37 @@ def ones(shape: tuple, dtype: np.dtype) -> np.ndarray:
Returns
-------
type
A writable temporary mmapped array filled with ones.
A writable temporary memory-mapped array filled with ones.
"""
_array = array(shape, dtype)
_array[:] = 1
return _array
array_ = array(shape, dtype)
array_[:] = 1
return array_


@atexit.register
def _clear() -> None:
"""Reset the temporary folder containing temp mmapped arrays.
"""Reset the temporary folder containing temp memory-mapped arrays.

WARNING: All existing temp mmapp arrays will be unusable!
"""
global _TEMP_DIR, TEMP_DIR_NAME

if _TEMP_DIR is not None:
logging.warning(
f"Folder {TEMP_DIR_NAME} with temp mmap arrays is being deleted. "
"All existing temp mmapp arrays will be unusable!"
logging.info(
f"Temporary folder {TEMP_DIR_NAME} with memory-mapped arrays is being deleted. "
"All existing memory-mapped arrays will be unusable!"
)

del _TEMP_DIR
_TEMP_DIR = None # TempDirectory will take care of the cleanup
if os.path.exists(TEMP_DIR_NAME):
logging.warning(
f"Temporary folder {TEMP_DIR_NAME} still exists, manual removal necessary."
)
TEMP_DIR_NAME = None


def clear() -> str:
"""Reset the temporary folder containing temp mmapped arrays and create a new one.
"""Reset the temporary folder containing temp memory-mapped arrays and create a new one.

WARNING: All existing temp mmapp arrays will be unusable!

Expand Down
69 changes: 66 additions & 3 deletions tests/unit/io/test_tempmmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,8 @@ def teardown_function(function):
tempmmap = sys.modules["alphabase.io.tempmmap"]
tempmmap._clear() # simulating @atexit.register

# # later:
# assert tempmmap._TEMP_DIR is None
# assert tempmmap.TEMP_DIR_NAME is None
assert tempmmap._TEMP_DIR is None
assert tempmmap.TEMP_DIR_NAME is None

del sys.modules["alphabase.io.tempmmap"]

Expand All @@ -46,6 +45,22 @@ def test_create_array():
assert tempmmap._TEMP_DIR is not None


def test_check_temp_dir_deletion():
"""Test that tempdir is deleted at exit."""
tempmmap = sys.modules["alphabase.io.tempmmap"]

_ = tempmmap.array((5, 5), np.float32)
temp_dir_name = tempmmap._TEMP_DIR.name

# check presence of temp dir first
assert os.path.exists(temp_dir_name)

# when
tempmmap._clear()

assert not os.path.exists(temp_dir_name)


def test_create_array_with_custom_temp_dir():
"""Test creating and accessing an array with custom temp dir."""
tempmmap = sys.modules["alphabase.io.tempmmap"]
Expand All @@ -66,6 +81,33 @@ def test_create_array_with_custom_temp_dir():
assert temp_dir == tempmmap.TEMP_DIR_NAME


def test_create_array_with_custom_temp_dir_nonexisting():
"""Test creating an array with custom temp dir: not existing."""
tempmmap = sys.modules["alphabase.io.tempmmap"]

temp_dir = "nonexisting_dir"
# when
with pytest.raises(
ValueError,
match="The directory 'nonexisting_dir' in which the file should be created does not exist.",
):
_ = tempmmap.array((5, 5), np.int32, tmp_dir_abs_path=temp_dir)


def test_create_array_with_custom_temp_dir_not_a_dir():
"""Test creating an array with custom temp dir: not a directory."""
tempmmap = sys.modules["alphabase.io.tempmmap"]

with tempfile.TemporaryFile() as temp_file, pytest.raises(
ValueError,
match=f"The path '{temp_file.name}' does not point to a directory.",
):
# when
_ = tempmmap.create_empty_mmap(
(5, 5), np.int32, tmp_dir_abs_path=temp_file.name
)


def test_mmap_array_from_path():
"""Test reconnecting to an existing array."""
tempmmap = sys.modules["alphabase.io.tempmmap"]
Expand Down Expand Up @@ -156,6 +198,27 @@ def test_create_empty_with_custom_file_path():
assert temp_dir != tempmmap.TEMP_DIR_NAME


def test_create_empty_with_custom_file_path_exists():
"""Test creating and accessing an empty array with custom file path that exists."""
tempmmap = sys.modules["alphabase.io.tempmmap"]

# when
with tempfile.TemporaryFile() as temp_file, pytest.raises(
ValueError,
match=f"The file '{temp_file.name}' already exists. Set overwrite to True to overwrite the file or choose a different name.",
):
_ = tempmmap.create_empty_mmap((5, 5), np.float32, file_path=temp_file.name)

# when 2
with tempfile.TemporaryDirectory() as temp_dir, open(
f"{temp_dir}/temp_mmap.hdf", "w"
) as temp_file:
_ = tempmmap.create_empty_mmap(
(5, 5), np.float32, file_path=temp_file.name, overwrite=True
)
# did not raise -> OK


def test_create_empty_with_custom_file_path_error_cases():
"""Test creating and accessing an empty array: error cases."""
tempmmap = sys.modules["alphabase.io.tempmmap"]
Expand Down
Loading