diff --git a/tests/test_archive.py b/tests/test_archive.py new file mode 100644 index 00000000..3031237a --- /dev/null +++ b/tests/test_archive.py @@ -0,0 +1,213 @@ +import os +import tempfile +import zipfile + +import numpy as np +import pytest + +from wfdb import rdrecord, wrsamp +from wfdb.io.archive import WFDBArchive + +np.random.seed(1234) + + +@pytest.fixture +def temp_record(): + """ + Create a temporary WFDB record and archive for testing. + + This fixture generates a synthetic 2-channel signal, writes it to a temporary + directory using `wrsamp`, then creates an uncompressed `.wfdb` archive (ZIP container) + containing the `.hea` and `.dat` files. The archive is used to test read/write + round-trip support for WFDB archives. + + Yields + ------ + dict + A dictionary containing: + - 'record_name': Path to the record base name (without extension). + - 'archive_path': Full path to the created `.wfdb` archive. + - 'original_signal': The original NumPy array of the signal. + - 'fs': The sampling frequency. + """ + with tempfile.TemporaryDirectory() as tmpdir: + record_basename = "testrecord" + fs = 250 + sig_len = 1000 + sig = (np.random.randn(sig_len, 2) * 1000).astype(np.float32) + + # Write into tmpdir with record name only + wrsamp( + record_name=record_basename, + fs=fs, + units=["mV", "mV"], + sig_name=["I", "II"], + p_signal=sig, + fmt=["24", "24"], + adc_gain=[200.0, 200.0], + baseline=[0, 0], + write_dir=tmpdir, + ) + + # Construct full paths for archive creation + hea_path = os.path.join(tmpdir, record_basename + ".hea") + dat_path = os.path.join(tmpdir, record_basename + ".dat") + archive_path = os.path.join(tmpdir, record_basename + ".wfdb") + + with WFDBArchive(record_name=record_basename, mode="w") as archive: + archive.create_archive( + file_list=[hea_path, dat_path], + output_path=archive_path, + ) + + try: + yield { + "record_name": os.path.join(tmpdir, record_basename), + "archive_path": archive_path, + "original_signal": sig, + "fs": fs, + } + finally: + # Clean up any open archive handles + from wfdb.io.archive import _archive_cache + + for archive in _archive_cache.values(): + if archive is not None: + archive.close() + _archive_cache.clear() + + +def test_wfdb_archive_inline_round_trip(): + """ + There are two ways of creating an archive: + + 1. Inline archive creation via wrsamp(..., wfdb_archive=...) + This creates the .hea and .dat files directly inside the archive as part of the record writing step. + + 2. Two-step creation via wrsamp(...) followed by WFDBArchive.create_archive(...) + This writes regular WFDB files to disk, which are then added to an archive container afterward. + + Test round-trip read/write using inline archive creation via `wrsamp(..., wfdb_archive=...)`. + """ + with tempfile.TemporaryDirectory() as tmpdir: + record_basename = "testrecord" + record_path = os.path.join(tmpdir, record_basename) + archive_path = record_path + ".wfdb" + fs = 250 + sig_len = 1000 + sig = (np.random.randn(sig_len, 2) * 1000).astype(np.float32) + + # Create archive inline using context manager + with WFDBArchive(record_path, mode="w") as wfdb_archive: + wrsamp( + record_name=record_basename, + fs=fs, + units=["mV", "mV"], + sig_name=["I", "II"], + p_signal=sig, + fmt=["24", "24"], + adc_gain=[200.0, 200.0], + baseline=[0, 0], + write_dir=tmpdir, + wfdb_archive=wfdb_archive, + ) + + assert os.path.exists(archive_path), "Archive was not created" + + # Read back from archive + record = rdrecord(archive_path) + + try: + assert record.fs == fs + assert record.n_sig == 2 + assert record.p_signal.shape == sig.shape + + # Add tolerance to account for loss of precision during archive round-trip + np.testing.assert_allclose( + record.p_signal, sig, rtol=1e-2, atol=3e-3 + ) + finally: + # Ensure we close the archive after reading + if ( + hasattr(record, "wfdb_archive") + and record.wfdb_archive is not None + ): + record.wfdb_archive.close() + + +def test_wfdb_archive_round_trip(temp_record): + record_name = temp_record["record_name"] + archive_path = temp_record["archive_path"] + original_signal = temp_record["original_signal"] + fs = temp_record["fs"] + + assert os.path.exists(archive_path), "Archive was not created" + + record = rdrecord(archive_path) + + assert record.fs == fs + assert record.n_sig == 2 + assert record.p_signal.shape == original_signal.shape + + # Add tolerance to account for loss of precision during archive round-trip + np.testing.assert_allclose( + record.p_signal, original_signal, rtol=1e-2, atol=3e-3 + ) + + +def test_archive_read_subset_channels(temp_record): + """ + Test reading a subset of channels from an archive. + """ + archive_path = temp_record["archive_path"] + original_signal = temp_record["original_signal"] + + record = rdrecord(archive_path, channels=[1]) + + assert record.n_sig == 1 + assert record.p_signal.shape[0] == original_signal.shape[0] + + # Add tolerance to account for loss of precision during archive round-trip + np.testing.assert_allclose( + record.p_signal[:, 0], original_signal[:, 1], rtol=1e-2, atol=3e-3 + ) + + +def test_archive_read_partial_samples(temp_record): + """ + Test reading a sample range from the archive. + """ + archive_path = temp_record["archive_path"] + original_signal = temp_record["original_signal"] + + start, stop = 100, 200 + record = rdrecord(archive_path, sampfrom=start, sampto=stop) + + assert record.p_signal.shape == (stop - start, original_signal.shape[1]) + np.testing.assert_allclose( + record.p_signal, original_signal[start:stop], rtol=1e-2, atol=1e-3 + ) + + +def test_archive_missing_file_error(temp_record): + """ + Ensure appropriate error is raised when expected files are missing from the archive. + """ + archive_path = temp_record["archive_path"] + + # Remove one file from archive (e.g. the .dat file) + with zipfile.ZipFile(archive_path, "a") as zf: + zf_name = [name for name in zf.namelist() if name.endswith(".dat")][0] + zf.fp = None # Prevent auto-close bug in some zipfile implementations + os.rename(archive_path, archive_path + ".bak") + with ( + zipfile.ZipFile(archive_path + ".bak", "r") as zin, + zipfile.ZipFile(archive_path, "w") as zout, + ): + for item in zin.infolist(): + if not item.filename.endswith(".dat"): + zout.writestr(item, zin.read(item.filename)) + os.remove(archive_path + ".bak") + + with pytest.raises(FileNotFoundError, match=r".*\.dat.*"): + rdrecord(archive_path) diff --git a/wfdb/io/_header.py b/wfdb/io/_header.py index 0d420521..d4d69e2c 100644 --- a/wfdb/io/_header.py +++ b/wfdb/io/_header.py @@ -1,11 +1,11 @@ import datetime +import os from typing import Any, Dict, List, Optional, Sequence, Tuple import numpy as np import pandas as pd -from wfdb.io import _signal -from wfdb.io import util +from wfdb.io import _signal, util from wfdb.io.header import HeaderSyntaxError, rx_record, rx_segment, rx_signal """ @@ -278,7 +278,7 @@ def set_defaults(self): for f in sfields: self.set_default(f) - def wrheader(self, write_dir="", expanded=True): + def wrheader(self, write_dir="", expanded=True, wfdb_archive=None): """ Write a WFDB header file. The signals are not used. Before writing: @@ -325,7 +325,12 @@ def wrheader(self, write_dir="", expanded=True): self.check_field_cohesion(rec_write_fields, list(sig_write_fields)) # Write the header file using the specified fields - self.wr_header_file(rec_write_fields, sig_write_fields, write_dir) + self.wr_header_file( + rec_write_fields, + sig_write_fields, + write_dir, + wfdb_archive=wfdb_archive, + ) def get_write_fields(self): """ @@ -508,7 +513,9 @@ def check_field_cohesion(self, rec_write_fields, sig_write_fields): "Each file_name (dat file) specified must have the same byte offset" ) - def wr_header_file(self, rec_write_fields, sig_write_fields, write_dir): + def wr_header_file( + self, rec_write_fields, sig_write_fields, write_dir, wfdb_archive=None + ): """ Write a header file using the specified fields. Converts Record attributes into appropriate WFDB format strings. @@ -522,6 +529,8 @@ def wr_header_file(self, rec_write_fields, sig_write_fields, write_dir): being equal to a list of channels to write for each field. write_dir : str The directory in which to write the header file. + wfdb_archive : WFDBArchive, optional + If provided, write the header into this archive instead of to disk. Returns ------- @@ -583,7 +592,13 @@ def wr_header_file(self, rec_write_fields, sig_write_fields, write_dir): comment_lines = ["# " + comment for comment in self.comments] header_lines += comment_lines - util.lines_to_file(self.record_name + ".hea", write_dir, header_lines) + header_str = "\n".join(header_lines) + "\n" + hea_filename = os.path.basename(self.record_name) + ".hea" + + if wfdb_archive: + wfdb_archive.write(hea_filename, header_str.encode("utf-8")) + else: + util.lines_to_file(hea_filename, write_dir, header_lines) class MultiHeaderMixin(BaseHeaderMixin): @@ -621,7 +636,7 @@ def set_defaults(self): for field in self.get_write_fields(): self.set_default(field) - def wrheader(self, write_dir=""): + def wrheader(self, write_dir="", wfdb_archive=None): """ Write a multi-segment WFDB header file. The signals or segments are not used. Before writing: @@ -655,7 +670,7 @@ def wrheader(self, write_dir=""): self.check_field_cohesion() # Write the header file using the specified fields - self.wr_header_file(write_fields, write_dir) + self.wr_header_file(write_fields, write_dir, wfdb_archive=wfdb_archive) def get_write_fields(self): """ @@ -733,7 +748,7 @@ def check_field_cohesion(self): "The sum of the 'seg_len' fields do not match the 'sig_len' field" ) - def wr_header_file(self, write_fields, write_dir): + def wr_header_file(self, write_fields, write_dir, wfdb_archive=None): """ Write a header file using the specified fields. @@ -744,6 +759,8 @@ def wr_header_file(self, write_fields, write_dir): and their dependencies. write_dir : str The output directory in which the header is written. + wfdb_archive : WFDBArchive, optional + If provided, write the header into this archive instead of to disk. Returns ------- @@ -779,7 +796,13 @@ def wr_header_file(self, write_fields, write_dir): comment_lines = ["# " + comment for comment in self.comments] header_lines += comment_lines - util.lines_to_file(self.record_name + ".hea", write_dir, header_lines) + header_str = "\n".join(header_lines) + "\n" + hea_filename = os.path.basename(self.record_name) + ".hea" + + if wfdb_archive: + wfdb_archive.write(hea_filename, header_str.encode("utf-8")) + else: + util.lines_to_file(hea_filename, write_dir, header_lines) def get_sig_segments(self, sig_name=None): """ diff --git a/wfdb/io/_signal.py b/wfdb/io/_signal.py index 6bfafdb5..735ac81c 100644 --- a/wfdb/io/_signal.py +++ b/wfdb/io/_signal.py @@ -1,3 +1,4 @@ +import io import math import os import posixpath @@ -6,7 +7,7 @@ import fsspec import numpy as np -from wfdb.io import download, _coreio, util +from wfdb.io import _coreio, download, util from wfdb.io._coreio import CLOUD_PROTOCOLS MAX_I32 = 2147483647 @@ -120,7 +121,7 @@ class SignalMixin(object): """ - def wr_dats(self, expanded, write_dir): + def wr_dats(self, expanded, write_dir, wfdb_archive=None): """ Write all dat files associated with a record expanded=True to use e_d_signal instead of d_signal. @@ -132,6 +133,8 @@ def wr_dats(self, expanded, write_dir): the `d_signal` attribute (False). write_dir : str The directory to write the output file to. + wfdb_archive : WFDBArchive, optional + If set, writes to a .wfdb archive instead of the filesystem. Returns ------- @@ -160,7 +163,9 @@ def wr_dats(self, expanded, write_dir): self.check_sig_cohesion([], expanded) # Write each of the specified dat files - self.wr_dat_files(expanded=expanded, write_dir=write_dir) + self.wr_dat_files( + expanded=expanded, write_dir=write_dir, wfdb_archive=wfdb_archive + ) def check_sig_cohesion(self, write_fields, expanded): """ @@ -958,7 +963,7 @@ def calc_checksum(self, expanded=False): cs = [int(c) for c in cs] return cs - def wr_dat_files(self, expanded=False, write_dir=""): + def wr_dat_files(self, expanded=False, write_dir="", wfdb_archive=None): """ Write each of the specified dat files. @@ -969,6 +974,8 @@ def wr_dat_files(self, expanded=False, write_dir=""): the `d_signal` attribute (False). write_dir : str, optional The directory to write the output file to. + wfdb_archive : WFDBArchive, optional + If set, writes to a .wfdb archive instead of the local filesystem. Returns ------- @@ -1003,6 +1010,7 @@ def wr_dat_files(self, expanded=False, write_dir=""): [self.e_d_signal[ch] for ch in dat_channels[fn]], [self.samps_per_frame[ch] for ch in dat_channels[fn]], write_dir=write_dir, + wfdb_archive=wfdb_archive, ) else: dsig = self.d_signal @@ -1013,6 +1021,7 @@ def wr_dat_files(self, expanded=False, write_dir=""): dsig[:, dat_channels[fn][0] : dat_channels[fn][-1] + 1], dat_offsets[fn], write_dir=write_dir, + wfdb_archive=wfdb_archive, ) def smooth_frames(self, sigtype="physical"): @@ -1120,6 +1129,7 @@ def _rd_segment( no_file=False, sig_data=None, return_res=64, + wfdb_archive=None, ): """ Read the digital samples from a single segment record's associated @@ -1264,6 +1274,7 @@ def _rd_segment( sampto=sampto, no_file=no_file, sig_data=sig_data, + wfdb_archive=wfdb_archive, ) # Copy over the wanted signals @@ -1288,6 +1299,7 @@ def _rd_dat_signals( sampto, no_file=False, sig_data=None, + wfdb_archive=None, ): """ Read all signals from a WFDB dat file. @@ -1390,7 +1402,13 @@ def _rd_dat_signals( ) else: data_to_read = _rd_dat_file( - file_name, dir_name, pn_dir, fmt, start_byte, n_read_samples + file_name, + dir_name, + pn_dir, + fmt, + start_byte, + n_read_samples, + wfdb_archive=wfdb_archive, ) if extra_flat_samples: @@ -1630,7 +1648,9 @@ def _required_byte_num(mode, fmt, n_samp): return int(n_bytes) -def _rd_dat_file(file_name, dir_name, pn_dir, fmt, start_byte, n_samp): +def _rd_dat_file( + file_name, dir_name, pn_dir, fmt, start_byte, n_samp, wfdb_archive=None +): """ Read data from a dat file, either local or remote, into a 1d numpy array. @@ -1688,14 +1708,30 @@ def _rd_dat_file(file_name, dir_name, pn_dir, fmt, start_byte, n_samp): element_count = n_samp byte_count = n_samp * BYTES_PER_SAMPLE[fmt] - # Local or cloud dat file - if pn_dir is None: + # Local file or .wfdb archive + if wfdb_archive is not None: + # If the exact file name isn't found, look for any .dat file + if not wfdb_archive.exists(file_name): + dat_files = [ + f for f in wfdb_archive.zipfile.namelist() if f.endswith(".dat") + ] + if not dat_files: + raise FileNotFoundError( + f"No dat file found in archive for {file_name}" + ) + file_name = dat_files[0] # Use the first dat file found + + with wfdb_archive.open(file_name, "rb") as fp: + fp.seek(start_byte) + sig_data = util.fromfile( + fp, dtype=np.dtype(DATA_LOAD_TYPES[fmt]), count=element_count + ) + elif pn_dir is None: with fsspec.open(os.path.join(dir_name, file_name), "rb") as fp: fp.seek(start_byte) sig_data = util.fromfile( fp, dtype=np.dtype(DATA_LOAD_TYPES[fmt]), count=element_count ) - # Stream dat file from PhysioNet else: # check to make sure a cloud path isn't being passed under pn_dir @@ -2312,6 +2348,7 @@ def wr_dat_file( e_d_signal=None, samps_per_frame=None, write_dir="", + wfdb_archive=None, ): """ Write a dat file. All bytes are written one at a time to avoid @@ -2509,16 +2546,30 @@ def wr_dat_file( else: raise ValueError(f"unknown format ({fmt})") - sf = soundfile.SoundFile( - file_path, - mode="w", - samplerate=96000, - channels=n_sig, - subtype=subtype, - format="FLAC", - ) - with sf: - sf.write(d_signal) + if wfdb_archive: + with io.BytesIO() as f: + with soundfile.SoundFile( + f, + mode="w", + samplerate=96000, + channels=n_sig, + subtype=subtype, + format="FLAC", # required for file-like + ) as sf: + sf.write(d_signal) + wfdb_archive.write(os.path.basename(file_name), f.getvalue()) + return + else: + sf = soundfile.SoundFile( + file_path, + mode="w", + samplerate=96000, + channels=n_sig, + subtype=subtype, + format="FLAC", + ) + with sf: + sf.write(d_signal) return else: @@ -2539,8 +2590,13 @@ def wr_dat_file( b_write = np.append(np.zeros(byte_offset, dtype="uint8"), b_write) # Write the bytes to the file - with open(file_path, "wb") as f: - b_write.tofile(f) + if wfdb_archive: + with io.BytesIO() as f: + f.write(b_write.tobytes()) + wfdb_archive.write(os.path.basename(file_name), f.getvalue()) + else: + with open(file_path, "wb") as f: + b_write.tofile(f) def describe_list_indices(full_list): diff --git a/wfdb/io/archive.py b/wfdb/io/archive.py new file mode 100644 index 00000000..7f03b8cc --- /dev/null +++ b/wfdb/io/archive.py @@ -0,0 +1,151 @@ +import io +import os +import shutil +import zipfile +from contextlib import contextmanager + +_archive_cache = {} + + +class WFDBArchive: + """ + Helper class for working with WFDB .wfdb ZIP archives. + + If used for reading, the archive must already exist. + If used for writing, use mode='w' and call `write(...)` or `create_archive(...)`. + + Used only if: + - .wfdb is included in the record_name explicitly, or + - .wfdb is passed directly to the file loading function. + """ + + def __init__(self, record_name, mode="r"): + """ + Initialize a WFDBArchive for a given record name (without extension). + + Parameters + ---------- + record_name : str + The base name of the archive, without the .wfdb extension. + mode : str + 'r' for read (default), 'w' for write. + """ + self.record_name = record_name + # Only append .wfdb if it's not already there + if record_name.endswith(".wfdb"): + self.archive_path = record_name + else: + self.archive_path = f"{record_name}.wfdb" + self.zipfile = None + self.mode = mode + + if mode == "r": + if not os.path.exists(self.archive_path): + raise FileNotFoundError( + f"Archive not found: {self.archive_path}" + ) + if not zipfile.is_zipfile(self.archive_path): + raise ValueError(f"Invalid WFDB archive: {self.archive_path}") + self.zipfile = zipfile.ZipFile(self.archive_path, mode="r") + + elif mode == "w": + # Create archive file if needed + if not os.path.exists(self.archive_path): + # Create the directory if it doesn't exist + os.makedirs( + os.path.dirname(os.path.abspath(self.archive_path)), + exist_ok=True, + ) + WFDBArchive.make_archive_file([], self.archive_path) + self.zipfile = zipfile.ZipFile(self.archive_path, mode="a") + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.close() + + def exists(self, filename): + """ + Check if a file exists in the archive. + """ + return self.zipfile and filename in self.zipfile.namelist() + + @staticmethod + def make_archive_file(file_list, output_path): + with zipfile.ZipFile(output_path, mode="w") as zf: + for file in file_list: + compress = zipfile.ZIP_DEFLATED + zf.write( + file, arcname=os.path.basename(file), compress_type=compress + ) + + @contextmanager + def open(self, filename, mode="r"): + """ + Open a file, either from disk or from the archive. + Mode 'r' (text) or 'rb' (binary) supported. + """ + if self.zipfile and filename in self.zipfile.namelist(): + with self.zipfile.open(filename, "r") as f: + if "b" in mode: + yield f + else: + yield io.TextIOWrapper(f) + else: + raise FileNotFoundError( + f"Could not find '{filename}' as loose file or inside " + f"'{self.archive_path}'." + ) + + def close(self): + """ + Close the archive if open. + """ + if self.zipfile: + self.zipfile.close() + + def write(self, filename, data): + """ + Write binary data to the archive (replaces if already exists). + """ + if self.zipfile is None: + self.zipfile = zipfile.ZipFile(self.archive_path, mode="w") + self.zipfile.writestr(filename, data) + return + + # If already opened in read or append mode, use replace-then-move + tmp_path = self.archive_path + ".tmp" + with zipfile.ZipFile(self.archive_path, mode="r") as zin: + with zipfile.ZipFile(tmp_path, mode="w") as zout: + for item in zin.infolist(): + if item.filename != filename: + zout.writestr(item, zin.read(item.filename)) + zout.writestr(filename, data) + shutil.move(tmp_path, self.archive_path) + self.zipfile = zipfile.ZipFile(self.archive_path, mode="a") + + def create_archive(self, file_list, output_path=None): + """ + Create a .wfdb archive containing the specified list of files. + If output_path is not specified, uses self.archive_path. + """ + output_path = output_path or self.archive_path + WFDBArchive.make_archive_file(file_list, output_path) + + # If this archive object points to the archive, reload the zipfile in append mode + if output_path == self.archive_path: + if self.zipfile: + self.zipfile.close() + self.zipfile = zipfile.ZipFile(self.archive_path, mode="a") + + +def get_archive(record_base_name, mode="r"): + """ + Get or create a WFDBArchive for the given record base name. + """ + if record_base_name not in _archive_cache: + _archive_cache[record_base_name] = WFDBArchive( + record_base_name, mode=mode + ) + return _archive_cache[record_base_name] diff --git a/wfdb/io/record.py b/wfdb/io/record.py index e611f364..ea195134 100644 --- a/wfdb/io/record.py +++ b/wfdb/io/record.py @@ -1,21 +1,16 @@ import datetime import multiprocessing.dummy -import posixpath import os +import posixpath import re import fsspec import numpy as np import pandas as pd -from wfdb.io import _header -from wfdb.io import _signal -from wfdb.io import _url -from wfdb.io import download -from wfdb.io import header -from wfdb.io import util +from wfdb.io import _header, _signal, _url, download, header, util from wfdb.io._coreio import CLOUD_PROTOCOLS - +from wfdb.io.archive import get_archive, WFDBArchive # -------------- WFDB Signal Calibration and Classification ---------- # @@ -904,7 +899,7 @@ def __eq__(self, other, verbose=False): return True - def wrsamp(self, expanded=False, write_dir=""): + def wrsamp(self, expanded=False, write_dir="", wfdb_archive=None): """ Write a WFDB header file and any associated dat files from this object. @@ -916,6 +911,10 @@ def wrsamp(self, expanded=False, write_dir=""): of the uniform signal (d_signal). write_dir : str, optional The directory in which to write the files. + wfdb_archive : str or WFDBArchive, optional + If provided, writes the record to a .wfdb archive. Can be either: + - A string path to the archive file (e.g., 'record.wfdb') + - A WFDBArchive object for more advanced usage Returns ------- @@ -932,13 +931,31 @@ def wrsamp(self, expanded=False, write_dir=""): checksums[ch] = old_val self.checksum = checksums + # Handle wfdb_archive parameter + if wfdb_archive: + if isinstance(wfdb_archive, str): + # If a string path is provided, create a WFDBArchive object + from wfdb.io.archive import get_archive + + wfdb_archive = get_archive(wfdb_archive, mode="w") + elif not isinstance(wfdb_archive, WFDBArchive): + raise TypeError( + "wfdb_archive must be either a string path or WFDBArchive object" + ) + # Perform field validity and cohesion checks, and write the # header file. - self.wrheader(write_dir=write_dir, expanded=expanded) + self.wrheader( + write_dir=write_dir, expanded=expanded, wfdb_archive=wfdb_archive + ) if self.n_sig > 0: # Perform signal validity and cohesion checks, and write the # associated dat files. - self.wr_dats(expanded=expanded, write_dir=write_dir) + self.wr_dats( + expanded=expanded, + write_dir=write_dir, + wfdb_archive=wfdb_archive, + ) def _arrange_fields(self, channels, sampfrom, smooth_frames): """ @@ -1160,7 +1177,7 @@ def __init__( if not seg_len: self.seg_len = [segment.sig_len for segment in segments] - def wrsamp(self, write_dir=""): + def wrsamp(self, write_dir="", wfdb_archive=None): """ Write a multi-segment header, along with headers and dat files for all segments, from this object. @@ -1177,11 +1194,11 @@ def wrsamp(self, write_dir=""): """ # Perform field validity and cohesion checks, and write the # header file. - self.wrheader(write_dir=write_dir) + self.wrheader(write_dir=write_dir, wfdb_archive=wfdb_archive) # Perform record validity and cohesion checks, and write the # associated segments. for seg in self.segments: - seg.wrsamp(write_dir=write_dir) + seg.wrsamp(write_dir=write_dir, wfdb_archive=wfdb_archive) def _check_segment_cohesion(self): """ @@ -1826,7 +1843,11 @@ def rdheader(record_name, pn_dir=None, rd_segments=False): """ dir_name, base_record_name = os.path.split(record_name) - file_name = f"{base_record_name}.hea" + + if not base_record_name.endswith(".hea"): + file_name = f"{base_record_name}.hea" + else: + file_name = base_record_name # If this is a cloud path, use posixpath to construct the path and fsspec to open file if any(dir_name.startswith(proto) for proto in CLOUD_PROTOCOLS): @@ -1970,7 +1991,6 @@ def rdrecord( Option used to stream data from Physionet. The Physionet database directory from which to find the required record files. eg. For record '100' in 'http://physionet.org/content/mitdb' - pn_dir='mitdb'. m2s : bool, optional Used when reading multi-segment records. Specifies whether to directly return a WFDB MultiRecord object (False), or to convert @@ -2028,27 +2048,62 @@ def rdrecord( -------- >>> record = wfdb.rdrecord('sample-data/test01_00s', sampfrom=800, channels=[1, 3]) - """ - dir_name, base_record_name = os.path.split(record_name) - # Update the dir_name using abspath unless it is a cloud path - if not any(dir_name.startswith(proto) for proto in CLOUD_PROTOCOLS): - dir_name = os.path.abspath(dir_name) + wfdb_archive = None + is_wfdb_archive = record_name.endswith(".wfdb") - # Read the header fields - if pn_dir is not None: - # check to make sure a cloud path isn't being passed under pn_dir - if any(pn_dir.startswith(proto) for proto in CLOUD_PROTOCOLS): - raise ValueError( - "Cloud paths should be passed under record_name, not under pn_dir" - ) - if "." not in pn_dir: - dir_list = pn_dir.split("/") - pn_dir = posixpath.join( - dir_list[0], download.get_version(dir_list[0]), *dir_list[1:] + if is_wfdb_archive: + record_base = record_name[:-5] # remove ".wfdb" + wfdb_archive = get_archive(record_base) + + # Find any .hea file in the archive + hea_files = [ + f for f in wfdb_archive.zipfile.namelist() if f.endswith(".hea") + ] + if not hea_files: + raise FileNotFoundError( + f"No header file found in archive {record_name}" ) + hea_file = hea_files[0] # Use the first header file found + + import tempfile - record = rdheader(record_name, pn_dir=pn_dir, rd_segments=False) + with wfdb_archive.open(hea_file, "r") as f: + header_str = f.read() + + with tempfile.NamedTemporaryFile( + "w+", suffix=".hea", delete=False + ) as tmpf: + tmpf.write(header_str) + tmpf.flush() + record = rdheader(tmpf.name) + record.wfdb_archive = wfdb_archive + + # Set dir_name to the archive base (needed for _rd_segment) + dir_name = os.path.dirname(record_base) + + else: + dir_name, base_record_name = os.path.split(record_name) + # Update the dir_name using abspath unless it is a cloud path + if not any(dir_name.startswith(proto) for proto in CLOUD_PROTOCOLS): + dir_name = os.path.abspath(dir_name) + + # Read the header fields + if pn_dir is not None: + # check to make sure a cloud path isn't being passed under pn_dir + if any(pn_dir.startswith(proto) for proto in CLOUD_PROTOCOLS): + raise ValueError( + "Cloud paths should be passed under record_name, not under pn_dir" + ) + if "." not in pn_dir: + dir_list = pn_dir.split("/") + pn_dir = posixpath.join( + dir_list[0], + download.get_version(dir_list[0]), + *dir_list[1:], + ) + + record = rdheader(record_name, pn_dir=pn_dir, rd_segments=False) # Set defaults for sampto and channels input variables if sampto is None: @@ -2150,6 +2205,7 @@ def rdrecord( no_file=no_file, sig_data=sig_data, return_res=return_res, + wfdb_archive=wfdb_archive, ) # Only 1 sample/frame, or frames are smoothed. Return uniform numpy array @@ -2861,6 +2917,7 @@ def wrsamp( base_date=None, base_datetime=None, write_dir="", + wfdb_archive=None, ): """ Write a single segment WFDB record, creating a WFDB header file and any @@ -2920,6 +2977,10 @@ def wrsamp( setting both `base_date` and `base_time`. write_dir : str, optional The directory in which to write the files. + wfdb_archive : str or WFDBArchive, optional + If provided, writes the record to a .wfdb archive. Can be either: + - A string path to the archive file (e.g., 'record.wfdb') + - A WFDBArchive object for more advanced usage Returns ------- @@ -2944,11 +3005,16 @@ def wrsamp( >>> # Write a local WFDB record (manually inserting fields) >>> wfdb.wrsamp('ecgrecord', fs = 250, units=['mV', 'mV'], sig_name=['I', 'II'], p_signal=signals, fmt=['16', '16']) + >>> # Write to a .wfdb archive using a string path + >>> wfdb.wrsamp('ecgrecord', fs = 250, units=['mV', 'mV'], + sig_name=['I', 'II'], p_signal=signals, fmt=['16', '16'], + wfdb_archive='ecgrecord.wfdb') """ # Check for valid record name if "." in record_name: raise Exception("Record name must not contain '.'") + # Check input field combinations signal_list = [p_signal, d_signal, e_p_signal, e_d_signal] signals_set = sum(1 for var in signal_list if var is not None) @@ -3047,8 +3113,22 @@ def wrsamp( else: expanded = False + # Handle wfdb_archive parameter + if wfdb_archive: + if isinstance(wfdb_archive, str): + # If a string path is provided, create a WFDBArchive object + from wfdb.io.archive import get_archive + + wfdb_archive = get_archive(wfdb_archive, mode="w") + elif not isinstance(wfdb_archive, WFDBArchive): + raise TypeError( + "wfdb_archive must be either a string path or WFDBArchive object" + ) + # Write the record files - header and associated dat - record.wrsamp(write_dir=write_dir, expanded=expanded) + record.wrsamp( + write_dir=write_dir, expanded=expanded, wfdb_archive=wfdb_archive + ) def dl_database(