added a function to add metadata in output

abhi0395 · Aug 5, 2024 · 0d42bb1 · 0d42bb1
1 parent fcb8de1
commit 0d42bb1
Show file tree

Hide file tree

Showing 5 changed files with 64 additions and 6 deletions.
diff --git a/docs/installation.rst b/docs/installation.rst
@@ -48,6 +48,6 @@ Parallel mode can be memory-intensive if the input FITS file is large in size. A
 
    - **Use a rule of thumb for file size:** Ensure that the size of each individual file is no larger than `total_memory/ncpu` of your node or system. Based on this idea you can decide your `N`. I would suggest `N = 1000`.
 
-   - **Merge results at the end:** After processing, you can merge your results.
+   - **Merge results at the end:** After processing, you can merge your results using `qsoabsfind.utils.combine_fits_files <https://github.com/abhi0395/qsoabsfind/blob/main/qsoabsfind/utils.py>`_ function. Please read the description before using it.
 
 In order to decide the right size of the FITS file, consider the total available memory and the number of CPUs in your system.
diff --git a/qsoabsfind/absfinder.py b/qsoabsfind/absfinder.py
@@ -16,6 +16,7 @@
 from .config import load_constants
 from .spec import QSOSpecRead
 from numba import jit
+from astropy.table import Table
 
 constants = load_constants()
 lines, oscillator_parameters, speed_of_light = constants.lines, constants.oscillator_parameters, constants.speed_of_light
@@ -127,7 +128,6 @@ def read_single_spectrum_and_find_absorber(fits_file, spec_index, absorber, **kw
     if spec_index % 5000 == 0:
         print(f'Detection finished up to spec index = {spec_index}', flush=True)
 
-
     (index_spec, pure_z_abs, pure_gauss_fit, pure_gauss_fit_std, pure_ew_first_line_mean, pure_ew_second_line_mean, pure_ew_total_mean, pure_ew_first_line_error, pure_ew_second_line_error, pure_ew_total_error, redshift_err, sn1_all, sn2_all) = convolution_method_absorber_finder_in_QSO_spectra(spec_index, absorber, lam_obs, residual, error, lam_search, unmsk_residual, unmsk_error, **kwargs)
 
     return (index_spec, pure_z_abs, pure_gauss_fit, pure_gauss_fit_std, pure_ew_first_line_mean, pure_ew_second_line_mean, pure_ew_total_mean, pure_ew_first_line_error, pure_ew_second_line_error, pure_ew_total_error, redshift_err, sn1_all, sn2_all)

diff --git a/qsoabsfind/io.py b/qsoabsfind/io.py
@@ -35,15 +35,20 @@ def read_fits_file(fits_file, index=None):
             wavelength = hdul['WAVELENGTH'].data  # Assuming wavelength is common for all spectra
     return flux, error, wavelength, metadata
 
-def save_results_to_fits(results, output_file, headers, absorber):
+def save_results_to_fits(results, input_file, output_file, headers, absorber):
     """
-    Save the results to a FITS file.
+    Save the absorber results to a FITS file along with the metadata of QSOs.
 
     Args:
         results (dict): The results dictionary.
+        intput_file (str): The path to the input spectra FITS file.
         output_file (str): The path to the output FITS file.
         headers (dict): The headers to include in the FITS file.
         absorber (str): The absorber type (MgII or CIV).
+
+    Returns:
+        A fits file containing detected absorber properties in 'ABSORBER' HDU and
+        corresponfing QSO metadata in 'METADATA' HDU.
     """
     EW_TOTAL = f'{absorber.upper()}_EW_TOTAL'
     if absorber == 'MgII':
@@ -79,5 +84,10 @@ def save_results_to_fits(results, output_file, headers, absorber):
     for key, header in headers.items():
         hdr[key] = (header["value"], header["comment"])
 
-    hdul = fits.HDUList([fits.PrimaryHDU(header=hdr), hdu])
+    # load the QSO METADATA
+
+    _, _, _, metadata = read_fits_file(input_file, index=np.array(results['index_spec']))
+    qso_hdu = fits.BinTableHDU(metadata, name='METADATA')
+    import pdb;pdb.set_trace()
+    hdul = fits.HDUList([fits.PrimaryHDU(header=hdr), hdu, qso_hdu])
     hdul.writeto(output_file, overwrite=True)
diff --git a/qsoabsfind/parallel_convolution.py b/qsoabsfind/parallel_convolution.py
@@ -212,7 +212,7 @@ def main():
     )
 
     # Save the results to a FITS file
-    save_results_to_fits(results, args.output, headers, args.absorber)
+    save_results_to_fits(results, args.input_fits_file, args.output, headers, args.absorber)
 
     # End timing
     end_time = time.time()

diff --git a/qsoabsfind/utils.py b/qsoabsfind/utils.py
@@ -175,6 +175,54 @@ def save_plot(x, y, plot_filename='qsoabsfind_plot.png', xlabel='X-axis', ylabel
 
     print(f"Plot saved as {plot_path}")
 
+def combine_fits_files(directory, output_filename='combined.fits'):
+    """
+    Combine HDUs from all FITS files in a directory into a single FITS file, preserving headers
+    and saves the final combined file in the same directory.
+
+    Args:
+        directory: str, path to the directory containing FITS files (absorber file for each spectra file).
+        output_filename: str, name of the output combined FITS file (default: 'combined.fits').
+
+    Note:
+        This function assumes that all the corresponding absorber fits files are in the input directory,
+        and follow the same structure as the output of qsoabsfind. Also there should not be any other
+        FITS file there. Otherwise, script will fail.
+    """
+    # Ensure the directory exists
+    if not os.path.isdir(directory):
+        raise FileNotFoundError(f"The directory {directory} does not exist.")
+
+    # List to hold all HDUs for the combined FITS file
+    combined_hdul = fits.HDUList()
+
+    # Track added extension names to avoid duplicates
+    added_extnames = set()
+
+    # Iterate over all FITS files in the directory
+    for filename in os.listdir(directory):
+        if filename.endswith(".fits"):
+            file_path = os.path.join(directory, filename)
+            print(f"Processing file: {file_path}")
+
+            # Open the FITS file
+            with fits.open(file_path, memmap=True) as hdul:
+                # Iterate through each HDU in the file
+                for hdu in hdul:
+                    # If it's the Primary HDU or a unique extension, add it to the combined list
+                    if isinstance(hdu, fits.PrimaryHDU) or hdu.name not in added_extnames:
+                        # Copy the HDU to preserve data and header
+                        combined_hdul.append(hdu.copy())
+                        added_extnames.add(hdu.name)
+                        print(f"Added HDU '{hdu.name}' from {filename}")
+
+    # Construct the output file path
+    output_file_path = os.path.join(directory, output_filename)
+
+    # Save the combined HDU list to a new FITS file
+    combined_hdul.writeto(output_file_path, overwrite=True)
+    print(f"Combined FITS file saved as {output_file_path}")
+
 def validate_sizes(conv_arr, unmsk_residual, spec_index):
     """
     Validate that all arrays have the same size.