Skip to content

Commit

Permalink
added a function to add metadata in output
Browse files Browse the repository at this point in the history
  • Loading branch information
abhi0395 committed Aug 5, 2024
1 parent fcb8de1 commit 0d42bb1
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 6 deletions.
2 changes: 1 addition & 1 deletion docs/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,6 @@ Parallel mode can be memory-intensive if the input FITS file is large in size. A

- **Use a rule of thumb for file size:** Ensure that the size of each individual file is no larger than `total_memory/ncpu` of your node or system. Based on this idea you can decide your `N`. I would suggest `N = 1000`.

- **Merge results at the end:** After processing, you can merge your results.
- **Merge results at the end:** After processing, you can merge your results using `qsoabsfind.utils.combine_fits_files <https://github.com/abhi0395/qsoabsfind/blob/main/qsoabsfind/utils.py>`_ function. Please read the description before using it.

In order to decide the right size of the FITS file, consider the total available memory and the number of CPUs in your system.
2 changes: 1 addition & 1 deletion qsoabsfind/absfinder.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from .config import load_constants
from .spec import QSOSpecRead
from numba import jit
from astropy.table import Table

constants = load_constants()
lines, oscillator_parameters, speed_of_light = constants.lines, constants.oscillator_parameters, constants.speed_of_light
Expand Down Expand Up @@ -127,7 +128,6 @@ def read_single_spectrum_and_find_absorber(fits_file, spec_index, absorber, **kw
if spec_index % 5000 == 0:
print(f'Detection finished up to spec index = {spec_index}', flush=True)


(index_spec, pure_z_abs, pure_gauss_fit, pure_gauss_fit_std, pure_ew_first_line_mean, pure_ew_second_line_mean, pure_ew_total_mean, pure_ew_first_line_error, pure_ew_second_line_error, pure_ew_total_error, redshift_err, sn1_all, sn2_all) = convolution_method_absorber_finder_in_QSO_spectra(spec_index, absorber, lam_obs, residual, error, lam_search, unmsk_residual, unmsk_error, **kwargs)

return (index_spec, pure_z_abs, pure_gauss_fit, pure_gauss_fit_std, pure_ew_first_line_mean, pure_ew_second_line_mean, pure_ew_total_mean, pure_ew_first_line_error, pure_ew_second_line_error, pure_ew_total_error, redshift_err, sn1_all, sn2_all)
Expand Down
16 changes: 13 additions & 3 deletions qsoabsfind/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,20 @@ def read_fits_file(fits_file, index=None):
wavelength = hdul['WAVELENGTH'].data # Assuming wavelength is common for all spectra
return flux, error, wavelength, metadata

def save_results_to_fits(results, output_file, headers, absorber):
def save_results_to_fits(results, input_file, output_file, headers, absorber):
"""
Save the results to a FITS file.
Save the absorber results to a FITS file along with the metadata of QSOs.
Args:
results (dict): The results dictionary.
intput_file (str): The path to the input spectra FITS file.
output_file (str): The path to the output FITS file.
headers (dict): The headers to include in the FITS file.
absorber (str): The absorber type (MgII or CIV).
Returns:
A fits file containing detected absorber properties in 'ABSORBER' HDU and
corresponfing QSO metadata in 'METADATA' HDU.
"""
EW_TOTAL = f'{absorber.upper()}_EW_TOTAL'
if absorber == 'MgII':
Expand Down Expand Up @@ -79,5 +84,10 @@ def save_results_to_fits(results, output_file, headers, absorber):
for key, header in headers.items():
hdr[key] = (header["value"], header["comment"])

hdul = fits.HDUList([fits.PrimaryHDU(header=hdr), hdu])
# load the QSO METADATA

_, _, _, metadata = read_fits_file(input_file, index=np.array(results['index_spec']))
qso_hdu = fits.BinTableHDU(metadata, name='METADATA')
import pdb;pdb.set_trace()
hdul = fits.HDUList([fits.PrimaryHDU(header=hdr), hdu, qso_hdu])
hdul.writeto(output_file, overwrite=True)
2 changes: 1 addition & 1 deletion qsoabsfind/parallel_convolution.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ def main():
)

# Save the results to a FITS file
save_results_to_fits(results, args.output, headers, args.absorber)
save_results_to_fits(results, args.input_fits_file, args.output, headers, args.absorber)

# End timing
end_time = time.time()
Expand Down
48 changes: 48 additions & 0 deletions qsoabsfind/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,54 @@ def save_plot(x, y, plot_filename='qsoabsfind_plot.png', xlabel='X-axis', ylabel

print(f"Plot saved as {plot_path}")

def combine_fits_files(directory, output_filename='combined.fits'):
"""
Combine HDUs from all FITS files in a directory into a single FITS file, preserving headers
and saves the final combined file in the same directory.
Args:
directory: str, path to the directory containing FITS files (absorber file for each spectra file).
output_filename: str, name of the output combined FITS file (default: 'combined.fits').
Note:
This function assumes that all the corresponding absorber fits files are in the input directory,
and follow the same structure as the output of qsoabsfind. Also there should not be any other
FITS file there. Otherwise, script will fail.
"""
# Ensure the directory exists
if not os.path.isdir(directory):
raise FileNotFoundError(f"The directory {directory} does not exist.")

# List to hold all HDUs for the combined FITS file
combined_hdul = fits.HDUList()

# Track added extension names to avoid duplicates
added_extnames = set()

# Iterate over all FITS files in the directory
for filename in os.listdir(directory):
if filename.endswith(".fits"):
file_path = os.path.join(directory, filename)
print(f"Processing file: {file_path}")

# Open the FITS file
with fits.open(file_path, memmap=True) as hdul:
# Iterate through each HDU in the file
for hdu in hdul:
# If it's the Primary HDU or a unique extension, add it to the combined list
if isinstance(hdu, fits.PrimaryHDU) or hdu.name not in added_extnames:
# Copy the HDU to preserve data and header
combined_hdul.append(hdu.copy())
added_extnames.add(hdu.name)
print(f"Added HDU '{hdu.name}' from {filename}")

# Construct the output file path
output_file_path = os.path.join(directory, output_filename)

# Save the combined HDU list to a new FITS file
combined_hdul.writeto(output_file_path, overwrite=True)
print(f"Combined FITS file saved as {output_file_path}")

def validate_sizes(conv_arr, unmsk_residual, spec_index):
"""
Validate that all arrays have the same size.
Expand Down

0 comments on commit 0d42bb1

Please sign in to comment.