ew error corrected, nb, data updated, vel_disp added

abhi0395 · Aug 30, 2024 · a002276 · a002276
1 parent 989e467
commit a002276
Show file tree

Hide file tree

Showing 11 changed files with 127 additions and 90 deletions.
diff --git a/data/CIV_cat.fits b/data/CIV_cat.fits
diff --git a/data/MgII_cat.fits b/data/MgII_cat.fits
diff --git a/docs/fileformat.rst b/docs/fileformat.rst
@@ -25,7 +25,6 @@ Then run `qsoabsfind` with the required FITS file. If using a custom constant fi
 ::
 
     qsoabsfind --input-fits-file data/qso_test.fits \
-               --n-qso 500 \
                --absorber MgII \
                --output test_MgII.fits \
                --headers SURVEY=SDSS AUTHOR=YOUR_NAME \
@@ -47,8 +46,9 @@ The **output** `fits file` will have two HDUs `ABSORBER` and `METADATA`:
 - **Z_ABS_ERR**: Measured error in the redshift of the absorber.
 - **GAUSS_FIT**: Rest-frame fitting parameters of double Gaussian to the absorber doublet (the width can be used to measure the velocity dispersion).
 - **GAUSS_FIT_STD**: Uncertainties in rest-frame fitting parameters of double Gaussian to the absorber doublet.
-- **SN_${METAL}_${LINE}**: Signal-to-noise ratio of the lines.
+- **SN_${METAL}_${LINE}**: Signal-to-noise ratio (SNR) of the lines, estimated from uncertainties on residual.
 - **${METAL}_EW_TOTAL**: Total EW of the lines in Angstroms.
 - **${METAL}_EW_TOTAL_ERROR**: Uncertainties in total EW of the lines in Angstroms.
+- **{METAL}_${LINE}_VDISP**: Rest-frame intrinsic velocity dispersion (corrected for instrumental resolution) in km/s.
 
 **METADATA** HDU will contain every metadata (corresponding to each absorber) that is available in input spectra file.
diff --git a/nb/example.ipynb b/nb/example.ipynb
diff --git a/qsoabsfind/absfinder.py b/qsoabsfind/absfinder.py
@@ -18,7 +18,7 @@
     calculate_doublet_ratio,
     group_and_select_weighted_redshift
 )
-from .ew import ( 
+from .ew import (
     measure_absorber_properties_double_gaussian
 )
 from .config import load_constants
@@ -88,12 +88,12 @@ def read_single_spectrum_and_find_absorber(fits_file, spec_index, absorber, **kw
 
     kwargs.pop("lam_edge_sep") # just remove this keyword as its not used the following function.
 
-    (index_spec, pure_z_abs, pure_gauss_fit, pure_gauss_fit_std, pure_ew_first_line_mean, pure_ew_second_line_mean, pure_ew_total_mean, pure_ew_first_line_error, pure_ew_second_line_error, pure_ew_total_error, redshift_err, sn1_all, sn2_all) = convolution_method_absorber_finder_in_QSO_spectra(spec_index, absorber, lam_obs, residual, error, lam_search, unmsk_residual, unmsk_error, **kwargs)
+    (index_spec, pure_z_abs, pure_gauss_fit, pure_gauss_fit_std, pure_ew_first_line_mean, pure_ew_second_line_mean, pure_ew_total_mean, pure_ew_first_line_error, pure_ew_second_line_error, pure_ew_total_error, redshift_err, sn1_all, sn2_all, vel_disp1, vel_disp2) = convolution_method_absorber_finder_in_QSO_spectra(spec_index, absorber, lam_obs, residual, error, lam_search, unmsk_residual, unmsk_error, **kwargs)
 
     # Print progress for every spectrum processed
     elapsed(start_time, f"\nTime taken to finish {absorber} detection for index = {spec_index} is: ")
 
-    return (index_spec, pure_z_abs, pure_gauss_fit, pure_gauss_fit_std, pure_ew_first_line_mean, pure_ew_second_line_mean, pure_ew_total_mean, pure_ew_first_line_error, pure_ew_second_line_error, pure_ew_total_error, redshift_err, sn1_all, sn2_all)
+    return (index_spec, pure_z_abs, pure_gauss_fit, pure_gauss_fit_std, pure_ew_first_line_mean, pure_ew_second_line_mean, pure_ew_total_mean, pure_ew_first_line_error, pure_ew_second_line_error, pure_ew_total_error, redshift_err, sn1_all, sn2_all, vel_disp1, vel_disp2)
 
 
 def convolution_method_absorber_finder_in_QSO_spectra(spec_index, absorber='MgII', lam_obs=None, residual=None, error=None,
@@ -140,12 +140,17 @@ def convolution_method_absorber_finder_in_QSO_spectra(spec_index, absorber='MgII
             - errors EW1 (list): errors on Equivalent width of line 1 for each absorber
             - errors EW2 (list): errors on Equivalent width of line 2 for each absorber
             - errors EW total (list): errors on Total Equivalent width of line 1 and line 2 for each absorber
+            - zabs_err (list): errors on redshifts of absorbers detected
+            - sn1 (list): SNR of line 1 for each absorber
+            - sn2 (list): SNR of of line 2 for each absorber
+            - vel_disp1 (list): rest-frame velocity dispersion of line 1 for each absorber (in km/s)
+            - vel_disp2 (list): rest-frame velocity dispersion of line 2 for each absorber (in km/s)
     """
 
     # return if there are no wavelength pixels available to search for
     if lam_search.size==0 or lam_obs.size==0:
         print(f'INFO: No wavelength pixels available in search region, spec index = {spec_index}')
-        return ([spec_index], [0], [[0, 0, 0, 0, 0, 0]], [[0, 0, 0, 0, 0, 0]], [0], [0], [0], [0], [0], [0], [0], [0], [0])
+        return ([spec_index], [0], [[0, 0, 0, 0, 0, 0]], [[0, 0, 0, 0, 0, 0]], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0])
 
     else:
         # Constants
@@ -212,7 +217,7 @@ def convolution_method_absorber_finder_in_QSO_spectra(spec_index, absorber='MgII
         if len(combined_final_our_z)>0:
             z_abs, z_err, fit_param, fit_param_std, EW_first_line_mean, EW_second_line_mean, EW_total_mean, EW_first_line_error, EW_second_line_error, EW_total_error = measure_absorber_properties_double_gaussian(
                 index=spec_index, wavelength=lam_obs, flux=residual, error=error, absorber_redshift=combined_final_our_z, bound=bound, use_kernel=absorber, d_pix=d_pix, use_covariance=use_covariance)
-            
+
             pure_z_abs = np.zeros(len(z_abs))
             pure_gauss_fit = np.zeros((len(z_abs), 6))
             pure_gauss_fit_std = np.zeros((len(z_abs), 6))
@@ -225,15 +230,17 @@ def convolution_method_absorber_finder_in_QSO_spectra(spec_index, absorber='MgII
             redshift_err = np.zeros(len(z_abs))
             sn1_all = np.zeros(len(z_abs))
             sn2_all = np.zeros(len(z_abs))
-
+            vel_disp1 = np.zeros(len(z_abs))
+            vel_disp2 = np.zeros(len(z_abs))
+
             z_inds = [i for i, x in enumerate(z_abs) if not np.isnan(x) and x > 0]
-            
+
             for m in z_inds:
                 if len(fit_param[m]) > 0 and not np.all(np.isnan(fit_param[m])):
 
                     z_new, z_new_error, fit_param_temp, fit_param_std_temp, EW_first_temp_mean, EW_second_temp_mean, EW_total_temp_mean, EW_first_error_temp, EW_second_error_temp, EW_total_error_temp = measure_absorber_properties_double_gaussian(
                         index=spec_index, wavelength=lam_obs, flux=residual, error=error, absorber_redshift=[z_abs[m]], bound=bound, use_kernel=absorber, d_pix=d_pix)
-                    
+
                     if len(fit_param_temp[0]) > 0 and not np.all(np.isnan(fit_param_temp[0])):
                         gaussian_parameters = np.array(fit_param_temp[0])
                         lam_rest = lam_obs / (1 + z_abs[m])
@@ -254,7 +261,7 @@ def convolution_method_absorber_finder_in_QSO_spectra(spec_index, absorber='MgII
                         else:
                             dr, min_dr, max_dr = 0, 0, -1 #failure case
                             ew1_snr, ew2_snr = 0, 0 # failure case
-                            
+
                         if (gaussian_parameters > bound[0]+0.001).all() and (gaussian_parameters < bound[1]-0.001).all() and lower_del_lam <= c1 - c0 <= upper_del_lam and sn1 > sn_line1 and sn2 > sn_line2 and vel1 > 0 and vel2 > 0 and min_dr < dr < max_dr and ew1_snr >1 and ew2_snr>1:
                             pure_z_abs[m] = z_new
                             pure_gauss_fit[m] = fit_param_temp[0]
@@ -268,6 +275,8 @@ def convolution_method_absorber_finder_in_QSO_spectra(spec_index, absorber='MgII
                             redshift_err[m] = z_new_error
                             sn1_all[m] = sn1
                             sn2_all[m] = sn2
+                            vel_disp1[m] = vel1
+                            vel_disp2[m] = vel2
 
             valid_indices = pure_z_abs != 0
             pure_z_abs = pure_z_abs[valid_indices]
@@ -282,6 +291,8 @@ def convolution_method_absorber_finder_in_QSO_spectra(spec_index, absorber='MgII
             redshift_err = redshift_err[valid_indices]
             sn1_all = sn1_all[valid_indices]
             sn2_all = sn2_all[valid_indices]
+            vel_disp1 = vel_disp1[valid_indices]
+            vel_disp2 = vel_disp2[valid_indices]
 
             if len(pure_z_abs) > 0:
                 if absorber=='MgII':
@@ -305,17 +316,20 @@ def convolution_method_absorber_finder_in_QSO_spectra(spec_index, absorber='MgII
                 redshift_err = redshift_err[sel_indices]
                 sn1_all = sn1_all[sel_indices]
                 sn2_all = sn2_all[sel_indices]
+                vel_disp1 = vel_disp1[sel_indices]
+                vel_disp2 = vel_disp2[sel_indices]
             else:
                 redshift_err = np.array([0])
                 pure_z_abs = np.array([0])
                 pure_gauss_fit = pure_gauss_fit_std = np.array([[0, 0, 0, 0, 0, 0]])
                 pure_ew_first_line_mean = pure_ew_second_line_mean = pure_ew_total_mean = np.array([0])
                 pure_ew_first_line_error = pure_ew_second_line_error = pure_ew_total_error = np.array([0])
                 sn1_all = sn2_all = np.array([0])
+                vel_disp1 = vel_disp2= np.array([0])
 
             not_found = max(1, len(pure_z_abs))
             index_spec = [spec_index for _ in range(not_found)]
             return (index_spec, pure_z_abs.tolist(), pure_gauss_fit.tolist(), pure_gauss_fit_std.tolist(), pure_ew_first_line_mean.tolist(), pure_ew_second_line_mean.tolist(), pure_ew_total_mean.tolist(),
-                    pure_ew_first_line_error.tolist(), pure_ew_second_line_error.tolist(), pure_ew_total_error.tolist(), redshift_err.tolist(), sn1_all.tolist(), sn2_all.tolist())
+                    pure_ew_first_line_error.tolist(), pure_ew_second_line_error.tolist(), pure_ew_total_error.tolist(), redshift_err.tolist(), sn1_all.tolist(), sn2_all.tolist(), vel_disp1.tolist(), vel_disp2.tolist())
         else:
-            return ([spec_index], [0], [[0, 0, 0, 0, 0, 0]], [[0, 0, 0, 0, 0, 0]], [0], [0], [0], [0], [0], [0], [0], [0], [0])
+            return ([spec_index], [0], [[0, 0, 0, 0, 0, 0]], [[0, 0, 0, 0, 0, 0]], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0])
diff --git a/qsoabsfind/ew.py b/qsoabsfind/ew.py
@@ -6,7 +6,6 @@
 from scipy.optimize import curve_fit
 from .utils import double_gaussian
 from .absorberutils import redshift_estimate
-import copy
 from .config import load_constants
 
 constants = load_constants()
@@ -111,8 +110,9 @@ def calculate_ew_errors(popt, perr):
     EW1 = amp1 * np.sqrt(np.pi * 2 * sigma1 ** 2)
     EW2 = amp2 * np.sqrt(np.pi * 2 * sigma2 ** 2)
 
-    EW1_error = EW1 * np.sqrt((amp1_err / amp1) ** 2 + (2 * sigma1_err / sigma1) ** 2)
-    EW2_error = EW2 * np.sqrt((amp2_err / amp2) ** 2 + (2 * sigma2_err / sigma2) ** 2)
+    # using correlation between parameters
+    EW1_error = EW1 * np.sqrt((amp1_err / amp1) ** 2 + (sigma1_err / sigma1) ** 2) - 2 * amp1_err * sigma1_err / (amp1 * sigma1)
+    EW2_error = EW2 * np.sqrt((amp2_err / amp2) ** 2 + (sigma2_err / sigma2) ** 2) - 2 * amp2_err * sigma2_err / (amp2 * sigma2)
 
     EW_total_error = np.sqrt(EW1_error ** 2 + EW2_error ** 2)
 
@@ -158,7 +158,7 @@ def full_covariance_ew_errors(popt, pcov):
     # Total EW error, including cross-terms
     cross_term = 2 * np.dot(jacobian_EW1, np.dot(pcov, jacobian_EW2.T))
     EW_total_error = np.sqrt(EW1_var + EW2_var + cross_term)
-    
+
     return EW1_error, EW2_error, EW_total_error
 
 def measure_absorber_properties_double_gaussian(index, wavelength, flux, error, absorber_redshift, bound, use_kernel, d_pix, use_covariance=False):
@@ -175,7 +175,7 @@ def measure_absorber_properties_double_gaussian(index, wavelength, flux, error,
         absorber_redshift (list): List of potential absorbers identified previously.
         bound (tuple): Bounds for the fitting parameters.
         use_kernel (str, optional): Kernel type ('MgII, FeII, CIV).
-        d_pix (float, optional): wavelength pixel for tolerance 
+        d_pix (float, optional): wavelength pixel for tolerance
         use_covariance (bool): if want to use full covariance of scipy curvey_fit for EW error calculation (default is False)
 
     Returns:
@@ -226,7 +226,7 @@ def measure_absorber_properties_double_gaussian(index, wavelength, flux, error,
         absorber_rest_lam = wavelength / (1 + absorber_redshift[k]) # rest-frame conversion of wavelength
         lam_ind = np.where((absorber_rest_lam >= ix0) & (absorber_rest_lam <= ix1))[0]
         lam_fit = absorber_rest_lam[lam_ind]
-        nmf_resi = flux[lam_ind] 
+        nmf_resi = flux[lam_ind]
         error_flux = error[lam_ind]
         uniform = np.random.uniform
         if nmf_resi.size > 0 and not np.all(np.isnan(nmf_resi)):
@@ -256,24 +256,24 @@ def measure_absorber_properties_double_gaussian(index, wavelength, flux, error,
 
             fitting_param_for_spectrum[k], fitting_param_std_for_spectrum[k], EW_first_line[k], EW_second_line[k], EW_total[k], fitting_param_pcov_for_spectrum[k] = double_curve_fit(
                 index, double_gaussian, lam_fit, nmf_resi, error_fit=error_flux, bounds=bound, init_cond=init_cond, iter_n=1000)
-            
+
             fitted_l1 = fitting_param_for_spectrum[k][1]*(1+z_abs_array[k]) # in observed frame
             fitted_l2 = fitting_param_for_spectrum[k][4]*(1+z_abs_array[k])
             std_fitted_l1 = fitting_param_std_for_spectrum[k][1]*(1+z_abs_array[k])
             std_fitted_l2 = fitting_param_std_for_spectrum[k][4]*(1+z_abs_array[k])
 
             z_abs_array[k], z_abs_err[k] = redshift_estimate(fitted_l1, fitted_l2, std_fitted_l1, std_fitted_l2, line_centre1, line_centre2)
-            
+
             #best-fit corresponding to this best redshift
             absorber_rest_lam = wavelength / (1 + z_abs_array[k]) # rest-frame conversion of wavelength
             lam_ind = np.where((absorber_rest_lam >= ix0) & (absorber_rest_lam <= ix1))[0]
             lam_fit = absorber_rest_lam[lam_ind]
             nmf_resi = flux[lam_ind]
             error_flux = error[lam_ind]
-            
+
             fitting_param_for_spectrum[k], fitting_param_std_for_spectrum[k], EW_first_line[k], EW_second_line[k], EW_total[k], fitting_param_pcov_for_spectrum[k] = double_curve_fit(
                 index, double_gaussian, lam_fit, nmf_resi, error_fit=error_flux, bounds=bound, init_cond=init_cond, iter_n=1000)
-            
+
             ## errors on EW
             if not use_covariance:
                 EW_first_line_error[k], EW_second_line_error[k], EW_total_error[k] = calculate_ew_errors(fitting_param_for_spectrum[k], fitting_param_std_for_spectrum[k])
@@ -305,4 +305,4 @@ def measure_absorber_properties_double_gaussian(index, wavelength, flux, error,
         z_abs_array, z_abs_err, fitting_param_for_spectrum, fitting_param_std_for_spectrum,
         EW_first_line, EW_second_line, EW_total,
         EW_first_line_error, EW_second_line_error, EW_total_error
-    )
+    )
diff --git a/qsoabsfind/io.py b/qsoabsfind/io.py
@@ -56,11 +56,15 @@ def save_results_to_fits(results, input_file, output_file, headers, absorber):
         sn_2 = 'SN_MGII_2803'
         EW_1 = 'MGII_2796_EW'
         EW_2 = 'MGII_2803_EW'
+        VDISP1 = 'MGII_2796_VDISP'
+        VDISP2 = 'MGII_2803_VDISP'
     elif absorber == 'CIV':
         sn_1 = 'SN_CIV_1548'
         sn_2 = 'SN_CIV_1550'
         EW_1 = 'CIV_1548_EW'
         EW_2 = 'CIV_1550_EW'
+        VDISP1 = 'CIV_1548_VDISP'
+        VDISP2 = 'CIV_1550_VDISP'
     else:
         raise ValueError(f"Unsupported absorber: {absorber}")
 
@@ -77,7 +81,9 @@ def save_results_to_fits(results, input_file, output_file, headers, absorber):
         fits.Column(name=f'{EW_TOTAL}_ERROR', format='D', unit='Angstrom', array=np.array(results['ew_total_error'])),
         fits.Column(name='Z_ABS_ERR', format='D', array=np.array(results['z_abs_err'])),
         fits.Column(name=sn_1, format='D', array=np.array(results['sn_1'])),
-        fits.Column(name=sn_2, format='D', array=np.array(results['sn_2']))
+        fits.Column(name=sn_2, format='D', array=np.array(results['sn_2'])),
+        fits.Column(name=VDISP1, format='D', unit='km/s', array=np.array(results['vel_disp1'])),
+        fits.Column(name=VDISP2, format='D', unit='km/s', array=np.array(results['vel_disp2']))
     ], name='ABSORBER')
 
     hdr = fits.Header()

diff --git a/qsoabsfind/parallel_convolution.py b/qsoabsfind/parallel_convolution.py
@@ -64,11 +64,13 @@ def parallel_convolution_method_absorber_finder_QSO_spectra(fits_file, spec_indi
         'z_abs_err': [],
         'sn_1': [],
         'sn_2': [],
+        'vel_disp1': [],
+        'vel_disp2': [],
     }
 
     for result in results:
         (index_spec, z_abs, gauss_fit, gauss_fit_std, ew_1_mean, ew_2_mean, ew_total_mean,
-         ew_1_error, ew_2_error, ew_total_error, z_abs_err, sn_1, sn_2) = result
+         ew_1_error, ew_2_error, ew_total_error, z_abs_err, sn_1, sn_2, vel_disp1, vel_disp2) = result
 
         valid_indices = np.array(z_abs) > 0
 
@@ -85,6 +87,8 @@ def parallel_convolution_method_absorber_finder_QSO_spectra(fits_file, spec_indi
         combined_results['z_abs_err'].extend(np.array(z_abs_err)[valid_indices])
         combined_results['sn_1'].extend(np.array(sn_1)[valid_indices])
         combined_results['sn_2'].extend(np.array(sn_2)[valid_indices])
+        combined_results['vel_disp1'].extend(np.array(vel_disp1)[valid_indices])
+        combined_results['vel_disp2'].extend(np.array(vel_disp2)[valid_indices])
 
     return combined_results
 
@@ -129,7 +133,7 @@ def main():
         'SN_LINE1': {"value": constants.search_parameters[args.absorber]["sn_line1"], "comment": 'S/N threshold for first line (sn_line1)'},
         'SN_LINE2': {"value": constants.search_parameters[args.absorber]["sn_line2"], "comment": 'S/N threshold for second line (sn_line2)'},
         'EWCOVAR': {"value": constants.search_parameters[args.absorber]["use_covariance"], "comment": 'Use covariance for EW error (use_covariance)'},
-        'LOGWAVE': {"value": constants.search_parameters[args.absorber]["logwave"], "comment": 'Use log wavelength scaling (logwave)'}, 'LAM_ESEP': {"value": constants.search_parameters[args.absorber]["lam_edge_sep"], "comment": 'wavelength edges to avoid noisy regions'}
+        'LOGWAVE': {"value": constants.search_parameters[args.absorber]["logwave"], "comment": 'Use log wavelength scaling (logwave)'}, 'LAM_ESEP': {"value": constants.search_parameters[args.absorber]["lam_edge_sep"], "comment": 'lambda edges to avoid noisy regions (lam_edge_sep)'}
     })
 
     package_versions = get_package_versions()

diff --git a/qsoabsfind/utils.py b/qsoabsfind/utils.py
@@ -140,7 +140,7 @@ def convolution_fun(absorber, residual_arr_after_mask, width, log, wave_res, ind
     gauss_kernel = gauss_two_lines_kernel(lam_ker, a=ker_parm)
 
     result = np.convolve(gauss_kernel, residual_arr_after_mask, mode='same')
-    
+
     #check if input and output array size are same
     bad_conv = validate_sizes(result, residual_arr_after_mask, index)
     if bad_conv == 1:
@@ -234,6 +234,8 @@ def modify_units(col_name, col):
     """
     if 'EW' in col_name.upper():
         return 'Angstrom'
+    elif 'VDISP' in col_name.upper():
+        return 'km/s'
     else:
         return str(col.unit) if col.unit is not None else None
 
@@ -351,7 +353,7 @@ def vel_dispersion(c1, c2, sigma1, sigma2, resolution):
 
     v1_sig = sigma1 / c1 * speed_of_light
     v2_sig = sigma2 / c2 * speed_of_light
-    
+
     # FWHM of instrument
     resolution = resolution/2.355
 

diff --git a/tests/__pycache__/test_convolution.cpython-39.pyc b/tests/__pycache__/test_convolution.cpython-39.pyc
diff --git a/tests/test_convolution.py b/tests/test_convolution.py
@@ -25,7 +25,7 @@ def test_convolution_method_absorber_finder_in_QSO_spectra(self):
 
         # Validate the output
         self.assertIsInstance(result, tuple)
-        self.assertEqual(len(result), 13)  # Ensure the correct number of return values
+        self.assertEqual(len(result), 15)  # Ensure the correct number of return values
 
     def test_parallel_convolution_method_absorber_finder_QSO_spectra(self):
         # Set up the input parameters for the function