updated functions and better logic

abhi0395 · Aug 23, 2024 · 9c61321 · 9c61321
1 parent 175ca30
commit 9c61321
Show file tree

Hide file tree

Showing 4 changed files with 103 additions and 67 deletions.
diff --git a/qsoabsfind/absfinder.py b/qsoabsfind/absfinder.py
@@ -7,11 +7,11 @@
 from operator import add
 from .utils import convolution_fun, vel_dispersion, elapsed
 from .absorberutils import (
-    estimate_local_sigma_conv_array, group_and_weighted_mean_selection_function,
+    estimate_local_sigma_conv_array, 
     median_selection_after_combining,
     remove_Mg_falsely_come_from_Fe_absorber, z_abs_from_same_metal_absorber,
     contiguous_pixel_remover, estimate_snr_for_lines, absorber_search_window,
-    find_valid_indices, calculate_doublet_ratio
+    find_valid_indices, calculate_doublet_ratio, group_and_weighted_mean_selection_function
 )
 from .ew import measure_absorber_properties_double_gaussian
 from .config import load_constants
@@ -152,26 +152,30 @@ def convolution_method_absorber_finder_in_QSO_spectra(spec_index, absorber='MgII
             raise ValueError(f"No support for {absorber}, only supports MgII and CIV")
 
         line_sep = line2 - line1
+        del_z = line_sep / (0.5 * (line1+line2))
 
         if not logwave:
             # average resolution in case wavelength is on linear scale
             wave_pixel = np.nanmean(lam_search[1:] - lam_search[:-1])
-            resolution  = np.nanmean(wave_pixel/lam_search) * speed_of_light
-
-        del_sigma = line1 * resolution / speed_of_light  # in Ang
+            del_sigma = wave_pixel / 2.355
+            resolution  = np.nanmean(wave_pixel/lam_obs) * speed_of_light
+        else:
+            del_sigma = line1 * resolution / speed_of_light  # in Ang
+            del_sigma = del_sigma / 2.355 ## FWHM sqrt(8ln2)
 
-        bd_ct, x_sep = 2.0, 10 # multiple for bound definition (for line centres and widths of line)
+        bd_ct, x_sep = 1.0, 10 # multiple for bound definition (for line centres and widths of line)
 
         # bounds for gaussian fitting, to avoid very bad candidates
-        bound = ((np.array([2e-2, line1 - bd_ct * d_pix, del_sigma-0.1, 2e-2, line2 - bd_ct * d_pix, del_sigma-0.1])),
-                 (np.array([1.11, line1 + bd_ct * d_pix, x_sep * del_sigma+0.1, 1.11, line2 + bd_ct * d_pix, x_sep * del_sigma+0.1])))
-
+        edge = 0.05
+        bound = ((np.array([2e-2, line1 - bd_ct * d_pix, del_sigma - edge, 2e-2, line2 - bd_ct * d_pix, del_sigma - edge])),
+                 (np.array([1.11, line1 + bd_ct * d_pix, x_sep * del_sigma + edge, 1.11, line2 + bd_ct * d_pix, x_sep * del_sigma + edge])))
+
         # line separation tolerance (fitted line centers should not be outside, centre +/- d_pix)
         lower_del_lam = line_sep - d_pix
         upper_del_lam = line_sep + d_pix
 
         # Kernel width computation
-        width_kernel = np.array([ker * resolution * ((f1 * line1 + f2 * line2) / (f1 + f2)) / (speed_of_light * 2.35) for ker in ker_width_pixels])
+        width_kernel = np.array([ker * resolution * ((f1 * line1 + f2 * line2) / (f1 + f2)) / (speed_of_light * 2.355) for ker in ker_width_pixels])
 
         combined_final_our_z = []
 
@@ -190,20 +194,17 @@ def convolution_method_absorber_finder_in_QSO_spectra(spec_index, absorber='MgII
             residual_our_z = unmsk_residual[our_z_ind]
 
             new_our_z, new_res_arr = find_valid_indices(our_z, residual_our_z, lam_search, conv_arr, sigma_cr, coeff_sigma, d_pix, f1 / f2, line1, line2, logwave)
-
-            final_our_z = group_and_weighted_mean_selection_function(new_our_z, np.array(new_res_arr))
-
+    
+            #final_our_z = group_and_select_weighted_redshift(new_our_z, new_res_arr, delta_z_threshold=del_z)
+            final_our_z =  group_and_weighted_mean_selection_function(new_our_z, np.array(new_res_arr), delta_z=del_z)
             combined_final_our_z.append(final_our_z)
-
+    
         combined_final_our_z = reduce(add, combined_final_our_z)
         combined_final_our_z = list(set(combined_final_our_z))
         combined_final_our_z = median_selection_after_combining(combined_final_our_z, lam_obs, residual, d_pix=d_pix, use_kernel=absorber)
-        combined_final_our_z = np.array(combined_final_our_z)
-        combined_final_our_z = combined_final_our_z[~np.isnan(combined_final_our_z)]
-        combined_final_our_z = combined_final_our_z.tolist()
-
+        combined_final_our_z = [x for x in combined_final_our_z if not np.isnan(x)]
+
         if len(combined_final_our_z)>0:
-
             z_abs, z_err, fit_param, fit_param_std, EW_first_line_mean, EW_second_line_mean, EW_total_mean, EW_first_line_error, EW_second_line_error, EW_total_error = measure_absorber_properties_double_gaussian(
                 index=spec_index, wavelength=lam_obs, flux=residual, error=error, absorber_redshift=combined_final_our_z, bound=bound, use_kernel=absorber, d_pix=d_pix, use_covariance=use_covariance)
 
@@ -220,8 +221,8 @@ def convolution_method_absorber_finder_in_QSO_spectra(spec_index, absorber='MgII
             sn1_all = np.zeros(len(z_abs))
             sn2_all = np.zeros(len(z_abs))
 
-
-            for m in range(len(z_abs)):
+            z_inds = [i for i, x in enumerate(z_abs) if not np.isnan(x) and x > 0]
+            for m in z_inds:
                 if len(fit_param[m]) > 0 and not np.all(np.isnan(fit_param[m])):
 
                     z_new, z_new_error, fit_param_temp, fit_param_std_temp, EW_first_temp_mean, EW_second_temp_mean, EW_total_temp_mean, EW_first_error_temp, EW_second_error_temp, EW_total_error_temp = measure_absorber_properties_double_gaussian(
@@ -238,6 +239,7 @@ def convolution_method_absorber_finder_in_QSO_spectra(spec_index, absorber='MgII
                         # resolution corrected velocity dispersion (should be greater than 0)
                         vel1, vel2 = vel_dispersion(c0, c1, gaussian_parameters[2], gaussian_parameters[5], resolution)
                         # calculate best -fit doublet ratio and errors and check if they are within the range.
+
                         # usually 1 < DR < f1/f2 (doublet ratio =2, for MgII, CIV)
                         if EW_first_temp_mean[0] > 0 and EW_second_temp_mean[0] > 0:
                             dr, dr_error = calculate_doublet_ratio(EW_first_temp_mean[0], EW_second_temp_mean[0], EW_first_error_temp[0], EW_second_error_temp[0])
@@ -246,8 +248,8 @@ def convolution_method_absorber_finder_in_QSO_spectra(spec_index, absorber='MgII
                         else:
                             dr, min_dr, max_dr = 0, 0, -1 #failure case
                             ew1_snr, ew2_snr = 0, 0 # failure case
-
-                        if (gaussian_parameters > bound[0] + 0.01).all() and (gaussian_parameters < bound[1] - 0.01).all() and lower_del_lam <= c1 - c0 <= upper_del_lam and sn1 > sn_line1 and sn2 > sn_line2 and vel1 > 0 and vel2 > 0 and min_dr < dr < max_dr and ew1_snr >1 and ew2_snr>1:
+                                          
+                        if (gaussian_parameters > bound[0]).all() and (gaussian_parameters < bound[1]).all() and lower_del_lam <= c1 - c0 <= upper_del_lam and sn1 > sn_line1 and sn2 > sn_line2 and vel1 > 0 and vel2 > 0 and min_dr < dr < max_dr and ew1_snr >1 and ew2_snr>1 and c0 > bound[0][1] + 0.01 and c0 < bound[1][1] - 0.01:
                             pure_z_abs[m] = z_new
                             pure_gauss_fit[m] = fit_param_temp[0]
                             pure_gauss_fit_std[m] = fit_param_std_temp[0]
@@ -262,7 +264,6 @@ def convolution_method_absorber_finder_in_QSO_spectra(spec_index, absorber='MgII
                             sn2_all[m] = sn2
 
             valid_indices = pure_z_abs != 0
-
             pure_z_abs = pure_z_abs[valid_indices]
             pure_gauss_fit = pure_gauss_fit[valid_indices]
             pure_gauss_fit_std = pure_gauss_fit_std[valid_indices]
@@ -275,13 +276,14 @@ def convolution_method_absorber_finder_in_QSO_spectra(spec_index, absorber='MgII
             redshift_err = redshift_err[valid_indices]
             sn1_all = sn1_all[valid_indices]
             sn2_all = sn2_all[valid_indices]
+
             if len(pure_z_abs) > 0:
                 if absorber=='MgII':
                     match_abs1 = remove_Mg_falsely_come_from_Fe_absorber(spec_index, pure_z_abs, lam_obs, residual, error, d_pix, logwave)
                 else:
                     match_abs1 = -1*np.ones(len(pure_z_abs))
                 match_abs2 = z_abs_from_same_metal_absorber(pure_z_abs, lam_obs, residual, error, d_pix, absorber, logwave)
-                ind_z = contiguous_pixel_remover(pure_z_abs, sn1_all, sn2_all, absorber)
+                ind_z = contiguous_pixel_remover(pure_z_abs, sn1_all, sn2_all, absorber, pure_gauss_fit)
                 sel_indices = (match_abs1 == -1) & (match_abs2 == -1) & (ind_z == -1)  # pure final absorber candidates
 
                 # Select final quantities based on sel_indices

diff --git a/qsoabsfind/absorberutils.py b/qsoabsfind/absorberutils.py
@@ -131,9 +131,17 @@ def estimate_snr_for_lines(l1, l2, sig1, sig2, lam_rest, residual, error, log):
         tuple: Mean signal-to-noise ratios (SNR) around the specified wavelengths.
                Returns (mean_sn1, mean_sn2).
     """
-    nsig = 4 # for gaussian more than 99.7 percentile data is within 4sigma
-
-    delta1, delta2 = nsig * sig2, nsig * sig2
+    if sig1 is None or sig2 is None:
+        dpix = 5
+        if log:
+            delta1 = np.abs(l1 * (10**(dpix * 0.0001) - 1))
+            delta2 = np.abs(l2 * (10**(dpix * 0.0001) - 1))
+        else:
+            delta1 = dpix * (lam_rest[1]-lam_rest[0])
+            delta2 = delta1
+    else:
+        nsig = 4 # for gaussian more than 99.7 percentile data is within 4sigma
+        delta1, delta2 = nsig * sig2, nsig * sig2
 
     ind1 = np.where((lam_rest > l1 - delta1) & (lam_rest < l1 + delta1))[0]
     ind2 = np.where((lam_rest > l2 - delta2) & (lam_rest < l2 + delta2))[0]
@@ -228,7 +236,7 @@ def weighted_mean(z_values, residuals, gamma):
     weighted_z = (z_values / residuals**gamma)
     return np.nansum(weighted_z) / np.nansum(weights)
 
-def group_and_weighted_mean_selection_function(master_list_of_pot_absorber, residual, gamma=1):
+def group_and_weighted_mean_selection_function(master_list_of_pot_absorber, residual, delta_z, gamma=1):
     """
     Perform grouping, splitting, and median selection from the list of all
     potentially identified absorbers.
@@ -245,16 +253,16 @@ def group_and_weighted_mean_selection_function(master_list_of_pot_absorber, resi
         return master_list_of_pot_absorber
 
     z_ind = []  # Final list of median redshifts for each spectrum
-    abs_list = np.array(master_list_of_pot_absorber)
-    z_check = np.log10(1 + abs_list)
-
+    z_check = np.array(master_list_of_pot_absorber)
+    #z_check = np.log10(1 + abs_list)
+    
     # Grouping contiguous pixels separated by ~0.0006 on redshift scale
-    z_temp, res_temp = group_contiguous_pixel(z_check, residual, avg=0.0006)
+    z_temp, res_temp = group_contiguous_pixel(z_check, residual, avg=delta_z)
     for group_z, group_res in zip(z_temp, res_temp):
         if isinstance(group_res, list):
             group_res, group_z = np.array(group_res), np.array(group_z)
         if group_res.size > 0 and np.all(group_res != 0):
-            abs_z = weighted_mean(10**group_z - 1, group_res, gamma)
+            abs_z = weighted_mean(group_z, group_res, gamma)
             z_ind.append(abs_z)
 
     return z_ind
@@ -359,10 +367,10 @@ def remove_Mg_falsely_come_from_Fe_absorber(index, z_after_grouping, lam_obs, re
 
                 if n_new > 0:
                     lam_rest = lam_obs / (1 + z[p])
-                    sn_fe1, sn_fe2 = estimate_snr_for_lines(fe1, fe2, lam_rest, residual, error, logwave)
+                    sn_fe1, sn_fe2 = estimate_snr_for_lines(fe1, fe2, None, None, lam_rest, residual, error, logwave)
                     for k in range(n_new):
                         lam_rest = lam_obs / (1 + z[ind_fin[k]])
-                        sn_mg1, sn_mg2 = estimate_snr_for_lines(mg1, mg2, lam_rest, residual, error, logwave)
+                        sn_mg1, sn_mg2 = estimate_snr_for_lines(mg1, mg2, None, None, lam_rest, residual, error, logwave)
                         if (sn_fe1 > sn_mg1) or (sn_fe1 > sn_mg2) or (sn_fe2 > sn_mg1) or (sn_fe2 > sn_mg2):
                             match_abs[ind_fin[k]] = 1  # False positive MgII absorber due to FeII lines
                         else:
@@ -416,11 +424,11 @@ def z_abs_from_same_metal_absorber(first_list_z, lam_obs, residual, error, d_pix
 
                 if ind_del.size > 0:
                     lam_rest = lam_obs / (1 + z[p])
-                    sn_mg0, _ = estimate_snr_for_lines(mg1, mg2, lam_rest, residual, error, logwave)
+                    sn_mg0, _ = estimate_snr_for_lines(mg1, mg2, None, None, lam_rest, residual, error, logwave)
 
                     for k in ind_del:
                         lam_rest1 = lam_obs / (1 + z[k])
-                        sn_mg1, _ = estimate_snr_for_lines(mg1, mg2, lam_rest1, residual, error, logwave)
+                        sn_mg1, _ = estimate_snr_for_lines(mg1, mg2, None, None, lam_rest1, residual, error, logwave)
 
                         if sn_mg0 > sn_mg1:
                             match_abs[k] = 1  # Matched within limits, not true absorbers
@@ -430,43 +438,65 @@ def z_abs_from_same_metal_absorber(first_list_z, lam_obs, residual, error, d_pix
     return match_abs
 
 #@jit(nopython=False)
-def contiguous_pixel_remover(abs_z, sn1_all, sn2_all, use_kernel):
+def contiguous_pixel_remover(abs_z, sn1_all, sn2_all, use_kernel, fitted_params):
     """
-    Remove contiguous pixels by evaluating the signal-to-noise ratio (SNR)
-    for absorbers.
+    Remove contiguous or duplicate absorbers by evaluating the signal-to-noise ratio (SNR)
+    and comparing their line centers.
 
     Args:
         abs_z (list or numpy.ndarray): List of absorber redshifts.
         sn1_all (list or numpy.ndarray): List of SNR values for the first line.
         sn2_all (list or numpy.ndarray): List of SNR values for the second line.
         use_kernel (str, optional): Kernel type (MgII, CIV).
+        fitted_params (list of arrays): corresponding gaussian fitting parameters for those redshifts
 
     Returns:
-        list: Updated list of absorbers with false positives removed.
+        list: Updated list of indices indicating bad (1) or good (-1) absorbers.
     """
-    if use_kernel=='MgII':
-        thresh = (lines['MgII_2803']-lines['MgII_2796'])/lines['MgII_2796']
-    if use_kernel=='CIV':
-        thresh = (lines['CIV_1550']-lines['CIV_1548'])/lines['CIV_1548']
+    # Define constants based on the kernel type
+    if use_kernel == 'MgII':
+        c0, c1 = lines['MgII_2796'], lines["MgII_2803"]
+    elif use_kernel == 'CIV':
+        c0, c1 = lines["CIV_1548"], lines["CIV_1550"]
+    else:
+        raise ValueError("Unknown kernel type")
+
+    thresh = (c1 - c0) / c0 
+
     abs_z = np.array(abs_z)
     sn1_all = np.array(sn1_all)
     sn2_all = np.array(sn2_all)
     nabs = abs_z.size
-    ind_true = -1 * np.ones(nabs, dtype='int32')
+    ind_true = np.ones(nabs, dtype='int32')  # Initialize all as bad (1)
 
     if nabs > 1:
         for k in range(nabs):
+            if ind_true[k] == -1:  # Skip if already marked as good
+                continue
+
+            # Calculate differences between current absorber and others
             diff = np.abs(abs_z[k] - abs_z)
             ix = np.where((diff > 0) & (diff <= thresh))[0]
+
             if ix.size > 0:
-                sn1_temp = sn1_all[ix]
-
-                for j in range(ix.size):
-                    if sn1_all[k] > sn1_temp[j]:
-                        ind_true[k] = -1
-                    else:
-                        ind_true[k] = 1
+                # Consider the current absorber and its closely spaced ones
+                candidates = np.append(k, ix)
+                best_idx = candidates[0]  # Default to the current one
+
+                # Compare SNR and line positions to decide the best absorber
+                for j in candidates:
+                    line_diff = abs(fitted_params[j][1] - c0)
+                    if line_diff < abs(fitted_params[best_idx][1] - c0):
+                        best_idx = j
+                    elif line_diff == abs(fitted_params[best_idx][1] - c0):
+                        if sn1_all[j] > sn1_all[best_idx]:
+                            best_idx = j
+
+                # Mark the best one as good (-1) and others as bad (1)
+                ind_true[best_idx] = -1
+                ind_true[candidates[candidates != best_idx]] = 1
             else:
+                # No closely spaced absorbers, mark the current one as good (-1)
                 ind_true[k] = -1
     else:
         ind_true[0] = -1

diff --git a/qsoabsfind/ew.py b/qsoabsfind/ew.py
@@ -63,7 +63,8 @@ def double_curve_fit(index, fun_to_run, lam_fit_range, nmf_resi_fit, error_fit,
     EW_first = np.nan
     EW_second = np.nan
     EW_total = np.nan
-
+    if bounds is None:
+        bounds = (-np.inf, np.inf)
     try:
         popt, pcov = curve_fit(
             fun_to_run, lam_fit_range, nmf_resi_fit,
@@ -219,24 +220,24 @@ def measure_absorber_properties_double_gaussian(index, wavelength, flux, error,
             EW_first_line_error, EW_second_line_error, EW_total_error
         )
 
-    np.random.seed(1234) # for reproducibility of initital condition
+    #np.random.seed(1234) # for reproducibility of initital condition
     for k in range(size_array):
         absorber_rest_lam = wavelength / (1 + absorber_redshift[k]) # rest-frame conversion of wavelength
         lam_ind = np.where((absorber_rest_lam >= ix0) & (absorber_rest_lam <= ix1))[0]
         lam_fit = absorber_rest_lam[lam_ind]
-        nmf_resi = flux[lam_ind]
+        nmf_resi = flux[lam_ind] 
         error_flux = error[lam_ind]
-
+        uniform = np.random.uniform
         if nmf_resi.size > 0 and not np.all(np.isnan(nmf_resi)):
             #random initial condition
-            amp_first_nmf = np.nanmin(nmf_resi) + np.random.normal(0, d_pix/10)
-            line_first = line_centre1 + np.random.normal(0, d_pix)
-            sigma1 = 1.5 + np.random.normal(0, d_pix/2)
-            sigma2 = 1.5 + np.random.normal(0, d_pix/2)
-            line_second = line_first + (line_centre2 - line_centre1) +  np.random.normal(0, d_pix)
+            amp_first_nmf = 1 - np.nanmin(nmf_resi)
+            line_first = line_centre1
+            sigma1 = uniform(bound[0][2], bound[1][2])
+            sigma2 = uniform(bound[0][5], bound[1][5])
+            line_second = line_centre2
             init_cond = [amp_first_nmf, line_first, sigma1, 0.54 * amp_first_nmf, line_second, sigma2]
             fitting_param_for_spectrum[k], fitting_param_std_for_spectrum[k], EW_first_line[k], EW_second_line[k], EW_total[k],_ = double_curve_fit(
-                index, double_gaussian, lam_fit, nmf_resi, error_fit=error_flux, bounds=bound, init_cond=init_cond, iter_n=1000)
+                index, double_gaussian, lam_fit, nmf_resi, error_fit=error_flux, bounds=bound, init_cond=init_cond, iter_n=2000)
 
             fitted_l1 = fitting_param_for_spectrum[k][1]*(1+absorber_redshift[k]) # in observed frame
             fitted_l2 = fitting_param_for_spectrum[k][4]*(1+absorber_redshift[k])