Skip to content

Commit

Permalink
updated functions and better logic
Browse files Browse the repository at this point in the history
  • Loading branch information
abhi0395 committed Aug 23, 2024
1 parent 175ca30 commit 9c61321
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 67 deletions.
52 changes: 27 additions & 25 deletions qsoabsfind/absfinder.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@
from operator import add
from .utils import convolution_fun, vel_dispersion, elapsed
from .absorberutils import (
estimate_local_sigma_conv_array, group_and_weighted_mean_selection_function,
estimate_local_sigma_conv_array,
median_selection_after_combining,
remove_Mg_falsely_come_from_Fe_absorber, z_abs_from_same_metal_absorber,
contiguous_pixel_remover, estimate_snr_for_lines, absorber_search_window,
find_valid_indices, calculate_doublet_ratio
find_valid_indices, calculate_doublet_ratio, group_and_weighted_mean_selection_function
)
from .ew import measure_absorber_properties_double_gaussian
from .config import load_constants
Expand Down Expand Up @@ -152,26 +152,30 @@ def convolution_method_absorber_finder_in_QSO_spectra(spec_index, absorber='MgII
raise ValueError(f"No support for {absorber}, only supports MgII and CIV")

line_sep = line2 - line1
del_z = line_sep / (0.5 * (line1+line2))

if not logwave:
# average resolution in case wavelength is on linear scale
wave_pixel = np.nanmean(lam_search[1:] - lam_search[:-1])
resolution = np.nanmean(wave_pixel/lam_search) * speed_of_light

del_sigma = line1 * resolution / speed_of_light # in Ang
del_sigma = wave_pixel / 2.355
resolution = np.nanmean(wave_pixel/lam_obs) * speed_of_light
else:
del_sigma = line1 * resolution / speed_of_light # in Ang
del_sigma = del_sigma / 2.355 ## FWHM sqrt(8ln2)

bd_ct, x_sep = 2.0, 10 # multiple for bound definition (for line centres and widths of line)
bd_ct, x_sep = 1.0, 10 # multiple for bound definition (for line centres and widths of line)

# bounds for gaussian fitting, to avoid very bad candidates
bound = ((np.array([2e-2, line1 - bd_ct * d_pix, del_sigma-0.1, 2e-2, line2 - bd_ct * d_pix, del_sigma-0.1])),
(np.array([1.11, line1 + bd_ct * d_pix, x_sep * del_sigma+0.1, 1.11, line2 + bd_ct * d_pix, x_sep * del_sigma+0.1])))

edge = 0.05
bound = ((np.array([2e-2, line1 - bd_ct * d_pix, del_sigma - edge, 2e-2, line2 - bd_ct * d_pix, del_sigma - edge])),
(np.array([1.11, line1 + bd_ct * d_pix, x_sep * del_sigma + edge, 1.11, line2 + bd_ct * d_pix, x_sep * del_sigma + edge])))

# line separation tolerance (fitted line centers should not be outside, centre +/- d_pix)
lower_del_lam = line_sep - d_pix
upper_del_lam = line_sep + d_pix

# Kernel width computation
width_kernel = np.array([ker * resolution * ((f1 * line1 + f2 * line2) / (f1 + f2)) / (speed_of_light * 2.35) for ker in ker_width_pixels])
width_kernel = np.array([ker * resolution * ((f1 * line1 + f2 * line2) / (f1 + f2)) / (speed_of_light * 2.355) for ker in ker_width_pixels])

combined_final_our_z = []

Expand All @@ -190,20 +194,17 @@ def convolution_method_absorber_finder_in_QSO_spectra(spec_index, absorber='MgII
residual_our_z = unmsk_residual[our_z_ind]

new_our_z, new_res_arr = find_valid_indices(our_z, residual_our_z, lam_search, conv_arr, sigma_cr, coeff_sigma, d_pix, f1 / f2, line1, line2, logwave)

final_our_z = group_and_weighted_mean_selection_function(new_our_z, np.array(new_res_arr))

#final_our_z = group_and_select_weighted_redshift(new_our_z, new_res_arr, delta_z_threshold=del_z)
final_our_z = group_and_weighted_mean_selection_function(new_our_z, np.array(new_res_arr), delta_z=del_z)
combined_final_our_z.append(final_our_z)

combined_final_our_z = reduce(add, combined_final_our_z)
combined_final_our_z = list(set(combined_final_our_z))
combined_final_our_z = median_selection_after_combining(combined_final_our_z, lam_obs, residual, d_pix=d_pix, use_kernel=absorber)
combined_final_our_z = np.array(combined_final_our_z)
combined_final_our_z = combined_final_our_z[~np.isnan(combined_final_our_z)]
combined_final_our_z = combined_final_our_z.tolist()

combined_final_our_z = [x for x in combined_final_our_z if not np.isnan(x)]

if len(combined_final_our_z)>0:

z_abs, z_err, fit_param, fit_param_std, EW_first_line_mean, EW_second_line_mean, EW_total_mean, EW_first_line_error, EW_second_line_error, EW_total_error = measure_absorber_properties_double_gaussian(
index=spec_index, wavelength=lam_obs, flux=residual, error=error, absorber_redshift=combined_final_our_z, bound=bound, use_kernel=absorber, d_pix=d_pix, use_covariance=use_covariance)

Expand All @@ -220,8 +221,8 @@ def convolution_method_absorber_finder_in_QSO_spectra(spec_index, absorber='MgII
sn1_all = np.zeros(len(z_abs))
sn2_all = np.zeros(len(z_abs))


for m in range(len(z_abs)):
z_inds = [i for i, x in enumerate(z_abs) if not np.isnan(x) and x > 0]
for m in z_inds:
if len(fit_param[m]) > 0 and not np.all(np.isnan(fit_param[m])):

z_new, z_new_error, fit_param_temp, fit_param_std_temp, EW_first_temp_mean, EW_second_temp_mean, EW_total_temp_mean, EW_first_error_temp, EW_second_error_temp, EW_total_error_temp = measure_absorber_properties_double_gaussian(
Expand All @@ -238,6 +239,7 @@ def convolution_method_absorber_finder_in_QSO_spectra(spec_index, absorber='MgII
# resolution corrected velocity dispersion (should be greater than 0)
vel1, vel2 = vel_dispersion(c0, c1, gaussian_parameters[2], gaussian_parameters[5], resolution)
# calculate best -fit doublet ratio and errors and check if they are within the range.

# usually 1 < DR < f1/f2 (doublet ratio =2, for MgII, CIV)
if EW_first_temp_mean[0] > 0 and EW_second_temp_mean[0] > 0:
dr, dr_error = calculate_doublet_ratio(EW_first_temp_mean[0], EW_second_temp_mean[0], EW_first_error_temp[0], EW_second_error_temp[0])
Expand All @@ -246,8 +248,8 @@ def convolution_method_absorber_finder_in_QSO_spectra(spec_index, absorber='MgII
else:
dr, min_dr, max_dr = 0, 0, -1 #failure case
ew1_snr, ew2_snr = 0, 0 # failure case

if (gaussian_parameters > bound[0] + 0.01).all() and (gaussian_parameters < bound[1] - 0.01).all() and lower_del_lam <= c1 - c0 <= upper_del_lam and sn1 > sn_line1 and sn2 > sn_line2 and vel1 > 0 and vel2 > 0 and min_dr < dr < max_dr and ew1_snr >1 and ew2_snr>1:
if (gaussian_parameters > bound[0]).all() and (gaussian_parameters < bound[1]).all() and lower_del_lam <= c1 - c0 <= upper_del_lam and sn1 > sn_line1 and sn2 > sn_line2 and vel1 > 0 and vel2 > 0 and min_dr < dr < max_dr and ew1_snr >1 and ew2_snr>1 and c0 > bound[0][1] + 0.01 and c0 < bound[1][1] - 0.01:
pure_z_abs[m] = z_new
pure_gauss_fit[m] = fit_param_temp[0]
pure_gauss_fit_std[m] = fit_param_std_temp[0]
Expand All @@ -262,7 +264,6 @@ def convolution_method_absorber_finder_in_QSO_spectra(spec_index, absorber='MgII
sn2_all[m] = sn2

valid_indices = pure_z_abs != 0

pure_z_abs = pure_z_abs[valid_indices]
pure_gauss_fit = pure_gauss_fit[valid_indices]
pure_gauss_fit_std = pure_gauss_fit_std[valid_indices]
Expand All @@ -275,13 +276,14 @@ def convolution_method_absorber_finder_in_QSO_spectra(spec_index, absorber='MgII
redshift_err = redshift_err[valid_indices]
sn1_all = sn1_all[valid_indices]
sn2_all = sn2_all[valid_indices]

if len(pure_z_abs) > 0:
if absorber=='MgII':
match_abs1 = remove_Mg_falsely_come_from_Fe_absorber(spec_index, pure_z_abs, lam_obs, residual, error, d_pix, logwave)
else:
match_abs1 = -1*np.ones(len(pure_z_abs))
match_abs2 = z_abs_from_same_metal_absorber(pure_z_abs, lam_obs, residual, error, d_pix, absorber, logwave)
ind_z = contiguous_pixel_remover(pure_z_abs, sn1_all, sn2_all, absorber)
ind_z = contiguous_pixel_remover(pure_z_abs, sn1_all, sn2_all, absorber, pure_gauss_fit)
sel_indices = (match_abs1 == -1) & (match_abs2 == -1) & (ind_z == -1) # pure final absorber candidates

# Select final quantities based on sel_indices
Expand Down
88 changes: 59 additions & 29 deletions qsoabsfind/absorberutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,9 +131,17 @@ def estimate_snr_for_lines(l1, l2, sig1, sig2, lam_rest, residual, error, log):
tuple: Mean signal-to-noise ratios (SNR) around the specified wavelengths.
Returns (mean_sn1, mean_sn2).
"""
nsig = 4 # for gaussian more than 99.7 percentile data is within 4sigma

delta1, delta2 = nsig * sig2, nsig * sig2
if sig1 is None or sig2 is None:
dpix = 5
if log:
delta1 = np.abs(l1 * (10**(dpix * 0.0001) - 1))
delta2 = np.abs(l2 * (10**(dpix * 0.0001) - 1))
else:
delta1 = dpix * (lam_rest[1]-lam_rest[0])
delta2 = delta1
else:
nsig = 4 # for gaussian more than 99.7 percentile data is within 4sigma
delta1, delta2 = nsig * sig2, nsig * sig2

ind1 = np.where((lam_rest > l1 - delta1) & (lam_rest < l1 + delta1))[0]
ind2 = np.where((lam_rest > l2 - delta2) & (lam_rest < l2 + delta2))[0]
Expand Down Expand Up @@ -228,7 +236,7 @@ def weighted_mean(z_values, residuals, gamma):
weighted_z = (z_values / residuals**gamma)
return np.nansum(weighted_z) / np.nansum(weights)

def group_and_weighted_mean_selection_function(master_list_of_pot_absorber, residual, gamma=1):
def group_and_weighted_mean_selection_function(master_list_of_pot_absorber, residual, delta_z, gamma=1):
"""
Perform grouping, splitting, and median selection from the list of all
potentially identified absorbers.
Expand All @@ -245,16 +253,16 @@ def group_and_weighted_mean_selection_function(master_list_of_pot_absorber, resi
return master_list_of_pot_absorber

z_ind = [] # Final list of median redshifts for each spectrum
abs_list = np.array(master_list_of_pot_absorber)
z_check = np.log10(1 + abs_list)

z_check = np.array(master_list_of_pot_absorber)
#z_check = np.log10(1 + abs_list)
# Grouping contiguous pixels separated by ~0.0006 on redshift scale
z_temp, res_temp = group_contiguous_pixel(z_check, residual, avg=0.0006)
z_temp, res_temp = group_contiguous_pixel(z_check, residual, avg=delta_z)
for group_z, group_res in zip(z_temp, res_temp):
if isinstance(group_res, list):
group_res, group_z = np.array(group_res), np.array(group_z)
if group_res.size > 0 and np.all(group_res != 0):
abs_z = weighted_mean(10**group_z - 1, group_res, gamma)
abs_z = weighted_mean(group_z, group_res, gamma)
z_ind.append(abs_z)

return z_ind
Expand Down Expand Up @@ -359,10 +367,10 @@ def remove_Mg_falsely_come_from_Fe_absorber(index, z_after_grouping, lam_obs, re

if n_new > 0:
lam_rest = lam_obs / (1 + z[p])
sn_fe1, sn_fe2 = estimate_snr_for_lines(fe1, fe2, lam_rest, residual, error, logwave)
sn_fe1, sn_fe2 = estimate_snr_for_lines(fe1, fe2, None, None, lam_rest, residual, error, logwave)
for k in range(n_new):
lam_rest = lam_obs / (1 + z[ind_fin[k]])
sn_mg1, sn_mg2 = estimate_snr_for_lines(mg1, mg2, lam_rest, residual, error, logwave)
sn_mg1, sn_mg2 = estimate_snr_for_lines(mg1, mg2, None, None, lam_rest, residual, error, logwave)
if (sn_fe1 > sn_mg1) or (sn_fe1 > sn_mg2) or (sn_fe2 > sn_mg1) or (sn_fe2 > sn_mg2):
match_abs[ind_fin[k]] = 1 # False positive MgII absorber due to FeII lines
else:
Expand Down Expand Up @@ -416,11 +424,11 @@ def z_abs_from_same_metal_absorber(first_list_z, lam_obs, residual, error, d_pix

if ind_del.size > 0:
lam_rest = lam_obs / (1 + z[p])
sn_mg0, _ = estimate_snr_for_lines(mg1, mg2, lam_rest, residual, error, logwave)
sn_mg0, _ = estimate_snr_for_lines(mg1, mg2, None, None, lam_rest, residual, error, logwave)

for k in ind_del:
lam_rest1 = lam_obs / (1 + z[k])
sn_mg1, _ = estimate_snr_for_lines(mg1, mg2, lam_rest1, residual, error, logwave)
sn_mg1, _ = estimate_snr_for_lines(mg1, mg2, None, None, lam_rest1, residual, error, logwave)

if sn_mg0 > sn_mg1:
match_abs[k] = 1 # Matched within limits, not true absorbers
Expand All @@ -430,43 +438,65 @@ def z_abs_from_same_metal_absorber(first_list_z, lam_obs, residual, error, d_pix
return match_abs

#@jit(nopython=False)
def contiguous_pixel_remover(abs_z, sn1_all, sn2_all, use_kernel):
def contiguous_pixel_remover(abs_z, sn1_all, sn2_all, use_kernel, fitted_params):
"""
Remove contiguous pixels by evaluating the signal-to-noise ratio (SNR)
for absorbers.
Remove contiguous or duplicate absorbers by evaluating the signal-to-noise ratio (SNR)
and comparing their line centers.
Args:
abs_z (list or numpy.ndarray): List of absorber redshifts.
sn1_all (list or numpy.ndarray): List of SNR values for the first line.
sn2_all (list or numpy.ndarray): List of SNR values for the second line.
use_kernel (str, optional): Kernel type (MgII, CIV).
fitted_params (list of arrays): corresponding gaussian fitting parameters for those redshifts
Returns:
list: Updated list of absorbers with false positives removed.
list: Updated list of indices indicating bad (1) or good (-1) absorbers.
"""
if use_kernel=='MgII':
thresh = (lines['MgII_2803']-lines['MgII_2796'])/lines['MgII_2796']
if use_kernel=='CIV':
thresh = (lines['CIV_1550']-lines['CIV_1548'])/lines['CIV_1548']
# Define constants based on the kernel type
if use_kernel == 'MgII':
c0, c1 = lines['MgII_2796'], lines["MgII_2803"]
elif use_kernel == 'CIV':
c0, c1 = lines["CIV_1548"], lines["CIV_1550"]
else:
raise ValueError("Unknown kernel type")

thresh = (c1 - c0) / c0

abs_z = np.array(abs_z)
sn1_all = np.array(sn1_all)
sn2_all = np.array(sn2_all)
nabs = abs_z.size
ind_true = -1 * np.ones(nabs, dtype='int32')
ind_true = np.ones(nabs, dtype='int32') # Initialize all as bad (1)

if nabs > 1:
for k in range(nabs):
if ind_true[k] == -1: # Skip if already marked as good
continue

# Calculate differences between current absorber and others
diff = np.abs(abs_z[k] - abs_z)
ix = np.where((diff > 0) & (diff <= thresh))[0]

if ix.size > 0:
sn1_temp = sn1_all[ix]

for j in range(ix.size):
if sn1_all[k] > sn1_temp[j]:
ind_true[k] = -1
else:
ind_true[k] = 1
# Consider the current absorber and its closely spaced ones
candidates = np.append(k, ix)
best_idx = candidates[0] # Default to the current one

# Compare SNR and line positions to decide the best absorber
for j in candidates:
line_diff = abs(fitted_params[j][1] - c0)
if line_diff < abs(fitted_params[best_idx][1] - c0):
best_idx = j
elif line_diff == abs(fitted_params[best_idx][1] - c0):
if sn1_all[j] > sn1_all[best_idx]:
best_idx = j

# Mark the best one as good (-1) and others as bad (1)
ind_true[best_idx] = -1
ind_true[candidates[candidates != best_idx]] = 1
else:
# No closely spaced absorbers, mark the current one as good (-1)
ind_true[k] = -1
else:
ind_true[0] = -1
Expand Down
21 changes: 11 additions & 10 deletions qsoabsfind/ew.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ def double_curve_fit(index, fun_to_run, lam_fit_range, nmf_resi_fit, error_fit,
EW_first = np.nan
EW_second = np.nan
EW_total = np.nan

if bounds is None:
bounds = (-np.inf, np.inf)
try:
popt, pcov = curve_fit(
fun_to_run, lam_fit_range, nmf_resi_fit,
Expand Down Expand Up @@ -219,24 +220,24 @@ def measure_absorber_properties_double_gaussian(index, wavelength, flux, error,
EW_first_line_error, EW_second_line_error, EW_total_error
)

np.random.seed(1234) # for reproducibility of initital condition
#np.random.seed(1234) # for reproducibility of initital condition
for k in range(size_array):
absorber_rest_lam = wavelength / (1 + absorber_redshift[k]) # rest-frame conversion of wavelength
lam_ind = np.where((absorber_rest_lam >= ix0) & (absorber_rest_lam <= ix1))[0]
lam_fit = absorber_rest_lam[lam_ind]
nmf_resi = flux[lam_ind]
nmf_resi = flux[lam_ind]
error_flux = error[lam_ind]

uniform = np.random.uniform
if nmf_resi.size > 0 and not np.all(np.isnan(nmf_resi)):
#random initial condition
amp_first_nmf = np.nanmin(nmf_resi) + np.random.normal(0, d_pix/10)
line_first = line_centre1 + np.random.normal(0, d_pix)
sigma1 = 1.5 + np.random.normal(0, d_pix/2)
sigma2 = 1.5 + np.random.normal(0, d_pix/2)
line_second = line_first + (line_centre2 - line_centre1) + np.random.normal(0, d_pix)
amp_first_nmf = 1 - np.nanmin(nmf_resi)
line_first = line_centre1
sigma1 = uniform(bound[0][2], bound[1][2])
sigma2 = uniform(bound[0][5], bound[1][5])
line_second = line_centre2
init_cond = [amp_first_nmf, line_first, sigma1, 0.54 * amp_first_nmf, line_second, sigma2]
fitting_param_for_spectrum[k], fitting_param_std_for_spectrum[k], EW_first_line[k], EW_second_line[k], EW_total[k],_ = double_curve_fit(
index, double_gaussian, lam_fit, nmf_resi, error_fit=error_flux, bounds=bound, init_cond=init_cond, iter_n=1000)
index, double_gaussian, lam_fit, nmf_resi, error_fit=error_flux, bounds=bound, init_cond=init_cond, iter_n=2000)

fitted_l1 = fitting_param_for_spectrum[k][1]*(1+absorber_redshift[k]) # in observed frame
fitted_l2 = fitting_param_for_spectrum[k][4]*(1+absorber_redshift[k])
Expand Down
Loading

0 comments on commit 9c61321

Please sign in to comment.