Skip to content

Commit

Permalink
some functions moved to utils
Browse files Browse the repository at this point in the history
  • Loading branch information
abhi0395 committed Aug 8, 2024
1 parent c95a919 commit d9bafd4
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 48 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ Features

- Convolution-based adaptive S/N approach for detecting absorbers in QSO spectra.
- Gaussian fitting for accurate measurement of absorber properties (such as EW, line widths, and centers).
- Parallel processing for efficient computation on a large number of spectra.
- Parallel processing using multiprocessing for efficient computation on a large number of spectra.

Documentation
-------------
Expand Down Expand Up @@ -76,9 +76,9 @@ Useful notes:
Parallel mode can be memory-intensive if the input FITS file is large in size. As the code accesses the FITS file to read QSO spectra when running in parallel, it can become a bottleneck for memory, and the code may fail. Currently, I suggest the following:

- **Divide your file into smaller chunks:** Split the FITS file into several smaller files, each containing approximately `N` spectra. Then run the code on these smaller files.

- **Use a rule of thumb for file size:** Ensure that the size of each individual file is no larger than `total_memory/ncpu` of your node or system. Based on this idea you can decide your `N`. I would suggest `N = 1000`.

- **Merge results at the end:** After processing, you can merge your results.

In order to decide the right size of the FITS file, consider the total available memory and the number of CPUs in your system.
Expand Down
2 changes: 1 addition & 1 deletion docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ Features

- Convolution-based adaptive S/N approach for detecting absorbers in QSO spectra.
- Gaussian fitting for accurate measurement of absorber properties (such as EW, line widths, and centers).
- Parallel processing for efficient computation on a large number of spectra.
- Parallel processing using multiprocessing for efficient computation on a large number of spectra.

.. toctree::
:maxdepth: 1
Expand Down
46 changes: 2 additions & 44 deletions qsoabsfind/parallel_convolution.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,26 +5,12 @@
import numpy as np
import argparse
import time
import os
from multiprocessing import Pool
from .absfinder import read_single_spectrum_and_find_absorber
from .io import save_results_to_fits
import re
import os
import pkg_resources
from .config import load_constants
from .utils import read_nqso_from_header

def get_package_versions():
"""
Get the versions of qsoabsfind and other relevant packages.
Returns:
dict: A dictionary containing the versions of the packages.
"""
packages = ['qsoabsfind', 'numpy', 'astropy', 'scipy', 'numba', 'matplotlib']
versions = {pkg: pkg_resources.get_distribution(pkg).version for pkg in packages}
return versions

from .utils import read_nqso_from_header, get_package_versions, parse_qso_sequence

def run_convolution_method_absorber_finder_QSO_spectra(fits_file, spec_index, absorber, kwargs):
"""
Expand All @@ -42,34 +28,6 @@ def run_convolution_method_absorber_finder_QSO_spectra(fits_file, spec_index, ab
"""
return read_single_spectrum_and_find_absorber(fits_file, spec_index, absorber, **kwargs)

def parse_qso_sequence(qso_sequence):
"""
Parse a bash-like sequence or a single integer to generate QSO indices.
Args:
qso_sequence (str or int): Bash-like sequence (e.g., '1-1000', '1-1000:10') or an integer.
Returns:
numpy.array: Array of QSO indices.
"""
if isinstance(qso_sequence, int):
return np.arange(qso_sequence)

# Handle string input
if isinstance(qso_sequence, str):
if qso_sequence.isdigit():
return np.arange(int(qso_sequence))

match = re.match(r"(\d+)-(\d+)(?::(\d+))?", qso_sequence)
if match:
start, end, step = match.groups()
start, end = int(start), int(end)
step = int(step) if step else 1
return np.arange(start, end + 1, step)

# If none of the conditions matched, raise an error
raise ValueError(f"Invalid QSO sequence format: '{qso_sequence}'. Use 'start-end[:step]' or an integer.")

def parallel_convolution_method_absorber_finder_QSO_spectra(fits_file, spec_indices, absorber, n_jobs, **kwargs):
"""
Run convolution_method_absorber_finder_in_QSO_spectra in parallel using
Expand Down
42 changes: 42 additions & 0 deletions qsoabsfind/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
import os
from astropy.io import fits
from astropy.table import Table
import re
import pkg_resources

# Configure logging
#logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
Expand All @@ -20,6 +22,46 @@
constants = load_constants()
lines, amplitude_dict, speed_of_light = constants.lines, constants.amplitude_dict, constants.speed_of_light

def get_package_versions():
"""
Get the versions of qsoabsfind and other relevant packages.
Returns:
dict: A dictionary containing the versions of the packages.
"""
packages = ['qsoabsfind', 'numpy', 'astropy', 'scipy', 'numba', 'matplotlib']
versions = {pkg: pkg_resources.get_distribution(pkg).version for pkg in packages}
return versions

def parse_qso_sequence(qso_sequence):
"""
Parse a bash-like sequence or a single integer to generate QSO indices.
Args:
qso_sequence (str or int): Bash-like sequence (e.g., '1-1000', '1-1000:10') or an integer.
Returns:
numpy.array: Array of QSO indices.
"""
if isinstance(qso_sequence, int):
return np.arange(qso_sequence)

# Handle string input
if isinstance(qso_sequence, str):
if qso_sequence.isdigit():
return np.arange(int(qso_sequence))

match = re.match(r"(\d+)-(\d+)(?::(\d+))?", qso_sequence)
if match:
start, end, step = match.groups()
start, end = int(start), int(end)
step = int(step) if step else 1
return np.arange(start, end + 1, step)

# If none of the conditions matched, raise an error
raise ValueError(f"Invalid QSO sequence format: '{qso_sequence}'. Use 'start-end[:step]' or an integer.")


def elapsed(start, msg):
"""
Prints the elapsed time since `start`.
Expand Down

0 comments on commit d9bafd4

Please sign in to comment.