Skip to content

Commit

Permalink
Merge pull request #2376 from desihub/modernize_proctables
Browse files Browse the repository at this point in the history
Add script to update processing table column layout
  • Loading branch information
sbailey authored Sep 30, 2024
2 parents 67201d7 + 43cd53f commit 4eae355
Show file tree
Hide file tree
Showing 7 changed files with 251 additions and 44 deletions.
40 changes: 0 additions & 40 deletions bin/desi_reformat_exposure_tables

This file was deleted.

12 changes: 12 additions & 0 deletions bin/desi_reformat_exptables
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/usr/bin/env python
# coding: utf-8

## Import some helper functions, you can see their definitions by uncomenting the bash shell command
from desispec.scripts.reformat_exptables import get_parser, reformat_exposure_tables


if __name__ == '__main__':
parser = get_parser()
args = parser.parse_args()

reformat_exposure_tables(**args.__dict__)
14 changes: 14 additions & 0 deletions bin/desi_reformat_proctables
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/usr/bin/env python
# coding: utf-8

## Import some helper functions, you can see their definitions by uncomenting the bash shell command
from desispec.scripts.reformat_proctables import get_parser, reformat_processing_tables




if __name__ == '__main__':
parser = get_parser()
args = parser.parse_args()

reformat_processing_tables(**args.__dict__)
3 changes: 3 additions & 0 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -587,6 +587,9 @@ desispec API
.. automodule:: desispec.scripts.reformat_exptables
:members:

.. automodule:: desispec.scripts.reformat_proctables
:members:

.. automodule:: desispec.scripts.rejectcosmics
:members:

Expand Down
30 changes: 27 additions & 3 deletions py/desispec/scripts/reformat_exptables.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
"""
desispec.scripts.updateexptables
================================
desispec.scripts.reformat_exptables
===================================
"""
import argparse
import os
import sys
import numpy as np
Expand All @@ -20,8 +21,31 @@
from desispec.scripts.exposuretable import create_exposure_tables


def get_parser():
"""
Creates an arguments parser for the desi_reformat_exposure_tables script
"""
parser = argparse.ArgumentParser(usage = "{prog} [options]")
parser.add_argument("-n", "--nights", type=str, default=None, help="nights as comma separated string")
parser.add_argument("--night-range", type=str, default=None, help="comma separated pair of nights in form YYYYMMDD,YYYYMMDD"+\
"for first_night,last_night specifying the beginning"+\
"and end of a range of nights to be generated. "+\
"last_night should be inclusive.")
parser.add_argument("--obstypes", type=str, default=None, help="comma separated list of exposure types to include in "+\
"the exposure table, e.g. science,arc,flat,dark,zero, ...")
parser.add_argument("-i", "--path-to-data", type=str, default=None, help="path to the raw input data")
parser.add_argument("-o","--exp-table-path", type=str, default=None, help="path to save exposure tables, without monthly subdirectory")
parser.add_argument("--orig-filetype", type=str, default='csv', help="format type for original exposure tables")
parser.add_argument("--out-filetype", type=str, default='csv', help="format type for output exposure tables")
parser.add_argument("--verbose", action="store_true", help="print verbose output")
parser.add_argument("--dry-run", action="store_true",
help="Perform a dry run, printing the changes that would be made and the final output table "+
"but not overwriting the actual files on disk.")
parser.add_argument("--no-specprod", action="store_true", help="Create exposure table in repository location "+\
"rather than the SPECPROD location.")
return parser

def update_exposure_tables(nights=None, night_range=None, path_to_data=None,
def reformat_exposure_tables(nights=None, night_range=None, path_to_data=None,
exp_table_path=None, obstypes=None, orig_filetype='csv',
out_filetype='csv', verbose=False, no_specprod=False,
dry_run=False):
Expand Down
194 changes: 194 additions & 0 deletions py/desispec/scripts/reformat_proctables.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
"""
desispec.scripts.reformat_proctables
====================================
"""
import argparse
import os
import glob
import sys
import numpy as np
import re
import time
from astropy.table import Table

from desispec.io.meta import findfile
from desispec.workflow.proctable import get_processing_table_column_defs
from desispec.workflow.utils import define_variable_from_environment, listpath, \
pathjoin
from desispec.workflow.tableio import write_table, load_table
from desispec.scripts.exposuretable import create_exposure_tables


def get_parser():
"""
Creates an arguments parser for the desi_reformat_processing_tables script
"""
parser = argparse.ArgumentParser(usage = "{prog} [options]")
parser.add_argument("-n", "--nights", type=str, default=None, help="nights as comma separated string")
parser.add_argument("--night-range", type=str, default=None, help="comma separated pair of nights in form YYYYMMDD,YYYYMMDD"+\
"for first_night,last_night specifying the beginning"+\
"and end of a range of nights to be generated. "+\
"last_night should be inclusive.")
parser.add_argument("--orig-filetype", type=str, default='csv', help="format type for original exposure tables")
parser.add_argument("--out-filetype", type=str, default='csv', help="format type for output exposure tables")
parser.add_argument("--dry-run", action="store_true",
help="Perform a dry run, printing the changes that would be made and the final output table "+
"but not overwriting the actual files on disk.")
return parser

def reformat_processing_tables(nights=None, night_range=None, orig_filetype='csv',
out_filetype='csv', dry_run=False):
"""
Generates updated processing tables for the nights requested. Requires
a current processing table to exist on disk.
Args:
nights: str, int, or comma separated list. The night(s) to generate
processing tables for.
night_range: str. comma separated pair of nights in form
YYYYMMDD,YYYYMMDD for first_night,last_night
specifying the beginning and end of a range of
nights to be generated. first_night and last_night are
inclusive.
orig_filetype: str. The file extension (without the '.') of the processing
tables.
out_filetype: str. The file extension for the outputted processing tables
(without the '.').
Returns:
Nothing
"""
# log = get_logger()
## Make sure user specified what nights to run on
if nights is None and night_range is None:
raise ValueError("Must specify either nights or night_range."
+" To process all nights give nights=all")

## Get all nights in 2020's with data
proctab_template = findfile('proctable', night=99999999)
proctab_template = proctab_template.replace('99999999', '202[0-9][01][0-9][0-3][0-9]')
proctab_template = proctab_template.replace('.csv', f'.{orig_filetype}')
nights_with_proctables = list()
for ptabfn in glob.glob(proctab_template):
## nights are 202YMMDD
matches = re.findall('202\d{5}', os.path.basename(ptabfn))
if len(matches) == 1:
n = int(matches[0])
nights_with_proctables.append(n)
else:
print(f"Couldn't parse a night from proctable file: {ptabfn}")

## If unpecified or given "all", set nights to all nights with data
check_night = False
if nights is None or nights == 'all':
nights = nights_with_proctables
## No need to check nights since derived from disk
else:
nights = [int(val.strip()) for val in nights.split(",")]
## If nights are specified, make sure we check that there is actually data
check_night = True
nights = np.sort(nights)

## If user specified a night range, cut nights to that range of dates
if night_range is not None:
if ',' not in night_range:
raise ValueError("night_range must be a comma separated pair of "
+ "nights in form YYYYMMDD,YYYYMMDD")
nightpair = night_range.split(',')
if len(nightpair) != 2 or not nightpair[0].isnumeric() \
or not nightpair[1].isnumeric():
raise ValueError("night_range must be a comma separated pair of "
+ "nights in form YYYYMMDD,YYYYMMDD")
first_night, last_night = nightpair
nights = nights[np.where(int(first_night) <= nights.astype(int))[0]]
nights = nights[np.where(int(last_night) >= nights.astype(int))[0]]

## Get current set of expected columns
ptab_cols, ptab_dtypes, ptab_defs = get_processing_table_column_defs(return_default_values=True)
ptab_cols, ptab_dtypes = np.array(ptab_cols), np.array(ptab_dtypes)

## Tell user the final list of nights and starting looping over them
print("Nights: ", nights)
for night in nights:
if check_night and night not in nights_with_proctables:
print(f"Night {night} doesn't have a processing table: Skipping.")
continue

## If the processing table doesn't exist, skip, since we are updating
## not generating.
orig_pathname = findfile('proctable', night=night).replace('.csv', f'.{orig_filetype}')
if not os.path.exists(orig_pathname):
print(f'Could not find processing table for night={night} at:'
+ f' {orig_pathname}. Skipping this night.')
continue

## Load the old and new tables to compare
origtable = load_table(orig_pathname, tabletype='proctab')
curr_colnames = np.array(list(origtable.colnames))
expected_cols = np.isin(curr_colnames, ptab_cols)
found_cols = np.isin(ptab_cols, curr_colnames)

## If everything is present, don't try to do anything
if np.all(expected_cols) and np.all(found_cols):
print(f"{orig_pathname} has all of the expected columns, not updating this table.")
continue

unexpected = list(curr_colnames[~expected_cols])
missing = list(ptab_cols[~found_cols])
print(f"Found the following unexpected columns: {unexpected}")
print(f"Found the following missing columns: {missing}")

## Solving the only cases I'm currently aware of
if 'CAMWORD' in unexpected and 'PROCCAMWORD' in missing:
print(f"CAMWORD listed instead of PROCCAMWORD. Updating that.")
origtable.rename_column('CAMWORD', 'PROCCAMWORD')
unexpected.remove('CAWORD')
missing.remove('PROCCAMWORD')

if len(unexpected) > 0:
print(f"WARNING: Script detected unexpected columns. Only handle "
+ f"the case where 'CAMWORD' is defined instead of PROCCAMWORD. "
+ f"The following unexpected columns will be dropped without "
+ f"using the information they contain: {unexpected}.")
for colname in unexpected:
origtable.remove_column(colname)

## Add any missing columns
for colname in missing:
if colname not in ['BADAMPS', 'LASTSTEP', 'EXPFLAG']:
print(f"WARNING: Script didn't expect {colname} to be missing. "
+ f"Replacing with default values, but this may have "
+ f"downstream consequences.")
colindex = np.where(ptab_cols==colname)[0][0]
newdat = [ptab_defs[colindex]] * len(origtable)
newcol = Table.Column(name=colname, data=newdat, dtype=ptab_dtypes[colindex])
origtable.add_column(newcol)

## Finally, reorder to the current column ordering
origtable = origtable[list(ptab_cols)]

## If just testing, print the table and a cell-by-cell equality test
## for the scalar columns
## If not testing, move the original table to an archived filename
## and save the updated table to the official exptable pathname
if dry_run:
print("\n\nOutput file would have been:")
origtable.pprint_all()
else:
ftime = time.strftime("%Y%m%d_%Hh%Mm")
replaced_pathname = orig_pathname.replace(f".{orig_filetype}",
f".replaced-{ftime}.{orig_filetype}")
print(f"Moving original file from {orig_pathname} to {replaced_pathname}")
os.rename(orig_pathname,replaced_pathname)
time.sleep(0.1)
out_pathname = orig_pathname.replace(f".{orig_filetype}", f".{out_filetype}")
write_table(origtable, out_pathname)
print(f"Updated file saved to {out_pathname}. Original archived as {replaced_pathname}")

print("\n\n")

## Flush the outputs
sys.stdout.flush()
sys.stderr.flush()
print("Processing table regenerations complete")
2 changes: 1 addition & 1 deletion py/desispec/workflow/proctable.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def get_processing_table_column_defs(return_default_values=False,
coldeflt2 = [ 'a0123456789' , 0 , -99 , '' , 'unknown', defqid ]

colnames2 += [ 'SUBMIT_DATE', 'STATUS' , 'SCRIPTNAME']
coltypes2 += [ int , 'S14' , 'S40' ]
coltypes2 += [ int , 'S14' , 'S50' ]
coldeflt2 += [ -99 , 'UNSUBMITTED', '' ]

colnames2 += ['INT_DEP_IDS' , 'LATEST_DEP_QID' , 'ALL_QIDS' ]
Expand Down

0 comments on commit 4eae355

Please sign in to comment.