Skip to content

Commit af31cad

Browse files
committed
adjust per @msweier's changes
1 parent 3759826 commit af31cad

File tree

1 file changed

+23
-7
lines changed

1 file changed

+23
-7
lines changed

dags/cumulus/aprfc_qpf_06h.py

+23-7
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
default_args = {
2121
"owner": "airflow",
2222
"depends_on_past": False,
23-
"start_date": (datetime.now(timezone.utc) - timedelta(hours=72)).replace(
23+
"start_date": (datetime.now(timezone.utc) - timedelta(hours=36)).replace(
2424
minute=0, second=0
2525
),
2626
"catchup_by_default": False,
@@ -31,6 +31,24 @@
3131
}
3232

3333

34+
def get_latest_files(filenames):
35+
# Dictionary to store the latest file for each unique timestamp
36+
latest_files = {}
37+
38+
# Regular expression to extract the timestamp
39+
pattern = r"qpf06f_has_\d+f_(\d{8}_\d{2})_awips_(\d+)"
40+
41+
for filename in filenames:
42+
match = re.search(pattern, filename)
43+
if match:
44+
key = match.group(1) + "_" + match.group(2)
45+
if key not in latest_files or filename > latest_files[key]:
46+
latest_files[key] = filename
47+
48+
# Return the list of latest files
49+
return list(latest_files.values())
50+
51+
3452
# ALR QPF filename generator
3553
def get_filenames(edate, url):
3654
"""
@@ -39,22 +57,20 @@ def get_filenames(edate, url):
3957
for the sprcified date.
4058
"""
4159
d_t1 = edate.strftime("%Y%m%d")
42-
d_t2 = (edate - timedelta(hours=24)).strftime("%Y%m%d")
4360

4461
page = requests.get(url)
4562
soup = BeautifulSoup(page.content, "html.parser")
4663
links = [node.get("href") for node in soup.find_all("a")]
4764
filenames = []
48-
for d in [d_t2, d_t1]:
49-
regex = f"^qpf06f_has_.*.awips_{d}\d+.grb.gz$"
50-
filenames = filenames + [link for link in links if re.match(regex, link)]
65+
regex = f"^qpf06f_has_\\d+f_\\d{{8}}_\\d{{2}}_awips_{d_t1}.*\\.grb(\\.gz)?$"
66+
filenames = [link for link in links if re.match(regex, link)]
5167

52-
return filenames
68+
return get_latest_files(filenames)
5369

5470

5571
@dag(
5672
default_args=default_args,
57-
schedule="40 14,5 * * *",
73+
schedule="20 9,15,19 * * *",
5874
tags=["cumulus", "precip", "QPF", "APRFC"],
5975
max_active_runs=1,
6076
max_active_tasks=1,

0 commit comments

Comments
 (0)