|
| 1 | +import os |
| 2 | +from typing import List |
| 3 | +from datetime import datetime |
| 4 | + |
| 5 | + |
| 6 | +# util.py |
| 7 | +def make_bbox(location, margin=0.05): |
| 8 | + lat, lon = location |
| 9 | + return [lon - margin, lat - margin, lon + margin, lat + margin] |
| 10 | + |
| 11 | + |
| 12 | +def make_first_and_last_day_of_year(year): |
| 13 | + first_day = datetime(year=year, month=1, day=1) |
| 14 | + end_day = datetime(year=year, month=12, day=31) |
| 15 | + |
| 16 | + return first_day, end_day |
| 17 | + |
| 18 | + |
| 19 | +# search.py |
| 20 | +from pystac_client import Client |
| 21 | + |
| 22 | + |
| 23 | +class Searcher: |
| 24 | + |
| 25 | + def __init__(self, search_url: str = 'https://landsatlook.usgs.gov/stac-server'): |
| 26 | + self.client = Client.open(search_url) |
| 27 | + |
| 28 | + def search( |
| 29 | + self, |
| 30 | + bbox: List[tuple], |
| 31 | + start_at: datetime, |
| 32 | + end_at: datetime, |
| 33 | + collections: str = 'landsat-c2l1', |
| 34 | + query: dict = {"platform": {"in": ["LANDSAT_8", "LANDSAT_9"]}}, |
| 35 | + ): |
| 36 | + s_dt = start_at.strftime('%Y-%m-%d') |
| 37 | + e_dt = end_at.strftime('%Y-%m-%d') |
| 38 | + |
| 39 | + searched_data = self.client.search( |
| 40 | + collections=[collections], |
| 41 | + bbox=bbox, |
| 42 | + query=query, |
| 43 | + datetime=f'{s_dt}/{e_dt}' |
| 44 | + ) |
| 45 | + dataset = self._serialize_dataset(searched_data) |
| 46 | + |
| 47 | + return dataset |
| 48 | + |
| 49 | + @staticmethod |
| 50 | + def _serialize_dataset(plain_data): |
| 51 | + dataset = {} |
| 52 | + |
| 53 | + for ind, result in enumerate(plain_data): |
| 54 | + dataset[result.id] = {} |
| 55 | + |
| 56 | + for asset in result.assets: |
| 57 | + dataset[result.id][asset] = asset.href |
| 58 | + |
| 59 | + return dataset |
| 60 | + |
| 61 | + |
| 62 | +# download.py |
| 63 | +from selenium import webdriver |
| 64 | +from selenium.webdriver.common.by import By |
| 65 | + |
| 66 | + |
| 67 | +class Downloader: |
| 68 | + |
| 69 | + def __init__(self, username: str, password: str): |
| 70 | + self.username = username |
| 71 | + self.password = password |
| 72 | + |
| 73 | + self.driver = webdriver.Chrome() |
| 74 | + |
| 75 | + def download(self, download_url: str): |
| 76 | + self.driver.get(download_url) |
| 77 | + |
| 78 | + current_url = self.driver.current_url |
| 79 | + |
| 80 | + if current_url.startswith('https://ers.cr.usgs.gov/login?'): |
| 81 | + self._handle_login_redirect() |
| 82 | + self.download(download_url) |
| 83 | + |
| 84 | + def _set_download_path(self): |
| 85 | + pass |
| 86 | + |
| 87 | + def _handle_login_redirect(self): |
| 88 | + username_field = self.driver.find_element(By.NAME, 'username') |
| 89 | + password_field = self.driver.find_element(By.NAME, 'password') |
| 90 | + |
| 91 | + username_field.send_keys(self.username) |
| 92 | + password_field.send_keys(self.password) |
| 93 | + |
| 94 | + sign_in_button = self.driver.find_element(By.ID, 'loginButton') |
| 95 | + sign_in_button.click() |
| 96 | + |
| 97 | + |
| 98 | +# main.py |
| 99 | +tiles = [ |
| 100 | + ('Salzgitter', [52.1554604, 10.3953505]), |
| 101 | + ('Bremen', [53.1257501, 8.6898810]), |
| 102 | + ('Eisenhuttenstadt', [52.1644183, 14.6395639]) |
| 103 | +] |
| 104 | +years = [2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024, 2025] |
| 105 | + |
| 106 | + |
| 107 | +def main(save_path: str, searcher: Searcher, downloader: Downloader): |
| 108 | + for t_name, t_locate in tiles: |
| 109 | + for year in years: |
| 110 | + bbox = make_bbox(t_locate) |
| 111 | + start_at, end_at = make_first_and_last_day_of_year(year) |
| 112 | + |
| 113 | + dataset = searcher.search(bbox, start_at, end_at) |
| 114 | + |
| 115 | + # TODO: save_path |
| 116 | + downloader.download(dataset) |
| 117 | + |
| 118 | + |
| 119 | +if __name__ == '__main__': |
| 120 | + searcher = Searcher() |
| 121 | + downloader = Downloader(username='', password='') |
| 122 | + |
| 123 | + main('/Volumes/Work/Crawler/ncpl/', searcher, downloader) |
0 commit comments