Skip to content

Commit b312d83

Browse files
committed
[Python] :: Landsat Scrapper demo
1 parent fb8200c commit b312d83

File tree

2 files changed

+123
-0
lines changed

2 files changed

+123
-0
lines changed

python/scrapper/__init__.py

Whitespace-only changes.

python/scrapper/landsat.py

+123
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
import os
2+
from typing import List
3+
from datetime import datetime
4+
5+
6+
# util.py
7+
def make_bbox(location, margin=0.05):
8+
lat, lon = location
9+
return [lon - margin, lat - margin, lon + margin, lat + margin]
10+
11+
12+
def make_first_and_last_day_of_year(year):
13+
first_day = datetime(year=year, month=1, day=1)
14+
end_day = datetime(year=year, month=12, day=31)
15+
16+
return first_day, end_day
17+
18+
19+
# search.py
20+
from pystac_client import Client
21+
22+
23+
class Searcher:
24+
25+
def __init__(self, search_url: str = 'https://landsatlook.usgs.gov/stac-server'):
26+
self.client = Client.open(search_url)
27+
28+
def search(
29+
self,
30+
bbox: List[tuple],
31+
start_at: datetime,
32+
end_at: datetime,
33+
collections: str = 'landsat-c2l1',
34+
query: dict = {"platform": {"in": ["LANDSAT_8", "LANDSAT_9"]}},
35+
):
36+
s_dt = start_at.strftime('%Y-%m-%d')
37+
e_dt = end_at.strftime('%Y-%m-%d')
38+
39+
searched_data = self.client.search(
40+
collections=[collections],
41+
bbox=bbox,
42+
query=query,
43+
datetime=f'{s_dt}/{e_dt}'
44+
)
45+
dataset = self._serialize_dataset(searched_data)
46+
47+
return dataset
48+
49+
@staticmethod
50+
def _serialize_dataset(plain_data):
51+
dataset = {}
52+
53+
for ind, result in enumerate(plain_data):
54+
dataset[result.id] = {}
55+
56+
for asset in result.assets:
57+
dataset[result.id][asset] = asset.href
58+
59+
return dataset
60+
61+
62+
# download.py
63+
from selenium import webdriver
64+
from selenium.webdriver.common.by import By
65+
66+
67+
class Downloader:
68+
69+
def __init__(self, username: str, password: str):
70+
self.username = username
71+
self.password = password
72+
73+
self.driver = webdriver.Chrome()
74+
75+
def download(self, download_url: str):
76+
self.driver.get(download_url)
77+
78+
current_url = self.driver.current_url
79+
80+
if current_url.startswith('https://ers.cr.usgs.gov/login?'):
81+
self._handle_login_redirect()
82+
self.download(download_url)
83+
84+
def _set_download_path(self):
85+
pass
86+
87+
def _handle_login_redirect(self):
88+
username_field = self.driver.find_element(By.NAME, 'username')
89+
password_field = self.driver.find_element(By.NAME, 'password')
90+
91+
username_field.send_keys(self.username)
92+
password_field.send_keys(self.password)
93+
94+
sign_in_button = self.driver.find_element(By.ID, 'loginButton')
95+
sign_in_button.click()
96+
97+
98+
# main.py
99+
tiles = [
100+
('Salzgitter', [52.1554604, 10.3953505]),
101+
('Bremen', [53.1257501, 8.6898810]),
102+
('Eisenhuttenstadt', [52.1644183, 14.6395639])
103+
]
104+
years = [2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024, 2025]
105+
106+
107+
def main(save_path: str, searcher: Searcher, downloader: Downloader):
108+
for t_name, t_locate in tiles:
109+
for year in years:
110+
bbox = make_bbox(t_locate)
111+
start_at, end_at = make_first_and_last_day_of_year(year)
112+
113+
dataset = searcher.search(bbox, start_at, end_at)
114+
115+
# TODO: save_path
116+
downloader.download(dataset)
117+
118+
119+
if __name__ == '__main__':
120+
searcher = Searcher()
121+
downloader = Downloader(username='', password='')
122+
123+
main('/Volumes/Work/Crawler/ncpl/', searcher, downloader)

0 commit comments

Comments
 (0)