-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata_format.py
26 lines (23 loc) · 1.32 KB
/
data_format.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import pandas as pd
def generate_data():
dunne_jerolmack_path = r"data/GlobalDatasets.xlsx"
dunne_jerolmack = pd.read_excel(dunne_jerolmack_path)
dunne_jerolmack = dunne_jerolmack.drop(['tau_*bf', 'D50 (m)'], axis=1)
dunne_jerolmack['bankfull'] = True
dunne_jerolmack.columns = ['source', 'site_id', 'slope', 'width', 'depth', 'discharge', 'bankfull']
dunne_jerolmack['source'] = dunne_jerolmack['source'] .astype(str)
dunne_jerolmack['site_id'] = dunne_jerolmack['source'] .astype(str)
dunne_jerolmack['slope'] = dunne_jerolmack['slope'].abs()
deal_ds_path = "data/HG_data_comp_complete.csv"
deal_ds = pd.read_csv(deal_ds_path)
deal_ds = deal_ds.query("river_class != -1.0")
deal_ds = deal_ds.drop(['notes', 'area', 'sed_discharge', 'd90',
'bedload_discharge', 'erosion_rate', 'velocity',
'd50', 'd84', 'Unnamed: 0', 'DOI', 'primary_source', 'river_class'], axis=1)
deal_ds['source'] = deal_ds['source'] .astype(str)
deal_ds['site_id'] = deal_ds['source'] .astype(str)
based_input_data = pd.concat([deal_ds, dunne_jerolmack], axis=0)
based_input_data = based_input_data.dropna(subset = ['width', 'slope', 'discharge', 'depth'])
based_input_data.to_csv('data/based_input_data.csv')
if __name__ == '__main__':
generate_data()