-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
7 changed files
with
276 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
# import functions and packages | ||
from functions_book_chapter_SCA import * | ||
|
||
dir_raster = './data/planet/20180528_181110_1025_3B_AnalyticMS_SR_clip.tif' | ||
planet = rasterio.open(dir_raster).read()/10000 | ||
planet = np.where(planet[0,:,:] == 0, np.nan, planet) # the default nan data value is 0, replace with np.nan | ||
|
||
fig, axs = plt.subplots(2,2,figsize=(15,7.5)) | ||
im1 = axs[0,0].imshow(planet[0,:,:],cmap='jet') | ||
axs[0,0].set_title("Surface reflectance of blue band", fontsize=16) | ||
|
||
im2 = axs[0,1].imshow(planet[1,:,:], cmap='jet') | ||
axs[0,1].set_title("Surface reflectance of green band", fontsize=16) | ||
|
||
im3 = axs[1,0].imshow(planet[2,:,:], cmap='jet') | ||
axs[1,0].set_title("Surface reflectance of red band", fontsize=16) | ||
|
||
im4 = axs[1,1].imshow(planet[3,:,:], cmap='jet') | ||
axs[1,1].set_title("Surface reflectance of NIR band", fontsize=16) | ||
|
||
cbar_ax = fig.add_axes([0.95, 0.15, 0.02, 0.7]) | ||
fig.colorbar(im1, cax=cbar_ax) | ||
|
||
# read model input features and labels | ||
data = pd.read_csv('./data/samples/sample_100K.csv', index_col = False) | ||
print("Sample dimentions:".format(), data.shape) | ||
print(data.head()) | ||
X = data[['blue','green','red','nir']] | ||
y = data['label'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
# prepare data | ||
data = pd.read_csv('./data/samples/sample_10K.csv', index_col = False) | ||
print("Sample dimentions:".format(), data.shape) | ||
data.head() | ||
X = data[['blue','green','red','nir']] | ||
y = data['label'] | ||
|
||
# customize models with different sample sizes | ||
models = get_models_size() | ||
results, names = list(), list() | ||
for name, model in models.items(): | ||
# evaluate models using k-fold cross-validation | ||
scores = evaluate_model(model, X, y) | ||
results.append(scores) | ||
names.append(name) | ||
# print the mean and standard deviation of models | ||
print('>%s Mean Score: %.6f (Score SD: %.6f)' % ('Sample size: ' + str(int(float(name) * 10000)), scores.mean(), scores.std())) | ||
|
||
# display model performance | ||
plt.figure(figsize=(10,5)) | ||
plt.boxplot(results, labels=names, showmeans=True) | ||
plt.show() | ||
|
||
# customize models with different model feature sizes | ||
models = get_models_feature() | ||
# evaluate the models and store results | ||
results, names = list(), list() | ||
for name, model in models.items(): | ||
# evaluate models using k-fold cross-validation | ||
scores = evaluate_model(model, X, y) | ||
results.append(scores) | ||
names.append(name) | ||
# print the mean and standard deviation of models | ||
# print('>%s %.3f (%.3f)' % (name, scores.mean(), scores.std())) | ||
print('>%s Mean Score: %.6f (Score SD: %.6f)' % ('Features: ' + name, scores.mean(), scores.std())) | ||
# display model performance | ||
plt.boxplot(results, labels=names, showmeans=True) | ||
plt.show() | ||
|
||
# customize models with different tree numbers | ||
models = get_models_tree() | ||
# evaluate the models and store results | ||
results, names = list(), list() | ||
for name, model in models.items(): | ||
# evaluate models using k-fold cross-validation | ||
scores = evaluate_model(model, X, y) | ||
results.append(scores) | ||
names.append(name) | ||
# print the mean and standard deviation of models | ||
# print('>%s %.3f (%.3f)' % (name, scores.mean(), scores.std())) | ||
print('>%s Mean Score: %.6f (Score SD: %.6f)' % ('Tree numbers: ' + name, scores.mean(), scores.std())) | ||
# display model performance | ||
plt.boxplot(results, labels=names, showmeans=True) | ||
plt.show() | ||
|
||
# customize models with different tree depths | ||
models = get_models_depth() | ||
# evaluate the models and store results | ||
results, names = list(), list() | ||
for name, model in models.items(): | ||
# evaluate models using k-fold cross-validation | ||
scores = evaluate_model(model, X, y) | ||
results.append(scores) | ||
names.append(name) | ||
# print the mean and standard deviation of models | ||
# print('>%s %.3f (%.3f)' % (name, scores.mean(), scores.std())) | ||
print('>%s Mean Score: %.6f (Score SD: %.6f)' % ('Tree Depth: ' + name, scores.mean(), scores.std())) | ||
# display model performance | ||
plt.figure(figsize=(10,5)) | ||
plt.boxplot(results, labels=names, showmeans=True) | ||
plt.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
# read model input features and labels | ||
data = pd.read_csv('./data/samples/sample_100K.csv', index_col = False) | ||
print("Sample dimentions:".format(), data.shape) | ||
print(data.head()) | ||
X = data[['blue','green','red','nir']] | ||
y = data['label'] | ||
|
||
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.96,random_state=1) | ||
|
||
# define the model | ||
model = RandomForestClassifier(n_estimators=10, max_depth=10, max_features=4) | ||
|
||
# evaluate the model | ||
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=1000) | ||
n_scores = cross_val_score(model, X_train, y_train, scoring='accuracy', cv=cv, n_jobs=-1) | ||
# report model performance | ||
print('Mean Score: %.6f (SD: %.6f)' % (n_scores.mean(),n_scores.std())) | ||
|
||
# the histogram of the scores | ||
n, bins, patches = plt.hist(n_scores, density=True, facecolor='blue', alpha=0.75) | ||
plt.text(0.91, 15, r'mean = ' + str(n_scores.mean().round(6)) + ' '+ 'SD = ' + str(n_scores.std().round(6))) | ||
plt.xlim(0.9, 1.01) | ||
plt.xlabel('Acuuracy') | ||
plt.ylabel('Probability (%)') | ||
plt.grid(True) | ||
plt.show() | ||
|
||
model.fit(X_train,y_train) | ||
result = permutation_importance(model, X_train, y_train, n_repeats=1000, random_state=42, n_jobs=2) | ||
print('Permutation importance - average:'.format(), X_train.columns) | ||
print([round(i, 6) for i in result.importances_mean]) | ||
|
||
# displace feature importance | ||
fig, ax = plt.subplots(figsize=(6,5)) | ||
ax.boxplot(result.importances.T) | ||
ax.set_title("Permutation Importances", fontsize = 16) | ||
ax.set_xticklabels(labels=X_train.columns, fontsize=14) | ||
plt.show() | ||
|
||
# save model | ||
dir_model = "./models/random_forest_SCA_binary.joblib" | ||
joblib.dump(model, dir_model) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
dir_aso = './data/ASO/ASO_3M_SD_USCATE_20180528_clip.tif' | ||
raso = rasterio.open(dir_aso,'r').read() | ||
raso = np.where(raso[0,:,:] < 0, np.nan, raso) | ||
|
||
th = 0.1 # using 10 cm threshold | ||
raso_binary = np.where(raso >= 0.1, 1, 0) # if the SD is higher than 10 cm, then snow; otherwise, no-snow | ||
|
||
fig, axs = plt.subplots(1,2,figsize=(16,6)) | ||
im1 = axs[0].imshow(raso[0,:,:],cmap = 'Blues',vmin = 0, vmax = 5) | ||
axs[0].set_title('ASO snow depth', fontsize=16) | ||
fig.colorbar(im1, ax = axs[0], label = 'snow depth (meter)', extend = 'max') | ||
|
||
im2 = axs[1].imshow(raso_binary[0,:,:], cmap = 'gray', interpolation = 'none') | ||
axs[1].set_title('ASO snow cover (TH = 10 cm)', fontsize=16) | ||
|
||
dir_model = "./models/random_forest_SCA_binary.joblib" | ||
# load model | ||
model = joblib.load(dir_model) | ||
|
||
df = pd.DataFrame() | ||
df['obs'] = y_test | ||
df['predict'] = model.predict(X_test) | ||
|
||
# Cross-tabulate predictions | ||
print(pd.crosstab(df['obs'], df['predict'], margins=True)) | ||
print(calculate_metrics(df)) | ||
|
||
dir_raster = './data/planet/20180528_181110_1025_3B_AnalyticMS_SR_clip.tif' | ||
dir_out = './data/SCA/' | ||
nodata_flag = 9 | ||
run_sca_prediction(dir_raster, dir_out, nodata_flag, model) | ||
|
||
dir_planet = './data/planet/20180528_181110_1025_3B_AnalyticMS_SR_clip.tif' | ||
r_na_flag = rasterio.open(dir_planet, 'r').read() | ||
r_planet = rasterio.open(dir_planet, 'r').read([4,3,2])/10000 | ||
|
||
dir_sca = './data/SCA/20180528_181110_1025_3B_AnalyticMS_SR_clip_SCA.tif' | ||
r_sca = rasterio.open(dir_sca, 'r') | ||
|
||
fig, (ax1, ax2) = plt.subplots(1,2, figsize = (16,4)) | ||
show(r_planet, ax= ax1, cmap='jet', interpolation = 'none',title = 'Planet false color Image') | ||
show(r_sca.read().squeeze(), ax= ax2, interpolation = 'none',title = 'Planet Snow Cover') | ||
|
||
dir_planet_ext = './data/GIS/extent/CATE_20180528_181110_img_ext.shp' | ||
with fiona.open(dir_planet_ext, "r") as shapefile: | ||
shapes = [feature["geometry"] for feature in shapefile] | ||
|
||
dir_aso = "./data/ASO/ASO_3M_SD_USCATE_20180528_binary_clip.tif" | ||
with rasterio.open(dir_aso,'r') as src: | ||
r_aso = rasterio.mask.mask(src, shapes, crop=True, filled = False) | ||
|
||
dir_pred = './data/SCA/20180528_181110_1025_3B_AnalyticMS_SR_clip_SCA.tif' | ||
with rasterio.open(dir_pred,'r') as src: | ||
r_predict = rasterio.mask.mask(src, shapes, crop=True, filled = False) | ||
|
||
dir_watermask = './data/mask/waterbody_TB_UTM11_clip.tif' | ||
with rasterio.open(dir_watermask,'r') as src: | ||
r_watermask = rasterio.mask.mask(src, shapes, crop=True, filled = False) | ||
|
||
dir_glaciermask = './data/mask/02_rgi60_WesternCanadaUS_hypso_TB_clip.tif' | ||
with rasterio.open(dir_glaciermask,'r') as src: | ||
r_glaciermask = rasterio.mask.mask(src, shapes, crop=True, filled = False) | ||
|
||
|
||
df = pd.DataFrame() | ||
df['predict'] = r_predict[0].ravel() | ||
df['obs'] = r_aso[0].ravel() | ||
df['watermask'] = r_watermask[0].ravel() | ||
df['glaciermask'] = r_watermask[0].ravel() | ||
|
||
# remove NA data region, water bodies, and glaciers | ||
df_mask = df[(df.predict >= 0) & (df.watermask != 0) & (df.glaciermask != 0)] | ||
# print(df) | ||
|
||
print("overall model performance:") | ||
print(calculate_metrics(df_mask)) | ||
|
||
file_landcover = './data/ASO/ASO_3M_CHM_USCATB_20140827_binary_clip.tif' # 1 - forest, 0 open area | ||
with rasterio.open(file_landcover,'r') as src: | ||
r_landcover = rasterio.mask.mask(src, shapes, crop=True, filled = False) | ||
|
||
df['landcover'] = r_landcover[0].ravel() | ||
|
||
df_mask = df[(df.predict >= 0) & (df.watermask != 0) & (df.glaciermask != 0)] | ||
df_open = df_mask[df_mask.landcover == 0] | ||
print("Model performance in open areas:") | ||
print(calculate_metrics(df_open)) | ||
|
||
df_forest = df_mask[df_mask.landcover == 1] | ||
print("Model performance in forested areas:") | ||
print(calculate_metrics(df_forest)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
[{ | ||
"id" : "rgfp44", | ||
"name" : "data_preparation", | ||
"description" : "python", | ||
"code" : "# import functions and packages\nfrom functions_book_chapter_SCA import *\n\ndir_raster = './data/planet/20180528_181110_1025_3B_AnalyticMS_SR_clip.tif'\nplanet = rasterio.open(dir_raster).read()/10000\nplanet = np.where(planet[0,:,:] == 0, np.nan, planet) # the default nan data value is 0, replace with np.nan\n\nfig, axs = plt.subplots(2,2,figsize=(15,7.5))\nim1 = axs[0,0].imshow(planet[0,:,:],cmap='jet')\naxs[0,0].set_title(\"Surface reflectance of blue band\", fontsize=16)\n\nim2 = axs[0,1].imshow(planet[1,:,:], cmap='jet')\naxs[0,1].set_title(\"Surface reflectance of green band\", fontsize=16)\n\nim3 = axs[1,0].imshow(planet[2,:,:], cmap='jet')\naxs[1,0].set_title(\"Surface reflectance of red band\", fontsize=16)\n\nim4 = axs[1,1].imshow(planet[3,:,:], cmap='jet')\naxs[1,1].set_title(\"Surface reflectance of NIR band\", fontsize=16)\n\ncbar_ax = fig.add_axes([0.95, 0.15, 0.02, 0.7])\nfig.colorbar(im1, cax=cbar_ax)\n\n# read model input features and labels \ndata = pd.read_csv('./data/samples/sample_100K.csv', index_col = False)\nprint(\"Sample dimentions:\".format(), data.shape)\nprint(data.head())\nX = data[['blue','green','red','nir']]\ny = data['label']", | ||
"lang" : "python", | ||
"owner" : "111111", | ||
"confidential" : "FALSE" | ||
},{ | ||
"id" : "4p0kit", | ||
"name" : "hyper_parameter_tuning", | ||
"description" : "python", | ||
"code" : "# prepare data \ndata = pd.read_csv('./data/samples/sample_10K.csv', index_col = False)\nprint(\"Sample dimentions:\".format(), data.shape)\ndata.head()\nX = data[['blue','green','red','nir']]\ny = data['label']\n\n# customize models with different sample sizes\nmodels = get_models_size()\nresults, names = list(), list()\nfor name, model in models.items():\n # evaluate models using k-fold cross-validation\n scores = evaluate_model(model, X, y)\n results.append(scores)\n names.append(name)\n # print the mean and standard deviation of models \n print('>%s Mean Score: %.6f (Score SD: %.6f)' % ('Sample size: ' + str(int(float(name) * 10000)), scores.mean(), scores.std()))\n \n# display model performance \nplt.figure(figsize=(10,5))\nplt.boxplot(results, labels=names, showmeans=True)\nplt.show()\n\n# customize models with different model feature sizes\nmodels = get_models_feature()\n# evaluate the models and store results\nresults, names = list(), list()\nfor name, model in models.items():\n # evaluate models using k-fold cross-validation\n scores = evaluate_model(model, X, y)\n results.append(scores)\n names.append(name)\n # print the mean and standard deviation of models \n # print('>%s %.3f (%.3f)' % (name, scores.mean(), scores.std()))\n print('>%s Mean Score: %.6f (Score SD: %.6f)' % ('Features: ' + name, scores.mean(), scores.std()))\n# display model performance \nplt.boxplot(results, labels=names, showmeans=True)\nplt.show()\n\n# customize models with different tree numbers\nmodels = get_models_tree()\n# evaluate the models and store results\nresults, names = list(), list()\nfor name, model in models.items():\n # evaluate models using k-fold cross-validation\n scores = evaluate_model(model, X, y)\n results.append(scores)\n names.append(name)\n # print the mean and standard deviation of models \n # print('>%s %.3f (%.3f)' % (name, scores.mean(), scores.std()))\n print('>%s Mean Score: %.6f (Score SD: %.6f)' % ('Tree numbers: ' + name, scores.mean(), scores.std()))\n# display model performance \nplt.boxplot(results, labels=names, showmeans=True)\nplt.show()\n\n# customize models with different tree depths\nmodels = get_models_depth()\n# evaluate the models and store results\nresults, names = list(), list()\nfor name, model in models.items():\n # evaluate models using k-fold cross-validation\n scores = evaluate_model(model, X, y)\n results.append(scores)\n names.append(name)\n # print the mean and standard deviation of models \n # print('>%s %.3f (%.3f)' % (name, scores.mean(), scores.std()))\n print('>%s Mean Score: %.6f (Score SD: %.6f)' % ('Tree Depth: ' + name, scores.mean(), scores.std()))\n# display model performance \nplt.figure(figsize=(10,5))\nplt.boxplot(results, labels=names, showmeans=True)\nplt.show()", | ||
"lang" : "python", | ||
"owner" : "111111", | ||
"confidential" : "FALSE" | ||
},{ | ||
"id" : "xcw4h1", | ||
"name" : "model_evaluation", | ||
"description" : "python", | ||
"code" : "dir_aso = './data/ASO/ASO_3M_SD_USCATE_20180528_clip.tif'\nraso = rasterio.open(dir_aso,'r').read()\nraso = np.where(raso[0,:,:] < 0, np.nan, raso)\n\nth = 0.1 # using 10 cm threshold\nraso_binary = np.where(raso >= 0.1, 1, 0) # if the SD is higher than 10 cm, then snow; otherwise, no-snow\n\nfig, axs = plt.subplots(1,2,figsize=(16,6))\nim1 = axs[0].imshow(raso[0,:,:],cmap = 'Blues',vmin = 0, vmax = 5)\naxs[0].set_title('ASO snow depth', fontsize=16)\nfig.colorbar(im1, ax = axs[0], label = 'snow depth (meter)', extend = 'max')\n\nim2 = axs[1].imshow(raso_binary[0,:,:], cmap = 'gray', interpolation = 'none')\naxs[1].set_title('ASO snow cover (TH = 10 cm)', fontsize=16)\n\ndir_model = \"./models/random_forest_SCA_binary.joblib\"\n# load model\nmodel = joblib.load(dir_model)\n\ndf = pd.DataFrame()\ndf['obs'] = y_test\ndf['predict'] = model.predict(X_test)\n\n# Cross-tabulate predictions\nprint(pd.crosstab(df['obs'], df['predict'], margins=True))\nprint(calculate_metrics(df))\n\ndir_raster = './data/planet/20180528_181110_1025_3B_AnalyticMS_SR_clip.tif'\ndir_out = './data/SCA/'\nnodata_flag = 9\nrun_sca_prediction(dir_raster, dir_out, nodata_flag, model)\n\ndir_planet = './data/planet/20180528_181110_1025_3B_AnalyticMS_SR_clip.tif'\nr_na_flag = rasterio.open(dir_planet, 'r').read()\nr_planet = rasterio.open(dir_planet, 'r').read([4,3,2])/10000\n\ndir_sca = './data/SCA/20180528_181110_1025_3B_AnalyticMS_SR_clip_SCA.tif'\nr_sca = rasterio.open(dir_sca, 'r')\n\nfig, (ax1, ax2) = plt.subplots(1,2, figsize = (16,4))\nshow(r_planet, ax= ax1, cmap='jet', interpolation = 'none',title = 'Planet false color Image')\nshow(r_sca.read().squeeze(), ax= ax2, interpolation = 'none',title = 'Planet Snow Cover')\n\ndir_planet_ext = './data/GIS/extent/CATE_20180528_181110_img_ext.shp'\nwith fiona.open(dir_planet_ext, \"r\") as shapefile:\n shapes = [feature[\"geometry\"] for feature in shapefile]\n \ndir_aso = \"./data/ASO/ASO_3M_SD_USCATE_20180528_binary_clip.tif\"\nwith rasterio.open(dir_aso,'r') as src:\n r_aso = rasterio.mask.mask(src, shapes, crop=True, filled = False)\n \ndir_pred = './data/SCA/20180528_181110_1025_3B_AnalyticMS_SR_clip_SCA.tif'\nwith rasterio.open(dir_pred,'r') as src:\n r_predict = rasterio.mask.mask(src, shapes, crop=True, filled = False)\n\ndir_watermask = './data/mask/waterbody_TB_UTM11_clip.tif'\nwith rasterio.open(dir_watermask,'r') as src:\n r_watermask = rasterio.mask.mask(src, shapes, crop=True, filled = False)\n \ndir_glaciermask = './data/mask/02_rgi60_WesternCanadaUS_hypso_TB_clip.tif'\nwith rasterio.open(dir_glaciermask,'r') as src:\n r_glaciermask = rasterio.mask.mask(src, shapes, crop=True, filled = False)\n \n \ndf = pd.DataFrame()\ndf['predict'] = r_predict[0].ravel()\ndf['obs'] = r_aso[0].ravel()\ndf['watermask'] = r_watermask[0].ravel()\ndf['glaciermask'] = r_watermask[0].ravel()\n\n# remove NA data region, water bodies, and glaciers \ndf_mask = df[(df.predict >= 0) & (df.watermask != 0) & (df.glaciermask != 0)]\n# print(df)\n\nprint(\"overall model performance:\")\nprint(calculate_metrics(df_mask))\n\nfile_landcover = './data/ASO/ASO_3M_CHM_USCATB_20140827_binary_clip.tif' # 1 - forest, 0 open area\nwith rasterio.open(file_landcover,'r') as src:\n r_landcover = rasterio.mask.mask(src, shapes, crop=True, filled = False)\n \ndf['landcover'] = r_landcover[0].ravel()\n\ndf_mask = df[(df.predict >= 0) & (df.watermask != 0) & (df.glaciermask != 0)]\ndf_open = df_mask[df_mask.landcover == 0]\nprint(\"Model performance in open areas:\")\nprint(calculate_metrics(df_open))\n\ndf_forest = df_mask[df_mask.landcover == 1]\nprint(\"Model performance in forested areas:\")\nprint(calculate_metrics(df_forest))", | ||
"lang" : "python", | ||
"owner" : "111111", | ||
"confidential" : "FALSE" | ||
},{ | ||
"id" : "8e59lt", | ||
"name" : "modeL-training", | ||
"description" : "python", | ||
"code" : "# read model input features and labels \ndata = pd.read_csv('./data/samples/sample_100K.csv', index_col = False)\nprint(\"Sample dimentions:\".format(), data.shape)\nprint(data.head())\nX = data[['blue','green','red','nir']]\ny = data['label']\n\nX_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.96,random_state=1)\n\n# define the model\nmodel = RandomForestClassifier(n_estimators=10, max_depth=10, max_features=4)\n\n# evaluate the model\ncv = RepeatedStratifiedKFold(n_splits=10, n_repeats=1000)\nn_scores = cross_val_score(model, X_train, y_train, scoring='accuracy', cv=cv, n_jobs=-1)\n# report model performance\nprint('Mean Score: %.6f (SD: %.6f)' % (n_scores.mean(),n_scores.std()))\n\n# the histogram of the scores\nn, bins, patches = plt.hist(n_scores, density=True, facecolor='blue', alpha=0.75)\nplt.text(0.91, 15, r'mean = ' + str(n_scores.mean().round(6)) + ' '+ 'SD = ' + str(n_scores.std().round(6)))\nplt.xlim(0.9, 1.01)\nplt.xlabel('Acuuracy')\nplt.ylabel('Probability (%)')\nplt.grid(True)\nplt.show()\n\nmodel.fit(X_train,y_train)\nresult = permutation_importance(model, X_train, y_train, n_repeats=1000, random_state=42, n_jobs=2)\nprint('Permutation importance - average:'.format(), X_train.columns)\nprint([round(i, 6) for i in result.importances_mean])\n\n# displace feature importance\nfig, ax = plt.subplots(figsize=(6,5))\nax.boxplot(result.importances.T)\nax.set_title(\"Permutation Importances\", fontsize = 16)\nax.set_xticklabels(labels=X_train.columns, fontsize=14)\nplt.show()\n\n# save model \ndir_model = \"./models/random_forest_SCA_binary.joblib\"\njoblib.dump(model, dir_model)", | ||
"lang" : "python", | ||
"owner" : "111111", | ||
"confidential" : "FALSE" | ||
}] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
[] |
Oops, something went wrong.