forked from wizmik12/crowdsourcing-digital-pathology-GPs
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathextract_features.py
74 lines (60 loc) · 3.19 KB
/
extract_features.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import skimage.io as io
import os
from tqdm import tqdm
from keras.applications.vgg16 import VGG16
from keras.applications.vgg16 import preprocess_input
from keras.layers import Input
import numpy as np
from keras import applications
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.models import Sequential, Model
from keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D, AveragePooling2D
from keras import backend as k
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, TensorBoard, EarlyStopping
from keras.utils import to_categorical
input_tensor = Input(shape=(224, 224, 3))
vgg_model = VGG16(weights='imagenet',
include_top=False,
input_tensor=input_tensor)
#Adding custom Layers
x = vgg_model.output
x = AveragePooling2D(pool_size=(7,7))(x) #Downsamples the input along its spatial dimensions (height and width)
#by taking the average value over an input window (of size defined by pool_size) for each channel of the input
# creating the final model
model_final = Model(input = vgg_model.input, output = x)
# compile the model
model_final.compile(loss = "categorical_crossentropy", optimizer = optimizers.SGD(lr=0.0001, momentum=0.9), metrics=["accuracy"])
def extract_features(path, final_model, vgg_model):
"""
Extract features from the images and save the labels and names. The pool512 is for having a 512 component vector.
"""
images_list = []
name_list = []
label_list = []
for root, subdirs, files in os.walk(path): #OS.walk() generate the file names in a directory tree by walking the tree either top-down or bottom-up.
#For each directory in the tree rooted at directory top (including top itself), it yields a 3-tuple (dirpath, dirnames, filenames).
for name in tqdm(files): #imprimir barra de progreso
label_list.append(int(root.split('/')[len(root.split('/'))-1]))
image = io.imread(os.path.join(root,name))[:, :, :3] #leo una imagen rgba y me quedo con los canales rgb
image = preprocess_input(image)
images_list.append(image) #hago una lista de imágenes
name_list.append(name)
images_list = np.array(images_list)
name_list = np.array(name_list)
label_list = np.array(label_list)
label_list = to_categorical(label_list)[:,1:]
features_vgg_pool512 = final_model.predict(images_list)
features_vgg_pool512 = features_vgg_pool512.reshape(-1,512)
features_vgg = vgg_model.predict(images_list, batch_size=8, verbose=1)
return images_list, features_vgg, features_vgg_pool512, label_list, name_list
images_train, X_train, X_train_pool512, y_train, name_list_train = extract_features('../Train_non_experts_simple', model_final, vgg_model)
images_test, X_test, X_test_pool512, y_test, name_list_test = extract_features('../Test', model_final, vgg_model)
np.save("X_train", X_train)
np.save("X_test", X_test)
np.save("X_train_512pool", X_train_pool512)
np.save("X_test_512pool", X_test_pool512)
np.save("y_train", y_train)
np.save("y_test", y_test)
np.save("name_list_train", name_list_train)
np.save("name_list_test", name_list_test)