diff --git a/configs/exp_resnet18.yaml b/configs/exp_resnet18.yaml index c88854f..4499086 100644 --- a/configs/exp_resnet18.yaml +++ b/configs/exp_resnet18.yaml @@ -3,15 +3,16 @@ # In Python, this is all read as a dict. # environment/computational parameters -device: CUDA +device: cuda num_workers: 4 # dataset parameters -data_root: /path/to/dataset -num_classes: 32 +data_root: datasets/CaltechCT +num_classes: 16 # training hyperparameters +image_size: [224, 224] num_epochs: 200 batch_size: 128 -learning_rate: 1e-3 -weight_decay: 1e-3 \ No newline at end of file +learning_rate: 0.001 +weight_decay: 0.001 \ No newline at end of file diff --git a/ct_classifier/dataset.py b/ct_classifier/dataset.py index 4a4593a..010a613 100644 --- a/ct_classifier/dataset.py +++ b/ct_classifier/dataset.py @@ -15,20 +15,23 @@ import os import json from torch.utils.data import Dataset -from torchvision.transforms import ToTensor +from torchvision.transforms import Compose, Resize, ToTensor from PIL import Image class CTDataset(Dataset): - def __init__(self, data_root, split='train'): + def __init__(self, cfg, split='train'): ''' Constructor. Here, we collect and index the dataset inputs and labels. ''' - self.data_root = data_root + self.data_root = cfg['data_root'] self.split = split - self.transform = ToTensor() + self.transform = Compose([ # Transforms. Here's where we could add data augmentation (see Björn's lecture on August 11). + Resize((cfg['image_size'])), # For now, we just resize the images to the same dimensions... + ToTensor() # ...and convert them to torch.Tensor. + ]) # index data into list self.data = [] @@ -40,6 +43,23 @@ def __init__(self, data_root, split='train'): 'train_annotations.json' if self.split=='train' else 'cis_val_annotations.json' ) meta = json.load(open(annoPath, 'r')) + + images = dict([[i['id'], i['file_name']] for i in meta['images']]) # image id to filename lookup + labels = dict([[c['id'], idx] for idx, c in enumerate(meta['categories'])]) # custom labelclass indices that start at zero + + # since we're doing classification, we're just taking the first annotation per image and drop the rest + images_covered = set() # all those images for which we have already assigned a label + for anno in meta['annotations']: + imgID = anno['image_id'] + if imgID in images_covered: + continue + + # append image-label tuple to data + imgFileName = images[imgID] + label = anno['category_id'] + labelIndex = labels[label] + self.data.append([imgFileName, labelIndex]) + images_covered.add(imgID) # make sure image is only added once to dataset def __len__(self): @@ -54,17 +74,13 @@ def __getitem__(self, idx): Returns a single data point at given idx. Here's where we actually load the image. ''' - image_name, label = self.data[idx] + image_name, label = self.data[idx] # see line 57 above where we added these two items to the self.data list # load image - image_path = os.path.join(self.data_root, image_path) - img = Image.open(image_path) + image_path = os.path.join(self.data_root, 'eccv_18_all_images_sm', image_name) + img = Image.open(image_path).convert('RGB') # the ".convert" makes sure we always get three bands in Red, Green, Blue order - # transform: convert to torch.Tensor - # here's where we could do data augmentation: - # https://pytorch.org/vision/stable/transforms.html - # see Björn's lecture on Thursday, August 11. - # For now, we only convert the image to torch.Tensor + # transform: see lines 31ff above where we define our transformations img_tensor = self.transform(img) return img_tensor, label \ No newline at end of file diff --git a/ct_classifier/model.py b/ct_classifier/model.py index d4d876e..3bf5246 100644 --- a/ct_classifier/model.py +++ b/ct_classifier/model.py @@ -23,10 +23,10 @@ def __init__(self, num_classes): # replace the very last layer from the original, 1000-class output # ImageNet to a new one that outputs num_classes last_layer = self.feature_extractor.fc # tip: print(self.feature_extractor) to get info on how model is set up - num_features = last_layer.num_features + in_features = last_layer.in_features # number of input dimensions to last (classifier) layer self.feature_extractor.fc = nn.Identity() # discard last layer... - self.classifier = nn.Linear(num_features, num_classes) # ...and create a new one + self.classifier = nn.Linear(in_features, num_classes) # ...and create a new one def forward(self, x): diff --git a/ct_classifier/train.py b/ct_classifier/train.py index 0501b7d..fbaa258 100644 --- a/ct_classifier/train.py +++ b/ct_classifier/train.py @@ -5,6 +5,7 @@ 2022 Benjamin Kellenberger ''' +import os import argparse import yaml import glob @@ -16,8 +17,8 @@ from torch.optim import SGD # let's import our own classes and functions! -from ct_classifier.dataset import CTDataset -from ct_classifier.model import CustomResNet18 +from dataset import CTDataset +from model import CustomResNet18 @@ -26,7 +27,7 @@ def create_dataloader(cfg, split='train'): Loads a dataset according to the provided split and wraps it in a PyTorch DataLoader object. ''' - dataset_instance = CTDataset(cfg['data_root'], split) # create an object instance of our CTDataset class + dataset_instance = CTDataset(cfg, split) # create an object instance of our CTDataset class dataLoader = DataLoader( dataset=dataset_instance, @@ -66,6 +67,9 @@ def load_model(cfg): def save_model(epoch, model, stats): + # make sure save directory exists; create if not + os.makedirs('model_states', exist_ok=True) + # get model parameters and add to stats... stats['model'] = model.state_dict() @@ -100,11 +104,11 @@ def train(cfg, dataLoader, model, optimizer): criterion = nn.CrossEntropyLoss() # running averages - loss_total, oa_total = 0.0, 0.0 # for now, we just log the loss and overall accuracy (OA) + loss_total, oa_total = 0.0, 0.0 # for now, we just log the loss and overall accuracy (OA) # iterate over dataLoader progressBar = trange(len(dataLoader)) - for idx, (data, labels) in enumerate(dataLoader): + for idx, (data, labels) in enumerate(dataLoader): # see the last line of file "dataset.py" where we return the image tensor (data) and label # put data and labels on device data, labels = data.to(device), labels.to(device) @@ -125,18 +129,19 @@ def train(cfg, dataLoader, model, optimizer): optimizer.step() # log statistics - loss_total += loss.item() # the .item() command retrieves the value of a single-valued tensor, regardless of its data type and device of tensor + loss_total += loss.item() # the .item() command retrieves the value of a single-valued tensor, regardless of its data type and device of tensor - pred_label = torch.argmax(prediction) # the predicted label is the one at position (class index) with highest predicted value + pred_label = torch.argmax(prediction, dim=1) # the predicted label is the one at position (class index) with highest predicted value oa = torch.mean((pred_label == labels).float()) # OA: number of correct predictions divided by batch size (i.e., average/mean) oa_total += oa.item() progressBar.set_description( - '[Train] Loss: {:.2f}; OA: {:.2f}'.format( + '[Train] Loss: {:.2f}; OA: {:.2f}%'.format( loss_total/(idx+1), - oa_total/(idx+1) + 100*oa_total/(idx+1) ) ) + progressBar.update(1) # end of epoch; finalize progressBar.close() @@ -179,16 +184,17 @@ def validate(cfg, dataLoader, model): # log statistics loss_total += loss.item() - pred_label = torch.argmax(prediction) + pred_label = torch.argmax(prediction, dim=1) oa = torch.mean((pred_label == labels).float()) oa_total += oa.item() progressBar.set_description( - '[Val ] Loss: {:.2f}; OA: {:.2f}'.format( + '[Val ] Loss: {:.2f}; OA: {:.2f}%'.format( loss_total/(idx+1), - oa_total/(idx+1) + 100*oa_total/(idx+1) ) ) + progressBar.update(1) # end of epoch; finalize progressBar.close() diff --git a/license b/license index d1ca00f..0b9bdb3 100644 --- a/license +++ b/license @@ -1,6 +1,6 @@ MIT License - Copyright (c) Microsoft Corporation. All rights reserved. + Copyright (c) ECEO, EPFL. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/readme.md b/readme.md index ba76e07..6131840 100644 --- a/readme.md +++ b/readme.md @@ -17,8 +17,10 @@ pip install -r requirements.txt 3. Download dataset +**NOTE:** Requires the [azcopy CLI](https://docs.microsoft.com/en-us/azure/storage/common/storage-use-azcopy-v10) to be installed and set up on your machine. + ```bash -./scripts/download_dataset.sh +sh scripts/download_dataset.sh ``` This downloads the [CCT20](https://lila.science/datasets/caltech-camera-traps) subset to the `datasets/CaltechCT` folder.