Showing
15 changed files
with
332 additions
and
0 deletions
final_code/Brixia_Regression.ipynb
0 → 100644
This diff could not be displayed because it is too large.
final_code/DB/1000186638823204855.jpg
0 → 100644
3.93 KB
final_code/DB/10005836788378209022.jpg
0 → 100644
5.76 KB
final_code/DB/10011454155587105152.jpg
0 → 100644
3.83 KB
final_code/DB/10015354220486554048.jpg
0 → 100644
4.17 KB
final_code/DB/10026271850367430724.jpg
0 → 100644
4.17 KB
final_code/DB/10027044307414466695.jpg
0 → 100644
3.91 KB
final_code/DB/10027500604909952472.jpg
0 → 100644
5.19 KB
final_code/DB/10028581328861447555.jpg
0 → 100644
4.1 KB
final_code/DB/10030929591921881379.jpg
0 → 100644
4.16 KB
final_code/DB/10062027240959229488.jpg
0 → 100644
3.9 KB
final_code/cxr_dataset.py
0 → 100644
| 1 | +import pandas as pd | ||
| 2 | +import torch | ||
| 3 | +import numpy as np | ||
| 4 | +from torch.utils.data import Dataset | ||
| 5 | +import os | ||
| 6 | +from PIL import Image | ||
| 7 | + | ||
| 8 | + | ||
| 9 | +class CXRDataset(Dataset): | ||
| 10 | + | ||
| 11 | + def __init__( | ||
| 12 | + self, | ||
| 13 | + path_to_images, | ||
| 14 | + fold, | ||
| 15 | + transform=None, | ||
| 16 | + transform_bb=None, | ||
| 17 | + finding="any", | ||
| 18 | + fine_tune=False, | ||
| 19 | + regression=False, | ||
| 20 | + label_path="/content/gdrive/MyDrive/ColabNotebooks/brixia/labels"): | ||
| 21 | + | ||
| 22 | + self.transform = transform | ||
| 23 | + self.transform_bb = transform_bb | ||
| 24 | + self.path_to_images = path_to_images | ||
| 25 | + if not fine_tune: | ||
| 26 | + self.df = pd.read_csv(label_path + "/nih_original_split.csv") | ||
| 27 | + elif fine_tune and not regression: | ||
| 28 | + self.df = pd.read_csv(label_path + "/brixia_split_classification.csv") | ||
| 29 | + else: | ||
| 30 | + self.df = pd.read_csv(label_path + "/brixia_split_regression.csv") | ||
| 31 | + self.fold = fold | ||
| 32 | + self.fine_tune = fine_tune | ||
| 33 | + self.regression = regression | ||
| 34 | + | ||
| 35 | + if not fold == 'BBox': | ||
| 36 | + self.df = self.df[self.df['fold'] == fold] | ||
| 37 | + else: | ||
| 38 | + bbox_images_df = pd.read_csv(label_path + "/BBox_List_2017.csv") | ||
| 39 | + self.df = pd.merge(left=self.df, right=bbox_images_df, how="inner", on="Image Index") | ||
| 40 | + | ||
| 41 | + if not self.fine_tune: | ||
| 42 | + self.PRED_LABEL = [ | ||
| 43 | + 'Atelectasis', | ||
| 44 | + 'Cardiomegaly', | ||
| 45 | + 'Effusion', | ||
| 46 | + 'Infiltration', | ||
| 47 | + 'Mass', | ||
| 48 | + 'Nodule', | ||
| 49 | + 'Pneumonia', | ||
| 50 | + 'Pneumothorax', | ||
| 51 | + 'Consolidation', | ||
| 52 | + 'Edema', | ||
| 53 | + 'Emphysema', | ||
| 54 | + 'Fibrosis', | ||
| 55 | + 'Pleural_Thickening', | ||
| 56 | + 'Hernia'] | ||
| 57 | + else: | ||
| 58 | + self.PRED_LABEL = [ | ||
| 59 | + 'Detector01', | ||
| 60 | + 'Detector2', | ||
| 61 | + 'Detector3'] | ||
| 62 | + | ||
| 63 | + if not finding == "any" and not fine_tune: # can filter for positive findings of the kind described; useful for evaluation | ||
| 64 | + self.df = self.df[self.df['Finding Label'] == finding] | ||
| 65 | + elif not finding == "any" and fine_tune and not regression: | ||
| 66 | + self.df = self.df[self.df[finding] == 1] | ||
| 67 | + | ||
| 68 | + self.df = self.df.set_index("Image Index") | ||
| 69 | + | ||
| 70 | + def __len__(self): | ||
| 71 | + return len(self.df) | ||
| 72 | + | ||
| 73 | + def __getitem__(self, idx): | ||
| 74 | + | ||
| 75 | + image = Image.open( | ||
| 76 | + os.path.join( | ||
| 77 | + self.path_to_images, | ||
| 78 | + self.df.index[idx])) | ||
| 79 | + image = image.convert('RGB') | ||
| 80 | + | ||
| 81 | + if not self.fine_tune: | ||
| 82 | + label = np.zeros(len(self.PRED_LABEL), dtype=int) | ||
| 83 | + for i in range(0, len(self.PRED_LABEL)): | ||
| 84 | + # can leave zero if zero, else make one | ||
| 85 | + if self.df[self.PRED_LABEL[i].strip()].iloc[idx].astype('int') > 0: | ||
| 86 | + label[i] = self.df[self.PRED_LABEL[i].strip() | ||
| 87 | + ].iloc[idx].astype('int') | ||
| 88 | + elif self.fine_tune and not self.regression: | ||
| 89 | + covid_label = np.zeros(len(self.PRED_LABEL), dtype=int) | ||
| 90 | + covid_label[0] = self.df['Detector01'].iloc[idx] | ||
| 91 | + covid_label[1] = self.df['Detector2'].iloc[idx] | ||
| 92 | + covid_label[2] = self.df['Detector3'].iloc[idx] | ||
| 93 | + else: | ||
| 94 | + ground_truth = np.array(self.df['BrixiaScoreGlobal'].iloc[idx].astype('float32')) | ||
| 95 | + | ||
| 96 | + if self.transform: | ||
| 97 | + image = self.transform(image) | ||
| 98 | + | ||
| 99 | + if self.fold == "BBox": | ||
| 100 | + # exctract bounding box coordinates from dataframe, they exist in the the columns specified below | ||
| 101 | + bounding_box = self.df.iloc[idx, -7:-3].to_numpy() | ||
| 102 | + | ||
| 103 | + if self.transform_bb: | ||
| 104 | + transformed_bounding_box = self.transform_bb(bounding_box) | ||
| 105 | + | ||
| 106 | + return image, label, self.df.index[idx], transformed_bounding_box | ||
| 107 | + elif self.fine_tune and not self.regression: | ||
| 108 | + return image, covid_label, self.df.index[idx] | ||
| 109 | + elif self.fine_tune and self.regression: | ||
| 110 | + return image, ground_truth, self.df.index[idx] | ||
| 111 | + else: | ||
| 112 | + return image, label, self.df.index[idx] | ||
| 113 | + | ||
| 114 | + def pos_neg_balance_weights(self): | ||
| 115 | + pos_neg_weights = [] | ||
| 116 | + | ||
| 117 | + for i in range(0, len(self.PRED_LABEL)): | ||
| 118 | + num_negatives = self.df[self.df[self.PRED_LABEL[i].strip()] == 0].shape[0] | ||
| 119 | + num_positives = self.df[self.df[self.PRED_LABEL[i].strip()] == 1].shape[0] | ||
| 120 | + | ||
| 121 | + pos_neg_weights.append(num_negatives / num_positives) | ||
| 122 | + | ||
| 123 | + pos_neg_weights = torch.Tensor(pos_neg_weights) | ||
| 124 | + pos_neg_weights = pos_neg_weights.cuda() | ||
| 125 | + pos_neg_weights = pos_neg_weights.type(torch.cuda.FloatTensor) | ||
| 126 | + return pos_neg_weights | ||
| 127 | + | ||
| 128 | + | ||
| 129 | +class RescaleBB(object): | ||
| 130 | + """Rescale the bounding box in a sample to a given size. | ||
| 131 | + | ||
| 132 | + Args: | ||
| 133 | + output_image_size (int): Desired output size. | ||
| 134 | + """ | ||
| 135 | + | ||
| 136 | + def __init__(self, output_image_size, original_image_size): | ||
| 137 | + assert isinstance(output_image_size, int) | ||
| 138 | + self.output_image_size = output_image_size | ||
| 139 | + self.original_image_size = original_image_size | ||
| 140 | + | ||
| 141 | + def __call__(self, sample): | ||
| 142 | + assert sample.shape == (4,) | ||
| 143 | + x, y, w, h = sample[0], sample[1], sample[2], sample[3] | ||
| 144 | + | ||
| 145 | + scale_factor = self.output_image_size / self.original_image_size | ||
| 146 | + new_x, new_y, new_w, new_h = x * scale_factor, y * scale_factor, w * scale_factor, h * scale_factor | ||
| 147 | + transformed_sample = np.array([new_x, new_y, new_w, new_h]) | ||
| 148 | + | ||
| 149 | + return transformed_sample | ||
| 150 | + | ||
| 151 | +class BrixiaScoreLocal: | ||
| 152 | + def __init__(self, label_path): | ||
| 153 | + self.data_brixia = pd.read_csv(label_path + "/metadata_global_v2.csv", sep=";") | ||
| 154 | + self.data_brixia.set_index("Filename", inplace=True) | ||
| 155 | + | ||
| 156 | + def getScore(self, filename,print_score=False): | ||
| 157 | + score = self.data_brixia.loc[filename.replace(".jpg", ".dcm"), "BrixiaScore"].astype(str) | ||
| 158 | + score = '0' * (6 - len(score)) + score | ||
| 159 | + if print_score: | ||
| 160 | + print('Brixia 6 regions Score: ') | ||
| 161 | + print(score[0], ' | ', score[3]) | ||
| 162 | + print(score[1], ' | ', score[4]) | ||
| 163 | + print(score[2], ' | ', score[5]) | ||
| 164 | + return list(map(int, score)) | ||
| 165 | + | ||
| 166 | + |
final_code/eval_model.py
0 → 100644
| 1 | +import torch | ||
| 2 | +import pandas as pd | ||
| 3 | +import cxr_dataset as CXR | ||
| 4 | +from torch.utils.data import Dataset, DataLoader | ||
| 5 | +import sklearn.metrics as sklm | ||
| 6 | +import numpy as np | ||
| 7 | + | ||
| 8 | + | ||
| 9 | +device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") | ||
| 10 | + | ||
| 11 | + | ||
| 12 | +def make_pred_multilabel(dataloader, model, save_as_csv=False, fine_tune=False): | ||
| 13 | + """ | ||
| 14 | + Gives predictions for test fold and calculates AUCs using previously trained model | ||
| 15 | + | ||
| 16 | + Args: | ||
| 17 | + data_transforms: torchvision transforms to preprocess raw images; same as validation transforms | ||
| 18 | + model: densenet-121 from torchvision previously fine tuned to training data | ||
| 19 | + PATH_TO_IMAGES: path at which NIH images can be found | ||
| 20 | + Returns: | ||
| 21 | + pred_df: dataframe containing individual predictions and ground truth for each test image | ||
| 22 | + auc_df: dataframe containing aggregate AUCs by train/test tuples | ||
| 23 | + """ | ||
| 24 | + | ||
| 25 | + batch_size = dataloader.batch_size | ||
| 26 | + # set model to eval mode; required for proper predictions given use of batchnorm | ||
| 27 | + model.train(False) | ||
| 28 | + | ||
| 29 | + # create empty dfs | ||
| 30 | + pred_df = pd.DataFrame(columns=["Image Index"]) | ||
| 31 | + true_df = pd.DataFrame(columns=["Image Index"]) | ||
| 32 | + | ||
| 33 | + # iterate over dataloader | ||
| 34 | + for i, data in enumerate(dataloader): | ||
| 35 | + | ||
| 36 | + inputs, labels, _ = data | ||
| 37 | + inputs, labels = inputs.to(device), labels.to(device) | ||
| 38 | + | ||
| 39 | + true_labels = labels.cpu().data.numpy() | ||
| 40 | + # batch_size = true_labels.shape | ||
| 41 | + | ||
| 42 | + outputs = model(inputs) | ||
| 43 | + outputs = torch.sigmoid(outputs) | ||
| 44 | + probs = outputs.cpu().data.numpy() | ||
| 45 | + | ||
| 46 | + # get predictions and true values for each item in batch | ||
| 47 | + for j in range(0, true_labels.shape[0]): | ||
| 48 | + thisrow = {} | ||
| 49 | + truerow = {} | ||
| 50 | + thisrow["Image Index"] = dataloader.dataset.df.index[batch_size * i + j] | ||
| 51 | + truerow["Image Index"] = dataloader.dataset.df.index[batch_size * i + j] | ||
| 52 | + | ||
| 53 | + # iterate over each entry in prediction vector; each corresponds to | ||
| 54 | + # individual label | ||
| 55 | + for k in range(len(dataloader.dataset.PRED_LABEL)): | ||
| 56 | + thisrow["prob_" + dataloader.dataset.PRED_LABEL[k]] = probs[j, k] | ||
| 57 | + truerow[dataloader.dataset.PRED_LABEL[k]] = true_labels[j, k] | ||
| 58 | + | ||
| 59 | + pred_df = pred_df.append(thisrow, ignore_index=True) | ||
| 60 | + true_df = true_df.append(truerow, ignore_index=True) | ||
| 61 | + | ||
| 62 | + # if(i % 10 == 0): | ||
| 63 | + # print(str(i * BATCH_SIZE)) | ||
| 64 | + | ||
| 65 | + auc_df = pd.DataFrame(columns=["label", "auc"]) | ||
| 66 | + | ||
| 67 | + # calc AUCs | ||
| 68 | + for column in true_df: | ||
| 69 | + | ||
| 70 | + if not fine_tune: | ||
| 71 | + if column not in [ | ||
| 72 | + 'Atelectasis', | ||
| 73 | + 'Cardiomegaly', | ||
| 74 | + 'Effusion', | ||
| 75 | + 'Infiltration', | ||
| 76 | + 'Mass', | ||
| 77 | + 'Nodule', | ||
| 78 | + 'Pneumonia', | ||
| 79 | + 'Pneumothorax', | ||
| 80 | + 'Consolidation', | ||
| 81 | + 'Edema', | ||
| 82 | + 'Emphysema', | ||
| 83 | + 'Fibrosis', | ||
| 84 | + 'Pleural_Thickening', | ||
| 85 | + 'Hernia']: | ||
| 86 | + continue | ||
| 87 | + else: | ||
| 88 | + if column not in [ | ||
| 89 | + 'Detector01', | ||
| 90 | + 'Detector2', | ||
| 91 | + 'Detector3']: | ||
| 92 | + continue | ||
| 93 | + actual = true_df[column] | ||
| 94 | + pred = pred_df["prob_" + column] | ||
| 95 | + thisrow = {} | ||
| 96 | + thisrow['label'] = column | ||
| 97 | + thisrow['auc'] = np.nan | ||
| 98 | + thisrow['AP'] = np.nan | ||
| 99 | + try: | ||
| 100 | + thisrow['auc'] = sklm.roc_auc_score(actual.to_numpy().astype(int), pred.to_numpy()) | ||
| 101 | + thisrow['AP'] = sklm.average_precision_score(actual.to_numpy().astype(int), pred.to_numpy()) | ||
| 102 | + except BaseException: | ||
| 103 | + print("can't calculate auc for " + str(column)) | ||
| 104 | + auc_df = auc_df.append(thisrow, ignore_index=True) | ||
| 105 | + | ||
| 106 | + if save_as_csv: | ||
| 107 | + pred_df.to_csv("results/preds.csv", index=False) | ||
| 108 | + auc_df.to_csv("results/aucs.csv", index=False) | ||
| 109 | + | ||
| 110 | + return pred_df, auc_df | ||
| 111 | + | ||
| 112 | + | ||
| 113 | +def evaluate_mae(dataloader, model): | ||
| 114 | + """ | ||
| 115 | + Calculates MAE using previously trained model | ||
| 116 | + | ||
| 117 | + Args: | ||
| 118 | + data_transforms: torchvision transforms to preprocess raw images; same as validation transforms | ||
| 119 | + model: densenet-121 from torchvision previously fine tuned to training data | ||
| 120 | + Returns: | ||
| 121 | + mae: MAE | ||
| 122 | + """ | ||
| 123 | + | ||
| 124 | + # calc preds in batches of 32, can reduce if your GPU has less RAM | ||
| 125 | + batch_size = dataloader.batch_size | ||
| 126 | + # set model to eval mode; required for proper predictions given use of batchnorm | ||
| 127 | + model.train(False) | ||
| 128 | + | ||
| 129 | + # create empty dfs | ||
| 130 | + pred_df = pd.DataFrame(columns=["Image Index"]) | ||
| 131 | + true_df = pd.DataFrame(columns=["Image Index"]) | ||
| 132 | + | ||
| 133 | + # iterate over dataloader | ||
| 134 | + for i, data in enumerate(dataloader): | ||
| 135 | + | ||
| 136 | + inputs, ground_truths, _ = data | ||
| 137 | + inputs, ground_truths = inputs.to(device), ground_truths.to(device) | ||
| 138 | + | ||
| 139 | + true_scores = ground_truths.cpu().data.numpy() | ||
| 140 | + | ||
| 141 | + outputs = model(inputs) | ||
| 142 | + preds = outputs.cpu().data.numpy() | ||
| 143 | + | ||
| 144 | + # get predictions and true values for each item in batch | ||
| 145 | + for j in range(0, true_scores.shape[0]): | ||
| 146 | + thisrow = {} | ||
| 147 | + truerow = {} | ||
| 148 | + thisrow["Image Index"] = dataloader.dataset.df.index[batch_size * i + j] | ||
| 149 | + truerow["Image Index"] = dataloader.dataset.df.index[batch_size * i + j] | ||
| 150 | + | ||
| 151 | + # iterate over each entry in prediction vector; each corresponds to | ||
| 152 | + # individual label | ||
| 153 | + thisrow["pred_score"] = preds[j] | ||
| 154 | + truerow["true_score"] = true_scores[j] | ||
| 155 | + | ||
| 156 | + pred_df = pred_df.append(thisrow, ignore_index=True) | ||
| 157 | + true_df = true_df.append(truerow, ignore_index=True) | ||
| 158 | + | ||
| 159 | + actual = true_df["true_score"] | ||
| 160 | + pred = pred_df["pred_score"] | ||
| 161 | + try: | ||
| 162 | + mae = sklm.mean_absolute_error(actual.to_numpy().astype(int), pred.to_numpy()) | ||
| 163 | + return mae, true_df, pred_df | ||
| 164 | + except BaseException: | ||
| 165 | + print("can't calculate mae") | ||
| 166 | + |
final_code/model.py
0 → 100644
This diff is collapsed. Click to expand it.
final_code/model_l1.py
0 → 100644
This diff is collapsed. Click to expand it.
-
Please register or login to post a comment