Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

YOLO Detection of Lesions in ISIC Dataset #188

Open
wants to merge 14 commits into
base: topic-recognition
Choose a base branch
from
19 changes: 2 additions & 17 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,19 +1,4 @@
# Pattern Analysis
Pattern Analysis of various datasets by COMP3710 students in 2024 at the University of Queensland.

We create pattern recognition and image processing library for Tensorflow (TF), PyTorch or JAX.
# Please see Updated Pull Request for Implemented Feedback! All details under Topic_Recognition
https://github.com/shakes76/PatternAnalysis-2024/pull/193#issue-2650661510

This library is created and maintained by The University of Queensland [COMP3710](https://my.uq.edu.au/programs-courses/course.html?course_code=comp3710) students.

The library includes the following implemented in Tensorflow:
* fractals
* recognition problems

In the recognition folder, you will find many recognition problems solved including:
* segmentation
* classification
* graph neural networks
* StyleGAN
* Stable diffusion
* transformers
etc.
76 changes: 76 additions & 0 deletions dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import torch
from torch.utils.data import Dataset
import pandas as pd
import os
import cv2
import numpy as np

class ISICDataset(Dataset):
"""Custom Dataset class for YOLO model with ISIC data."""

def __init__(self, image_dir, mask_dir, labels_path, image_size):
self.image_size = image_size
self.image_dir = image_dir
self.mask_dir = mask_dir
self.labels = pd.read_csv(labels_path)

# Load all image file names in the directory
self.image_files = [f for f in os.listdir(image_dir) if f.endswith('.jpg')]
self.samples = [self._process_sample(i) for i in range(len(self.image_files))]

def __len__(self):
return len(self.image_files)

def __getitem__(self, idx):
return self.samples[idx]

def _process_sample(self, idx):
"""Helper function to process and return a single sample (image and target vector)."""
# Load image and mask
image = self._load_image(idx)
mask = self._load_mask(idx)

# Resize image and mask to the target size
image = cv2.resize(image, (self.image_size, self.image_size)).astype(np.float32) / 255.0
mask = cv2.resize(mask, (self.image_size, self.image_size))

# Obtain bounding box coordinates from the mask
x, y, w, h = self._extract_bounding_box(mask)

# Retrieve label probabilities
label1, label2 = self.labels.iloc[idx, 1:3]
total_prob = label1 + label2

# Create target vector
target_vector = np.array(
[x + w / 2, y + h / 2, w, h, total_prob, label1, label2],
dtype=np.float32
)

# Convert image to tensor format (C, H, W)
image_tensor = torch.tensor(image.transpose(2, 0, 1), dtype=torch.float32)
target_tensor = torch.tensor(target_vector, dtype=torch.float32)

return image_tensor, target_tensor

def _load_image(self, idx):
"""Loads an image given an index."""
img_name = os.path.join(self.image_dir, self.image_files[idx])
return cv2.imread(img_name)

def _load_mask(self, idx):
"""Loads the mask corresponding to the image at the given index."""
mask_name = os.path.join(
self.mask_dir, self.image_files[idx].replace('.jpg', '_segmentation.png')
)
return cv2.imread(mask_name, cv2.IMREAD_GRAYSCALE)

def _extract_bounding_box(self, mask):
"""Extracts the bounding box from the mask image."""
_, thresh = cv2.threshold(mask, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

if contours:
x, y, w, h = cv2.boundingRect(contours[0])
return x, y, w, h
return 0, 0, 0, 0 # Return zero box if no contours are found
237 changes: 237 additions & 0 deletions modules.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,237 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
if torch.cuda.is_available():
print("cuda")
if not torch.cuda.is_available():
print("cpu")

class YOLO(nn.Module):

#REFERENCE: yolov3-tiny.cfg from https://github.com/pjreddie/darknet/blob/master/cfg
#Used as basis for what layers were needed
def __init__(self, num_classes):
super(YOLO, self).__init__()
self.num_classes = num_classes
layers = []
filters = [16,32,64,128,256,512]
in_channels = 3
#Convulution layers and maxpooling
for i in filters:
layers.append(nn.Conv2d(in_channels, i, kernel_size=3, stride=1, padding=1, bias=False))
in_channels = i
layers.append(nn.BatchNorm2d(i))
layers.append(nn.LeakyReLU(0.1, True)) #might be false
layers.append(nn.MaxPool2d(kernel_size=2, stride=2)) #Hopefully works
layers.append(nn.Conv2d(512, 1024, kernel_size=3, stride=1, padding=1, bias=False))
layers.append(nn.BatchNorm2d(1024))
layers.append(nn.LeakyReLU(0.1, True))

layers.append(nn.Conv2d(1024, 256, kernel_size=1, stride=1, padding=1, bias=False))
layers.append(nn.BatchNorm2d(256))
layers.append(nn.LeakyReLU(0.1, True))

layers.append(nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1, bias=False))
layers.append(nn.BatchNorm2d(512))
layers.append(nn.LeakyReLU(0.1, True))

layers.append(nn.Conv2d(512, 255, kernel_size=1, stride=1, padding=1, bias=True))
self.conv_start = nn.Sequential(*layers)

#Detection layer - given anchors
self.anchor1 = [(81,82), (135,169), (344,319)] #Anchors depends on image?

#Route layer could go here
self.conv_mid = nn.Sequential(
nn.Conv2d(255, 128, kernel_size=1, stride=1, padding=1, bias=False),
nn.BatchNorm2d(128),
nn.LeakyReLU(0.1, True),
nn.Upsample(scale_factor=2, mode="bilinear"))
#Another route layer maybe
self.conv_end = nn.Sequential(
nn.Conv2d(128,256,kernel_size=3,stride=1,padding=1,bias=False),
nn.BatchNorm2d(256),
nn.LeakyReLU(0.1, True),
nn.Conv2d(256, 255, kernel_size=1, stride=1, padding=1, bias=True))

#Another detection layer
self.anchor2 = [(10,14), (23,27), (37,58)]

def forward(self, x):
out = self.conv_start(x)
out = out.data
a = self.predict_transform(out, 416, self.anchor1, self.num_classes)
out = self.conv_mid(out)
out = self.conv_end(out)
out = out.data
b = self.predict_transform(out, 416, self.anchor2, self.num_classes)
return torch.cat((a, b), 1)

def predict_transform(self, prediction, inp_dim, anchors, num_classes):
"""
Decodes the output from the convolution layers and arranges the information into a usable format.
The below reference was used for a base for this function.
REFERENCE: refer to reference 2 in README.
"""
batch_size = prediction.size(0)
stride = inp_dim // prediction.size(2)
grid_size = inp_dim // stride
bbox_attrs = 5 + num_classes
num_anchors = len(anchors)

#Rearranges the feature map to (batch_size, number of boxes, box_attributes)
prediction = prediction.view(batch_size, bbox_attrs*num_anchors, grid_size*grid_size)
prediction = prediction.transpose(1,2).contiguous()
prediction = prediction.view(batch_size, grid_size*grid_size*num_anchors, bbox_attrs)
anchors = [(a[0]/stride, a[1]/stride) for a in anchors]
#Get the centre_X, centre_Y and object confidence between 1 and 0
prediction[:,:,0] = torch.sigmoid(prediction[:,:,0])
prediction[:,:,1] = torch.sigmoid(prediction[:,:,1])
prediction[:,:,4] = torch.sigmoid(prediction[:,:,4])
#Add the center offsets
grid = np.arange(grid_size)
a,b = np.meshgrid(grid, grid)

x_offset = torch.FloatTensor(a).view(-1,1)
y_offset = torch.FloatTensor(b).view(-1,1)

x_offset = x_offset.to(device)
y_offset = y_offset.to(device)

x_y_offset = torch.cat((x_offset, y_offset), 1).repeat(1,num_anchors).view(-1,2).unsqueeze(0)

prediction[:,:,:2] += x_y_offset
#log space transform height and the width
#so that all boxes are on the same scale
anchors = torch.FloatTensor(anchors)
anchors = anchors.to(device)

#arrange the probabilities of the classes
anchors = anchors.repeat(grid_size*grid_size, 1).unsqueeze(0)
prediction[:,:,2:4] = torch.exp(prediction[:,:,2:4])*anchors
prediction[:,:,5: 5 + num_classes] = torch.sigmoid((prediction[:,:, 5 : 5 + num_classes]))
prediction[:,:,:4] *= stride
return prediction


def calculate_iou(pred, label):
"""
Caculates the IoUs of a given list of boxes.
Used to determine accuracy of given bounding boxes.
Also is a key part of the loss function.
"""
px, py, pw, ph = pred[:,0], pred[:,1], pred[:,2], pred[:,3]
lx, ly, lw, lh = label[0], label[1], label[2], label[3]
box_a = [px-(pw/2), py-(ph/2), px+(pw/2), py+(ph/2)]
box_b = [lx-(lw/2), ly-(lh/2), lx+(lw/2), ly+(lh/2)]

# determine the (x, y) of the corners of intersection area
ax = torch.clamp(box_a[0], min=box_b[0])
ay = torch.clamp(box_a[1], min=box_b[1])
bx = torch.clamp(box_a[2], max=box_b[2])
by = torch.clamp(box_a[3], max=box_b[3])

# compute the area of intersection
intersect = torch.abs(torch.clamp((bx - ax), min=0) * torch.clamp((by - ay), min=0))

# compute the area of both the prediction and ground-truth
area_a = torch.abs((box_a[2] - box_a[0]) * (box_a[3] - box_a[1]))
area_b = torch.abs((box_b[2] - box_b[0]) * (box_b[3] - box_b[1]))

# compute the iou
iou = intersect / (area_a + area_b - intersect)
iou = torch.reshape(iou, (776, 3))
return iou

class YOLO_loss(nn.Module):
"""
Given one batch at a time, the loss of the predictions is calculated.
The formulas used to calculate loss are from the reference below.
REFERENCE: refer to reference 3 in README.
"""
def __init__(self):
super(YOLO_loss, self).__init__()

def forward(pred, label):
#Constants
no_object = 0.5 #Puts less emphasis on loss from boxes with no object
#Rearrange predictions to have one box shape on each line
boxes = torch.reshape(pred, (776, 3))

#IoU
iou = calculate_iou(pred, label)
iou, best_boxes = torch.max(iou, dim=1)

#Loss set up
class_loss = torch.zeros(776)
coord_loss = torch.zeros(776)
conf_loss = torch.zeros(776)

#Calculate loss
i = 0
for idx in best_boxes:
box = boxes[i][idx]
#coordinate loss
xy_loss = (label[0]-box[0])**2 + (label[1]-box[1])**2
wh_loss = ((label[0])**(1/2)-(box[0])**(1/2))**2 + ((label[1])**(1/2)-(box[1])**(1/2))**2
coord_loss[i] = (xy_loss + wh_loss)
#Check if there was a detection
if box[4] > 0.8: #There was
#classification loss
class_loss[i] = (label[5] - box[5])**2 + (label[6] - box[6])**2
#confidence loss
conf_loss[i] = (label[4] - box[4])**2
else: #There wasn't
conf_loss[i] = no_object*((label[4] - box[4])**2)
i += 1

#Final count
total_loss = 0
total_loss += torch.sum(coord_loss)
total_loss += torch.sum(class_loss)
total_loss += torch.sum(conf_loss)

return total_loss

def single_iou(pred, label):
"""
Calculates the IoU of a single box
"""
px, py, pw, ph = pred[:,0], pred[:,1], pred[:,2], pred[:,3]
lx, ly, lw, lh = label[0], label[1], label[2], label[3]
box_a = [px-(pw/2), py-(ph/2), px+(pw/2), py+(ph/2)]
box_b = [lx-(lw/2), ly-(lh/2), lx+(lw/2), ly+(lh/2)]

# determine the (x, y) of the corners of intersection area
ax = torch.clamp(box_a[0], min=box_b[0])
ay = torch.clamp(box_a[1], min=box_b[1])
bx = torch.clamp(box_a[2], max=box_b[2])
by = torch.clamp(box_a[3], max=box_b[3])

# compute the area of intersection
intersect = torch.abs(torch.clamp((bx - ax), min=0) * torch.clamp((by - ay), min=0))

# compute the area of both the prediction and ground-truth
area_a = torch.abs((box_a[2] - box_a[0]) * (box_a[3] - box_a[1]))
area_b = torch.abs((box_b[2] - box_b[0]) * (box_b[3] - box_b[1]))

# compute the iou
iou = intersect / (area_a + area_b - intersect)
return iou

def filter_boxes(pred):
"""
Returns highest confidence box that has detected something
"""
best_box = None
highest_conf = 0
for i in range(pred.size(0)):
box = pred[i,:]
if box[4] >= highest_conf:
best_box = box
highest_conf = box[4]
return best_box
Loading