shakes76 · mermalade0325 · Nov 1, 2024 · Nov 1, 2024 · Nov 1, 2024 · Nov 1, 2024
diff --git a/README.md b/README.md
@@ -1,19 +1,4 @@
-# Pattern Analysis
-Pattern Analysis of various datasets by COMP3710 students in 2024 at the University of Queensland.
 
-We create pattern recognition and image processing library for Tensorflow (TF), PyTorch or JAX.
+# Please see Updated Pull Request for Implemented Feedback! All details under Topic_Recognition
+https://github.com/shakes76/PatternAnalysis-2024/pull/193#issue-2650661510
 
-This library is created and maintained by The University of Queensland [COMP3710](https://my.uq.edu.au/programs-courses/course.html?course_code=comp3710) students.
-
-The library includes the following implemented in Tensorflow:
-* fractals 
-* recognition problems
-
-In the recognition folder, you will find many recognition problems solved including:
-* segmentation
-* classification
-* graph neural networks
-* StyleGAN
-* Stable diffusion
-* transformers
-etc.
diff --git a/dataset.py b/dataset.py
@@ -0,0 +1,76 @@
+import torch
+from torch.utils.data import Dataset
+import pandas as pd
+import os
+import cv2
+import numpy as np
+
+class ISICDataset(Dataset):
+    """Custom Dataset class for YOLO model with ISIC data."""
+
+    def __init__(self, image_dir, mask_dir, labels_path, image_size):
+        self.image_size = image_size
+        self.image_dir = image_dir
+        self.mask_dir = mask_dir
+        self.labels = pd.read_csv(labels_path)
+
+        # Load all image file names in the directory
+        self.image_files = [f for f in os.listdir(image_dir) if f.endswith('.jpg')]
+        self.samples = [self._process_sample(i) for i in range(len(self.image_files))]
+
+    def __len__(self):
+        return len(self.image_files)
+
+    def __getitem__(self, idx):
+        return self.samples[idx]
+
+    def _process_sample(self, idx):
+        """Helper function to process and return a single sample (image and target vector)."""
+        # Load image and mask
+        image = self._load_image(idx)
+        mask = self._load_mask(idx)
+
+        # Resize image and mask to the target size
+        image = cv2.resize(image, (self.image_size, self.image_size)).astype(np.float32) / 255.0
+        mask = cv2.resize(mask, (self.image_size, self.image_size))
+
+        # Obtain bounding box coordinates from the mask
+        x, y, w, h = self._extract_bounding_box(mask)
+
+        # Retrieve label probabilities
+        label1, label2 = self.labels.iloc[idx, 1:3]
+        total_prob = label1 + label2
+
+        # Create target vector
+        target_vector = np.array(
+            [x + w / 2, y + h / 2, w, h, total_prob, label1, label2],
+            dtype=np.float32
+        )
+
+        # Convert image to tensor format (C, H, W)
+        image_tensor = torch.tensor(image.transpose(2, 0, 1), dtype=torch.float32)
+        target_tensor = torch.tensor(target_vector, dtype=torch.float32)
+
+        return image_tensor, target_tensor
+
+    def _load_image(self, idx):
+        """Loads an image given an index."""
+        img_name = os.path.join(self.image_dir, self.image_files[idx])
+        return cv2.imread(img_name)
+
+    def _load_mask(self, idx):
+        """Loads the mask corresponding to the image at the given index."""
+        mask_name = os.path.join(
+            self.mask_dir, self.image_files[idx].replace('.jpg', '_segmentation.png')
+        )
+        return cv2.imread(mask_name, cv2.IMREAD_GRAYSCALE)
+
+    def _extract_bounding_box(self, mask):
+        """Extracts the bounding box from the mask image."""
+        _, thresh = cv2.threshold(mask, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
+        contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+
+        if contours:
+            x, y, w, h = cv2.boundingRect(contours[0])
+            return x, y, w, h
+        return 0, 0, 0, 0  # Return zero box if no contours are found
diff --git a/modules.py b/modules.py
@@ -0,0 +1,237 @@
+import torch 
+import torch.nn as nn
+import torch.nn.functional as F 
+import numpy as np
+
+# Device configuration
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+if torch.cuda.is_available():
+    print("cuda")
+if not torch.cuda.is_available():
+    print("cpu")  
+
+class YOLO(nn.Module):
+
+    #REFERENCE: yolov3-tiny.cfg from https://github.com/pjreddie/darknet/blob/master/cfg
+    #Used as basis for what layers were needed 
+    def __init__(self, num_classes):
+        super(YOLO, self).__init__()
+        self.num_classes = num_classes
+        layers = []
+        filters = [16,32,64,128,256,512]
+        in_channels = 3
+        #Convulution layers and maxpooling
+        for i in filters:
+            layers.append(nn.Conv2d(in_channels, i, kernel_size=3, stride=1, padding=1, bias=False))
+            in_channels = i
+            layers.append(nn.BatchNorm2d(i))
+            layers.append(nn.LeakyReLU(0.1, True)) #might be false
+            layers.append(nn.MaxPool2d(kernel_size=2, stride=2)) #Hopefully works
+        layers.append(nn.Conv2d(512, 1024, kernel_size=3, stride=1, padding=1, bias=False))
+        layers.append(nn.BatchNorm2d(1024))
+        layers.append(nn.LeakyReLU(0.1, True))
+
+        layers.append(nn.Conv2d(1024, 256, kernel_size=1, stride=1, padding=1, bias=False))
+        layers.append(nn.BatchNorm2d(256))
+        layers.append(nn.LeakyReLU(0.1, True))
+
+        layers.append(nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1, bias=False))
+        layers.append(nn.BatchNorm2d(512))
+        layers.append(nn.LeakyReLU(0.1, True))
+
+        layers.append(nn.Conv2d(512, 255, kernel_size=1, stride=1, padding=1, bias=True))
+        self.conv_start = nn.Sequential(*layers)
+
+        #Detection layer - given anchors
+        self.anchor1 =  [(81,82), (135,169), (344,319)] #Anchors depends on image?
+
+        #Route layer could go here
+        self.conv_mid = nn.Sequential(
+            nn.Conv2d(255, 128, kernel_size=1, stride=1, padding=1, bias=False),
+            nn.BatchNorm2d(128),
+            nn.LeakyReLU(0.1, True),
+            nn.Upsample(scale_factor=2, mode="bilinear"))
+        #Another route layer maybe
+        self.conv_end = nn.Sequential(
+            nn.Conv2d(128,256,kernel_size=3,stride=1,padding=1,bias=False),
+            nn.BatchNorm2d(256),
+            nn.LeakyReLU(0.1, True),
+            nn.Conv2d(256, 255, kernel_size=1, stride=1, padding=1, bias=True))
+
+        #Another detection layer
+        self.anchor2 = [(10,14), (23,27), (37,58)]
+
+    def forward(self, x):
+        out = self.conv_start(x)
+        out = out.data
+        a = self.predict_transform(out, 416, self.anchor1, self.num_classes)
+        out = self.conv_mid(out)
+        out = self.conv_end(out)
+        out = out.data
+        b = self.predict_transform(out, 416, self.anchor2, self.num_classes)
+        return torch.cat((a, b), 1)
+
+    def predict_transform(self, prediction, inp_dim, anchors, num_classes):
+        """
+        Decodes the output from the convolution layers and arranges the information into a usable format. 
+        The below reference was used for a base for this function.
+        REFERENCE: refer to reference 2 in README.
+        """
+        batch_size = prediction.size(0)
+        stride =  inp_dim // prediction.size(2)
+        grid_size = inp_dim // stride
+        bbox_attrs = 5 + num_classes
+        num_anchors = len(anchors)
+
+        #Rearranges the feature map to (batch_size, number of boxes, box_attributes)
+        prediction = prediction.view(batch_size, bbox_attrs*num_anchors, grid_size*grid_size)
+        prediction = prediction.transpose(1,2).contiguous()
+        prediction = prediction.view(batch_size, grid_size*grid_size*num_anchors, bbox_attrs)
+        anchors = [(a[0]/stride, a[1]/stride) for a in anchors]
+        #Get the centre_X, centre_Y and object confidence between 1 and 0
+        prediction[:,:,0] = torch.sigmoid(prediction[:,:,0])
+        prediction[:,:,1] = torch.sigmoid(prediction[:,:,1])
+        prediction[:,:,4] = torch.sigmoid(prediction[:,:,4])
+        #Add the center offsets 
+        grid = np.arange(grid_size)
+        a,b = np.meshgrid(grid, grid)
+
+        x_offset = torch.FloatTensor(a).view(-1,1)
+        y_offset = torch.FloatTensor(b).view(-1,1)
+
+        x_offset = x_offset.to(device)
+        y_offset = y_offset.to(device)
+
+        x_y_offset = torch.cat((x_offset, y_offset), 1).repeat(1,num_anchors).view(-1,2).unsqueeze(0)
+
+        prediction[:,:,:2] += x_y_offset
+        #log space transform height and the width 
+        #so that all boxes are on the same scale
+        anchors = torch.FloatTensor(anchors)
+        anchors = anchors.to(device)
+
+        #arrange the  probabilities of the classes
+        anchors = anchors.repeat(grid_size*grid_size, 1).unsqueeze(0)
+        prediction[:,:,2:4] = torch.exp(prediction[:,:,2:4])*anchors
+        prediction[:,:,5: 5 + num_classes] = torch.sigmoid((prediction[:,:, 5 : 5 + num_classes]))
+        prediction[:,:,:4] *= stride
+        return prediction
+
+
+def calculate_iou(pred, label):
+    """
+    Caculates the IoUs of a given list of boxes.
+    Used to determine accuracy of given bounding boxes.
+    Also is a key part of the loss function.
+    """
+    px, py, pw, ph = pred[:,0], pred[:,1], pred[:,2], pred[:,3]
+    lx, ly, lw, lh = label[0], label[1], label[2], label[3]
+    box_a = [px-(pw/2), py-(ph/2), px+(pw/2), py+(ph/2)]
+    box_b = [lx-(lw/2), ly-(lh/2), lx+(lw/2), ly+(lh/2)]
+
+    # determine the (x, y) of the corners of intersection area
+    ax = torch.clamp(box_a[0], min=box_b[0])
+    ay = torch.clamp(box_a[1], min=box_b[1])
+    bx = torch.clamp(box_a[2], max=box_b[2]) 
+    by = torch.clamp(box_a[3], max=box_b[3]) 
+
+    # compute the area of intersection
+    intersect = torch.abs(torch.clamp((bx - ax), min=0) * torch.clamp((by - ay), min=0))
+
+    # compute the area of both the prediction and ground-truth
+    area_a = torch.abs((box_a[2] - box_a[0]) * (box_a[3] - box_a[1]))
+    area_b = torch.abs((box_b[2] - box_b[0]) * (box_b[3] - box_b[1]))
+
+    # compute the iou
+    iou = intersect / (area_a + area_b - intersect)
+    iou = torch.reshape(iou, (776, 3))
+    return iou
+
+class YOLO_loss(nn.Module):
+    """
+    Given one batch at a time, the loss of the predictions is calculated.
+    The formulas used to calculate loss are from the reference below.
+    REFERENCE: refer to reference 3 in README.
+    """
+    def __init__(self):
+        super(YOLO_loss, self).__init__()
+
+    def forward(pred, label):
+        #Constants
+        no_object = 0.5 #Puts less emphasis on loss from boxes with no object
+        #Rearrange predictions to have one box shape on each line
+        boxes = torch.reshape(pred, (776, 3))
+
+        #IoU
+        iou = calculate_iou(pred, label)
+        iou, best_boxes = torch.max(iou, dim=1)
+
+        #Loss set up
+        class_loss = torch.zeros(776)
+        coord_loss = torch.zeros(776)
+        conf_loss = torch.zeros(776)
+
+        #Calculate loss
+        i = 0
+        for idx in best_boxes:
+            box = boxes[i][idx]
+            #coordinate loss
+            xy_loss = (label[0]-box[0])**2 + (label[1]-box[1])**2
+            wh_loss = ((label[0])**(1/2)-(box[0])**(1/2))**2 + ((label[1])**(1/2)-(box[1])**(1/2))**2
+            coord_loss[i] = (xy_loss + wh_loss)
+            #Check if there was a detection
+            if box[4] > 0.8: #There was
+                #classification loss
+                class_loss[i] = (label[5] - box[5])**2 + (label[6] - box[6])**2
+                #confidence loss
+                conf_loss[i] = (label[4] - box[4])**2
+            else: #There wasn't
+                conf_loss[i] = no_object*((label[4] - box[4])**2)
+            i += 1
+
+        #Final count
+        total_loss = 0
+        total_loss += torch.sum(coord_loss) 
+        total_loss += torch.sum(class_loss)
+        total_loss += torch.sum(conf_loss)
+
+        return total_loss
+
+def single_iou(pred, label):
+        """
+        Calculates the IoU of a single box
+        """
+        px, py, pw, ph = pred[:,0], pred[:,1], pred[:,2], pred[:,3]
+        lx, ly, lw, lh = label[0], label[1], label[2], label[3]
+        box_a = [px-(pw/2), py-(ph/2), px+(pw/2), py+(ph/2)]
+        box_b = [lx-(lw/2), ly-(lh/2), lx+(lw/2), ly+(lh/2)]
+
+        # determine the (x, y) of the corners of intersection area
+        ax = torch.clamp(box_a[0], min=box_b[0])
+        ay = torch.clamp(box_a[1], min=box_b[1])
+        bx = torch.clamp(box_a[2], max=box_b[2])
+        by = torch.clamp(box_a[3], max=box_b[3])
+
+        # compute the area of intersection
+        intersect = torch.abs(torch.clamp((bx - ax), min=0) * torch.clamp((by - ay), min=0))
+
+        # compute the area of both the prediction and ground-truth
+        area_a = torch.abs((box_a[2] - box_a[0]) * (box_a[3] - box_a[1]))
+        area_b = torch.abs((box_b[2] - box_b[0]) * (box_b[3] - box_b[1]))
+
+        # compute the iou
+        iou = intersect / (area_a + area_b - intersect)
+        return iou
+
+def filter_boxes(pred):
+    """
+    Returns highest confidence box that has detected something
+    """
+    best_box = None
+    highest_conf = 0
+    for i in range(pred.size(0)):
+        box = pred[i,:]
+        if box[4] >= highest_conf:
+            best_box = box
+            highest_conf = box[4]
+    return best_box