Skip to content

Commit

Permalink
Merge branch 'main' of github.com:weecology/MillionTrees
Browse files Browse the repository at this point in the history
  • Loading branch information
bw4sz committed Nov 20, 2024
2 parents c5453f7 + d59814a commit 478157d
Show file tree
Hide file tree
Showing 6 changed files with 162 additions and 91 deletions.
10 changes: 5 additions & 5 deletions milliontrees/common/metrics/all_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -434,8 +434,8 @@ class KeypointAccuracy(ElementwiseMetric):
"""Given a specific Intersection over union threshold, determine the
accuracy achieved for a one-class detector."""

def __init__(self, iou_threshold=0.5, score_threshold=0.5, name=None):
self.iou_threshold = iou_threshold
def __init__(self, distance_threshold=5, score_threshold=5, name=None):
self.distance_threshold = distance_threshold
self.score_threshold = score_threshold
if name is None:
name = "keypoint_acc"
Expand All @@ -462,13 +462,13 @@ def _compute_element_wise(self, y_pred, y_true):
def _point_iou(self, src_keypoints, pred_keypoints):
return torch.cdist(src_keypoints, pred_keypoints, p=2)

def _accuracy(self, src_keypoints, pred_keypoints, iou_threshold):
def _accuracy(self, src_keypoints, pred_keypoints, distance_threshold):
total_gt = len(src_keypoints)
total_pred = len(pred_keypoints)
if total_gt > 0 and total_pred > 0:
# Define the matcher and distance matrix based on iou
matcher = Matcher(iou_threshold,
iou_threshold,
matcher = Matcher(distance_threshold,
distance_threshold,
allow_low_quality_matches=False)
match_quality_matrix = self._point_iou(src_keypoints, pred_keypoints)
results = matcher(match_quality_matrix)
Expand Down
20 changes: 0 additions & 20 deletions milliontrees/datasets/TreePoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,26 +30,6 @@ class TreePointsDataset(MillionTreesDataset):
Each image is annotated with the following metadata
- location (int): location id
Website:
https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1009180
Original publication:
The following publications are included in this dataset
@article{Weinstein2020,
title={A benchmark dataset for canopy crown detection and delineation in co-registered airborne RGB, LiDAR and hyperspectral imagery from the National Ecological Observation Network.},
author={Weinstein BG, Graves SJ, Marconi S, Singh A, Zare A, Stewart D, et al.},
journal={PLoS Comput Biol},
year={2021},
doi={10.1371/journal.pcbi.1009180}
}
Original publication:
The following publications are included in this dataset
@article{Weinstein2020,
title={A benchmark dataset for canopy crown detection and delineation in co-registered airborne RGB, LiDAR and hyperspectral imagery from the National Ecological Observation Network.},
author={Weinstein BG, Graves SJ, Marconi S, Singh A, Zare A, Stewart D, et al.},
journal={PLoS Comput Biol},
year={2021},
doi={10.1371/journal.pcbi.1009180}
}
License:
This dataset is distributed under Creative Commons Attribution License
"""
Expand Down
65 changes: 37 additions & 28 deletions milliontrees/datasets/TreePolygons.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,15 @@
from PIL import Image, ImageDraw
import pandas as pd
import numpy as np
import torch
from shapely import from_wkt
from milliontrees.datasets.milliontrees_dataset import MillionTreesDataset
from milliontrees.common.grouper import CombinatorialGrouper
from milliontrees.common.metrics.all_metrics import Accuracy, Recall, F1

from albumentations import A, ToTensorV2
from torchvision.tv_tensors import BoundingBoxes, Mask
import torchvision.transforms as transforms
from torchvision.ops import masks_to_boxes
import torch

class TreePolygonsDataset(MillionTreesDataset):
"""The TreePolygons dataset is a collection of tree annotations annotated
Expand All @@ -30,20 +33,6 @@ class TreePolygonsDataset(MillionTreesDataset):
- location (int): location id
- source (int): source id
Website:
https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1009180
Original publication:
# Ventura et al. 2022
@article{ventura2022individual,
title={Individual tree detection in large-scale urban environments using high-resolution multispectral imagery},
author={Ventura, Jonathan and Pawlak, Camille and Honsberger, Milo and Gonsalves, Cameron and Rice, Julian and Love, Natalie LR and Han, Skyler and Nguyen, Viet and Sugano, Keilana and Doremus, Jacqueline and others},
journal={arXiv preprint arXiv:2208.10607},
year={2022}
}
# TreeFormer
#etc....
License:
This dataset is distributed under Creative Commons Attribution License
"""
Expand Down Expand Up @@ -90,11 +79,17 @@ def __init__(self,
'id_test': 'Test (ID/Cis)'
}

unique_files = df.drop_duplicates(subset=['filename'], inplace=False).reset_index(drop=True)
unique_files['split_id'] = unique_files['split'].apply(lambda x: self._split_dict[x])
self._split_array = unique_files['split_id'].values

df['split_id'] = df['split'].apply(lambda x: self._split_dict[x])
self._split_array = df['split_id'].values

# Filenames
self._input_array = df['filename'].values
self._input_array = unique_files.filename

# Create lookup table for which index to select for each filename
self._input_lookup = df.groupby('filename').apply(lambda x: x.index.values).to_dict()

# Convert each polygon to shapely objects
Expand All @@ -105,7 +100,9 @@ def __init__(self,

# Labels -> just 'Tree'
self._n_classes = 1
self._y_size = 2

# Not clear what this is, since we have a polygon, unknown size
self._y_size = 4

# Create source locations with a numeric ID
df["source_id"] = df.source.astype('category').cat.codes
Expand All @@ -129,12 +126,16 @@ def __getitem__(self, idx):
# Any transformations are handled by the WILDSSubset
# since different subsets (e.g., train vs test) might have different transforms
x = self.get_input(idx)
y_polygon = self._y_array[idx]

y = self.create_polygon_mask(x.shape[-2:], y_polygon)
y_indices = self._input_lookup[self._input_array[idx]]
y_polygons = [self._y_array[i] for i in y_indices]
mask_imgs = [self.create_polygon_mask(x.shape[-2:], y_polygon) for y_polygon in y_polygons]
masks = torch.concat([Mask(transforms.PILToTensor()(mask_img), dtype=torch.bool) for mask_img in mask_imgs])
bboxes = BoundingBoxes(data=masks_to_boxes(masks), format='xyxy', canvas_size=x.size[::-1])

metadata = self.metadata_array[idx]
targets = {"y": masks, "bboxes": bboxes, "labels": np.zeros(len(masks), dtype=int)}

return x, y, metadata
return metadata, x, targets

def create_polygon_mask(self, image_size, vertices):
"""
Expand Down Expand Up @@ -198,18 +199,26 @@ def eval(self, y_pred, y_true, metadata, prediction_fn=None):

return results, results_str


def get_input(self, idx):
"""
Args:
- idx (int): Index of a data point
Output:
- x (Tensor): Input features of the idx-th data point
- x (np.ndarray): Input features of the idx-th data point
"""
# All images are in the images folder
img_path = os.path.join(self.data_dir / 'images' /
self._input_array[idx])
img_path = os.path.join(self._data_dir / 'images' / self._input_array[idx])
img = Image.open(img_path)
# Channels first input
img = torch.tensor(np.array(img)).permute(2, 0, 1)
img = np.array(img.convert('RGB'))/255
img = np.array(img, dtype=np.float32)

return img

def _transform_(self):
self.transform = A.Compose([
A.Resize(height=448, width=448, p=1.0),
ToTensorV2()
])

return self.transform
3 changes: 1 addition & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,11 +119,10 @@ def generate_polygon_dataset(image_dir):
# Assuming you have a list of polygon coordinates and corresponding image file paths
polygon_wkt = ["POLYGON((10 15, 50 15, 50 55, 10 55, 10 15))", "POLYGON((20 25, 60 25, 60 65, 20 65, 20 25))", "POLYGON((30 35, 70 35, 70 75, 30 75, 30 35))"]
locations = [0,0,1]
resolution = [1,1,10]
image_files = ['image1.jpg', 'image2.jpg', 'image3.jpg']

# Create a pandas DataFrame
df = pd.DataFrame({'polygon': polygon_wkt, 'filename': image_files, "source":locations,"resolution":resolution})
df = pd.DataFrame({'polygon': polygon_wkt, 'filename': image_files, "source":locations})

# Create images and save them to disk within image_dir
for i, row in df.iterrows():
Expand Down
5 changes: 3 additions & 2 deletions tests/test_TreePoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,15 +94,16 @@ def test_TreePoints_eval(dataset):
all_metadata = []
# Get predictions for the full test set
for metadata, x, y_true in test_loader:
y_pred = [{'y': torch.tensor([[30.0, 70.0]]), 'label': torch.tensor([0]), 'score': torch.tensor([0.54])} for _ in range(x.shape[0])]
# Make one point close and one point far
y_pred = [{'y': torch.tensor([[133.0, 155.0], [100.0, 190.0]]), 'label': torch.tensor([0]), 'score': torch.tensor([0.54, 0.75])} for _ in range(x.shape[0])]
# Accumulate y_true, y_pred, metadata
all_y_pred.append(y_pred)
all_y_true.append(y_true)
all_metadata.append(metadata)

# Evaluate
eval_results, eval_string = dataset.eval(all_y_pred, all_y_true, all_metadata)

eval_results["keypoint_acc_avg"] == 0.5
assert len(eval_results)
assert "keypoint_acc_avg" in eval_results.keys()

Expand Down
150 changes: 116 additions & 34 deletions tests/test_TreePolygons.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
from milliontrees.datasets.TreePolygons import TreePolygonsDataset
from milliontrees.common.data_loaders import get_train_loader, get_eval_loader

import torch
import pytest
import os
import torchvision.transforms.v2 as transforms
import pandas as pd
import numpy as np

# Check if running on hipergator
if os.path.exists("/orange"):
Expand All @@ -12,55 +16,133 @@
# Test structure without real annotation data to ensure format is correct
def test_TreePolygons_generic(dataset):
dataset = TreePolygonsDataset(download=False, root_dir=dataset)
for image, label, metadata in dataset:
assert image.shape == (3, 100, 100)
assert label.shape == (100, 100)
assert len(metadata) == 2
for metadata, image, targets in dataset:
polygons, labels = targets["y"], targets["labels"]
assert image.shape == (100, 100, 3)
assert image.dtype == np.float32
assert image.min() >= 0.0 and image.max() <= 1.0
assert polygons.shape == (2, 2)
assert labels.shape == (2,)
assert metadata.shape == (2,)
break

transform = transforms.Compose([
transforms.Resize((448, 448)),
transforms.RandomHorizontalFlip(p=0.5),
transforms.ToTensor()
])
train_dataset = dataset.get_subset("train", transform=transform)
train_dataset = dataset.get_subset("train")

for image, label, metadata in train_dataset:
for metadata, image, targets in train_dataset:
polygons, labels = targets["y"], targets["labels"]
assert image.shape == (3, 448, 448)
assert label.shape == (448, 448)
assert len(metadata) == 2
assert image.dtype == torch.float32
assert image.min() >= 0.0 and image.max() <= 1.0
assert torch.is_tensor(polygons)
assert polygons.shape == (2, 2)
assert len(labels) == 2
assert metadata.shape == (2,)
break

@pytest.mark.parametrize("batch_size", [1, 2])
def test_get_train_dataloader(dataset, batch_size):
dataset = TreePolygonsDataset(download=False, root_dir=dataset)
train_dataset = dataset.get_subset("train")
train_loader = get_train_loader('standard', train_dataset, batch_size=batch_size)
for metadata, x, targets in train_loader:
y = targets[0]["y"]
assert torch.is_tensor(targets[0]["y"])
assert x.shape == (batch_size, 3, 448, 448)
assert x.dtype == torch.float32
assert x.min() >= 0.0 and x.max() <= 1.0
assert y.shape[1] == 2
assert len(metadata) == batch_size
break

def test_get_test_dataloader(dataset):
dataset = TreePolygonsDataset(download=False, root_dir=dataset)
test_dataset = dataset.get_subset("test")

for metadata, image, targets in test_dataset:
polygons, labels = targets["y"], targets["labels"]
assert image.shape == (3,448, 448)
assert image.dtype == torch.float32
assert image.min() >= 0.0 and image.max() <= 1.0
assert polygons.shape == (2, 2)
assert labels.shape == (2,)
assert metadata.shape == (2,)
break

# Assert that test_dataset[0] == "image3.jpg"
metadata, image, targets = test_dataset[0]
assert metadata[1] == 1
assert metadata[0] == "image3.jpg"

test_loader = get_eval_loader('standard', test_dataset, batch_size=1)
for metadata, x, targets in test_loader:
y = targets[0]["y"]
assert torch.is_tensor(targets[0]["y"])
assert x.shape == (1, 3, 448, 448)
assert x.dtype == torch.float32
assert x.min() >= 0.0 and x.max() <= 1.0
assert y.shape[1] == 2
assert len(metadata) == 1
break

def test_TreePolygons_eval(dataset):
dataset = TreePolygonsDataset(download=False, root_dir=dataset)
test_dataset = dataset.get_subset("test")
test_loader = get_eval_loader('standard', test_dataset, batch_size=2)

all_y_pred = []
all_y_true = []
all_metadata = []
# Get predictions for the full test set
for metadata, x, y_true in test_loader:
y_pred = [{'y': torch.tensor([[134.4, 156.8]]), 'label': torch.tensor([0]), 'score': torch.tensor([0.54])} for _ in range(x.shape[0])]
# Accumulate y_true, y_pred, metadata
all_y_pred.append(y_pred)
all_y_true.append(y_true)
all_metadata.append(metadata)

# Evaluate
eval_results, eval_string = dataset.eval(all_y_pred, all_y_true, all_metadata)
eval_results["keypoint_acc_avg"] == 0.5
assert len(eval_results)
assert "keypoint_acc_avg" in eval_results.keys()

# Test structure with real annotation data to ensure format is correct
# Do not run on github actions
# Do not run on github actions, long running.
@pytest.mark.skipif(not on_hipergator, reason="Do not run on github actions")
def test_TreePolygons_release():
# Lookup size of the train dataset on disk
dataset = TreePolygonsDataset(download=False, root_dir="/orange/ewhite/DeepForest/MillionTrees/")
transform = transforms.Compose([
transforms.Resize((448, 448)),
transforms.RandomHorizontalFlip(p=0.5),
transforms.ToTensor()
])
train_dataset = dataset.get_subset("train", transform=transform)
train_dataset = dataset.get_subset("train")

for image, label, metadata in train_dataset:
for metadata, image, targets in train_dataset:
polygons = targets["y"]
labels = targets["labels"]
assert image.shape == (3, 448, 448)
assert label.shape == (4,)
assert image.dtype == torch.float32
assert image.min() >= 0.0 and image.max() <= 1.0
assert polygons.shape[1] == 2
assert metadata.shape[0] == 2

train_loader = get_train_loader('standard', train_dataset, batch_size=2)
for metadata, x, targets in train_loader:
y = targets[0]["y"]
assert torch.is_tensor(targets[0]["y"])
assert x.shape == (2, 3, 448, 448)
assert x.dtype == torch.float32
assert x.min() >= 0.0 and x.max() <= 1.0
assert y.shape[1] == 2
assert len(metadata) == 2
break

# Test download structure
def test_TreePolygons_download(tmpdir):
dataset = TreePolygonsDataset(download=True, root_dir=tmpdir)
transform = transforms.Compose([
transforms.Resize((448, 448)),
transforms.RandomHorizontalFlip(p=0.5),
transforms.ToTensor()
])
train_dataset = dataset.get_subset("train", transform=transform)
train_dataset = dataset.get_subset("train")

for image, label, metadata in train_dataset:
for metadata, image, targets in train_dataset:
polygons = targets["y"]
assert image.shape == (3, 448, 448)
assert label.shape == (4,)
assert len(metadata) == 2
break
assert image.dtype == torch.float32
assert image.min() >= 0.0 and image.max() <= 1.0
assert polygons.shape[1] == 2
assert metadata.shape[0] == 2
break

0 comments on commit 478157d

Please sign in to comment.