diff --git a/Example_firstgalaxydata.py b/Example_firstgalaxydata.py index 11752bb..a3fe8a0 100644 --- a/Example_firstgalaxydata.py +++ b/Example_firstgalaxydata.py @@ -2,15 +2,20 @@ import torchvision.transforms as transforms - if __name__ == "__main__": transformRGB = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])]) + # Basic usage of splitting train, valid and test + data = FIRSTGalaxyData(root="./", selected_split="train", input_data_list=["galaxy_data_h5.h5"], is_PIL=True, is_RGB=True, transform=transformRGB) + print(data) + data = FIRSTGalaxyData(root="./", selected_split="valid", input_data_list=["galaxy_data_h5.h5"], + is_PIL=True, is_RGB=True, transform=transformRGB) + print(data) data = FIRSTGalaxyData(root="./", selected_split="test", input_data_list=["galaxy_data_h5.h5"], @@ -18,5 +23,26 @@ selected_catalogues=["Gendre", "MiraBest", "Capetti2017a", "Capetti2017b", "Baldi2018", "Proctor_Tab1"], is_PIL=True, is_RGB=True, transform=transformRGB) + print(data) + + # Usage of 5-fold cross validation set + + data = FIRSTGalaxyData(root="./", selected_split="train", input_data_list=["galaxy_data_crossvalid_0_h5.h5"], + is_PIL=True, is_RGB=True, transform=transformRGB) + + print(data) + + data = FIRSTGalaxyData(root="./", selected_split="valid", input_data_list=["galaxy_data_crossvalid_0_h5.h5"], + is_PIL=True, is_RGB=True, transform=transformRGB) + + print(data) + + data = FIRSTGalaxyData(root="./", selected_split="test", input_data_list=["galaxy_data_crossvalid_test_h5.h5"], + selected_classes=["FRI", "FRII", "Compact", "Bent"], + selected_catalogues=["Gendre", "MiraBest", "Capetti2017a", "Capetti2017b", + "Baldi2018", "Proctor_Tab1"], + is_PIL=True, is_RGB=True, transform=transformRGB) + print(data) + + - print(data) \ No newline at end of file diff --git a/README.md b/README.md index 4155b17..e64a923 100644 --- a/README.md +++ b/README.md @@ -13,9 +13,25 @@ Further, the following catalogues are included in this dataset: * Baldi 2018 [[7]](https://www.aanda.org/articles/aa/full_html/2018/01/aa31333-17/aa31333-17.html), [Table](https://www.aanda.org/articles/aa/full_html/2018/01/aa31333-17/T1.html) * Proctor [[8]](https://ui.adsabs.harvard.edu/abs/2011ApJS..194...31P/abstract), [Table](https://iopscience.iop.org/article/10.1088/0067-0049/194/2/31#apjs390184t1), data from Table 1 with label “WAT” and “NAT” -When using the literature class definition of FRI, FRII, Compact and Bent as schown below, +Examples for the class definitions of FRI, FRII, Compact and Bent are shown below, ![image](img/Classification_Scheme.png) -the dataset contains the following number of samples per class. +with the labels + +| classes | Label | +| ----------- | ----------- | +| FRI | 0 | +| FRII | 1 | +| Compact| 2 | +| Bent | 3 | + +The dataset has the following total number of samples per class. + +| classes/split | FRI | FRII | Compact | Bent | Total | +| ----------- | ----------- |----------- |----------- |----------- |-----------| +| total | 495 |924 |391 |348 |2158 | + +We provide two splitting options for the dataset. +The first splitting option (galaxy_data_h5.zip) provides three splittings in train, valid and test with the following number of sample per class. | classes/split | FRI | FRII | Compact | Bent | Total | | ----------- | ----------- |----------- |----------- |----------- |-----------| @@ -24,12 +40,14 @@ the dataset contains the following number of samples per class. | test | 50 | 50 | 50 | 50 |200 | | total | 495 |924 |391 |348 |2158 | -| classes | Label | -| ----------- | ----------- | -| FRI | 0 | -| FRII | 1 | -| Compact| 2 | -| Bent | 3 | +The second splitting option (galaxy_data_crossvalid_0_h5.zip to galaxy_data_crossvalid_4_h5.zip and galaxy_data_crossvalid_test_h5.zip) provides a 5-fold cross validation dataset with a larger test set. + +| classes/split | FRI | FRII | Compact | Bent | Total | +| ----------- | ----------- |----------- |----------- |----------- |-----------| +| 5-fold cross train | 316 | 659 | 232 | 198 |1405 | +| 5-fold cross valid | 79 | 165 | 59 | 50 |353 | +| test | 100 | 100 | 100 | 100 |400 | +| total | 495 |924 |391 |348 |2158 | # Installation usage with pytorch If you want to use the dataset via the dataset class `FIRSTGalaxyData` with pytorch, install the necessary packages with diff --git a/firstgalaxydata.py b/firstgalaxydata.py index 26ff0dc..97c5ba6 100644 --- a/firstgalaxydata.py +++ b/firstgalaxydata.py @@ -13,7 +13,7 @@ class FIRSTGalaxyData(data.Dataset): """ - FIRSTGalaxyData class provides FIRST images from various different data catalogs + FIRSTGalaxyData class provides FIRST images with labels from various different data catalogs Attributes ---------- @@ -39,12 +39,18 @@ class FIRSTGalaxyData(data.Dataset): show_coords(): shows the coordinates of the images in a Aitoff projection __repr__() - presents import information aboout the dataset in the Repl + presents import information about the dataset in the Repl """ urls = { "galaxy_data.zip": "https://syncandshare.desy.de/index.php/s/yWNQEoCxbpwxCWm/download", - "galaxy_data_h5.zip": "https://syncandshare.desy.de/index.php/s/9kLKJGxwARZdfiZ/download" + "galaxy_data_h5.zip": "https://syncandshare.desy.de/index.php/s/9kLKJGxwARZdfiZ/download", + "galaxy_data_crossvalid_0_h5.zip": "https://syncandshare.desy.de/index.php/s/QPjZBZE4WeCPDxg/download", + "galaxy_data_crossvalid_1_h5.zip": "https://syncandshare.desy.de/index.php/s/8iDGAAqxtdM2tj9/download", + "galaxy_data_crossvalid_2_h5.zip": "https://syncandshare.desy.de/index.php/s/j6gKn8HiMa8nB4J/download", + "galaxy_data_crossvalid_3_h5.zip": "https://syncandshare.desy.de/index.php/s/oSz6WdWtnZq4s8M/download", + "galaxy_data_crossvalid_4_h5.zip": "https://syncandshare.desy.de/index.php/s/qoNsik3sF39cRBG/download", + "galaxy_data_crossvalid_test_h5.zip": "https://syncandshare.desy.de/index.php/s/aM9eGcr8ydddCNM/download" } def __init__(self, root, input_data_list=None, selected_split="train", selected_classes=None, @@ -161,7 +167,6 @@ def __getitem__(self, index): img = Image.fromarray(img, mode="L") if self.is_RGB: img = img.convert("RGB") - # else...return numpy array directly if self.transform is not None: img = self.transform(img) diff --git a/galaxy_data_crossvalid_0_h5.zip b/galaxy_data_crossvalid_0_h5.zip new file mode 100644 index 0000000..14f16bd Binary files /dev/null and b/galaxy_data_crossvalid_0_h5.zip differ diff --git a/galaxy_data_crossvalid_1_h5.zip b/galaxy_data_crossvalid_1_h5.zip new file mode 100644 index 0000000..842efb6 Binary files /dev/null and b/galaxy_data_crossvalid_1_h5.zip differ diff --git a/galaxy_data_crossvalid_2_h5.zip b/galaxy_data_crossvalid_2_h5.zip new file mode 100644 index 0000000..705f62a Binary files /dev/null and b/galaxy_data_crossvalid_2_h5.zip differ diff --git a/galaxy_data_crossvalid_3_h5.zip b/galaxy_data_crossvalid_3_h5.zip new file mode 100644 index 0000000..2669b2b Binary files /dev/null and b/galaxy_data_crossvalid_3_h5.zip differ diff --git a/galaxy_data_crossvalid_4_h5.zip b/galaxy_data_crossvalid_4_h5.zip new file mode 100644 index 0000000..7fed0f4 Binary files /dev/null and b/galaxy_data_crossvalid_4_h5.zip differ diff --git a/galaxy_data_crossvalid_test_h5.zip b/galaxy_data_crossvalid_test_h5.zip new file mode 100644 index 0000000..d81abab Binary files /dev/null and b/galaxy_data_crossvalid_test_h5.zip differ