-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathMapYourCity_ExampleDataLoader.py
122 lines (104 loc) · 4.13 KB
/
MapYourCity_ExampleDataLoader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
#!/usr/bin/env python
# coding: utf-8
# Example DataLoader for the MapYourCity dataset
"""
ABOUT SCRIPT:
This file creates an example DataLoader for the training, validation, and test sets using PyTorch
This code is generated by Nikolaos Dionelis @ESA
LAST EDITED: 05/02/2024
"""
# Python library imports
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import rasterio
import os
import cv2
# We use PyTorch
import torch
class Dataset(torch.utils.data.Dataset):
"""
This class defines the data with all the 3 modalities
"""
def __init__(self, list_IDs):
"""
This function initializes the data class - constructor function
:param list_IDs: the PID numbers - (i.e. the pid)
"""
self.list_IDs = list_IDs
def __len__(self):
return len(self.list_IDs)
def __getitem__(self, index):
ID = self.list_IDs[index]
X = cv2.imread(train_path + ID + '/street.jpg')
X = cv2.resize(X, (256, 256))
X2 = cv2.imread(train_path + ID + '/orthophoto.tif')
X2 = cv2.resize(X2, (256, 256))
X3 = rasterio.open(train_path + ID + '/s2_l2a.tif').read()
X3 = np.transpose(X3, [1, 2, 0])
y = int(open(train_path + ID + '/label.txt', "r").read())
return X, X2, X3, y
# Define the batch size
#BATCH_SIZE = 256
BATCH_SIZE = 32
# Define the paths to the data
# input_path = "directory with MapYourCity image files"
input_path = "/Data/ndionelis/building-age-dataset/" # This line has to be modified/ changed
train_path = input_path + "train/data/"
test_path = input_path + "test/data/"
# Load the csv files
test_df = pd.read_csv(input_path + "test/test-set.csv")
train_df = pd.read_csv(input_path + "train/train-set.csv")
train_df.head()
test_df.head()
# For the datasets
names_data = os.listdir(train_path) # to not load all data in a single tensor, load only the names
length_names = len(names_data)
perm = torch.randperm(length_names)
#idx = perm[:round(0.8*length_names)] # draw round(0.8*length_names) samples
#torch.save(idx, 'indexForTrainVal.pt')
idx = torch.load('indexForTrainVal.pt')
# For the training data
names_data = np.array(names_data)
idx = idx.numpy()
training_data = names_data[idx]
# For the test data
#test_data = names_data[~idx]
mask = np.ones(names_data.size, dtype=bool)
mask[idx] = False
test_data = names_data[mask]
# For the training set
train_set = Dataset(training_data.tolist())
train_loader = torch.utils.data.DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)
#train_loader_iter = iter(train_loader)
#train_loader_iter_next = next(train_loader_iter)
# Example for the test set
test_set = Dataset(test_data.tolist())
test_loader = torch.utils.data.DataLoader(test_set, batch_size=BATCH_SIZE)
#test_loader_iter = iter(test_loader)
#test_loader_iter_next = next(test_loader_iter)
# For the DataLoaders
# We use train_loader and test_loader
train_dataloader = train_loader
valid_dataloader = test_loader
# The sizes depend on the BATCH_SIZE
# We use PyTorch .shape
print(next(iter(train_dataloader))[0].shape)
print(next(iter(train_dataloader))[1].shape)
print(next(iter(train_dataloader))[2].shape)
print(next(iter(train_dataloader))[3].shape)
# To run this script: python MapYourCity_ExampleDataLoader.py
# Also: The main function below
# if __name__ == '__main__':
# BATCH_SIZE = 32
# input_path = "/Data/ndionelis/building-age-dataset/" # This line has to be modified/ changed
# train_path = input_path + "train/data/"
# train_df = pd.read_csv(input_path + "train/train-set.csv")
# train_df.head()
# names_data = os.listdir(train_path) # to not load all data in a single tensor, load only the names
# train_set = Dataset(training_data)
# train_dataloader = torch.utils.data.DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)
# print(next(iter(train_dataloader))[0].shape)
# print(next(iter(train_dataloader))[1].shape)
# print(next(iter(train_dataloader))[2].shape)
# print(next(iter(train_dataloader))[3].shape)