-
Notifications
You must be signed in to change notification settings - Fork 0
/
build-dataset.py
90 lines (53 loc) · 1.93 KB
/
build-dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
from imutils import paths
import numpy as np
import shutil
import os
import pandas as pd
# specify path to the HAM10000 dataset
DATASET_PATH = "/data/images"
# specify the paths to our training and validation set
TRAIN = "/data/train"
VAL = "/data/val"
TEST = "/data/test"
# set the input height and width
INPUT_HEIGHT = 450
INPUT_WIDTH = 600
# set the batch size and validation data split
BATCH_SIZE = 8
VAL_SPLIT = 0.1
TEST_SPLIT= 0.2
def copy_images(Images, folder,DF):
# check if the destination folder exists and if not create it
if not os.path.exists(folder):
os.makedirs(folder)
# loop over the image paths
for image in Images:
# grab image name and its label from the path and create
# a placeholder corresponding to the separate label folder
imageName = image
label = DF[DF['image']==image]['label'].item()
labelFolder = os.path.join(folder, label)
# check to see if the label folder exists and if not create it
if not os.path.exists(labelFolder):
os.makedirs(labelFolder)
# construct the destination image path and copy the current
# image to it
destination = os.path.join(labelFolder, imageName+'.jpg')
shutil.copy('data/images/'+imageName+'.jpg', destination)
df = pd.read_csv('data/GroundTruth.csv')
result = df.apply(lambda row: row[row==1].index.tolist(),axis=1)
result = result.apply(lambda x: x[0] if x else None)
dict = {'image':df['image'],'label':result}
df_new = pd.DataFrame(dict)
images = list(df_new['image'])
np.random.shuffle(images)
# generate training and validation paths
valLen = int(len(images) * VAL_SPLIT)
testLen= int(len(images) * TEST_SPLIT)
trainsLen = len(images) - valLen - testLen
train = images[:trainsLen]
val = images[trainsLen:]
test = images[:valLen]
copy_images(train, TRAIN,df_new)
copy_images(val, VAL,df_new)
copy_images(test, TEST, df_new)