forked from Lyken17/Efficient-PyTorch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
tfpack2lmdb.py
26 lines (22 loc) · 963 Bytes
/
tfpack2lmdb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
import numpy as np
from tensorpack.dataflow import *
class BinaryILSVRC12(dataset.ILSVRC12Files):
def __iter__(self):
for fname, label in super(BinaryILSVRC12, self).__iter__():
with open(fname, 'rb') as f:
jpeg = f.read()
jpeg = np.asarray(bytearray(jpeg), dtype='uint8')
yield [jpeg, label]
from tensorpack.dataflow.serialize import LMDBSerializer
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--ds', type=str)
parser.add_argument('-s', '--split', type=str, default="val")
parser.add_argument('--out', type=str, default=".")
parser.add_argument('-p', '--procs', type=int, default=20)
args = parser.parse_args()
import os.path as osp
ds0 = BinaryILSVRC12(args.ds, args.split)
ds1 = PrefetchDataZMQ(ds0, nr_proc=args.procs)
LMDBSerializer.save(ds1, osp.join(args.out, '%s.lmdb' % args.split))