This repository has been archived by the owner on Dec 13, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 52
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
API/ENH: Add image database step, postproc module
This is a big one. We add a processing step: load all images in parallel (we use multiprocessing) into a in-memory db (a dict) and resize to the dimensions used for the NN model. It turns out that loading many images from disk (even < 10 MB/image, even with an SSD) is slow. With the images in memory, the fingerprints loop actually loads all CPU cores ~100% with TensorFlow w/o waiting for IO. Functions such as fingerprint(s) which used to get a file name or a list of those now work with (the dict of) image arrays. Rename imagecluster.py -> calc.py Add postproc.py and move make_links() there. Add function to plot a grid of images arranged into clusters. We build a numpy array out of images and use only one imshow() call .. cool eh? We use the in-memory images here as well. Else, plotting would be painfully slow.
- Loading branch information
Showing
3 changed files
with
193 additions
and
81 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
import os | ||
import shutil | ||
|
||
from matplotlib import pyplot as plt | ||
import numpy as np | ||
|
||
from . import calc as ic | ||
|
||
pj = os.path.join | ||
|
||
|
||
def plot_clusters(clusters, ias, maxelem=None): | ||
"""Plot `clusters` of images in `ias`. | ||
For interactive work, use :func:`visualize` instead. | ||
Parameters | ||
---------- | ||
clusters : see :func:`imagecluster.cluster` | ||
ias : see :func:`imagecluster.image_arrays` | ||
""" | ||
stats = ic.cluster_stats(clusters) | ||
ncols = sum(list(stats.values())) | ||
nrows = max(stats.keys()) | ||
if maxelem is not None: | ||
nrows = min(maxelem, nrows) | ||
shape = ias[list(ias.keys())[0]].shape[:2] | ||
arr = np.ones((nrows*shape[0], ncols*shape[1], 3), dtype=int) * 255 | ||
icol = -1 | ||
for nelem in np.sort(list(clusters.keys())): | ||
for cluster in clusters[nelem]: | ||
icol += 1 | ||
for irow, filename in enumerate(cluster[:nrows]): | ||
img_arr = ias[filename] | ||
arr[irow*shape[0]:(irow+1)*shape[0], icol*shape[1]:(icol+1)*shape[1], :] = img_arr | ||
fig_scale = 1/shape[0] | ||
figsize = np.array(arr.shape[:2][::-1])*fig_scale | ||
fig,ax = plt.subplots(figsize=figsize) | ||
ax.imshow(arr) | ||
ax.axis('off') | ||
fig.subplots_adjust(left=0, right=1, top=1, bottom=0) | ||
return fig,ax | ||
|
||
|
||
def visualize(*args, **kwds): | ||
plot_clusters(*args, **kwds) | ||
plt.show() | ||
|
||
|
||
def make_links(clusters, cluster_dr): | ||
print("cluster dir: {}".format(cluster_dr)) | ||
if os.path.exists(cluster_dr): | ||
shutil.rmtree(cluster_dr) | ||
for nelem, group in clusters.items(): | ||
for iclus, cluster in enumerate(group): | ||
dr = pj(cluster_dr, | ||
'cluster_with_{}'.format(nelem), | ||
'cluster_{}'.format(iclus)) | ||
for fn in cluster: | ||
link = pj(dr, os.path.basename(fn)) | ||
os.makedirs(os.path.dirname(link), exist_ok=True) | ||
os.symlink(os.path.abspath(fn), link) |