Skip to content
This repository has been archived by the owner on Dec 13, 2024. It is now read-only.

Commit

Permalink
ENH: Add optional PCA before clustering in main
Browse files Browse the repository at this point in the history
  • Loading branch information
elcorto committed Feb 18, 2019
1 parent 0a4da92 commit 1cafed3
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 2 deletions.
2 changes: 1 addition & 1 deletion examples/example_main.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from imagecluster import main

main.main('pics/', sim=0.65, vis=True, maxelem=30)
main.main('pics/', sim=0.65, vis=True, maxelem=10, pca=True)
17 changes: 16 additions & 1 deletion imagecluster/main.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
import os
from collections import OrderedDict

import numpy as np
from sklearn.decomposition import PCA

from imagecluster import calc as ic
from imagecluster import common as co
Expand All @@ -11,7 +15,7 @@


def main(imagedir, sim=0.5, layer='fc2', size=(224,224), links=True, vis=False,
maxelem=None):
maxelem=None, pca=False, pca_params=dict(n_components=0.9)):
"""Example main app using this library.
Upon first invocation, the image and fingerprint databases are built and
Expand Down Expand Up @@ -40,6 +44,10 @@ def main(imagedir, sim=0.5, layer='fc2', size=(224,224), links=True, vis=False,
plot images in clusters
maxelem : max number of images per cluster for visualization (see
:mod:`~postproc`)
pca : bool
Perform PCA on fingerprints before clustering, using `pca_params`.
pca_params : dict
kwargs to sklearn's PCA
Notes
-----
Expand Down Expand Up @@ -67,6 +75,13 @@ def main(imagedir, sim=0.5, layer='fc2', size=(224,224), links=True, vis=False,
else:
print(f"loading fingerprints database {fps_fn} ...")
fps = co.read_pk(fps_fn)
if pca:
# Yes in recent Pythons, dicts are ordered in CPython, but still.
_fps = OrderedDict(fps)
X = np.array(list(_fps.values()))
Xp = PCA(**pca_params).fit(X).transform(X)
fps = {k:v for k,v in zip(_fps.keys(), Xp)}
print("pca dims:", Xp.shape[1])
print("clustering ...")
clusters = ic.cluster(fps, sim)
if links:
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ numpy
tensorflow
keras
Pillow
scikit-learn

0 comments on commit 1cafed3

Please sign in to comment.