diff --git a/README.rst b/README.rst
index 194bb1f..8494930 100644
--- a/README.rst
+++ b/README.rst
@@ -20,6 +20,9 @@ or::
 Usage
 =====
 
+We use a pre-trained keras NN model. The weights will be downloaded *once* by
+keras automatically upon first import and placed into ``~/.keras/models/``.
+
 See ``imagecluster.main.main()`` for a usage example.
 
 If there is no fingerprints database, it will first run all images through the
@@ -28,50 +31,53 @@ the fingerprints and a similarity index (more details below).
 
 Example session::
 
-	>>> from imagecluster import main
-	>>> main.main('/path/to/testpics/', sim=0.5)
-	no fingerprints database /path/to/testpics/fingerprints.pk found
-	running all images thru NN model ...
-	/path/to/testpics/DSC_1061.JPG
-	/path/to/testpics/DSC_1080.JPG
-	...
-	/path/to/testpics/DSC_1087.JPG
-	clustering ...
-	cluster dir: /path/to/testpics/clusters
-	items per cluster : number of such clusters
-	2 : 7
-	3 : 2
-	4 : 4
-	5 : 1
-	10 : 1
+    >>> from imagecluster import main
+    >>> main.main('/path/to/testpics/', sim=0.5)
+    no fingerprints database /path/to/testpics/fingerprints.pk found
+    running all images through NN model ...
+    /path/to/testpics/DSC_1061.JPG
+    /path/to/testpics/DSC_1080.JPG
+    ...
+    /path/to/testpics/DSC_1087.JPG
+    clustering ...
+    cluster dir: /path/to/testpics/clusters
+    items per cluster : number of such clusters
+    2 : 7
+    3 : 2
+    4 : 4
+    5 : 1
+    10 : 1
 
 Have a look at the clusters (as dirs with symlinks to the relevant files)::
 
-	$ tree /path/to/testpics
-	/path/to/testpics/clusters
-	├── cluster_with_10
-	│   └── cluster_0
-	│       ├── DSC_1068.JPG -> /path/to/testpics/DSC_1068.JPG
-	│       ├── DSC_1070.JPG -> /path/to/testpics/DSC_1070.JPG
-	│       ├── DSC_1071.JPG -> /path/to/testpics/DSC_1071.JPG
-	│       ├── DSC_1072.JPG -> /path/to/testpics/DSC_1072.JPG
-	│       ├── DSC_1073.JPG -> /path/to/testpics/DSC_1073.JPG
-	│       ├── DSC_1074.JPG -> /path/to/testpics/DSC_1074.JPG
-	│       ├── DSC_1075.JPG -> /path/to/testpics/DSC_1075.JPG
-	│       ├── DSC_1076.JPG -> /path/to/testpics/DSC_1076.JPG
-	│       ├── DSC_1077.JPG -> /path/to/testpics/DSC_1077.JPG
-	│       └── DSC_1078.JPG -> /path/to/testpics/DSC_1078.JPG
-	├── cluster_with_2
-	│   ├── cluster_0
-	│   │   ├── DSC_1037.JPG -> /path/to/testpics/DSC_1037.JPG
-	│   │   └── DSC_1038.JPG -> /path/to/testpics/DSC_1038.JPG
-	│   ├── cluster_1
-	│   │   ├── DSC_1053.JPG -> /path/to/testpics/DSC_1053.JPG
-	│   │   └── DSC_1054.JPG -> /path/to/testpics/DSC_1054.JPG
-	│   ├── cluster_2
-	│   │   ├── DSC_1046.JPG -> /path/to/testpics/DSC_1046.JPG
-	│   │   └── DSC_1047.JPG -> /path/to/testpics/DSC_1047.JPG
-	...
+    $ tree /path/to/testpics
+    /path/to/testpics/clusters
+    ├── cluster_with_10
+    │   └── cluster_0
+    │       ├── DSC_1068.JPG -> /path/to/testpics/DSC_1068.JPG
+    │       ├── DSC_1070.JPG -> /path/to/testpics/DSC_1070.JPG
+    │       ├── DSC_1071.JPG -> /path/to/testpics/DSC_1071.JPG
+    │       ├── DSC_1072.JPG -> /path/to/testpics/DSC_1072.JPG
+    │       ├── DSC_1073.JPG -> /path/to/testpics/DSC_1073.JPG
+    │       ├── DSC_1074.JPG -> /path/to/testpics/DSC_1074.JPG
+    │       ├── DSC_1075.JPG -> /path/to/testpics/DSC_1075.JPG
+    │       ├── DSC_1076.JPG -> /path/to/testpics/DSC_1076.JPG
+    │       ├── DSC_1077.JPG -> /path/to/testpics/DSC_1077.JPG
+    │       └── DSC_1078.JPG -> /path/to/testpics/DSC_1078.JPG
+    ├── cluster_with_2
+    │   ├── cluster_0
+    │   │   ├── DSC_1037.JPG -> /path/to/testpics/DSC_1037.JPG
+    │   │   └── DSC_1038.JPG -> /path/to/testpics/DSC_1038.JPG
+    │   ├── cluster_1
+    │   │   ├── DSC_1053.JPG -> /path/to/testpics/DSC_1053.JPG
+    │   │   └── DSC_1054.JPG -> /path/to/testpics/DSC_1054.JPG
+    │   ├── cluster_2
+    │   │   ├── DSC_1046.JPG -> /path/to/testpics/DSC_1046.JPG
+    │   │   └── DSC_1047.JPG -> /path/to/testpics/DSC_1047.JPG
+    ...
+
+If you run this again on the same directory, only the clustering will be
+repeated.
 
 Methods
 =======
@@ -111,14 +117,14 @@ Now with NN-based fingerprints, we also cluster all sorts of images which have,
 e.g. mountains, tents, or beaches, so this is far better. However, if you run
 this on a large collection of images which contain images with tents or
 beaches, then the system won't recognize that certain images belong together
-because they were taken on the same trip. All tent images will be in one
-cluster, and so will all beaches images. This is probably b/c in this case, the
-classification of the image happens by looking at the background. A tent in the
-center of the image will always look the same, but it is the background which
-makes humans distinguish the context. The problem is: VGG16 and all the other
-popular networks have been trained on ridiculously small images of 224x224 size
-because of computational limitations, where it is impossible to recognize
-background details.
+because they were taken on the same trip, for instance. All tent images will be
+in one cluster, and so will all beaches images. This is probably b/c in this
+case, the human classification of the image happens by looking at the
+background. A tent in the center of the image will always look the same, but it
+is the background which makes humans distinguish the context. The problem is:
+VGG16 and all the other popular networks have been trained on ridiculously
+small images of 224x224 size because of computational limitations, where it is
+impossible to recognize background details.
 
 Clustering
 ----------
diff --git a/imagecluster/imagecluster.py b/imagecluster/imagecluster.py
index b5acc6e..dda818d 100644
--- a/imagecluster/imagecluster.py
+++ b/imagecluster/imagecluster.py
@@ -3,7 +3,7 @@
 import numpy as np
 from matplotlib import pyplot as plt
 
-import PIL.Image, os, multiprocessing, shutil
+import PIL.Image, os, multiprocessing, shutil, pickle
 from keras.applications.vgg16 import VGG16
 from keras.preprocessing import image
 from keras.applications.vgg16 import preprocess_input
@@ -12,6 +12,8 @@
 pj = os.path.join
 
 def get_model():
+    """Keras Model of the VGG16 network, with the output layer set to the 
+    pre-last fully connected layer 'fc2' of shape (4096,)."""
     # base_model.summary():
     #     ....
     #     block5_conv4 (Conv2D)        (None, 15, 15, 512)       2359808   
@@ -26,7 +28,6 @@ def get_model():
     #     _________________________________________________________________
     #     predictions (Dense)          (None, 1000)              4097000   
     # 
-    # model: get output from pre-last fully connected layer 'fc2'
     base_model = VGG16(weights='imagenet', include_top=True)
     model = Model(inputs=base_model.input,
                   outputs=base_model.get_layer('fc2').output)
@@ -34,6 +35,21 @@ def get_model():
 
 
 def fingerprint(fn, model, size):
+    """Load image from file `fn`, resize to `size` and run through `model`
+    (keras.models.Model).
+
+    Parameters
+    ----------
+    fn : str
+        filename
+    model : keras.models.Model instance
+    size : tuple
+        input image size (width, height), must match `model`, e.g. (224,224)
+
+    Returns
+    -------
+    fingerprint : 1d array
+    """
     # keras.preprocessing.image.load_img() uses img.rezize(shape) with the
     # default interpolation which is pretty bad (see
     # imagecluster/play/pil_resample_methods.py). Given that we are restricted
@@ -54,56 +70,45 @@ def fingerprint(fn, model, size):
     return model.predict(arr4d_pp)[0,:]
 
 
-def _worker(fn, model, size):
-    print(fn)
-    return fn, fingerprint(fn, model, size)
-
-
-def fingerprints(files, model, size=(224,224)):
-    # Cannot use multiprocessing:
-    # TypeError: can't pickle _thread.lock objects 
-    # The error doesn't come from functools.partial since those objects are
-    # pickable since python3. The reason is the keras.model.Model, which is not
-    # pickable. However keras with tensorflow backend runs multithreaded
-    # (model.predict()), so we don't need that.
+# Cannot use multiprocessing:
+# TypeError: can't pickle _thread.lock objects 
+# The error doesn't come from functools.partial since those objects are
+# pickable since python3. The reason is the keras.model.Model, which is not
+# pickable. However keras with tensorflow backend runs multi-threaded
+# (model.predict()), so we don't need that. I guess it will scale better if we
+# parallelize over images than to run a muti-threaded tensorflow on each image,
+# but OK. On low core counts (2-4), it won't matter.
+#
+##def _worker(fn, model, size):
+##    print(fn)
+##    return fn, fingerprint(fn, model, size)
+##
+##def fingerprints(files, model, size=(224,224)):
 ##    worker = functools.partial(_worker,
 ##                               model=model,
 ##                               size=size)
 ##    pool = multiprocessing.Pool(multiprocessing.cpu_count())
 ##    return dict(pool.map(worker, files))
-    return dict(_worker(fn, model, size) for fn in files)
 
 
-def make_links(clusters, cluster_dr):
-    # [[list_of_files], [list_of_files], ...]
-    clst_multi = [x for x in clusters.values() if len(x) > 1]
-
-    # {number_of_files1: [[list_of_files], [list_of_files],...],
-    #  number_of_files2: [[list_of_files],...],
-    # }
-    cdct_multi = {}
-    for x in clst_multi:
-        nn = len(x)
-        if not (nn in cdct_multi.keys()):
-            cdct_multi[nn] = [x]
-        else:
-            cdct_multi[nn].append(x)
-
-    print("cluster dir: {}".format(cluster_dr))
-    print("items per cluster : number of such clusters")
-    if os.path.exists(cluster_dr):
-        shutil.rmtree(cluster_dr)
-    for n_in_cluster in np.sort(list(cdct_multi.keys())):
-        cluster_list = cdct_multi[n_in_cluster]
-        print("{} : {}".format(n_in_cluster, len(cluster_list)))
-        for iclus, lst in enumerate(cluster_list):
-            dr = pj(cluster_dr,
-                    'cluster_with_{}'.format(n_in_cluster),
-                    'cluster_{}'.format(iclus))
-            for fn in lst:
-                link = pj(dr, os.path.basename(fn))
-                os.makedirs(os.path.dirname(link), exist_ok=True)
-                os.symlink(os.path.abspath(fn), link)
+def fingerprints(files, model, size=(224,224)):
+    """Calculate fingerprints for all `files`.
+
+    Parameters
+    ----------
+    files : sequence
+        image filenames
+    model, size : see :func:`fingerprint`
+
+    Returns
+    -------
+    fingerprint : dict
+        {filename1: array([...]),
+         filename2: array([...]),
+         ...
+         }
+    """
+    return dict((fn, fingerprint(fn, model, size)) for fn in files)
 
 
 def get_files(dr):
@@ -113,7 +118,9 @@ def get_files(dr):
 def cluster(files, fps, sim=0.5, method='average', metric='euclidean'):
     """Hierarchical clustering of images `files` based on image fingerprints
     `fps`.
-
+    
+    Parameters
+    ----------
     files : list of file names
     sim : float 0..1
         similarity tolerance (1=max. allowed similarity tolerance, all images
@@ -126,6 +133,16 @@ def cluster(files, fps, sim=0.5, method='average', metric='euclidean'):
         pretty much the same result
     metric : see scipy.hierarchy.linkage(), make sure to use 'euclidean' in
         case of method='centroid', 'median' or 'ward'
+    
+    Returns
+    -------
+    clusters : dict
+        key = number of the cluster, value = list of filenames in the cluster
+        {1: [filename1, filename5],
+         2: [filename23],
+         3: [filename48, filename2, filename42, ...],
+         ...
+         }
     """
     dfps = distance.pdist(fps, metric)
     # hierarchical/agglomerative clustering (Z = linkage matrix, construct
@@ -139,8 +156,50 @@ def cluster(files, fps, sim=0.5, method='average', metric='euclidean'):
     return clusters 
 
 
+def make_links(clusters, cluster_dr):
+    # group all clusters (cluster = list_of_files) of equal size together
+    # {number_of_files1: [[list_of_files], [list_of_files],...],
+    #  number_of_files2: [[list_of_files],...],
+    # }
+    cdct_multi = {}
+    for x in (x for x in clusters.values() if len(x) > 1):
+        nn = len(x)
+        if not (nn in cdct_multi.keys()):
+            cdct_multi[nn] = [x]
+        else:
+            cdct_multi[nn].append(x)
+
+    print("cluster dir: {}".format(cluster_dr))
+    print("items per cluster : number of such clusters")
+    if os.path.exists(cluster_dr):
+        shutil.rmtree(cluster_dr)
+    for n_in_cluster in np.sort(list(cdct_multi.keys())):
+        cluster_list = cdct_multi[n_in_cluster]
+        print("{} : {}".format(n_in_cluster, len(cluster_list)))
+        for iclus, lst in enumerate(cluster_list):
+            dr = pj(cluster_dr,
+                    'cluster_with_{}'.format(n_in_cluster),
+                    'cluster_{}'.format(iclus))
+            for fn in lst:
+                link = pj(dr, os.path.basename(fn))
+                os.makedirs(os.path.dirname(link), exist_ok=True)
+                os.symlink(os.path.abspath(fn), link)
+
+
 def view_image_list(lst):
     for filename in lst:
         fig,ax = plt.subplots()
         ax.imshow(plt.imread(filename))
     plt.show()
+
+
+def read_pk(fn):
+    with open(fn, 'rb') as fd:
+        ret = pickle.load(fd)
+    return ret
+
+
+def write_pk(obj, fn):
+    with open(fn, 'wb') as fd:
+        pickle.dump(obj, fd)
+    
diff --git a/imagecluster/main.py b/imagecluster/main.py
index f7bce90..11870aa 100644
--- a/imagecluster/main.py
+++ b/imagecluster/main.py
@@ -1,24 +1,30 @@
-import os, pickle
+import os
 import numpy as np
 from imagecluster import imagecluster as ic
 pj = os.path.join
 
+
 def main(imagedir, sim=0.5):
-    """Example main app using this library.     """
+    """Example main app using this library.
+    
+    Parameters
+    ----------
+    imagedir : str
+        path to directory with images
+    sim : float (0..1)
+        similarity index (see imagecluster.cluster())
+    """
     dbfn = pj(imagedir, 'fingerprints.pk')
     if not os.path.exists(dbfn):
         print("no fingerprints database {} found".format(dbfn))
         files = ic.get_files(imagedir)
         model = ic.get_model()
-        print("running all images thru NN model ...".format(dbfn))
+        print("running all images through NN model ...".format(dbfn))
         fps = ic.fingerprints(files, model, size=(224,224))
-        with open(dbfn, 'wb') as fd:
-            pickle.dump(fps, fd)
-        fd.close()
+        ic.write_pk(fps, dbfn)
     else:
         print("loading fingerprints database {} ...".format(dbfn))
-        with open(dbfn, 'rb') as fd:
-            fps = pickle.load(fd)
+        fps = ic.read_pk(dbfn)
     print("clustering ...")
     clusters = ic.cluster(list(fps.keys()), 
                           np.array(list(fps.values())),