diff --git a/doc/source/pages/analysis.rst b/doc/source/pages/analysis.rst
index 85659f2..2611f15 100644
--- a/doc/source/pages/analysis.rst
+++ b/doc/source/pages/analysis.rst
@@ -1 +1,7 @@
 .. automodule:: pbxplore.analysis
+
+K-means
+-------
+
+.. automodule:: pbxplore.analysis.kmeans
+    :members:
diff --git a/pbxplore/analysis/kmeans.py b/pbxplore/analysis/kmeans.py
new file mode 100644
index 0000000..66ea009
--- /dev/null
+++ b/pbxplore/analysis/kmeans.py
@@ -0,0 +1,255 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+
+from __future__ import print_function, absolute_import
+
+# Imports for the clustering
+import random
+import sys
+import numpy
+import collections
+import copy
+
+from ..PB import NAMES
+
+def count_per_position_partial(sequences, seq_indices):
+    """
+    Count the population of each block at each position.
+
+    Parameters
+    ----------
+    sequences
+        a list of sequences
+    seq_indices
+        a list of indices; only the sequences from `sequences`
+        which have their index in `seq_indices` will be taken into
+        account.
+
+    Returns
+    -------
+    counts: collections.defaultdict
+        a defaultdict with the blocks as keys and their population as
+        values; the default value is a 0 interger.
+    """
+    counts = [collections.defaultdict(int)
+              for _ in range(len(sequences[0]))]
+    for seq_idx in seq_indices:
+        seq = sequences[seq_idx]
+        for block, count in zip(seq, counts):
+            count[block] += 1
+    return counts
+
+
+def make_profile_partial(sequences, seq_indices):
+    """
+    Calculate a frequency profile from a list of sequences.
+
+    A profile is the frequency for each block to be a each position.
+
+    Parameters
+    ----------
+    sequences
+        a list of sequences
+    seq_indices
+        a list of indices; only the sequences from `sequences`
+        which have their index in `seq_indices` will be taken into
+        account.
+
+    Returns
+    -------
+    profile: numpy array
+        the frequency profile as a numpy array with a row for each
+        block and a column for each position.
+    """
+    counts = count_per_position_partial(sequences, seq_indices)
+    profile = numpy.zeros((16, len(counts)))
+    for pos_idx, position in enumerate(counts):
+        for block_idx, block in enumerate(NAMES):
+            profile[block_idx, pos_idx] = position[block]
+    profile /= len(seq_indices)
+    return profile
+
+
+def compatibility(profile, sequence):
+    """
+    Compute the compatibility of a sequence with a profile.
+
+    Parameters
+    ----------
+    profile
+        a frequency profile as a numpy array with a row for each
+        block and a column for each position
+    sequence: the block sequence to test as a string
+
+    Returns
+    -------
+    probability: float
+        cummulative probability of the given sequence given the profile
+    """
+    probabilities = numpy.zeros((profile.shape[1], ))
+    for pos_idx, block in enumerate(sequence):
+        block_idx = NAMES.find(block)
+        probabilities[pos_idx] = profile[block_idx, pos_idx]
+    return numpy.sum(probabilities)
+
+
+def _argmax(values):
+    """
+    Return the index of the maximum value of a sequence
+    """
+    iter_values = iter(values)
+    argmax = 0
+    valmax = next(iter_values)
+    for arg, val in enumerate(iter_values, start=1):
+        if val > valmax:
+            valmax = val
+            argmax = arg
+    return argmax
+
+
+def attribute_center(centers, sequences):
+    """
+    Assign the more compatible center to each sequence.
+
+    For a list of frequency profiles and a list of sequences, assign to
+    each sequence the more compatible frequency profile.
+
+    Parameters
+    ----------
+    centers
+        a list of frequency profiles, each profile is a numpy array
+        with rows for the blocks and collumns for the positions
+    sequences: a list of block sequences
+
+    Returns
+    -------
+    groups: list
+        a list of the profile index for each sequence
+    """
+    groups = [0 for _ in range(len(sequences))]
+    for seq_idx, sequence in enumerate(sequences):
+        compatibilities = [compatibility(center, sequence)
+                           for center in centers]
+        groups[seq_idx] = _argmax(compatibilities)
+    return groups
+
+
+def update_centers(sequences, groups, ngroups):
+    """
+    Calculate the frequency profile for each group
+
+    Parameters
+    ----------
+    sequences
+        a list of block sequences
+    groups
+        a list with a group identifier for each sequence
+    ngroups
+        the number of groups; this number cannot be obtained with
+        enough confidence from the `groups` list, indeed some group can
+        be empty and therefore not appearing in the `group` list
+
+    Returns
+    -------
+    centers: numpy array
+        a frequency profile for each group
+    """
+    grouped_indices = [[] for _ in range(ngroups)]
+    for seq_idx, group_idx in enumerate(groups):
+        grouped_indices[group_idx].append(seq_idx)
+    centers = []
+    for group in grouped_indices:
+        centers.append(make_profile_partial(sequences, group))
+    return centers
+
+
+def get_medoids(groups):
+    raise NotImplemented
+
+
+def initial_centers(sequences, ngroups):
+    """
+    Create single sequence frequency profiles from randomly chosen sequences
+
+    Choose `ngroups` sequences and build a single sequence frequency
+    profile for each of them. These profiles aims at being initial centers
+    for the clustering.
+
+    Parameters
+    ----------
+    sequences
+        a list of block sequences
+    ngroups
+        the number of profiles to build
+
+    Returns
+    -------
+    centers: list
+        a list of frequency profiles
+    """
+    centers = []
+    sample = random.sample(range(len(sequences)), ngroups)
+    assert len(sample) == len(set(sample)), \
+        'Redundances in the initial sampling'
+    assert len([sequences[i] for i in sample]) \
+        == len(set([sequences[i] for i in sample])), \
+        'Redundances in the initial sampling'
+    for sequence_idx in sample:
+        centers.append(make_profile_partial(sequences, [sequence_idx, ]))
+    return centers
+
+
+def _is_converged(old_centers, new_centers):
+    """
+    Returns True is all `new_centers` are the same as the `old_centers`
+    """
+    for old, new in zip(old_centers, new_centers):
+        if not numpy.all(old == new):
+            return False
+    return True
+
+def k_means(sequences, ngroups, max_iter, logfile=sys.stdout):
+    """
+    Carry out a K-means clustering on block sequences
+
+    Cluster the sequences into `ngroups` groups. The centers of each
+    group are computed as a frequency profile for the sequences in the
+    group. The distance metrix used to assign a center to the sequences
+    is the probability to obtain a sequence from a given frequency profile.
+
+    Parameters
+    ----------
+    sequences
+        a list of block sequences to cluster
+    ngroups
+        the number of cluster to build
+    max_iter
+        the maximum number of iterations to run
+    logfile
+        a file descriptor where to write logs, stdout by default
+
+    Returns
+    -------
+    groups: list
+        a list of cluster identifier for each sequence
+    convergeance: bool
+        True if the clustering converged, else False
+    """
+    convergeance = False
+    centers = initial_centers(sequences, ngroups)
+    for iteration in range(1, max_iter + 1):
+        groups = attribute_center(centers, sequences)
+        print(iteration, collections.Counter(groups), len(centers),
+              file=logfile)
+        new_centers = update_centers(sequences, groups, ngroups)
+        if _is_converged(centers, new_centers):
+            print('Convergence reached in {} iterations'
+                  .format(iteration), file=logfile)
+            convergeance = True
+            break
+        centers = copy.copy(new_centers)
+    else:
+        print(('K-means reached {} iterations before '
+               'reaching convergence.').format(max_iter),
+              file=logfile)
+    return groups, centers, convergeance
diff --git a/pbxplore/scripts/PBkmeans.py b/pbxplore/scripts/PBkmeans.py
new file mode 100644
index 0000000..5f113cc
--- /dev/null
+++ b/pbxplore/scripts/PBkmeans.py
@@ -0,0 +1,93 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""
+Read PDB structures and assign protein blocs (PBs).
+
+2013 - P. Poulain, A. G. de Brevern
+"""
+
+
+# Use print as a function for python 3 compatibility
+from __future__ import print_function
+
+import argparse
+import os
+import sys
+
+import pbxplore as pbx
+from pbxplore.analysis import kmeans
+from pbxplore.scripts import PBclust
+
+try:
+    import builtins
+except ImportError:
+    import itertools
+    zip = itertools.izip
+else:
+    del builtins
+
+
+def write_kmeans_clusters(fname, headers, groups):
+    with open(fname, 'w') as infile:
+        for header, group in zip(headers, groups):
+            print('{0}\t{1}'.format(header, group), file=infile)
+
+
+def user_input():
+    """
+    Handle PBkmeans command line arguments
+    """
+    parser = argparse.ArgumentParser(
+        description=("Cluster protein structures based "
+                     "on their PB sequences using the k-means "
+                     "algorithm."))
+
+    # mandatory arguments
+    parser.add_argument("-f", action="append", required=True,
+                        help="name(s) of the PBs file (in fasta format)")
+    parser.add_argument("-o", action="store", required=True,
+                        help="name for results")
+    parser.add_argument("--clusters", action="store", type=int, required=True,
+                       help="number of wanted clusters")
+
+    # options
+    parser.add_argument("--max-iter", dest='max_iter', type=int, default=100,
+                        help="maximum number of iterations")
+
+    # get all arguments
+    options = parser.parse_args()
+
+    # test if the number of clusters is valid
+    if options.clusters <= 0:
+        parser.error("Number of clusters must be > 0.")
+
+    # check if input files exist
+    for name in options.f:
+        if not os.path.isfile(name):
+            parse.error("{0}: not a valid file. Bye".format(name))
+
+    return options
+
+
+def pbkmeans_cli():
+    # Read user inputs
+    options = user_input()
+    header_lst, seq_lst = pbx.io.read_several_fasta(options.f)
+
+    # Do the clustering
+    groups, _, convergeance = kmeans.k_means(seq_lst,
+                                             ngroups=options.clusters,
+                                             max_iter=options.max_iter)
+
+    # Write the output
+    output_fname = options.o + ".PB.kmeans"
+    write_kmeans_clusters(output_fname, header_lst, seq_lst)
+
+    # Write the report
+    PBclust.display_clust_report(groups)
+
+    return 0
+
+if __name__ =='__main__':
+    sys.exit(pbkmeans_cli())
diff --git a/pbxplore/test/test_functions.py b/pbxplore/test/test_functions.py
index 8b4f138..14adac8 100644
--- a/pbxplore/test/test_functions.py
+++ b/pbxplore/test/test_functions.py
@@ -16,8 +16,11 @@
 import collections
 import os
 
+import numpy
+
 import pbxplore as pbx
 from pbxplore.structure import structure
+from pbxplore.analysis import kmeans
 
 here = os.path.abspath(os.path.dirname(__file__))
 
@@ -174,5 +177,130 @@ def test_read_fasta(self):
                                      'mmmmmnoopacddddddehkllmmmmngoilmmmm'
                                      'mmmmmmmmnopacdcddZZ'])
 
+
+class TestKMeans(unittest.TestCase):
+    """
+    Test individual functions for K-Means clustering
+    """
+
+    def test_count_per_position_partial(self):
+        sequences = ['abcdef', 'bcdefg', 'cdefgh',
+                     'defghi', 'efghij', 'fghijk',  # ignore in the test
+                     'ghijkl', 'hijklm', 'ijklmn',
+                     'ghijkl', 'hijklm', 'ijklmn',
+                     'jklmno', 'klmnop', ]          # ignore in the test
+        indices = [0, 1, 2, 6, 7, 8, 9, 10, 11]
+
+        # Shorten defaultdict call to have a nicer table
+        dd = collections.defaultdict
+        # Create a list with the right number of elements. The type of each
+        # element does not matter as the elements will be overwritten
+        ref_count = ['' for _ in range(6)]
+        # Fill the reference. Each element of the list corresponds to a
+        # position in the sequences. Only the sequences that have their index
+        # in the ``indices`` list will be used in the count.
+        ref_count[0] = dd(int, {'a': 1, 'b': 1, 'c': 1, 'g': 2, 'h': 2, 'i': 2})
+        ref_count[1] = dd(int, {'b': 1, 'c': 1, 'd': 1, 'h': 2, 'i': 2, 'j': 2})
+        ref_count[2] = dd(int, {'c': 1, 'd': 1, 'e': 1, 'i': 2, 'j': 2, 'k': 2})
+        ref_count[3] = dd(int, {'d': 1, 'e': 1, 'f': 1, 'j': 2, 'k': 2, 'l': 2})
+        ref_count[4] = dd(int, {'e': 1, 'f': 1, 'g': 1, 'k': 2, 'l': 2, 'm': 2})
+        ref_count[5] = dd(int, {'f': 1, 'g': 1, 'h': 1, 'l': 2, 'm': 2, 'n': 2})
+
+        count = kmeans.count_per_position_partial(sequences, indices)
+        self.assertEqual(count, ref_count)
+
+    def test_make_profile_partial(self):
+        sequences = ['abcdef', 'bcdefg', 'cdefgh',
+                     'defghi', 'efghij', 'fghijk',  # ignore in the test
+                     'ghijkl', 'hijklm', 'ijklmn',
+                     'ghijkl', 'hijklm', 'ijklmn',
+                     'jklmno', 'klmnop',            # ignore in the test
+                     'ijklmn']
+        # Using 10 sequences makes things easier
+        indices = [0, 1, 2, 6, 7, 8, 9, 10, 11, 14]
+        ref_profile = numpy.array([[0.1, 0.0, 0.0, 0.0, 0.0, 0.0],   # a
+                                   [0.1, 0.1, 0.0, 0.0, 0.0, 0.0],   # b
+                                   [0.1, 0.1, 0.1, 0.0, 0.0, 0.0],   # c
+                                   [0.0, 0.1, 0.1, 0.1, 0.0, 0.0],   # d
+                                   [0.0, 0.0, 0.1, 0.1, 0.1, 0.0],   # e
+                                   [0.0, 0.0, 0.0, 0.1, 0.1, 0.1],   # f
+                                   [0.2, 0.0, 0.0, 0.0, 0.1, 0.1],   # g
+                                   [0.2, 0.2, 0.0, 0.0, 0.0, 0.1],   # h
+                                   [0.3, 0.2, 0.2, 0.0, 0.0, 0.0],   # i
+                                   [0.0, 0.3, 0.2, 0.2, 0.0, 0.0],   # j
+                                   [0.0, 0.0, 0.3, 0.2, 0.2, 0.0],   # k
+                                   [0.0, 0.0, 0.0, 0.3, 0.2, 0.2],   # l
+                                   [0.0, 0.0, 0.0, 0.0, 0.3, 0.2],   # m
+                                   [0.0, 0.0, 0.0, 0.0, 0.0, 0.3],   # n
+                                   [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],   # o
+                                   [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]])  # p
+        profile = kmeans.make_profile_partial(sequences, indices)
+        assert(numpy.allclose(ref_profile, profile))
+
+    def test_compatibility_identity(self):
+        profile = numpy.array([[1.0, 0.0, 0.0, 0.0],   # a
+                               [0.0, 1.0, 0.0, 0.0],   # b
+                               [0.0, 0.0, 1.0, 0.0],   # c
+                               [0.0, 0.0, 0.0, 1.0],   # d
+                               [0.0, 0.0, 0.0, 0.0],   # e
+                               [0.0, 0.0, 0.0, 0.0],   # f
+                               [0.0, 0.0, 0.0, 0.0],   # g
+                               [0.0, 0.0, 0.0, 0.0],   # h
+                               [0.0, 0.0, 0.0, 0.0],   # i
+                               [0.0, 0.0, 0.0, 0.0],   # j
+                               [0.0, 0.0, 0.0, 0.0],   # k
+                               [0.0, 0.0, 0.0, 0.0],   # l
+                               [0.0, 0.0, 0.0, 0.0],   # m
+                               [0.0, 0.0, 0.0, 0.0],   # n
+                               [0.0, 0.0, 0.0, 0.0],   # o
+                               [0.0, 0.0, 0.0, 0.0]])  # p
+        sequence = 'abcd'
+        reference_compatibility = len(sequence)
+        compatibility = kmeans.compatibility(profile, sequence)
+        self.assertAlmostEqual(compatibility, reference_compatibility)
+
+    def test_compatibility(self):
+        # The profile correspond to the following sequences:
+        # - abcd      - ioph    - alep
+        # - jiop      - fgad    - hobe
+        # - abjp      - piep
+        # - hbno      - jojo
+        profile = numpy.array([[0.3, 0.0, 0.1, 0.0],   # a
+                               [0.0, 0.3, 0.1, 0.0],   # b
+                               [0.0, 0.0, 0.1, 0.0],   # c
+                               [0.0, 0.0, 0.0, 0.2],   # d
+                               [0.0, 0.1, 0.2, 0.1],   # e
+                               [0.1, 0.0, 0.0, 0.0],   # f
+                               [0.0, 0.1, 0.0, 0.0],   # g
+                               [0.2, 0.0, 0.0, 0.1],   # h
+                               [0.1, 0.2, 0.0, 0.0],   # i
+                               [0.2, 0.0, 0.2, 0.0],   # j
+                               [0.0, 0.0, 0.0, 0.0],   # k
+                               [0.0, 0.1, 0.0, 0.0],   # l
+                               [0.0, 0.0, 0.0, 0.0],   # m
+                               [0.0, 0.0, 0.1, 0.0],   # n
+                               [0.0, 0.2, 0.1, 0.2],   # o
+                               [0.1, 0.0, 0.1, 0.4]])  # p
+        print(profile.sum(axis=0))
+        assert(numpy.allclose(profile.sum(axis=0),
+                              numpy.ones((profile.shape[1],),
+                                         dtype=profile.dtype)))
+        reference_sequences = (('abcd', 0.9), ('hipo', 0.7), ('bcda', 0.0),
+                               ('aaaa', 0.4), ('kkkk', 0.0), ('oooo', 0.5),)
+        for sequence, reference_compatibility in reference_sequences:
+            compatibility = kmeans.compatibility(profile, sequence)
+            self.assertAlmostEqual(compatibility, reference_compatibility)
+
+    def test_argmax(self):
+        reference = (([0, 1, 2, 3, 4], 4),  # Ordered
+                     ([4, 3, 2, 1, 0], 0),  # Reverse ordered
+                     ([1, 3, 2, 4, 0], 3),  # Random order
+                     ([0, 0, 3, 4, 4], 3),  # Duplicates
+                     ([4, 4, 4, 4, 4], 0),  # All the same
+                     )
+        for test_case, expectation in reference:
+            self.assertEqual(kmeans._argmax(test_case), expectation)
+
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/setup.py b/setup.py
index e30c176..0b338d8 100644
--- a/setup.py
+++ b/setup.py
@@ -73,6 +73,7 @@
             'PBclust  = pbxplore.scripts.PBclust:pbclust_cli',
             'PBcount  = pbxplore.scripts.PBcount:pbcount_cli',
             'PBstat   = pbxplore.scripts.PBstat:pbstat_cli',
+            'PBkmeans = pbxplore.scripts.PBkmeans:pbkmeans_cli',
             'PBdata   = pbxplore.scripts.PBdata:pbdata_cli',
         ],
     },