Merge branch 'emm_iters_exps'

- Publish final implementations used for ICML camera ready
PrefPy · May 26, 2016 · 648c3b0 · 648c3b0
2 parents 78e51ee + db77fe6
commit 648c3b0
Show file tree

Hide file tree

Showing 19 changed files with 321 additions and 1,122 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,4 +1,62 @@
-# ignore compilation files
+# Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
 *$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*,cover
+.hypothesis/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+#Ipython Notebook
+.ipynb_checkpoints
diff --git a/README.md b/README.md
@@ -1,34 +1,35 @@
-PrefPy
-======
-
-Rank aggregation algorithms in the computer science field of computational social choice
-
-
-What's New
-==========
-
-- Generalized method of moments algorithm for mixtures of Plackett-Luce models
-- Implementation of EMM algorithm for mixtures of Plackett-Luce by Gormley & Murphy
-
-
-Work In Progress
-================
-
-- This is an initial version of the Python package form, further structural changes will be coming
-- Module naming conventions will be changed; currently the algorithm files take the initials of the names of the papers from which they originate (e.g. "gmmra" for Generalized Method of Moments for Rank Aggregation)
-- Mixture Model for Plackett-Luce EMM algorithm by Gormley & Murphy is forthcoming pending verification and testing of the method
-- Random utility model algorithms (verification of the implentation needs to be completed)
-
-
-Installation
-============
-
-- Use of MATLAB optimization in this package requires Python 3.4 due to lack of support yet for Python 3.5 by the MATLAB Engine
-
-Install by running setup.py with Python 3.4 (or greater) with the command
-
-    python3 setup.py install
-
-Symlink install while developing to keep changes in the code instead with the command
-
-    python3 setup.py develop
+PrefPy
+======
+
+Rank aggregation algorithms in the computer science field of computational social choice
+
+
+What's New
+==========
+
+- Experiments and tests have been factored out of the repository (now located at https://github.com/pdpiech/prefpy-experiments)
+- Generalized method of moments algorithm for mixtures of Plackett-Luce models
+- Implementation of EMM algorithm for mixtures of Plackett-Luce by Gormley & Murphy
+
+
+Work In Progress
+================
+
+- This is an initial version of the Python package form, further structural changes will be coming
+- Module naming conventions will be changed; currently the algorithm files take the initials of the names of the papers from which they originate (e.g. "gmmra" for Generalized Method of Moments for Rank Aggregation)
+- Mixture Model for Plackett-Luce EMM algorithm by Gormley & Murphy is forthcoming pending verification and testing of the method
+- Random utility model algorithms (verification of the implentation needs to be completed)
+
+
+Installation
+============
+
+- Use of MATLAB optimization in this package requires Python 3.4 due to lack of support yet for Python 3.5 by the MATLAB Engine
+
+Install by running setup.py with Python 3.4 (or greater) with the command
+
+    python3 setup.py install
+
+Symlink install while developing to keep changes in the code instead with the command
+
+    python3 setup.py develop
diff --git a/prefpy/__init__.py b/prefpy/__init__.py
@@ -0,0 +1,15 @@
+from . import aggregate
+from . import distributions
+from . import evbwie
+from . import gmm_mixpl_moments
+from . import gmm_mixpl_objectives
+from . import gmm_mixpl
+from . import gmmra
+from . import mmgbtl
+from . import plackettluce
+from . import stats
+from . import util
+
+__all__ = ["aggregate", "distributions", "evbwie", "gmm_mixpl_moments",
+           "gmm_mixpl_objectives", "gmm_mixpl", "gmmra", "mmgbtl",
+           "plackettluce", "stats", "util"]
diff --git a/prefpy/evbwie.py b/prefpy/evbwie.py
@@ -3,9 +3,9 @@
 # A Mixture Modeling Approach by Gormley and Murphy, 2008
 
 import numpy as np
-import aggregate
-import plackettluce as pl
-import stats
+from . import aggregate
+from . import plackettluce as pl
+from . import stats
 
 
 class EMMMixPLResult:
@@ -107,11 +107,12 @@ def aggregate(self, rankings, K, epsilon, max_iters, epsilon_mm, max_iters_mm):
                     z_h1[i][k] = (pi_h[k] * EMMMixPLAggregator.f(x[i], p_h[k])) / denom_sum
 
             # M-Step:
-            #for l in range(max_iters_mm):
-            for l in range(int(g/50) + 5):
+            for l in range(max_iters_mm):
+            #for l in range(int(g/50) + 5):
                 for k in range(K):
                     normconst = 0
-                    pi_h1[k] = np.sum(z_h1.T[k]) / len(z_h1)
+                    if l == 0: # only need to compute pi at first MM iteration
+                        pi_h1[k] = np.sum(z_h1.T[k]) / len(z_h1)
                     for j in range(self.m):
                         omega_k_j = EMMMixPLAggregator.omega(k, j, z_h1, x) # numerator
                         denom_sum = 0
@@ -132,15 +133,18 @@ def aggregate(self, rankings, K, epsilon, max_iters, epsilon_mm, max_iters_mm):
                         p_h1[k][j] /= normconst
 
                 if (epsilon_mm != None and
-                    np.all(np.absolute(p_h1 - p_h) < epsilon_mm) and
-                    np.all(np.absolute(pi_h1 - pi_h) < epsilon_mm)):
+                    np.all(np.absolute(p_h1 - p_h) < epsilon_mm)):
                         break
 
+                p_h = np.copy(p_h1) # deep copy p for next MM iteration
+                # pi does not change across MM iterations, no copy needed
+
             if (epsilon != None and
                 np.all(np.absolute(p_h1 - p_h) < epsilon) and
                 np.all(np.absolute(pi_h1 - pi_h) < epsilon)):
                     break
 
+            # remember that assignments below are references only, not copies
             p_h = p_h1
             pi_h = pi_h1
 

diff --git a/prefpy/evbwie1.py b/prefpy/evbwie1.py
@@ -0,0 +1,184 @@
+# Implementation of algorithm (2) from
+# Exploring Voting Blocs Within the Irish Electorate:
+# A Mixture Modeling Approach by Gormley and Murphy, 2008
+
+import numpy as np
+from . import aggregate
+from . import plackettluce as pl
+from . import stats
+
+
+class EMMMixPLResult:
+    def __init__(self, num_alts, num_votes, num_mix, true_params, epsilon, max_iters, epsilon_mm, max_iters_mm, init_guess, soln_params, runtime):
+        self.num_alts = num_alts
+        self.num_votes = num_votes
+        self.num_mix = num_mix
+        self.true_params = true_params
+        self.epsilon = epsilon
+        self.max_iters = max_iters
+        self.epsilon_mm = epsilon_mm
+        self.max_iters_mm = max_iters_mm
+        self.init_guess = init_guess
+        self.soln_params = soln_params
+        self.runtime = runtime
+
+class EMMMixPLAggregator(aggregate.RankAggregator):
+
+    def c(x_i, j):
+        try:
+            return x_i[j]
+        except IndexError:
+            return -1
+
+    def f(x_i, p):
+        prod = 1
+        for t in range(len(x_i)):
+            denom_sum = 0
+            for s in range(t, len(p)):
+                denom_sum += p[EMMMixPLAggregator.c(x_i, s)]
+            prod *= p[EMMMixPLAggregator.c(x_i, t)] / denom_sum
+        return prod
+
+    def indic(j, x_i, s):
+        flag = j == EMMMixPLAggregator.c(x_i, s)
+        if flag:
+            return 1
+        else:
+            return 0
+
+    def delta(x_i, j, s, N):
+        """ delta_i_j_s """
+        flag = j == EMMMixPLAggregator.c(x_i, s)
+        if flag and s < len(x_i):
+            return 1
+        elif s == N:
+            found_equal = False
+            for l in range(len(x_i)):
+                if j == EMMMixPLAggregator.c(x_i, l):
+                    found_equal = True
+                    break
+            if not found_equal:
+                return 1
+        return 0
+
+    def omega(k, j, z, x):
+        """ omega_k_j """
+        sum_out = 0
+        for i in range(len(x)):
+            sum_in = 0
+            for t in range(len(x[i])):
+                sum_in += z[i][k] * EMMMixPLAggregator.indic(j, x[i], t)
+            sum_out += sum_in
+        return sum_out
+
+    def aggregate(self, rankings, K, epsilon, tot_iters, epsilon_mm, max_iters_em):
+        x = rankings # shorter pseudonym for voting data
+        self.n = len(rankings) # number of votes
+
+        # pre-compute the delta values
+        delta_i_j_s = np.empty((self.n, self.m, self.m + 1))
+        for i in range(self.n):
+            for j in range(self.m):
+                for s in range(self.m + 1):
+                    delta_i_j_s[i][j][s] = EMMMixPLAggregator.delta(x[i], j, s, self.m)
+
+        # generate initial values for p and pi:
+        p_h0 = np.random.rand(K, self.m)
+        p_h0 /= np.sum(p_h0, axis=1, keepdims=True)
+
+        pi_h0 =  np.random.rand(K)
+        pi_h0 /= np.sum(pi_h0)
+
+        p_h = np.copy(p_h0)
+        pi_h = np.copy(pi_h0)
+
+        inner = tot_iters // max_iters_em
+
+        for g in range(max_iters_em):
+        #for g in range(max_iters):
+
+            p_h1 = np.empty((K, self.m))
+            pi_h1 = np.empty(K)
+            z_h1 = np.empty((self.n, K))
+
+            # E-Step:
+            for i in range(self.n):
+                for k in range(K):
+                    denom_sum = 0
+                    for k2 in range(K):
+                        denom_sum += pi_h[k2] * EMMMixPLAggregator.f(x[i], p_h[k2])
+                    z_h1[i][k] = (pi_h[k] * EMMMixPLAggregator.f(x[i], p_h[k])) / denom_sum
+
+            # M-Step:
+            test = (g + 1) * inner + (max_iters_em - g - 1) * (inner + 1)
+            if test < tot_iters:
+                inner += 1
+            for l in range(inner):
+            #for l in range(max_iters_mm):
+            #for l in range(int(g/50) + 5):
+                for k in range(K):
+                    normconst = 0
+                    if l == 0: # only need to compute pi at first MM iteration
+                        pi_h1[k] = np.sum(z_h1.T[k]) / len(z_h1)
+                    for j in range(self.m):
+                        omega_k_j = EMMMixPLAggregator.omega(k, j, z_h1, x) # numerator
+                        denom_sum = 0
+                        for i in range(self.n):
+                            sum1 = 0
+                            for t in range(len(x[i])):
+                                sum2 = 0
+                                sum3 = 0
+                                for s in range(t, self.m):
+                                    sum2 += p_h[k][EMMMixPLAggregator.c(x[i], s)]
+                                for s in range(t, self.m + 1):
+                                    sum3 += delta_i_j_s[i][j][s]
+                                sum1 += z_h1[i][k] * (sum2 ** -1) * sum3
+                            denom_sum += sum1
+                        p_h1[k][j] = omega_k_j / denom_sum
+                        normconst += p_h1[k][j]
+                    for j in range(self.m):
+                        p_h1[k][j] /= normconst
+
+                if (epsilon_mm != None and
+                    np.all(np.absolute(p_h1 - p_h) < epsilon_mm)):
+                        break
+
+                p_h = np.copy(p_h1) # deep copy p for next MM iteration
+                # pi does not change across MM iterations, no copy needed
+
+            if (epsilon != None and
+                np.all(np.absolute(p_h1 - p_h) < epsilon) and
+                np.all(np.absolute(pi_h1 - pi_h) < epsilon)):
+                    break
+
+            # remember that assignments below are references only, not copies
+            p_h = p_h1
+            pi_h = pi_h1
+
+        return (pi_h1, p_h1, pi_h0, p_h0)
+
+def main():
+    n = 100
+    m = 4
+    k = 2
+    cand_set = np.arange(m)
+    #np.random.seed(0)
+    params, votes = pl.generate_mix2pl_dataset(n, m, useDirichlet=True)
+    print("Ground-Truth Parameters:\n" + str(params))
+    print("EMM Algorithm:")
+
+    emmagg = EMMMixPLAggregator(cand_set)
+    pi, p = emmagg.aggregate(votes, K=2, epsilon=1e-8, max_iters=1000, epsilon_mm=1e-8, max_iters_mm=10)
+
+    sol_params = np.empty(2*m+1)
+    sol_params[0] = pi[0]
+    sol_params[1:m+1] = p[0]
+    sol_params[m+1:] = p[1]
+
+    print("Ground-Truth Parameters:\n" + str(params))
+    print("Final Solution:\n" + str(sol_params))
+    print("\t\"1 - alpha\" = " + str(pi[1]))
+    print("WSSE:\n" + str(stats.mix2PL_wsse(params, sol_params, m)))
+
+if __name__ == "__main__":
+    main()