Skip to content

Commit

Permalink
Merge branch 'emm_iters_exps'
Browse files Browse the repository at this point in the history
- Publish final implementations used for ICML camera ready
  • Loading branch information
pdpiech committed May 26, 2016
2 parents 78e51ee + db77fe6 commit 648c3b0
Show file tree
Hide file tree
Showing 19 changed files with 321 additions and 1,122 deletions.
60 changes: 59 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,62 @@
# ignore compilation files
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
.hypothesis/

# Translations
*.mo
*.pot

# Django stuff:
*.log

# Sphinx documentation
docs/_build/

# PyBuilder
target/

#Ipython Notebook
.ipynb_checkpoints
69 changes: 35 additions & 34 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,34 +1,35 @@
PrefPy
======

Rank aggregation algorithms in the computer science field of computational social choice


What's New
==========

- Generalized method of moments algorithm for mixtures of Plackett-Luce models
- Implementation of EMM algorithm for mixtures of Plackett-Luce by Gormley & Murphy


Work In Progress
================

- This is an initial version of the Python package form, further structural changes will be coming
- Module naming conventions will be changed; currently the algorithm files take the initials of the names of the papers from which they originate (e.g. "gmmra" for Generalized Method of Moments for Rank Aggregation)
- Mixture Model for Plackett-Luce EMM algorithm by Gormley & Murphy is forthcoming pending verification and testing of the method
- Random utility model algorithms (verification of the implentation needs to be completed)


Installation
============

- Use of MATLAB optimization in this package requires Python 3.4 due to lack of support yet for Python 3.5 by the MATLAB Engine

Install by running setup.py with Python 3.4 (or greater) with the command

python3 setup.py install

Symlink install while developing to keep changes in the code instead with the command

python3 setup.py develop
PrefPy
======

Rank aggregation algorithms in the computer science field of computational social choice


What's New
==========

- Experiments and tests have been factored out of the repository (now located at https://github.com/pdpiech/prefpy-experiments)
- Generalized method of moments algorithm for mixtures of Plackett-Luce models
- Implementation of EMM algorithm for mixtures of Plackett-Luce by Gormley & Murphy


Work In Progress
================

- This is an initial version of the Python package form, further structural changes will be coming
- Module naming conventions will be changed; currently the algorithm files take the initials of the names of the papers from which they originate (e.g. "gmmra" for Generalized Method of Moments for Rank Aggregation)
- Mixture Model for Plackett-Luce EMM algorithm by Gormley & Murphy is forthcoming pending verification and testing of the method
- Random utility model algorithms (verification of the implentation needs to be completed)


Installation
============

- Use of MATLAB optimization in this package requires Python 3.4 due to lack of support yet for Python 3.5 by the MATLAB Engine

Install by running setup.py with Python 3.4 (or greater) with the command

python3 setup.py install

Symlink install while developing to keep changes in the code instead with the command

python3 setup.py develop
15 changes: 15 additions & 0 deletions prefpy/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from . import aggregate
from . import distributions
from . import evbwie
from . import gmm_mixpl_moments
from . import gmm_mixpl_objectives
from . import gmm_mixpl
from . import gmmra
from . import mmgbtl
from . import plackettluce
from . import stats
from . import util

__all__ = ["aggregate", "distributions", "evbwie", "gmm_mixpl_moments",
"gmm_mixpl_objectives", "gmm_mixpl", "gmmra", "mmgbtl",
"plackettluce", "stats", "util"]
20 changes: 12 additions & 8 deletions prefpy/evbwie.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
# A Mixture Modeling Approach by Gormley and Murphy, 2008

import numpy as np
import aggregate
import plackettluce as pl
import stats
from . import aggregate
from . import plackettluce as pl
from . import stats


class EMMMixPLResult:
Expand Down Expand Up @@ -107,11 +107,12 @@ def aggregate(self, rankings, K, epsilon, max_iters, epsilon_mm, max_iters_mm):
z_h1[i][k] = (pi_h[k] * EMMMixPLAggregator.f(x[i], p_h[k])) / denom_sum

# M-Step:
#for l in range(max_iters_mm):
for l in range(int(g/50) + 5):
for l in range(max_iters_mm):
#for l in range(int(g/50) + 5):
for k in range(K):
normconst = 0
pi_h1[k] = np.sum(z_h1.T[k]) / len(z_h1)
if l == 0: # only need to compute pi at first MM iteration
pi_h1[k] = np.sum(z_h1.T[k]) / len(z_h1)
for j in range(self.m):
omega_k_j = EMMMixPLAggregator.omega(k, j, z_h1, x) # numerator
denom_sum = 0
Expand All @@ -132,15 +133,18 @@ def aggregate(self, rankings, K, epsilon, max_iters, epsilon_mm, max_iters_mm):
p_h1[k][j] /= normconst

if (epsilon_mm != None and
np.all(np.absolute(p_h1 - p_h) < epsilon_mm) and
np.all(np.absolute(pi_h1 - pi_h) < epsilon_mm)):
np.all(np.absolute(p_h1 - p_h) < epsilon_mm)):
break

p_h = np.copy(p_h1) # deep copy p for next MM iteration
# pi does not change across MM iterations, no copy needed

if (epsilon != None and
np.all(np.absolute(p_h1 - p_h) < epsilon) and
np.all(np.absolute(pi_h1 - pi_h) < epsilon)):
break

# remember that assignments below are references only, not copies
p_h = p_h1
pi_h = pi_h1

Expand Down
184 changes: 184 additions & 0 deletions prefpy/evbwie1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
# Implementation of algorithm (2) from
# Exploring Voting Blocs Within the Irish Electorate:
# A Mixture Modeling Approach by Gormley and Murphy, 2008

import numpy as np
from . import aggregate
from . import plackettluce as pl
from . import stats


class EMMMixPLResult:
def __init__(self, num_alts, num_votes, num_mix, true_params, epsilon, max_iters, epsilon_mm, max_iters_mm, init_guess, soln_params, runtime):
self.num_alts = num_alts
self.num_votes = num_votes
self.num_mix = num_mix
self.true_params = true_params
self.epsilon = epsilon
self.max_iters = max_iters
self.epsilon_mm = epsilon_mm
self.max_iters_mm = max_iters_mm
self.init_guess = init_guess
self.soln_params = soln_params
self.runtime = runtime

class EMMMixPLAggregator(aggregate.RankAggregator):

def c(x_i, j):
try:
return x_i[j]
except IndexError:
return -1

def f(x_i, p):
prod = 1
for t in range(len(x_i)):
denom_sum = 0
for s in range(t, len(p)):
denom_sum += p[EMMMixPLAggregator.c(x_i, s)]
prod *= p[EMMMixPLAggregator.c(x_i, t)] / denom_sum
return prod

def indic(j, x_i, s):
flag = j == EMMMixPLAggregator.c(x_i, s)
if flag:
return 1
else:
return 0

def delta(x_i, j, s, N):
""" delta_i_j_s """
flag = j == EMMMixPLAggregator.c(x_i, s)
if flag and s < len(x_i):
return 1
elif s == N:
found_equal = False
for l in range(len(x_i)):
if j == EMMMixPLAggregator.c(x_i, l):
found_equal = True
break
if not found_equal:
return 1
return 0

def omega(k, j, z, x):
""" omega_k_j """
sum_out = 0
for i in range(len(x)):
sum_in = 0
for t in range(len(x[i])):
sum_in += z[i][k] * EMMMixPLAggregator.indic(j, x[i], t)
sum_out += sum_in
return sum_out

def aggregate(self, rankings, K, epsilon, tot_iters, epsilon_mm, max_iters_em):
x = rankings # shorter pseudonym for voting data
self.n = len(rankings) # number of votes

# pre-compute the delta values
delta_i_j_s = np.empty((self.n, self.m, self.m + 1))
for i in range(self.n):
for j in range(self.m):
for s in range(self.m + 1):
delta_i_j_s[i][j][s] = EMMMixPLAggregator.delta(x[i], j, s, self.m)

# generate initial values for p and pi:
p_h0 = np.random.rand(K, self.m)
p_h0 /= np.sum(p_h0, axis=1, keepdims=True)

pi_h0 = np.random.rand(K)
pi_h0 /= np.sum(pi_h0)

p_h = np.copy(p_h0)
pi_h = np.copy(pi_h0)

inner = tot_iters // max_iters_em

for g in range(max_iters_em):
#for g in range(max_iters):

p_h1 = np.empty((K, self.m))
pi_h1 = np.empty(K)
z_h1 = np.empty((self.n, K))

# E-Step:
for i in range(self.n):
for k in range(K):
denom_sum = 0
for k2 in range(K):
denom_sum += pi_h[k2] * EMMMixPLAggregator.f(x[i], p_h[k2])
z_h1[i][k] = (pi_h[k] * EMMMixPLAggregator.f(x[i], p_h[k])) / denom_sum

# M-Step:
test = (g + 1) * inner + (max_iters_em - g - 1) * (inner + 1)
if test < tot_iters:
inner += 1
for l in range(inner):
#for l in range(max_iters_mm):
#for l in range(int(g/50) + 5):
for k in range(K):
normconst = 0
if l == 0: # only need to compute pi at first MM iteration
pi_h1[k] = np.sum(z_h1.T[k]) / len(z_h1)
for j in range(self.m):
omega_k_j = EMMMixPLAggregator.omega(k, j, z_h1, x) # numerator
denom_sum = 0
for i in range(self.n):
sum1 = 0
for t in range(len(x[i])):
sum2 = 0
sum3 = 0
for s in range(t, self.m):
sum2 += p_h[k][EMMMixPLAggregator.c(x[i], s)]
for s in range(t, self.m + 1):
sum3 += delta_i_j_s[i][j][s]
sum1 += z_h1[i][k] * (sum2 ** -1) * sum3
denom_sum += sum1
p_h1[k][j] = omega_k_j / denom_sum
normconst += p_h1[k][j]
for j in range(self.m):
p_h1[k][j] /= normconst

if (epsilon_mm != None and
np.all(np.absolute(p_h1 - p_h) < epsilon_mm)):
break

p_h = np.copy(p_h1) # deep copy p for next MM iteration
# pi does not change across MM iterations, no copy needed

if (epsilon != None and
np.all(np.absolute(p_h1 - p_h) < epsilon) and
np.all(np.absolute(pi_h1 - pi_h) < epsilon)):
break

# remember that assignments below are references only, not copies
p_h = p_h1
pi_h = pi_h1

return (pi_h1, p_h1, pi_h0, p_h0)

def main():
n = 100
m = 4
k = 2
cand_set = np.arange(m)
#np.random.seed(0)
params, votes = pl.generate_mix2pl_dataset(n, m, useDirichlet=True)
print("Ground-Truth Parameters:\n" + str(params))
print("EMM Algorithm:")

emmagg = EMMMixPLAggregator(cand_set)
pi, p = emmagg.aggregate(votes, K=2, epsilon=1e-8, max_iters=1000, epsilon_mm=1e-8, max_iters_mm=10)

sol_params = np.empty(2*m+1)
sol_params[0] = pi[0]
sol_params[1:m+1] = p[0]
sol_params[m+1:] = p[1]

print("Ground-Truth Parameters:\n" + str(params))
print("Final Solution:\n" + str(sol_params))
print("\t\"1 - alpha\" = " + str(pi[1]))
print("WSSE:\n" + str(stats.mix2PL_wsse(params, sol_params, m)))

if __name__ == "__main__":
main()
Loading

0 comments on commit 648c3b0

Please sign in to comment.