Skip to content

Commit

Permalink
restructuring release
Browse files Browse the repository at this point in the history
  • Loading branch information
lukedeo committed Mar 19, 2017
1 parent a43b2a6 commit 886afc6
Show file tree
Hide file tree
Showing 10 changed files with 722 additions and 79 deletions.
62 changes: 0 additions & 62 deletions models/lagan/generator.py

This file was deleted.

150 changes: 150 additions & 0 deletions models/manifolds.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
from joblib import Parallel, delayed
import numpy as np
import math


# form the grids you need to represent the eta, phi coordinates
grid = 0.5 * (np.linspace(-1.25, 1.25, 26)[:-1] + np.linspace(-1.25, 1.25, 26)[1:])
eta = np.tile(grid, (25, 1))
phi = np.tile(grid[::-1].reshape(-1, 1), (1, 25))


def discrete_mass(jet_image):
'''
Calculates the jet mass from a pixelated jet image
Args:
-----
jet_image: numpy ndarray of dim (1, 25, 25)
Returns:
--------
M: float, jet mass
'''
Px = np.sum(jet_image * np.cos(phi), axis=(1, 2))
Py = np.sum(jet_image * np.sin(phi), axis=(1, 2))
Pz = np.sum(jet_image * np.sinh(eta), axis=(1, 2))
E = np.sum(jet_image * np.cosh(eta), axis=(1, 2))
PT2 = np.square(Px) + np.square(Py)
M2 = np.square(E) - (PT2 + np.square(Pz))
M = np.sqrt(M2)
return M


def discrete_pt(jet_image):
'''
Calculates the jet transverse momentum from a pixelated jet image
Args:
-----
jet_image: numpy ndarray of dim (1, 25, 25)
Returns:
--------
float, jet transverse momentum
'''
Px = np.sum(jet_image * np.cos(phi), axis=(1, 2))
Py = np.sum(jet_image * np.sin(phi), axis=(1, 2))
return np.sqrt(np.square(Px) + np.square(Py))


def dphi(phi1, phi2):
'''
Calculates the difference between two angles avoiding |phi1 - phi2| > 180 degrees
'''
import math
return math.acos(math.cos(abs(phi1 - phi2)))


def _tau1(jet_image):
'''
Calculates the normalized tau1 from a pixelated jet image
Args:
-----
jet_image: numpy ndarray of dim (1, 25, 25)
Returns:
--------
float, normalized jet tau1
'''
# find coordinate of most energetic pixel, then use formula to compute tau1
tau1_axis_eta = eta.ravel()[np.argmax(jet_image)]
tau1_axis_phi = phi.ravel()[np.argmax(jet_image)]
tau1 = np.sum(jet_image *
np.sqrt(np.square(tau1_axis_eta - eta) +
np.square([dphi(tau1_axis_phi, p) for p in phi.ravel()]).reshape(25, 25))
)
return tau1 / np.sum(jet_image) # normalize by the total intensity


def _tau2(jet_image):
'''
Calculates the normalized tau2 from a pixelated jet image
Args:
-----
jet_image: numpy ndarray of dim (1, 25, 25)
Returns:
--------
float, normalized jet tau2
Notes:
------
slow implementation
'''
proto = np.array(zip(jet_image[jet_image != 0],
eta[jet_image != 0],
phi[jet_image != 0]))
while len(proto) > 2:
candidates = [
(
(i, j),
(min(pt1, pt2) ** 2) * ((eta1 - eta2) ** 2 + (phi1 - phi2) ** 2)
)
for i, (pt1, eta1, phi1) in enumerate(proto)
for j, (pt2, eta2, phi2) in enumerate(proto)
if j > i
]
index, value = zip(*candidates)
pix1, pix2 = index[np.argmin(value)]
if pix1 > pix2:
# swap
pix1, pix2 = pix2, pix1
(pt1, eta1, phi1) = proto[pix1]
(pt2, eta2, phi2) = proto[pix2]
e1 = pt1 / np.cosh(eta1)
e2 = pt2 / np.cosh(eta2)
choice = e1 > e2
eta_add = (eta1 if choice else eta2)
phi_add = (phi1 if choice else phi2)
pt_add = (e1 + e2) * np.cosh(eta_add)
proto[pix1] = (pt_add, eta_add, phi_add)
proto = np.delete(proto, pix2, axis=0).tolist()

(_, eta1, phi1), (_, eta2, phi2) = proto
np.sqrt(np.square(eta - eta1) + np.square(phi - phi1))
grid = np.array([
np.sqrt(np.square(eta - eta1) + np.square(phi - phi1)),
np.sqrt(np.square(eta - eta2) + np.square(phi - phi2))
]).min(axis=0)
# normalize by the total intensity
return np.sum(jet_image * grid) / np.sum(jet_image)


def tau21(jet_image, nb_jobs=1, verbose=False):
'''
Calculates the tau21 from a pixelated jet image using the functions above
Args:
-----
jet_image: numpy ndarray of dim (1, 25, 25)
Returns:
--------
float, jet tau21
Notes:
------
slow implementation
'''
if len(jet_image.shape) == 2:
tau1 = _tau1(jet_image)
if tau1 <= 0:
return 0
else:
tau2 = _tau2(jet_image)
return tau2 / tau1

return np.array(Parallel(n_jobs=nb_jobs, verbose=verbose)(
delayed(tau21)(im) for im in jet_image
))
111 changes: 111 additions & 0 deletions models/metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
import numpy as np
from scipy.spatial.distance import cdist as distance
from pyemd import emd
from scipy.linalg import toeplitz


class AnnoyingError(Exception):
pass


def _calculate_emd_2D(D1, D2, bins=(40, 40)):
"""
Args:
-----
D1, D2: two np arrays with potentially differing
numbers of rows, but two columns. The empirical
distributions you want a similarity over
bins: number of bins in each dim
"""

try:
_, bx, by = np.histogram2d(*np.concatenate((D1, D2), axis=0).T, bins=bins)
except ValueError, e:
print '[ERROR] found here'

raise AnnoyingError('Fuck this')

H1, _, _ = np.histogram2d(*D1.T, bins=(bx, by))
H2, _, _ = np.histogram2d(*D2.T, bins=(bx, by))

H1 /= H1.sum()
H2 /= H2.sum()

_x, _y = np.indices(H1.shape)

coords = np.array(zip(_x.ravel(), _y.ravel()))

D = distance(coords, coords)

return emd(H1.ravel(), H2.ravel(), D)


def _calculate_emd_1D(D1, D2, bins=40):
"""
Args:
-----
D1, D2: two np arrays with potentially differing
numbers of rows, but two columns. The empirical
distributions you want a similarity over
bins: number of bins in each dim
"""

D1 = D1[np.isnan(D1).sum(axis=-1) < 1]
D2 = D2[np.isnan(D2).sum(axis=-1) < 1]

try:
_, bx = np.histogram(np.concatenate((D1, D2), axis=0), bins=bins)
except ValueError, e:
print '[ERROR] found here'

raise AnnoyingError('Fuck this')

H1, _ = np.histogram(D1, bins=bx, normed=True)
H2, _ = np.histogram(D2, bins=bx, normed=True)

H1 /= H1.sum()
H2 /= H2.sum()

# _x, _y = np.indices(H1.shape)

D = toeplitz(range(len(H1))).astype(float)

# print coords

# D = distance(coords, coords)

return emd(H1, H2, D)


def calculate_metric(D1, signal1, D2, signal2, bins=(40, 40)):
"""
Args:
-----
D1: (nb_rows, 2) array of observations from first distribution
signal1: (nb_rows, ) array of 1 or 0, indicating class of
first distribution
D2: (nb_rows, 2) array of observations from second distribution
signal2: (nb_rows, ) array of 1 or 0, indicating class of
second distribution
bins: number of bins in each dim
"""

try:
if len(D1.shape) == 2:
sig_cond = _calculate_emd_2D(D1[signal1 == True], D2[
signal2 == True], bins=bins)
bkg_cond = _calculate_emd_2D(D1[signal1 == False], D2[
signal2 == False], bins=bins)

else:
if not isinstance(bins, int):
bins = bins[0]
sig_cond = _calculate_emd_1D(D1[signal1 == True], D2[
signal2 == True], bins=bins)
bkg_cond = _calculate_emd_1D(D1[signal1 == False], D2[
signal2 == False], bins=bins)

return max(sig_cond, bkg_cond)
except AnnoyingError, a:
return 999
File renamed without changes.
Loading

0 comments on commit 886afc6

Please sign in to comment.