-
Notifications
You must be signed in to change notification settings - Fork 18
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
10 changed files
with
722 additions
and
79 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,150 @@ | ||
from joblib import Parallel, delayed | ||
import numpy as np | ||
import math | ||
|
||
|
||
# form the grids you need to represent the eta, phi coordinates | ||
grid = 0.5 * (np.linspace(-1.25, 1.25, 26)[:-1] + np.linspace(-1.25, 1.25, 26)[1:]) | ||
eta = np.tile(grid, (25, 1)) | ||
phi = np.tile(grid[::-1].reshape(-1, 1), (1, 25)) | ||
|
||
|
||
def discrete_mass(jet_image): | ||
''' | ||
Calculates the jet mass from a pixelated jet image | ||
Args: | ||
----- | ||
jet_image: numpy ndarray of dim (1, 25, 25) | ||
Returns: | ||
-------- | ||
M: float, jet mass | ||
''' | ||
Px = np.sum(jet_image * np.cos(phi), axis=(1, 2)) | ||
Py = np.sum(jet_image * np.sin(phi), axis=(1, 2)) | ||
Pz = np.sum(jet_image * np.sinh(eta), axis=(1, 2)) | ||
E = np.sum(jet_image * np.cosh(eta), axis=(1, 2)) | ||
PT2 = np.square(Px) + np.square(Py) | ||
M2 = np.square(E) - (PT2 + np.square(Pz)) | ||
M = np.sqrt(M2) | ||
return M | ||
|
||
|
||
def discrete_pt(jet_image): | ||
''' | ||
Calculates the jet transverse momentum from a pixelated jet image | ||
Args: | ||
----- | ||
jet_image: numpy ndarray of dim (1, 25, 25) | ||
Returns: | ||
-------- | ||
float, jet transverse momentum | ||
''' | ||
Px = np.sum(jet_image * np.cos(phi), axis=(1, 2)) | ||
Py = np.sum(jet_image * np.sin(phi), axis=(1, 2)) | ||
return np.sqrt(np.square(Px) + np.square(Py)) | ||
|
||
|
||
def dphi(phi1, phi2): | ||
''' | ||
Calculates the difference between two angles avoiding |phi1 - phi2| > 180 degrees | ||
''' | ||
import math | ||
return math.acos(math.cos(abs(phi1 - phi2))) | ||
|
||
|
||
def _tau1(jet_image): | ||
''' | ||
Calculates the normalized tau1 from a pixelated jet image | ||
Args: | ||
----- | ||
jet_image: numpy ndarray of dim (1, 25, 25) | ||
Returns: | ||
-------- | ||
float, normalized jet tau1 | ||
''' | ||
# find coordinate of most energetic pixel, then use formula to compute tau1 | ||
tau1_axis_eta = eta.ravel()[np.argmax(jet_image)] | ||
tau1_axis_phi = phi.ravel()[np.argmax(jet_image)] | ||
tau1 = np.sum(jet_image * | ||
np.sqrt(np.square(tau1_axis_eta - eta) + | ||
np.square([dphi(tau1_axis_phi, p) for p in phi.ravel()]).reshape(25, 25)) | ||
) | ||
return tau1 / np.sum(jet_image) # normalize by the total intensity | ||
|
||
|
||
def _tau2(jet_image): | ||
''' | ||
Calculates the normalized tau2 from a pixelated jet image | ||
Args: | ||
----- | ||
jet_image: numpy ndarray of dim (1, 25, 25) | ||
Returns: | ||
-------- | ||
float, normalized jet tau2 | ||
Notes: | ||
------ | ||
slow implementation | ||
''' | ||
proto = np.array(zip(jet_image[jet_image != 0], | ||
eta[jet_image != 0], | ||
phi[jet_image != 0])) | ||
while len(proto) > 2: | ||
candidates = [ | ||
( | ||
(i, j), | ||
(min(pt1, pt2) ** 2) * ((eta1 - eta2) ** 2 + (phi1 - phi2) ** 2) | ||
) | ||
for i, (pt1, eta1, phi1) in enumerate(proto) | ||
for j, (pt2, eta2, phi2) in enumerate(proto) | ||
if j > i | ||
] | ||
index, value = zip(*candidates) | ||
pix1, pix2 = index[np.argmin(value)] | ||
if pix1 > pix2: | ||
# swap | ||
pix1, pix2 = pix2, pix1 | ||
(pt1, eta1, phi1) = proto[pix1] | ||
(pt2, eta2, phi2) = proto[pix2] | ||
e1 = pt1 / np.cosh(eta1) | ||
e2 = pt2 / np.cosh(eta2) | ||
choice = e1 > e2 | ||
eta_add = (eta1 if choice else eta2) | ||
phi_add = (phi1 if choice else phi2) | ||
pt_add = (e1 + e2) * np.cosh(eta_add) | ||
proto[pix1] = (pt_add, eta_add, phi_add) | ||
proto = np.delete(proto, pix2, axis=0).tolist() | ||
|
||
(_, eta1, phi1), (_, eta2, phi2) = proto | ||
np.sqrt(np.square(eta - eta1) + np.square(phi - phi1)) | ||
grid = np.array([ | ||
np.sqrt(np.square(eta - eta1) + np.square(phi - phi1)), | ||
np.sqrt(np.square(eta - eta2) + np.square(phi - phi2)) | ||
]).min(axis=0) | ||
# normalize by the total intensity | ||
return np.sum(jet_image * grid) / np.sum(jet_image) | ||
|
||
|
||
def tau21(jet_image, nb_jobs=1, verbose=False): | ||
''' | ||
Calculates the tau21 from a pixelated jet image using the functions above | ||
Args: | ||
----- | ||
jet_image: numpy ndarray of dim (1, 25, 25) | ||
Returns: | ||
-------- | ||
float, jet tau21 | ||
Notes: | ||
------ | ||
slow implementation | ||
''' | ||
if len(jet_image.shape) == 2: | ||
tau1 = _tau1(jet_image) | ||
if tau1 <= 0: | ||
return 0 | ||
else: | ||
tau2 = _tau2(jet_image) | ||
return tau2 / tau1 | ||
|
||
return np.array(Parallel(n_jobs=nb_jobs, verbose=verbose)( | ||
delayed(tau21)(im) for im in jet_image | ||
)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
import numpy as np | ||
from scipy.spatial.distance import cdist as distance | ||
from pyemd import emd | ||
from scipy.linalg import toeplitz | ||
|
||
|
||
class AnnoyingError(Exception): | ||
pass | ||
|
||
|
||
def _calculate_emd_2D(D1, D2, bins=(40, 40)): | ||
""" | ||
Args: | ||
----- | ||
D1, D2: two np arrays with potentially differing | ||
numbers of rows, but two columns. The empirical | ||
distributions you want a similarity over | ||
bins: number of bins in each dim | ||
""" | ||
|
||
try: | ||
_, bx, by = np.histogram2d(*np.concatenate((D1, D2), axis=0).T, bins=bins) | ||
except ValueError, e: | ||
print '[ERROR] found here' | ||
|
||
raise AnnoyingError('Fuck this') | ||
|
||
H1, _, _ = np.histogram2d(*D1.T, bins=(bx, by)) | ||
H2, _, _ = np.histogram2d(*D2.T, bins=(bx, by)) | ||
|
||
H1 /= H1.sum() | ||
H2 /= H2.sum() | ||
|
||
_x, _y = np.indices(H1.shape) | ||
|
||
coords = np.array(zip(_x.ravel(), _y.ravel())) | ||
|
||
D = distance(coords, coords) | ||
|
||
return emd(H1.ravel(), H2.ravel(), D) | ||
|
||
|
||
def _calculate_emd_1D(D1, D2, bins=40): | ||
""" | ||
Args: | ||
----- | ||
D1, D2: two np arrays with potentially differing | ||
numbers of rows, but two columns. The empirical | ||
distributions you want a similarity over | ||
bins: number of bins in each dim | ||
""" | ||
|
||
D1 = D1[np.isnan(D1).sum(axis=-1) < 1] | ||
D2 = D2[np.isnan(D2).sum(axis=-1) < 1] | ||
|
||
try: | ||
_, bx = np.histogram(np.concatenate((D1, D2), axis=0), bins=bins) | ||
except ValueError, e: | ||
print '[ERROR] found here' | ||
|
||
raise AnnoyingError('Fuck this') | ||
|
||
H1, _ = np.histogram(D1, bins=bx, normed=True) | ||
H2, _ = np.histogram(D2, bins=bx, normed=True) | ||
|
||
H1 /= H1.sum() | ||
H2 /= H2.sum() | ||
|
||
# _x, _y = np.indices(H1.shape) | ||
|
||
D = toeplitz(range(len(H1))).astype(float) | ||
|
||
# print coords | ||
|
||
# D = distance(coords, coords) | ||
|
||
return emd(H1, H2, D) | ||
|
||
|
||
def calculate_metric(D1, signal1, D2, signal2, bins=(40, 40)): | ||
""" | ||
Args: | ||
----- | ||
D1: (nb_rows, 2) array of observations from first distribution | ||
signal1: (nb_rows, ) array of 1 or 0, indicating class of | ||
first distribution | ||
D2: (nb_rows, 2) array of observations from second distribution | ||
signal2: (nb_rows, ) array of 1 or 0, indicating class of | ||
second distribution | ||
bins: number of bins in each dim | ||
""" | ||
|
||
try: | ||
if len(D1.shape) == 2: | ||
sig_cond = _calculate_emd_2D(D1[signal1 == True], D2[ | ||
signal2 == True], bins=bins) | ||
bkg_cond = _calculate_emd_2D(D1[signal1 == False], D2[ | ||
signal2 == False], bins=bins) | ||
|
||
else: | ||
if not isinstance(bins, int): | ||
bins = bins[0] | ||
sig_cond = _calculate_emd_1D(D1[signal1 == True], D2[ | ||
signal2 == True], bins=bins) | ||
bkg_cond = _calculate_emd_1D(D1[signal1 == False], D2[ | ||
signal2 == False], bins=bins) | ||
|
||
return max(sig_cond, bkg_cond) | ||
except AnnoyingError, a: | ||
return 999 |
File renamed without changes.
Oops, something went wrong.