From f061e0c72e1768fd75573a5cb4aca2ee2b231c78 Mon Sep 17 00:00:00 2001 From: Mogeng Yin Date: Fri, 7 Jul 2017 10:47:37 -0700 Subject: [PATCH 1/2] First take on refactoring IOHMM to a package. --- .gitignore | 1 + auxiliary/HMM.py => IOHMM/HMM_utils.py | 0 {main => IOHMM}/IOHMM.py | 125 +++++++- IOHMM/__init__.py | 20 ++ {auxiliary => IOHMM}/family.py | 0 .../linear_models.py | 291 +++++++++++++++--- auxiliary/.DS_Store | Bin 6148 -> 0 bytes auxiliary/HMM.pyc | Bin 3439 -> 0 bytes auxiliary/family.pyc | Bin 62509 -> 0 bytes data/.DS_Store | Bin 6148 -> 0 bytes {data => examples/data}/speed.csv | 0 .../notebooks/SemiSupervisedIOHMM.ipynb | 145 +++------ .../notebooks/SupervisedIOHMM.ipynb | 58 ++-- .../notebooks/UnSupervisedIOHMM.ipynb | 239 +++++++------- main/.DS_Store | Bin 6148 -> 0 bytes setup.py | 32 ++ 16 files changed, 596 insertions(+), 315 deletions(-) rename auxiliary/HMM.py => IOHMM/HMM_utils.py (100%) rename {main => IOHMM}/IOHMM.py (83%) create mode 100644 IOHMM/__init__.py rename {auxiliary => IOHMM}/family.py (100%) rename auxiliary/SupervisedModels.py => IOHMM/linear_models.py (72%) delete mode 100644 auxiliary/.DS_Store delete mode 100644 auxiliary/HMM.pyc delete mode 100644 auxiliary/family.pyc delete mode 100644 data/.DS_Store rename {data => examples/data}/speed.csv (100%) rename SemiSupervisedIOHMM.ipynb => examples/notebooks/SemiSupervisedIOHMM.ipynb (80%) rename SupervisedIOHMM.ipynb => examples/notebooks/SupervisedIOHMM.ipynb (93%) rename UnSupervisedIOHMM.ipynb => examples/notebooks/UnSupervisedIOHMM.ipynb (78%) delete mode 100644 main/.DS_Store create mode 100644 setup.py diff --git a/.gitignore b/.gitignore index 91cde52..5691573 100644 --- a/.gitignore +++ b/.gitignore @@ -143,6 +143,7 @@ nose_stub.py # metastore metastore_db/* +examples/notebooks/metastore_db/* diff --git a/auxiliary/HMM.py b/IOHMM/HMM_utils.py similarity index 100% rename from auxiliary/HMM.py rename to IOHMM/HMM_utils.py diff --git a/main/IOHMM.py b/IOHMM/IOHMM.py similarity index 83% rename from main/IOHMM.py rename to IOHMM/IOHMM.py index 600d7c3..3af1d38 100644 --- a/main/IOHMM.py +++ b/IOHMM/IOHMM.py @@ -1,21 +1,36 @@ from __future__ import division -import numpy as np from copy import deepcopy -import sys +import logging +import os import warnings -sys.path.append('../auxiliary') -try: - from SupervisedModels import MNLP, GLM, LM, MNLD, LabelBinarizer - from HMM import calHMM -except: - raise -warnings.simplefilter("ignore") +import numpy as np + + +from HMM_utils import calHMM +from linear_models import MNLP, GLM, LM, MNLD, LabelBinarizer + + +warnings.simplefilter("ignore") + # example: +class LinearModelLoader(object): + """The mapping from data_type of a linear model + ('LM', 'GLM', 'MNLD', 'MNLP') + to the correct class. + + """ + LM = LM + GLM = GLM + MNLD = MNLD + MNLP = MNLP + + class UnSupervisedIOHMM(object): + def __init__(self, num_states=2, EM_tol=1e-4, max_EM_iter=100): self.num_states = num_states self.EM_tol = EM_tol @@ -27,7 +42,6 @@ def setModels(self, model_emissions, model_initial=MNLP(), model_transition=MNLP self.model_initial = model_initial self.model_transition = [deepcopy(model_transition) for i in range(self.num_states)] self.model_emissions = [deepcopy(model_emissions) for i in range(self.num_states)] - self.num_emissions = len(model_emissions) def setInputs(self, covariates_initial, covariates_transition, covariates_emissions): self.covariates_initial = covariates_initial @@ -38,6 +52,7 @@ def setInputs(self, covariates_initial, covariates_transition, covariates_emissi def setOutputs(self, responses_emissions): # output should be a list inidicating the columns of the dataframe self.responses_emissions = responses_emissions + self.num_emissions = len(responses_emissions) def setParams(self, model_initial_coef, model_transition_coef, model_emissions_coef, model_emissions_dispersion): @@ -53,10 +68,10 @@ def setParams(self, model_initial_coef, model_transition_coef, pass self.has_params = True - def setData(self, dfs): - self.num_seqs = len(dfs) - self.dfs = dfs - self.dfs_logStates = map(lambda x: [x, {}], dfs) + def setData(self, df_list): + self.num_seqs = len(df_list) + self.dfs = df_list + self.dfs_logStates = map(lambda x: [x, {}], df_list) self.initIO() def initIO(self): @@ -174,14 +189,90 @@ def train(self): prev_ll = self.ll self.MStep() self.EStep() - print self.ll + logging.info('log likelihood of iteration {0}: {1:.4f}'.format(it, self.ll)) if abs(self.ll - prev_ll) < self.EM_tol: break self.converged = it < self.max_EM_iter + @classmethod + def from_config(cls, config): + model = cls( + num_states=config['properties']['num_states'], + EM_tol=config['properties']['EM_tol'], + max_EM_iter=config['properties']['max_EM_iter'] + ) + model.setModels( + model_initial=getattr( + LinearModelLoader, config['properties']['model_initial']['data_type'])( + **config['properties']['model_initial']['properties']), + model_transition=getattr( + LinearModelLoader, config['properties']['model_transition']['data_type'])( + **config['properties']['model_transition']['properties']), + model_emissions=[getattr( + LinearModelLoader, model_emission['data_type'])(**model_emission['properties']) + for model_emission in config['properties']['model_emissions']] + ) + model.setInputs(covariates_initial=config['properties']['covariates_initial'], + covariates_transition=config['properties']['covariates_transition'], + covariates_emissions=config['properties']['covariates_emissions']) + model.setOutputs(config['properties']['responses_emissions']) + return model + + def to_json(self, path): + json_dict = { + 'data_type': self.__class__.__name__, + 'properties': { + 'num_states': self.num_states, + 'EM_tol': self.EM_tol, + 'max_EM_iter': self.max_EM_iter, + 'covariates_initial': self.covariates_initial, + 'covariates_transition': self.covariates_transition, + 'covariates_emissions': self.covariates_emissions, + 'responses_emissions': self.responses_emissions, + 'model_initial': self.model_initial.to_json( + path=os.path.join(path, 'model_initial')), + 'model_transition': [self.model_transition[st].to_json( + path=os.path.join(path, 'model_transition', 'state_{}'.format(st))) for + st in range(self.num_states)], + 'model_emissions': [[self.model_emissions[st][emis].to_json( + path=os.path.join( + path, 'model_emissions', 'state_{}'.format(st), 'emission_{}'.format(emis)) + ) for emis in range(self.num_emissions)] for st in range(self.num_states)] + + } + } + return json_dict + + @classmethod + def from_json(cls, json_dict): + model = cls( + num_states=json_dict['properties']['num_states'], + EM_tol=json_dict['properties']['EM_tol'], + max_EM_iter=json_dict['properties']['max_EM_iter'] + ) + model.model_initial = getattr( + LinearModelLoader, json_dict['properties']['model_initial']['data_type']).from_json( + json_dict['properties']['model_initial']) + model.model_transition = [getattr( + LinearModelLoader, model_transition_json['data_type'] + ).from_json(model_transition_json) for + model_transition_json in json_dict['properties']['model_transition']] + + model.model_emissions = [[getattr( + LinearModelLoader, model_emission_json['data_type'] + ).from_json(model_emission_json) for model_emission_json in model_emissions_json] for + model_emissions_json in json_dict['properties']['model_emissions']] + model.has_params = True + model.setInputs(covariates_initial=json_dict['properties']['covariates_initial'], + covariates_transition=json_dict['properties']['covariates_transition'], + covariates_emissions=json_dict['properties']['covariates_emissions']) + model.setOutputs(json_dict['properties']['responses_emissions']) + return model + class SemiSupervisedIOHMM(UnSupervisedIOHMM): + def setData(self, dfs_states): self.num_seqs = len(dfs_states) self.dfs = [df for df, state in dfs_states] @@ -190,6 +281,7 @@ def setData(self, dfs_states): class SupervisedIOHMM(SemiSupervisedIOHMM): + def __init__(self, num_states=2): self.num_states = num_states @@ -277,6 +369,7 @@ def train(self): class UnSupervisedIOHMMMapReduce(UnSupervisedIOHMM): + def setData(self, rdd_dfs): self.num_seqs = rdd_dfs.count() self.dfs = rdd_dfs @@ -328,6 +421,7 @@ def EStep(self): class SemiSupervisedIOHMMMapReduce(UnSupervisedIOHMMMapReduce): + def setData(self, rdd_dfs_states): self.num_seqs = rdd_dfs_states.count() self.dfs = rdd_dfs_states.mapValues(lambda v: v[0]) @@ -337,6 +431,7 @@ def setData(self, rdd_dfs_states): class SupervisedIOHMMMapReduce(SemiSupervisedIOHMMMapReduce, SupervisedIOHMM): + def __init__(self, num_states=2): self.num_states = num_states diff --git a/IOHMM/__init__.py b/IOHMM/__init__.py new file mode 100644 index 0000000..a161cdd --- /dev/null +++ b/IOHMM/__init__.py @@ -0,0 +1,20 @@ +from .IOHMM import (UnSupervisedIOHMM, + SemiSupervisedIOHMM, + SupervisedIOHMM, + UnSupervisedIOHMMMapReduce, + SemiSupervisedIOHMMMapReduce, + SupervisedIOHMMMapReduce) +from .HMM_utils import calHMM +from .linear_models import ( + LM, MNLP, MNLD, GLM, LabelBinarizer +) + + +# Enumerate exports, to make the linter happy. +__all__ = [ + UnSupervisedIOHMM, SemiSupervisedIOHMM, SupervisedIOHMM, + UnSupervisedIOHMMMapReduce, + SemiSupervisedIOHMMMapReduce, + SupervisedIOHMMMapReduce, + LM, MNLP, MNLD, GLM, LabelBinarizer, calHMM +] diff --git a/auxiliary/family.py b/IOHMM/family.py similarity index 100% rename from auxiliary/family.py rename to IOHMM/family.py diff --git a/auxiliary/SupervisedModels.py b/IOHMM/linear_models.py similarity index 72% rename from auxiliary/SupervisedModels.py rename to IOHMM/linear_models.py index 410b54e..b922970 100644 --- a/auxiliary/SupervisedModels.py +++ b/IOHMM/linear_models.py @@ -1,15 +1,22 @@ from __future__ import division + +import cPickle as pickle +import os +import sys +import warnings + + import numpy as np -import family +from scipy import optimize +from scipy.misc import logsumexp from sklearn import linear_model from sklearn.preprocessing import LabelBinarizer from sklearn.utils.optimize import newton_cg -from scipy import optimize -from scipy.misc import logsumexp import statsmodels.regression.linear_model as lim from statsmodels.tools.sm_exceptions import PerfectSeparationError -import sys -import warnings + + +import family warnings.simplefilter("ignore") @@ -34,7 +41,9 @@ class BaseModel(object): """ def __init__(self, fam, solver, fit_intercept=True, est_sd=False, - penalty=None, reg=0, l1_ratio=0, tol=1e-4, max_iter=100): + penalty=None, reg=0, l1_ratio=0, tol=1e-4, max_iter=100, + n_features=None, n_targets=None, + coef=None, sd=None, dispersion=None, converged=None): """ Constructor Parameters @@ -57,6 +66,12 @@ def __init__(self, fam, solver, fit_intercept=True, est_sd=False, self.tol = tol self.max_iter = max_iter self.est_sd = est_sd + self.n_features = n_features + self.n_targets = n_targets + self.coef = coef + self.sd = sd + self.dispersion = dispersion + self.converged = converged def fit(self, X, Y, sample_weight=None): """ @@ -109,16 +124,80 @@ def estimate_sd(self): def estimate_loglikelihood(self): raise NotImplementedError + def to_json(self, path): + json_dict = { + 'data_type': self.__class__.__name__, + 'properties': { + 'fit_intercept': self.fit_intercept, + 'penalty': self.penalty, + 'reg': self.reg, + 'l1_ratio': self.l1_ratio, + 'solver': self.solver, + 'tol': self.tol, + 'max_iter': self.max_iter, + 'est_sd': self.est_sd, + 'n_features': self.n_features, + 'n_targets': self.n_targets, + 'coef': { + 'data_type': 'numpy.ndarray', + 'path': os.path.join(path, 'coef.npy') + }, + 'sd': { + 'data_type': 'numpy.ndarray', + 'path': os.path.join(path, 'sd.npy') + }, + 'dispersion': { + 'data_type': 'numpy.ndarray', + 'path': os.path.join(path, 'dispersion.npy') + }, + 'converged': self.converged + } + } + if not os.path.exists(os.path.dirname(json_dict['properties']['coef']['path'])): + os.makedirs(os.path.dirname(json_dict['properties']['coef']['path'])) + np.save(json_dict['properties']['coef']['path'], self.coef) + if not os.path.exists(os.path.dirname(json_dict['properties']['sd']['path'])): + os.makedirs(os.path.dirname(json_dict['properties']['sd']['path'])) + np.save(json_dict['properties']['sd']['path'], self.sd) + if not os.path.exists(os.path.dirname(json_dict['properties']['dispersion']['path'])): + os.makedirs(os.path.dirname(json_dict['properties']['dispersion']['path'])) + np.save(json_dict['properties']['dispersion']['path'], self.dispersion) + return json_dict + + @classmethod + def from_json(cls, json_dict): + return cls( + solver=json_dict['properties']['solver'], + fit_intercept=json_dict['properties']['fit_intercept'], + est_sd=json_dict['properties']['est_sd'], + penalty=json_dict['properties']['penalty'], + reg=json_dict['properties']['reg'], + l1_ratio=json_dict['properties']['l1_ratio'], + tol=json_dict['properties']['tol'], + max_iter=json_dict['properties']['max_iter'], + n_features=json_dict['properties']['n_features'], + n_targets=json_dict['properties']['n_targets'], + coef=np.load(json_dict['properties']['coef']['path']), + sd=np.load(json_dict['properties']['sd']['path']), + dispersion=np.load(json_dict['properties']['dispersion']['path']), + converged=json_dict['properties']['converged'] + ) + class GLM(BaseModel): """ A Generalized linear model for data with input and output. """ + def __init__(self, fam, solver='pinv', fit_intercept=True, est_sd=False, penalty=None, - reg=0, l1_ratio=0, tol=1e-4, max_iter=100): + reg=0, l1_ratio=0, tol=1e-4, max_iter=100, + n_features=None, n_targets=None, + coef=None, sd=None, dispersion=None, converged=None): super(GLM, self).__init__(fam=fam, solver=solver, fit_intercept=fit_intercept, est_sd=est_sd, penalty=penalty, reg=reg, l1_ratio=l1_ratio, - tol=tol, max_iter=max_iter) + tol=tol, max_iter=max_iter, + n_features=n_features, n_targets=n_targets, + coef=coef, sd=sd, dispersion=dispersion, converged=converged) def fit(self, X, Y, sample_weight=None): """ @@ -161,7 +240,7 @@ def fit(self, X, Y, sample_weight=None): for iteration in range(self.max_iter): weights = sample_weight * self.fam.weights(mu) - wlsendog = lin_pred + self.fam.link.deriv(mu) * (Y-mu) + wlsendog = lin_pred + self.fam.link.deriv(mu) * (Y - mu) if self.penalty is None: wls_results = lim.WLS(wlsendog, X, weights).fit(method=self.solver) @@ -250,16 +329,88 @@ def estimate_loglikelihood(self, Y, mu, w): else: return self.fam.loglike_weighted(Y, mu, w, scale=self.dispersion) + def to_json(self, path): + json_dict = { + 'data_type': 'GLM', + 'properties': { + 'fit_intercept': self.fit_intercept, + 'penalty': self.penalty, + 'reg': self.reg, + 'l1_ratio': self.l1_ratio, + 'fam': { + 'data_type': self.fam.__class__.__name__, + 'path': os.path.join(path, 'fam.p') + }, + 'solver': self.solver, + 'tol': self.tol, + 'max_iter': self.max_iter, + 'est_sd': self.est_sd, + 'n_features': self.n_features, + 'n_targets': self.n_targets, + 'coef': { + 'data_type': 'numpy.ndarray', + 'path': os.path.join(path, 'coef.npy') + }, + 'sd': { + 'data_type': 'numpy.ndarray', + 'path': os.path.join(path, 'sd.npy') + }, + 'dispersion': { + 'data_type': 'numpy.ndarray', + 'path': os.path.join(path, 'dispersion.npy') + }, + 'converged': self.converged + } + } + if not os.path.exists(os.path.dirname(json_dict['properties']['fam']['path'])): + os.makedirs(os.path.dirname(json_dict['properties']['fam']['path'])) + pickle.dump(self.fam, open(json_dict['properties']['fam']['path'], 'wb')) + if not os.path.exists(os.path.dirname(json_dict['properties']['coef']['path'])): + os.makedirs(os.path.dirname(json_dict['properties']['coef']['path'])) + np.save(json_dict['properties']['coef']['path'], self.coef) + if not os.path.exists(os.path.dirname(json_dict['properties']['sd']['path'])): + os.makedirs(os.path.dirname(json_dict['properties']['sd']['path'])) + np.save(json_dict['properties']['sd']['path'], self.sd) + if not os.path.exists(os.path.dirname(json_dict['properties']['dispersion']['path'])): + os.makedirs(os.path.dirname(json_dict['properties']['dispersion']['path'])) + np.save(json_dict['properties']['dispersion']['path'], self.dispersion) + return json_dict + + @classmethod + def from_json(cls, json_dict): + assert json_dict['data_type'] == 'GLM' + return cls( + fam=pickle.load(open(json_dict['properties']['fam']['path'])), + solver=json_dict['properties']['solver'], + fit_intercept=json_dict['properties']['solver'], + est_sd=json_dict['properties']['solver'], + penalty=json_dict['properties']['solver'], + reg=json_dict['properties']['solver'], + l1_ratio=json_dict['properties']['solver'], + tol=json_dict['properties']['solver'], + max_iter=json_dict['properties']['solver'], + n_features=json_dict['properties']['n_features'], + n_targets=json_dict['properties']['n_targets'], + coef=np.load(json_dict['properties']['coef']['path']), + sd=np.load(json_dict['properties']['sd']['path']), + dispersion=np.load(json_dict['properties']['dispersion']['path']), + converged=json_dict['properties']['converged']) + class LM(BaseModel): """ A Generalized linear model for data with input and output. """ + def __init__(self, solver='svd', fit_intercept=True, penalty=None, est_sd=False, - reg=0, l1_ratio=0, tol=1e-4, max_iter=100): + reg=0, l1_ratio=0, tol=1e-4, max_iter=100, + n_features=None, n_targets=None, + coef=None, sd=None, dispersion=None, converged=None): super(LM, self).__init__(fam='LM', solver=solver, fit_intercept=fit_intercept, est_sd=est_sd, penalty=penalty, - reg=reg, l1_ratio=l1_ratio, tol=tol, max_iter=max_iter) + reg=reg, l1_ratio=l1_ratio, tol=tol, max_iter=max_iter, + n_features=n_features, n_targets=n_targets, + coef=coef, sd=sd, dispersion=dispersion, converged=converged) def fit(self, X, Y, sample_weight=None): """ @@ -350,8 +501,8 @@ def log_probability(self, X, Y): if Y.shape[1] == 1: if self.dispersion > 0: - logP = (Y * pred - pred**2/2)/self.dispersion - Y**2/(2 * self.dispersion) - \ - .5*np.log(2 * np.pi * self.dispersion) + logP = (Y * pred - pred**2 / 2) / self.dispersion - Y**2 / (2 * self.dispersion) - \ + .5 * np.log(2 * np.pi * self.dispersion) logP = logP.reshape(-1,) else: logP = np.zeros((Y.shape[0],)) @@ -359,23 +510,23 @@ def log_probability(self, X, Y): logP = logP.reshape(-1,) else: if np.linalg.det(self.dispersion) > 0: - logP = -1/2*((Y.shape[1] * np.log(2 * np.pi) + - np.log(np.linalg.det(self.dispersion))) + - np.diag(np.dot(np.dot(Y - pred, np.linalg.inv(self.dispersion)), - (Y-pred).T))) + logP = -1 / 2 * ((Y.shape[1] * np.log(2 * np.pi) + + np.log(np.linalg.det(self.dispersion))) + + np.diag(np.dot(np.dot(Y - pred, np.linalg.inv(self.dispersion)), + (Y - pred).T))) logP = logP.reshape(-1,) else: if (np.diag(self.dispersion) > 0).all(): new_dispersion = np.diag(np.diag(self.dispersion)) - logP = -1/2*((Y.shape[1] * np.log(2 * np.pi) + - np.log(np.linalg.det(self.dispersion))) + - np.diag(np.dot(np.dot(Y-pred, np.linalg.inv(new_dispersion)), - (Y-pred).T))) + logP = -1 / 2 * ((Y.shape[1] * np.log(2 * np.pi) + + np.log(np.linalg.det(self.dispersion))) + + np.diag(np.dot(np.dot(Y - pred, np.linalg.inv(new_dispersion)), + (Y - pred).T))) logP = logP.reshape(-1,) else: logP = np.zeros((Y.shape[0],)) - logP[np.linalg.norm(Y-pred, axis=1) != 0] = -np.Infinity + logP[np.linalg.norm(Y - pred, axis=1) != 0] = -np.Infinity logP = logP.reshape(-1,) return logP @@ -446,6 +597,7 @@ class MNL(BaseModel): """ A MNL for data with input and output. """ + def fit(self, X, Y, sample_weight=None): """ fit the weighted model @@ -520,17 +672,18 @@ def estimate_sd(self, X, sample_weight): # calculate hessian p = self.n_features q = self.n_targets - h = np.zeros((p*(q-1), p*(q-1))) - for e in range(q-1): - for f in range(q-1): - h[e*p: (e+1)*p, f*p: (f+1)*p] = -np.dot(np.dot(X.T, np.diag( - np.multiply(np.multiply(o_normalized[:, f+1], - (e == f) - o_normalized[:, e+1]), + h = np.zeros((p * (q - 1), p * (q - 1))) + for e in range(q - 1): + for f in range(q - 1): + h[e * p: (e + 1) * p, f * p: (f + 1) * p] = -np.dot(np.dot(X.T, np.diag( + np.multiply(np.multiply(o_normalized[:, f + 1], + (e == f) - o_normalized[:, e + 1]), sample_weight))), X) if np.sum(sample_weight) > 0: h = h / np.sum(sample_weight) * X.shape[0] - if np.all(np.linalg.eigvals(-h) > 0) and np.linalg.cond(-h) < 1/sys.float_info.epsilon: - sd = np.sqrt(np.diag(np.linalg.inv(-h))).reshape(p, q-1, order='F') + if np.all(np.linalg.eigvals(-h) > 0) and \ + np.linalg.cond(-h) < 1 / sys.float_info.epsilon: + sd = np.sqrt(np.diag(np.linalg.inv(-h))).reshape(p, q - 1, order='F') sd = np.hstack((np.zeros((p, 1)), sd)) else: sd = None @@ -541,16 +694,45 @@ def estimate_sd(self, X, sample_weight): def estimate_loglikelihood(self, X, Y, sample_weight): return NotImplementedError + def label_to_column(self, Y): + unique_label = np.unique(Y) + output_map = {} + for column_id, label in enumerate(unique_label): + output_map[label] = column_id + return output_map + + def log_prob_exclude_set(self, log_prob, exclude_set): + if exclude_set is not None: + for exclude_label in exclude_set: + if exclude_label not in self.label_to_column_map: + continue + exclude_column = self.label_to_column_map[exclude_label] + log_prob[:, exclude_column] = -float('inf') + return log_prob + class MNLD(MNL): """ A MNL for discrete data with input and output. """ + def __init__(self, solver='newton-cg', fit_intercept=True, est_sd=False, penalty=None, - reg=0, l1_ratio=0, tol=1e-4, max_iter=100): + reg=0, l1_ratio=0, tol=1e-4, max_iter=100, + n_features=None, n_targets=None, + coef=None, sd=None, dispersion=None, converged=None): super(MNLD, self).__init__(fam='MNLD', solver=solver, fit_intercept=fit_intercept, est_sd=est_sd, penalty=penalty, reg=reg, - l1_ratio=l1_ratio, tol=tol, max_iter=max_iter) + l1_ratio=l1_ratio, tol=tol, max_iter=max_iter, + n_features=n_features, n_targets=n_targets, + coef=coef, sd=sd, dispersion=dispersion, converged=converged) + + @staticmethod + def from_json_file(config): + # TODO clean up, no need to have this function anymore + """Create an MNLD object from json config. + """ + # TODO: Load the config file. + return MNLD() def fit(self, X, Y, sample_weight=None): """ @@ -574,7 +756,7 @@ def fit(self, X, Y, sample_weight=None): c = 1e200 else: penalty1 = self.penalty - c = 1/self.reg + c = 1 / self.reg if X.ndim == 1: X = X.reshape(-1, 1) if self.fit_intercept: @@ -594,6 +776,7 @@ def fit(self, X, Y, sample_weight=None): return self.lb = LabelBinarizer().fit(Y) + self.label_to_column_map = self.label_to_column(Y) model = linear_model.LogisticRegression(fit_intercept=False, penalty=penalty1, C=c, multi_class='multinomial', solver=self.solver, @@ -602,7 +785,7 @@ def fit(self, X, Y, sample_weight=None): model.fit(X, Y, sample_weight=sample_weight) w0 = model.coef_ if self.n_targets == 2: - w0 = np.vstack((np.zeros((1, self.n_features)), w0*2)) + w0 = np.vstack((np.zeros((1, self.n_features)), w0 * 2)) w1 = w0.reshape(self.n_targets, -1) w1 = w1.T - w1.T[:, 0].reshape(-1, 1) self.coef = w1 @@ -612,20 +795,41 @@ def fit(self, X, Y, sample_weight=None): self.sd = self.estimate_sd(X, sample_weight) self.ll = self.estimate_loglikelihood(X, Y, sample_weight) - def predict(self, X): + def predict(self, X, exclude_set=None): """ predict the Y value based on the model ---------- X : design matrix + exclude_set : a set of excluded choices. Returns ------- predicted value """ - index = np.argmax(self.predict_log_probability(X), axis=1) + log_prob = self.predict_log_probability(X) + log_prob = self.log_prob_exclude_set(log_prob, exclude_set=exclude_set) + + index = np.argmax(log_prob, axis=1) zero = np.zeros((X.shape[0], self.n_targets)) zero[np.arange(X.shape[0]), index] = 1 return self.lb.inverse_transform(zero) + def predict_stochastic(self, X, exclude_set=None): + """ + sample the Y value based on the model + ---------- + X : design matrix + + Returns + ------- + predicted value + """ + log_prob = self.predict_log_probability(X) + log_prob = self.log_prob_exclude_set(log_prob, exclude_set=exclude_set) + log_prob = np.exp(log_prob) + log_prob /= np.sum(log_prob, axis=1)[:, np.newaxis] + samples = np.array([np.random.multinomial(1, pvals=prob, size=1)[0] for prob in log_prob]) + return self.lb.inverse_transform(samples) + def log_probability(self, X, Y): """ Given a set of X and Y, calculate the probability of @@ -643,7 +847,7 @@ def log_probability(self, X, Y): Y_aug[np.arange(X.shape[0]), Y_transformed.reshape(-1,)] = 1 else: Y_aug = Y_transformed - logP = np.sum(p*Y_aug, axis=1) + logP = np.sum(p * Y_aug, axis=1) return logP @@ -661,11 +865,16 @@ class MNLP(MNL): """ A MNL with probability response for data with input and output. """ + def __init__(self, solver='newton-cg', fit_intercept=True, est_sd=False, penalty=None, - reg=0, l1_ratio=0, tol=1e-4, max_iter=100): + reg=0, l1_ratio=0, tol=1e-4, max_iter=100, + n_features=None, n_targets=None, + coef=None, sd=None, dispersion=None, converged=None): super(MNL, self).__init__(fam='MNLP', solver=solver, fit_intercept=fit_intercept, est_sd=est_sd, penalty=penalty, reg=reg, - l1_ratio=l1_ratio, tol=tol, max_iter=max_iter) + l1_ratio=l1_ratio, tol=tol, max_iter=max_iter, + n_features=n_features, n_targets=n_targets, + coef=coef, sd=sd, dispersion=dispersion, converged=converged) def fit(self, X, Y, sample_weight=None): """ @@ -697,7 +906,7 @@ def fit(self, X, Y, sample_weight=None): self.sd = None self.ll = 1 return - w0 = np.zeros((self.n_targets*self.n_features, )) + w0 = np.zeros((self.n_targets * self.n_features, )) if self.solver == 'lbfgs': def func(x, *args): return _multinomial_loss_grad(x, *args)[0:2] @@ -765,7 +974,7 @@ def log_probability(self, X, Y): assert X.shape[0] == Y.shape[0] p = self.predict_log_probability(X) - logP = np.sum(p*Y, axis=1) + logP = np.sum(p * Y, axis=1) return logP def estimate_loglikelihood(self, X, Y, sample_weight): diff --git a/auxiliary/.DS_Store b/auxiliary/.DS_Store deleted file mode 100644 index 9e103103544d63553e901e48520e47637913acfb..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHKO;5r=5Phq}5WR5p*k549i`PmpdO$A?;Ytx=h=2{W#@_nVeKWfn8+ta4nI$uC zc0TswCG2(qxKX(r0s{bjil7strHQ*WbP|#UosN;C%8SLMJQVwufx76DwV&Y;PyAh> zx&8-Cali_D>KI$R-i(oq^XFO-(=PGJ*oKlZRX9di~-jhr{fVf74Fuf?a5smQRWnBl2?ntM!WG5z>b_Fmm2i(NNnaM0c(#k TiukphXdePfh$&;>4;c6Yq?R}Z diff --git a/auxiliary/HMM.pyc b/auxiliary/HMM.pyc deleted file mode 100644 index 00a7c093075f352afb8a51806c22af5115dda88a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3439 zcma)8TW=dx5T3KXCUx4hmE)LjiMYusq7@Gykiw-jfj+bWt&ySzMb_5dxZAGR&hDBz z$ax{z)4_)i+LHY_ z`2B2MB>5Rc2OlG$NYBWw5yNLkLPvIUx|fqMFTH~77N&be*)57+l*c9M6GM|$nK&t%gVf@^n%hAr5BZ6Qu>n8mzAz4eTC*U8%r=X_!oZ-11c@k z^5DB53Hs5tWi5ae_Pfcj=Z^+yk@DTBAN%b*NXnP+y|WItf5#DHk;gc~$P-7Bs-#AK zFfumEbd-5x!gQ3GQ|700Oo!grkMc5x1Re|?3XXzmT9F zl<>o7i~Q(Nv|bylyI7T-1|+AN;1aBUjBtaTyP0E5L%tT zs18FRg&mmTsKja0s5QsiIQUC=9P~~dH1_|J`266vkD~N(L@WS-^JTrx20)9VITvB-JFwFql zN>VTN;RtNg^~rR{3kQ!pn z77E2~*;@G^+p>IQr+x%1sZdrUSy_OapOi&GM*k46EC>f^1R4r*oC9qCV9nTKAQ)LU ze=F7iOf8XG9o3bQGqh<50ghJWB%koMDkmU3)??uibS4{9Vjq5-a}QB})-^S>K6p$_ zpLyF3gI72iyu{eJg5PW~5!VaqHcwc zY@jY7?_S|T&}tVY7$5qn=Mq*5FkQ|=*&?JY%07qE8+b}qu}EpntYr{;#Vng0=QZ;+ zIBmk3Us|Z(dI^LD7hp9DHFAd(H5;tE?OCI@Uhn|-Dwd1wqhHp(d%+Vw3?B9SosYmv zc(9mu9xFq}uiEo52zFQ_gxV_F`KUoiZtkrM+Bth-0Wi>hK5y>EK~;9ZZtC>eww7r! zXsBDoqD>?WHR{uv?s~o6iw1jE(qB@GrY2=9VlE@ruUc;=9;r7k^ID8HR4Hmj-C}di z$*~UViLnHa;zePEdFe7O4IZU&R`XpZR*kZQM1Uy)FpR*2&3S!$I3R+#kE!fdN4<)4s@nRTV*y0o~LS7rv`&+fX3?4 zf{ebk_BPdCwXy~W*wUWs^=JkN0)R$G^an@P>bQEmpl+SjmNpKa3qCs#X~LeSW-c3k zh<5g-qzyqKIc*I({n8-xQ?ereDd(bs!0U8$#`TA(A2*n@xxO)l^b8ZWgRrc1_G$aj z8N^3_kOW~rx)?X?O150Ltjb+5k`*lCESc+O#VG>m?>Hl@%jULe(;a>ZI7R&HAkIX* zE!pFuf+s7|Dem1hsJU!k`rh#Wu3qd$))g}!9^lwX0pvlm9Zw5jfCtMvQeh>n`?pa>4H>#`do21^t rGIK$x^M)Sp+UWH=!_fbPvq(qYGQ8k-DOXvmtSsa!rAoeXwUYl2b4}u zMfP-$y1Ge@B?j1tiy$w?B0zvV?7lhiBG`ujYaj^XWb=}TIJ*gQ!D2VS2D=O73M8-* zAR7ZgobUVpbE)bZn>R_6d!#O&I(6#Qx%}rp|NV^quRXi|$FG0wN+~dZqxkm?{F48R zOF0Ox;+hM}BSAG6Tpf}0XiyyuHns&O&IQ-@23Ma9%G-kIsbCGYZx8MS!N);x9+T3xM zI~kOB+1%YOcPc33V_L%=mwO;6KVftCy4-_7`AM7ml*^rFcbD?}I0~n5*>!n63hT8f z+$=VV8&NZAgwd@{WYn5*u^O%wH{$A6SdNosBVKJa<9aO#TS-(7SGU49FMexbSNse7 zUCQTBS91qz-Hb|jvpI(Jd3oCn|8``ZW|2R@CBpDybZ{LBqS4^mwqUpZQyB@aZVxJW zFvh{g;9cFpMl*qd8V@2|ku%CM{&*yJb(b0AtGhYAEH)98Wqhyh;nFK0j8R2N%n91P2Li}+^ zF`O8}uv9H3Nw`*TaE#~sjBy+#3-aU zYo$oaun8AgE>6NGnu4NHu@N>K#ae=vY!u}c`>uYK?<_@;l#grW_(oiA+382qqzA=J)8!^8ujdC}(a@U@3Ma87mKt*hjD^^U<6I(W{ugTMms8|a(8}-%V zYFv$*ZeRv!B5N;4YsFTz>Azw>pgm4 zm;4ScALW9Nf?yfojzlhKV(E_p4ufPiXl^6K0tfu{Tu?)S?JNn<#nZ+Q%!|M!*T;g} zBU~*2auh($*?b4bBzXIIl5JfHWl!6&IB3(Znb(dbYQX&hL`G1!c?*80D&_UNhKRmk`yaZxDi*@ zoATzWmIX8$Fuf6lt5K;4d=tiKXjH8x;a0tss;ro5f3jY0Re_jLxF)??E#gJL0?k@L zN6oVFog`3Hsv8TyXl59yz<7kn;wC_G6EKdB2x!Rjs90LJxDi;9t@mq>{ne4ERt-?2 z9l!@+snsA>07CVHZ5|_6& zb^Zmy2o$A)qLCNmN~vgnW~=O2g%jpInxG0|2s- zR5%`iT#_lI7LFBLw}26fjjdzay@ky!tOmRUFlG(UBb{((3-*rZ_T~1KK&xJ^uKPbnIy)fD%5I?g32!c@SU8UaMWblxN|>9$aMnEI!V&o&uXK z&kB=B4c>v}xE42;m(Sy+gm4q&CUSy7`7zfzP7g6C=p>{Q~tb zAOTR)6@5(r3>zZ$-u=wN5|Dj)5}-yPPgc^P_HM*2NQoSQtfU!j`e7anr~sS>s^+Wl zb-x=}&`Qs+kT@OCWr4es{&sVGH?X^D+u+`?pTC*vBPX~^Kl=g3%FUJV_3(IjEX)&2 z9m%h3v{q(jXBWd+T^%3+HE+g!+JM4OPsW@yMJ|GKxObJKMtnmtj0X*-Hd+J02GO_a z)B8v&TZjc$*ZbS`WIYWPQe*I442`usun~kU(mzwCTEPKNP5XO>H5sbl!aIIP|Hs`{+ zkcY*pYXr-js=0hqzSyX=Hb4=%Vs`^@H~zgaxb7C{O4&}QrS{xBtwz=QO+Oj^z}|fh z(`}~NHFaSj1S_?E;)KgV7vD=-8_RKi(awKD=$c2r~ zYQ&j|%0lwzxt4@95Ry@vRCtCV@a9-h?w#lK%_1)g{eEc(d?D zKF05^?8{RqV;AAFLQ7Qso5<9~{mfu{@qbBEWTeGymE74x8g;J}S!Vxq$dyya>Qh*? zgA#Lv{nqKS?E9a}u4(s6%5FqRr^JREAxdSld&$5Xs;k2<^nN>I_Zbo?hoy(^vehX3 zxqYSJ;BRoqqx?YHy$oRayAI-%Js_P=f>Nlz14y*-1}axG%>Qqsq3?X$^dwG5DN>ny z8VI8l4ODL=%vmN$6i`(jse{X1lM!5KQyTIQ=Zf1_TGs{X?S%W}P$|L)vs z)GW@Bmnf?2iv)med)rGrfWx=55BzuMVUn;C--v32LsG$A7D;9yS=!9^BPE(3iEk?K zk_i!5*x?gN(puaU(b-+`6s5Q*jH&%cNU4nCiG5vU6l78qSd?otF!J3rj9lyh1_viP z0vNh>8o4iUxN*7nF?X-_m{0fIj$2o->(e7nWsmuWke8@Hy6V0|VD#YI$xYZ1N=-ZW z9FaOH0EiHZ^yNT15Fy2|CO<(^_nke1?K}S!IKnlYT>*tIw2)!lEQkts9ziZ?d#0e2 z9T(rB!RC5JcA=H7^YLxNUUKmJZC3u9rbvE25dAkU@N%A=YZvDO zo|SGW*2*E4BE|MKh|ICgKm#hQZ&P(gY^a?-7M9URG3Y;=z=72*RCONQmKCYb2}X2O zjfE@Maq?+NxLR+n+wK}}M%ptI@*9?Wlq?ZXfDkaEX;)Eixr3@{AHv4V1!vmwCvL$V z7>tX1I?crm7hDekv*XL$wAzJ{Wy{>-;R^k{tx(EC+E}wf$h?6$vnNs7EK|pr{S0|l z>N1d!^w&Sl4N`G&PZ{Vr>N*}o{&`+W{qDp~_qEkSxY=bNoC6Olwg}j2>)~;~KRu#- z(QYYWdV_SGApUUju{l8(*i@Wc6MFN3#iCX0+pn8le>Q~3Ge3{5@~G~dvw*lfCFk>D zrMR(CtQsxU(c?2-H=rU;^V|wL@uI%s#Luqr4{hh#t4jiGn>8@m+6~%MSHzUlrji^L z#<82);NRk*YZ>SLR4pmhHzF^o@Ka68f}%Ay;S^YXq^kfLu4W>PS+XEHU0S>t1^0Pl zeg!2ks1GPZNd%A}T2Zza7CrdzYP8~NBh$?Sy^MG^>L6&GfknOMTbdqXY|-tgXCOlS z3A~qW*FbW zTo!|a0%cAG>XM~CnrK9g=9a3hX)I8n&b;s}FJI#25HE*$d6AbRys$@w8D3_2If{$# zq@;S=9TW&4M6b#7xQsn9HnM+YY-IfD@f}km$tI?GDe@wTXb{ zaxgta$tQvejMVlTYdTuYsdd|^wQhrT5{wa%2?IBzVJn9;4BU{0xg63ka6@{hq+#HO zw3^Przz}IQor77S*dtz@rr{g;wB)+oAx!tWSP&|hwJ}9@Da@^=_S*K?zG22$!EC`A zcA#b?xjlgOX*U&Iv87fxV6eTjCBW#q2BZf78!lFC2d=VTSlCbltqkU~-IfXms7w)` zli)&tg|Zuw<1E9n*IM~pQc2RwC@ae6c98=`KOPD6 zS^q>bZxOeVhRT?_Fhd$dnWJftmwHD@Ingdq!`)PlU7c6W9ZdsQJDsW%9$^AdZ9s&(83N+Un^;Tsanr#~9aj$`F z%{6heo+L5NKcyYuQ(;OdD~$%C#)^-hf!uLDD(R{el#K;tgeI=8a($y1r#2teFCOEF zar{cvcvEc7&R=}{)aB)K?<^HwLbjI;i51DML1sz}%9cx2Si@Z8d=XxP@d~d&@Y9nb zBqF8lY&;9Nane5+Pffv&gm#y(SQ0Z5WKAQB_b@>gOOr|7Y@=N!9!zQ;i%qiNB zb>rS|5&5rx@Qo{mrUltMGs|rvzc?%NA;hnwj^WY#;;d5KEaw-i+QSHY_f-H}!GS$1 zRx?RdDWnf+>27==SwJCuKjLEqpIe+bo3kF6vr zA)YIYKS$eq1vel^3{^T3>;`=EMqEq?+6S@gkdhHA$%Dud3dWdA4kdvG$orc)l38oh zhz%uy4^0vn1H#SzK)T5G|5#qTu&qq`IpgJ4JGU0Cofs=zn%*8N9szAg?KQqCQ8Jtzn5A~c4u<5 znAw?9iwK-GJSSyO5*N$Vo-~Oqif)xOhHt!QB)Q`Ni8TY1OD8Cn!^n^87z}2t{m}}pW2TlWs}pByaBs2 z7EJAjfQlPivNz4Yt6B^Gv|e`7E@|)dRm#K)aI^6^G5UAo&{Y&?B#{qP||`opPkL~4t|2T zdH=)X$6j5y?cR(DRz_YLydkW+x!d`MY#!!r`VHmFS-g##8Hi!0{_W2lMDNejkF`K3 zF3j_?zzd;UcqWm3S#3>$jJ!G`jReD}mb|DSZ`O}M!ybU;I4 z`QtdA#xMEXxJ37&8$@&r#KKVJAyB{_X!Z~hLsmZ12#SI9Hpxd&3_$|SM^Fr;RV#>~ z7)YyD5OFb(R;?g3hZ3U%YC~xMs2oo|uqea!po{u1gA_g)U7%}CRvBnvM$~CuH0Cse zlHXG!Xv~rQdsK3z)mi#6uN(~TM=5BVe&1R_TY*#M$zry-UDSfvf+WoQX$8CW*lGW) z3{gJ$$aH`Yrv5tvxP5T--;)4*r~WfY&?uBFdCLza#3|TJe zBxe^B=fF2fX$Td^3MUF zu6v!JX|)lq$m2xv4I+j}YcU@S-A)D^t%8QoNP|7EmxL34yK0h9B3en2u00s&k(iow zpts-2(TE}i^eCG%peOnk;z>duC1sGWS=IRv{2thO-ogNIUw5AiSHLdj2T?6PH1HFm zzfFuA0%!)8J{>~olPMS^?CrqM=|%n)08DHp)ZjX5y&0Mk4Y zJPk&AiiRN)9FQA4-bQdPkxNa3zTQvfns>R|+ocp@c8L;kH^6%G_E_+d1i0h-UB(nq2O+~YdHXRGllVmrkh%{RZ3kE*TvsC6ConB>*~B;XZNzeFb4*p?-F&`Q zUrmlJLS{iTf~BRx42;b4$8q8P;_PO8j3s7Lwl!*4j-uIpj>ePk=jV^l&@!EIYy6g% z>a942(c6m+%y!xKlwbDX;QDRBqKB1)5k_*~HI(2i-Noj4L1WOxV=F>atUw@ROCTS8 zH2E~APygkQi!jHIgDd9yC9)Z@l{CDG5#ES_QSnxsnCw40c`46GO9f&@wV)=(6^rY4 z>NjCm&D%|v65*w%4%DEUVoovH-i$?%*d_oHR4w&zz7|CegE>+Yr|Q~3@*x(wOe9XT z>)_cVBTwaC%uVJF=eCU^z8l*(jvvvwkQGmXTTl8f7p)n#1>|Nfr_F>peFMV1Dc|ZJ*);er}rAC2S2w$U8*+ z{h|~NQU4J28#e&m0v}h@FI?PN;KTGL&;#2S&Y@i4JTKm=;zisx>C$s5Nw5td(6GdU z5&?YSE#&8kaobUS$UNGo7QTrKa1u~9f@0m&R9x987Je0Z1L6D`6#S4lpRPW^^G8Nz zAwb`k+m(9_Sbsd{qp672tQEQHS|*Yy_8V#SkA?Od zY1M>7`;D|}!lC^}S~cPDn?+jvW96_8RyakiIGO2$3QC!9>CI#L0vrC4HUGo_D_JY9cN|fO~S-d(xqj{V9h% zr>3JZdIy|#IP3vTr&FLRDmM{C5= zvQII#N9A;9Q$y;9Pd4Mjj?KN zEuAqI1;>wxnjLG0#E&I&j6nex73>S71WW~dEDvU9{zxN9F+P#RY{huEnlGd%rvYQ# zdG6cDA%FV&Y4?hso=kZay~f3wbuD@@sU#VS%WknA*fZkn^e7)X!&Owog^LxsXR}zX zxnA3P72mw>>s12e(7MU5S5znK)$DM?1*?FjV0w2)^WlPhY!@sk#hzs7KpZ~RqaSe9 zlHd#u?8%LKm8hdZQ=4pRdlen2**xv`iab3fO}1CrmrV&ZiA-;=;({y7*&H-F+++IQ<#lj1VPTh84`OBob?V1}H$IFKKM``I> z9}VF^$_Pg)_Z8E*QW*l*<#HE0_qk9FrgSC)>jm8~WOag&y1i`tZ{c!#oG!PB3}=&+ zzuB~_K)i3$bl}d1;AZ%!BcsP=liP<3vaUsix=L&MVx27ena&Kr}*?VFW#5d zD-I9y0ipW7eQBww;7J|HGe}tNq4A_W01e{w#24`2)1u9^XP1^xRQ|XXvTeyf$0h3P zNlQH;qU_>|L*{WE4=Q5}x2v&sU7%e)+AE=VpOj+aA8-d+MDdlqgHMR6kO_^it9nI^ zud8}RjjyYEMUAhkdPT(7MQy5AoR+l4-F@1|-F?Qz-Tjvs5CIh1O-=6lhfysD2#em@ho&$0B82#=2pgU%q}xP$phM5J2UdFC!`V ztFXk>Brg)}7mLV{;c|;RRj>7lEl3>qC-{ug-J|29(_K!HqQ?@qa0sZ-XzbK%SMoz# z9CGJQ4e#;PIoEm!aTEF3Bd%TS2sg0C)7;f{Dy;CN+0uHQN6^|h9e{+)InJ?<+-N_H z;Gx);@>`H(EWCn~qFs&N9EwAv42d7CgDww=pwD1268m3pu=s8;CbP_;;O`86B3Qsc zLHmfYy1p%FT;aIE5ev6Zr?W7O`2RPA_F01i7Ef?fpb)mjfCC=G?wvpaa%l+#mhh5c zQhuDKlP>59*75fu@%O-1-RrdPPlOQaVdmu3p>R~O21O9h=^CGdN3)znZXdbX%a$&N ziwMljGtcE1B0Y3Gu5Gp?u4gfUG>q||>s+h!`~m<1gb>@))t66Nr4mjokCHiF|i96Lxa>c7FwRavI6l#UYF-M#OK>j=7cNgjl_8!h1fCcj25i6wg5u|Ah zEdc$mkT4=Lu`31>4Pzp@58e{w1qfAd!dp^IP#AJP9$}nm04v`W%?}fBC=q}cfi_A( zcqXznQF~HZ1g=1ysl=OpfBL$78sKj#!JEKBwlFI~a7TS+Zuk%Cb7hRu2{uv2Z{%NN z6x?igs`-d>q8Ok9^dB{FD0Q_Ohv~hbj5y|gF7IPeb)He%kHrY{#cCremT?qe1F>=l z&kEy3AufumaTO*Ru0!NlVno5+Imk!k0Xtn6VLmMpj)j8*q>D(BS|%>vR#Kf~d_)b9FTG_U;%` z)Z}@i3>_IMD_z2&pE%wnWg7xqZYhQZkIsB^1);{&2UdRfFE35~t-rQ@Kq5R{((Rs{ zlRAV5TD?rfMlnSM+qUWe)*s^FhpE=u@M1qgf&3!F)heQW_6X4I$qk;7jbFtho9_a@A|F0;NOz#v?P<)IMzN}-@HiG#NDCee&6-+= zcs7I;(h$!&G&97rzr=X9@Gh`i3dIU4_Jl0uV1ULjFQ15n!c6Z{rP`CdRiFWE1K_-AzSF_YDZuKY!A#e>* zE3yS;<3|+v=>vmMfl5_QKny9Q>m+0jt{xEg#L@BqZ?LGz64cK7>2w(iBTP94rRZn_ zG50~OuDuz?LJtB7Bb)n&X)Fl zLz?IoDDB~Ti3xbTx{=V!Eckdgp51;#N6cDH0(;sYZLgpQ63mW~nnb7|c*vYS3$!B0 z`6$s%n>eAP-Zn(oS+lk6tKke=;4*u=mA$RaOGcZiU>I(-wF#2Vnq*14+>8upCMbOV z79{rC-CB4PA~(HpRdMHlD8S2hQeptePU2<^f28k45W`+xhm0bLH#VzV;YRT~=sFLwwpl-O*^Qp zRy501N7FucB=;*Ldk|{*+g^0wXxjhX6S=N=v`1WvTg7KUuJ?#vm7Px^SkS?#y3+VF zi(^Hk+lgZ#jc_=4Nk0V4|Z5Xs_z`Ps)G$@Z5X z42)@KfIfdSjcHE;(@qf6B2!$Si1mAtvD;w`4l(Qy!-CfQ43n{932X!g-Dp`MV;K*~ zAMexK3wmG-JBtc`l*X{6Wk-o&!5(K1bG(2IV8uH@aN8Z}D4aWxmeQ;yp%ICRS@Az@ zi19B;M|(RtWj*ES_2X!N^5C73hTP*vuD^F*@=Bpj1AxIa@OR^eRR-t|)aj<`?>-u7 zCQHum@l267kmE}CMsmCpVOkAwZAty7nhkUn z6}V9n2HY<)*IxlgXxQ}}nuM>$8{p0Dv>FR7n6E?_KUiWxB4kFy)nOVztCv<4gIurS zYp3gw71$}Hl)#iSR+?+|7V=PgQi?aXJSZ2UwWz_-_tt{~&sm_hxMD6m|IG!C`YSKb zgon@6Yv}iCi!Gp02BD@LCncVCgd7MI-dYIX!c@Ros8%`Rzrb~}iF48#3x~sR)>{oq zAyAs{7wTKMS!$wN3BZTxdMS>YTjA-2@c1i$=f$tjznqeN10d$YvjqEl2^_{SK?#1y zPrQI1{%h_q^LyKa80=JNyudXaHx2%C_JtoE^p)o%*UC8@5fH8YaZS zVNceT<>`wMbq=dD3^)!=JjJc^WCo7ue>sM48 z{@~8;afmmoRr5;26LfMQ=kCqRv9p4~SeQY?)~7&E+0agpZD<@=F)zzxigquq)RZ-% zI&W6^rqn%;$WT2P`J}~^WvtLdDYC@r^ShEI)He-8mK;XGLxg#*xnRGDJ-OqO0uexe z0f`KtgOF=a!81y-bkYl9}cocm|3oL$k+HRvj+TL5-_z8e$Y*ZjTHT_L3&9sK0o z75fnZPIr#l>jcib0V6@iN7sKaI3(TqY6=bsfBZRwOG&+fLs4IL!?9EjLNjS`><|{Y z1K=->Mt?bAk)YGAgAbWEFk(D}(!ZJlNFqi4gn`1p38kdKg<0H)18mHu&y#k57bk5j zApu$lE9DqK6YdM#17Fue8mK3?V#|7(Iv^D`G#xTzfJ2^y>olVSO7wGdVwu%b&zobh zd}LE08`?1QtSo9Twd2^xpPnlwPiK{^n1u+-pEj1FFAS~wXTL9mhD2#lj4l>0uL0QA z&gCtn9t_o!E?MEq;&KY6aOw$HD}Ouj6QYBn2*0*GzHeJzJSU0^2t+q5NFZS9x4hJH zM4Fxq*VjNg&EfijeKg3RPkeoGF+AAzMRMtuUt|+GN7*}y%4}e|Ur~3~b?X8})wy+v zELT?^mlX^LU`UV-o;6%x!v!YRte@%z_Nb4&`AxsT5U6exOV0je7bC5iCJ!kCGTW7C zu)(KLewF;EG|x<&YZXt3FJSaFy+ycCJvnuhBaZrV)2>uT4&-u4INYNMDD&=M(x1EW zzS8sM`%z%`l$NDODukXp(`u`Uya&!X+eGru;tL|9O7D?lBHTg3eJCZq5n~IgmNYNx z2RD%9h6<(MApnZxW?#>^SE5`oqC+d7*9cfAQG*h7!pXmZTZuPi15!m4Z!u8s%ME_&=49TBuca+D4Wh;s;b#5*JAOr1NZ8QLN2drmuqw9^hD z?X*Ks4QZ4t=!K9->4nM&e1)6??o6FO2D%X7u-$XM?Vp;8sAdM5;aJ)N?TC09jOYOR zAe2I&D_kXERX=lKO==abwH({zMc4PK3@TPP*NfKgqlYRCT9~+6Mf@me-y5*pfQ^Ae zr=8tRV+r59^N7q)tKSS){L@xzcFL*vi=NE*)?5*f@YXr+)?L1^95hEle)BUGTiHqtJk1V?k{qr5MAZU@m5u zYl9d72jICA{gi*u_d}E+(iJ`>)Bex~0z=PbL+%!D0 zr$?M_kDob%-|!eEpp;MT7^VNr84|)_To^-2UT7I3GxINyaC9pG6`-eaoFHYHz76M} zl%A6`Ee{UIjtMa)-;&b}SVWw9)GJbNT|#cxjGpZDO{8jNRA(UPcZ4M9!(-?2PW+w`Nq9|gKbz0ujpYXS9tQt%>3dp{bJsJ zI=mu>rGU7C=ecPP^57$CZsdFv*QgjPAke@_T=L@9p?bS_aa;H*FT#(=w(uH@5FfM) zpoE>J^e9dk*-Grsu=z1v;2A>_8>zXmInXvi4&rA-LUIuHGdu!&e{LrCOz!F2Bz__i zCPH#Z=@0n@EN<+Zptcn$N$?|}lWAfgJhR3Zf|M+x>A)Yv7(_S*+uTNU4g8ONGlP@_ zgFFE}$dve6P=oi(h%ni{r= z8G=tNE?rGZ2K7JgB8{D_K@aU6A1Hi~ryo_O2s)I8zKm|`8b{Pf*|6l&WYiI>@(zvq zU0H9-$+x^cC+}9=Cpyj15Y0wpmJbD$kz%@60!yPv998;?BGlg0RUFY z`0EbQJ=;Nc5z(=6{0T4k50D_CgsbQ*zzC|eLa8u5f>A5!sdP5)r;Sttct#5`58NY! z8syMKTL`+1x4=Gl9|_QGQWn&h1Wt1|%HnAeicb6k)N1OV1|`T3n^PtCq|p~w4t6C$ zGu3#VfyJ#9M@*F=j7zLDjzJU~QL*OGj+X>J6y$b~`jt-X$o_O|rx7JO*v+6cmrH17 z4f@ncMcwneDQ7;Tv36Yh-BEb5+!dQdw>IIk3YdcKSF{l++iS~5DAihpBP9{4lt0MqfC*JVj$uEGDv7g8VTX1I}+l9 zG!hzvIm8ePA>fv$M}}Cag3#^IRLy1(2<0_hvVZBZP~ke3d7Yjx8rG4q2!ST*4R_1RL?+~K}O7o0##z86L zAr|``iGxDxOQC*t16Po+NM%5hjSQM@k`@vZOg&oBB&QLpRCQ@WJfXyXsLmiYA4AL( zSf3HsC4VEW8Y5TG$NH?(5L-I#61PLQSd4F_BOd|=QU|PY?3>-D0uz}M5hAWqK1-S~ZLGXW0W+PR|H_bD|kdROC zwyVDcs~m`0p`qW8`E8OV@Ru0*axTyCOWh(zs2-07CK3`tI;<8mg>%f5p?HI-NnTFz za+;S%bIRo!s{SXKgX9?`#-16=J&(}*6C?X}j_;Pp4=;>=X*?YNQvPW+VF4F0R6%Ig z%`N3eIe!wnMd$AjZ&R-Fm8b?0u6oSJQ6d&jiF8_@h&iL6C_L59v&JTA#(wo!-+OuA z-iX1BG!P+G*6Nz7){9N$nm3ceE3BKdRA0Rom6`}xhHc|xv7zTqX&KweDT0f4*cxEzeDQWfq(k{0f@mK AL;wH) diff --git a/data/.DS_Store b/data/.DS_Store deleted file mode 100644 index 5008ddfcf53c02e82d7eee2e57c38e5672ef89f6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeH~Jr2S!425mzP>H1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3 zem<@ulZcFPQ@L2!n>{z**++&mCkOWA81W14cNZlEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ zLs35+`xjp>T0\n", + "\n", "\n", " \n", " \n", @@ -135,7 +153,7 @@ } ], "source": [ - "speed = pd.read_csv('data/speed.csv')\n", + "speed = pd.read_csv('../data/speed.csv')\n", "speed.head()" ] }, @@ -277,37 +295,7 @@ "collapsed": false, "scrolled": true }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-164.959670354\n", - "-162.025020679\n", - "-160.684435566\n", - "-160.290398374\n", - "-160.05216391\n", - "-159.007237916\n", - "-143.002425466\n", - "-97.0842474111\n", - "-69.2584066599\n", - "-66.486410055\n", - "-61.8979553772\n", - "-61.6762352018\n", - "-57.1299413718\n", - "-49.7340004002\n", - "-44.2543645717\n", - "-44.220820683\n", - "-44.2210747114\n", - "-44.35206149\n", - "-44.3903753351\n", - "-44.3985865598\n", - "-44.4003312251\n", - "-44.4007000757\n", - "-44.4007779689\n" - ] - } - ], + "outputs": [], "source": [ "SHMM.train()" ] @@ -331,7 +319,7 @@ "output_type": "stream", "text": [ "[[ 0.42176108 0.5332247 0.02250711 0.02250711]]\n", - "[[ 0.15113613 0.81203853 0.01918793 0.01763742]]\n" + "[[ 0.15113613 0.81203853 0.01763742 0.01918793]]\n" ] } ], @@ -351,10 +339,10 @@ "name": "stdout", "output_type": "stream", "text": [ - "[-0.]\n", + "[ 0.]\n", "[ 1.]\n", - "[ 5.47279257]\n", - "[ 6.39096276]\n" + "[ 6.3909629]\n", + "[ 5.47279282]\n" ] } ], @@ -377,9 +365,9 @@ "output_type": "stream", "text": [ "0.0\n", - "0.0\n", - "0.182276997053\n", - "0.224620703134\n" + "1.66533453694e-15\n", + "0.224620610731\n", + "0.182277323178\n" ] } ], @@ -408,11 +396,7 @@ "outputs": [], "source": [ "sc.stop()\n", - "sc = SparkContext(appName=\"Python_UnSupervised_IOHMM_MapReduce\", pyFiles=[\n", - " './auxiliary/HMM.py',\n", - " './auxiliary/SupervisedModels.py',\n", - " './auxiliary/family.py',\n", - " './main/IOHMM.py'])" + "sc = SparkContext(appName=\"\")" ] }, { @@ -423,7 +407,7 @@ }, "outputs": [], "source": [ - "speed = pd.read_csv('data/speed.csv')\n", + "speed = pd.read_csv('../data/speed.csv')\n", "states = {}\n", "corr = np.array(speed['corr'])\n", "for i in range(int(len(corr)/2)):\n", @@ -451,7 +435,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 15, "metadata": { "collapsed": false, "scrolled": true @@ -461,51 +445,6 @@ "name": "stdout", "output_type": "stream", "text": [ - "-323.817629492\n", - "-323.655376194\n", - "-322.264357484\n", - "-322.36577288\n", - "-322.460203662\n", - "-322.36518968\n", - "-323.067586601\n", - "-323.160001222\n", - "-322.430550335\n", - "-322.923114532\n", - "-322.532788688\n", - "-323.236472578\n", - "-323.2748324\n", - "-322.876876983\n", - "-323.060060092\n", - "-322.793643075\n", - "-322.707212423\n", - "-322.668641465\n", - "-322.543763646\n", - "-322.447603007\n", - "-322.090366888\n", - "-321.572216489\n", - "-321.555608227\n", - "-320.941027603\n", - "-321.563730232\n", - "-320.680582288\n", - "-318.642259708\n", - "-313.296724295\n", - "-296.739459247\n", - "-269.902260126\n", - "-168.533162104\n", - "-95.2275142182\n", - "-92.4586323464\n", - "-91.1610817588\n", - "-90.934065195\n", - "-90.8776724129\n", - "-90.8626762541\n", - "-90.8585893727\n", - "-90.8574677359\n", - "-90.8571593068\n", - "-90.8570744492\n", - "-90.857051099\n", - "-90.8570446735\n", - "-90.8570429053\n", - "-90.8570424187\n", "done\n" ] } @@ -522,7 +461,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 16, "metadata": { "collapsed": false }, @@ -531,10 +470,10 @@ "name": "stdout", "output_type": "stream", "text": [ - "[-0.]\n", + "[ 0.]\n", "[ 1.]\n", - "[ 6.39057687]\n", - "[ 5.47205369]\n" + "[ 5.50454627]\n", + "[ 6.02543362]\n" ] } ], @@ -547,7 +486,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 17, "metadata": { "collapsed": true }, diff --git a/SupervisedIOHMM.ipynb b/examples/notebooks/SupervisedIOHMM.ipynb similarity index 93% rename from SupervisedIOHMM.ipynb rename to examples/notebooks/SupervisedIOHMM.ipynb index 8e5e3c0..841e0ab 100644 --- a/SupervisedIOHMM.ipynb +++ b/examples/notebooks/SupervisedIOHMM.ipynb @@ -26,16 +26,21 @@ "%autoreload 2\n", "\n", "from __future__ import division\n", - "import numpy as np\n", + "\n", "from copy import deepcopy\n", "import sys\n", - "sys.path.append('./main')\n", - "sys.path.append('./auxiliary/')\n", - "from IOHMM import SupervisedIOHMM, SupervisedIOHMMMapReduce\n", - "from SupervisedModels import LM, MNLP\n", - "from scipy.misc import logsumexp\n", - "import pandas as pd\n", "import warnings\n", + "\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "from scipy.misc import logsumexp\n", + "\n", + "\n", + "from IOHMM import SupervisedIOHMM, SupervisedIOHMMMapReduce\n", + "from IOHMM import LM, MNLP, MNLD\n", + "\n", + "\n", "warnings.simplefilter(\"ignore\")" ] }, @@ -64,6 +69,19 @@ "data": { "text/html": [ "
\n", + "\n", "
\n", " \n", " \n", @@ -135,7 +153,7 @@ } ], "source": [ - "speed = pd.read_csv('data/speed.csv')\n", + "speed = pd.read_csv('../data/speed.csv')\n", "speed.head()" ] }, @@ -300,22 +318,18 @@ "outputs": [], "source": [ "sc.stop()\n", - "sc = SparkContext(appName=\"Python_UnSupervised_IOHMM_MapReduce\", pyFiles=[\n", - " './auxiliary/HMM.py',\n", - " './auxiliary/SupervisedModels.py',\n", - " './auxiliary/family.py',\n", - " './main/IOHMM.py'])" + "sc = SparkContext(appName=\"\")" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "metadata": { - "collapsed": true + "collapsed": false }, "outputs": [], "source": [ - "speed = pd.read_csv('data/speed.csv')\n", + "speed = pd.read_csv('../data/speed.csv')\n", "states = {}\n", "corr = np.array(speed['corr'])\n", "for i in range(len(corr)):\n", @@ -328,7 +342,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "metadata": { "collapsed": true }, @@ -341,7 +355,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "metadata": { "collapsed": false }, @@ -366,7 +380,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 14, "metadata": { "collapsed": false }, @@ -387,7 +401,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 15, "metadata": { "collapsed": false }, @@ -408,7 +422,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 16, "metadata": { "collapsed": false }, @@ -429,7 +443,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 17, "metadata": { "collapsed": true }, diff --git a/UnSupervisedIOHMM.ipynb b/examples/notebooks/UnSupervisedIOHMM.ipynb similarity index 78% rename from UnSupervisedIOHMM.ipynb rename to examples/notebooks/UnSupervisedIOHMM.ipynb index 28eecd5..5a945f4 100644 --- a/UnSupervisedIOHMM.ipynb +++ b/examples/notebooks/UnSupervisedIOHMM.ipynb @@ -45,7 +45,7 @@ "cell_type": "code", "execution_count": 1, "metadata": { - "collapsed": false + "collapsed": true }, "outputs": [], "source": [ @@ -53,16 +53,21 @@ "%autoreload 2\n", "\n", "from __future__ import division\n", - "import numpy as np\n", + "\n", "from copy import deepcopy\n", "import sys\n", - "sys.path.append('./main')\n", - "sys.path.append('./auxiliary')\n", - "from IOHMM import UnSupervisedIOHMM, UnSupervisedIOHMMMapReduce\n", - "from SupervisedModels import LM, MNLP, MNLD\n", - "from scipy.misc import logsumexp\n", - "import pandas as pd\n", "import warnings\n", + "\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "from scipy.misc import logsumexp\n", + "\n", + "\n", + "from IOHMM import UnSupervisedIOHMM, UnSupervisedIOHMMMapReduce\n", + "from IOHMM import LM, MNLP, MNLD\n", + "\n", + "\n", "warnings.simplefilter(\"ignore\")" ] }, @@ -84,6 +89,19 @@ "data": { "text/html": [ "
\n", + "\n", "
\n", " \n", " \n", @@ -155,7 +173,7 @@ } ], "source": [ - "speed = pd.read_csv('data/speed.csv')\n", + "speed = pd.read_csv('../data/speed.csv')\n", "speed.head()" ] }, @@ -170,7 +188,7 @@ "cell_type": "code", "execution_count": 3, "metadata": { - "collapsed": false + "collapsed": true }, "outputs": [], "source": [ @@ -185,39 +203,10 @@ "cell_type": "code", "execution_count": 4, "metadata": { - "collapsed": false, + "collapsed": true, "scrolled": true }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-305.300566605\n", - "-305.216162094\n", - "-305.162269755\n", - "-305.084300162\n", - "-304.95789699\n", - "-304.752239908\n", - "-304.39123161\n", - "-303.600197211\n", - "-301.73925858\n", - "-295.148884087\n", - "-273.016305927\n", - "-199.390609561\n", - "-110.701760328\n", - "-93.0289858768\n", - "-92.4321980066\n", - "-92.404432368\n", - "-92.4174791498\n", - "-92.4356121371\n", - "-92.4559823475\n", - "-92.4720757993\n", - "-92.4847667687\n", - "-92.4945053257\n" - ] - } - ], + "outputs": [], "source": [ "SHMM.train()" ] @@ -240,8 +229,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "[[ 0.91035447 0.08964553]]\n", - "[[ 0.19955373 0.80044627]]\n" + "[[ 0.80043352 0.19956648]]\n", + "[[ 0.08965284 0.91034716]]\n" ] } ], @@ -261,8 +250,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "[ 6.38013614]\n", - "[ 5.5021878]\n" + "[ 5.50216013]\n", + "[ 6.38011574]\n" ] } ], @@ -282,8 +271,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "0.247034141013\n", - "0.182740623875\n" + "0.182711715962\n", + "0.247049173969\n" ] } ], @@ -303,7 +292,7 @@ "cell_type": "code", "execution_count": 8, "metadata": { - "collapsed": false + "collapsed": true }, "outputs": [], "source": [ @@ -318,47 +307,10 @@ "cell_type": "code", "execution_count": 9, "metadata": { - "collapsed": false, + "collapsed": true, "scrolled": true }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-530.302930865\n", - "-530.239310475\n", - "-530.19737629\n", - "-530.158889512\n", - "-530.080562677\n", - "-530.01226267\n", - "-529.935550104\n", - "-529.836044223\n", - "-529.710366291\n", - "-529.55033268\n", - "-529.349383019\n", - "-529.093506793\n", - "-528.768858181\n", - "-528.334012945\n", - "-527.745305345\n", - "-526.3924036\n", - "-524.998526809\n", - "-521.857759258\n", - "-515.613507632\n", - "-504.1744234\n", - "-474.563032703\n", - "-398.004513767\n", - "-323.779208179\n", - "-302.338755714\n", - "-301.538015872\n", - "-300.449032716\n", - "-302.679593328\n", - "-302.569697692\n", - "-302.542928588\n", - "-302.543635379\n" - ] - } - ], + "outputs": [], "source": [ "SHMM.train()" ] @@ -374,8 +326,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "[[ 0.9082254 0.0917746]]\n", - "[[ 0.19758471 0.80241529]]\n" + "[[ 0.90699325 0.09300675]]\n", + "[[ 0.19975045 0.80024955]]\n" ] } ], @@ -395,12 +347,12 @@ "name": "stdout", "output_type": "stream", "text": [ - "[ 6.38524675]\n", - "[[ 0. -1.07886847]\n", - " [ 0. -2.2852737 ]]\n", - "[ 5.50910848]\n", - "[[ 0. -0.22113384]\n", - " [ 0. 0.62337172]]\n" + "[ 6.38141004]\n", + "[[ 0. -0.99623318]\n", + " [ 0. -2.42428868]]\n", + "[ 5.50367124]\n", + "[[ 0. -0.2204467 ]\n", + " [ 0. 0.62768677]]\n" ] } ], @@ -422,12 +374,12 @@ "name": "stdout", "output_type": "stream", "text": [ - "[ 0.01499167]\n", - "[[ 0. 0.38522395]\n", - " [ 0. 0.81187156]]\n", - "[ 0.01397556]\n", - "[[ 0. 0.15889428]\n", - " [ 0. 0.73585989]]\n" + "[ 0.0150907]\n", + "[[ 0. 0.37302218]\n", + " [ 0. 0.79613534]]\n", + "[ 0.01363804]\n", + "[[ 0. 0.15889117]\n", + " [ 0. 0.73583209]]\n" ] } ], @@ -449,29 +401,25 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [], "source": [ "sc.stop()\n", - "sc = SparkContext(appName=\"\", pyFiles=[\n", - " './auxiliary/HMM.py',\n", - " './auxiliary/SupervisedModels.py',\n", - " './auxiliary/family.py',\n", - " './main/IOHMM.py'])" + "sc = SparkContext(appName=\"\")" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 15, "metadata": { - "collapsed": false + "collapsed": true }, "outputs": [], "source": [ - "speed = pd.read_csv('data/speed.csv', index_col=0)\n", + "speed = pd.read_csv('../data/speed.csv', index_col=0)\n", "indexes = [(1,1), (2,1)]\n", "RDD = sc.parallelize(indexes)\n", "dfs = RDD.mapValues(lambda v: speed)" @@ -479,9 +427,9 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 16, "metadata": { - "collapsed": false + "collapsed": true }, "outputs": [], "source": [ @@ -494,7 +442,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 17, "metadata": { "collapsed": false, "scrolled": true @@ -504,25 +452,6 @@ "name": "stdout", "output_type": "stream", "text": [ - "-607.883265242\n", - "-595.478995517\n", - "-473.565132683\n", - "-256.505414778\n", - "-188.506919208\n", - "-180.14018665\n", - "-178.433063289\n", - "-177.919713522\n", - "-177.675977268\n", - "-177.57622844\n", - "-177.538977247\n", - "-177.525250495\n", - "-177.519969055\n", - "-177.517794445\n", - "-177.516827105\n", - "-177.516365078\n", - "-177.516131813\n", - "-177.516009449\n", - "-177.515943685\n", "done\n" ] } @@ -534,7 +463,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 18, "metadata": { "collapsed": true }, @@ -543,6 +472,48 @@ "sc.stop()" ] }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[ 5.51005132]\n", + "[ 6.38494108]\n" + ] + } + ], + "source": [ + "print SHMM.model_emissions[0][0].coef\n", + "print SHMM.model_emissions[1][0].coef" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[ 0.88162185 0.11837815]]\n", + "[[ 0.08845407 0.91154593]]\n" + ] + } + ], + "source": [ + "print np.exp(SHMM.model_transition[0].coef - logsumexp(SHMM.model_transition[0].coef))\n", + "print np.exp(SHMM.model_transition[1].coef - logsumexp(SHMM.model_transition[1].coef))" + ] + }, { "cell_type": "code", "execution_count": null, @@ -573,5 +544,5 @@ } }, "nbformat": 4, - "nbformat_minor": 0 + "nbformat_minor": 1 } diff --git a/main/.DS_Store b/main/.DS_Store deleted file mode 100644 index 5008ddfcf53c02e82d7eee2e57c38e5672ef89f6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeH~Jr2S!425mzP>H1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3 zem<@ulZcFPQ@L2!n>{z**++&mCkOWA81W14cNZlEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ zLs35+`xjp>T0=1.11.0', + 'pandas>=0.19.0', + 'scikit-learn>=0.18.0', + 'scipy>=0.19.0', + 'statsmodels>=0.8.0' + ], + extras_require={ + 'tests': [ + 'flake8>=2.5.4', + 'mock>=2.0.0', + 'nose>=1.3.4', + 'coveralls>=1.1', + 'pytest', + ] + }, + zip_safe=True, + keywords='sequence learning', +) From 499ccd40cb2035f20a674d1a0a2183b0418a9672 Mon Sep 17 00:00:00 2001 From: Mogeng Yin Date: Fri, 7 Jul 2017 14:43:30 -0700 Subject: [PATCH 2/2] added a few dummy tests. --- tests/__init__.py | 0 tests/test_HMM_utils.py | 13 +++++++++++++ tests/test_IOHMM.py | 13 +++++++++++++ tests/test_family.py | 13 +++++++++++++ tests/test_linear_models.py | 13 +++++++++++++ 5 files changed, 52 insertions(+) create mode 100644 tests/__init__.py create mode 100644 tests/test_HMM_utils.py create mode 100644 tests/test_IOHMM.py create mode 100644 tests/test_family.py create mode 100644 tests/test_linear_models.py diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_HMM_utils.py b/tests/test_HMM_utils.py new file mode 100644 index 0000000..7ae4a36 --- /dev/null +++ b/tests/test_HMM_utils.py @@ -0,0 +1,13 @@ +import unittest + + +class HMMUtilsTests(unittest.TestCase): + + def setUp(self): + pass + + def _mock_(self): + pass + + def test_(self): + pass diff --git a/tests/test_IOHMM.py b/tests/test_IOHMM.py new file mode 100644 index 0000000..8a2ab97 --- /dev/null +++ b/tests/test_IOHMM.py @@ -0,0 +1,13 @@ +import unittest + + +class IOHMMTests(unittest.TestCase): + + def setUp(self): + pass + + def _mock_(self): + pass + + def test_(self): + pass diff --git a/tests/test_family.py b/tests/test_family.py new file mode 100644 index 0000000..c6bd769 --- /dev/null +++ b/tests/test_family.py @@ -0,0 +1,13 @@ +import unittest + + +class FamilyTests(unittest.TestCase): + + def setUp(self): + pass + + def _mock_(self): + pass + + def test_(self): + pass diff --git a/tests/test_linear_models.py b/tests/test_linear_models.py new file mode 100644 index 0000000..33eb5aa --- /dev/null +++ b/tests/test_linear_models.py @@ -0,0 +1,13 @@ +import unittest + + +class LinearModelTests(unittest.TestCase): + + def setUp(self): + pass + + def _mock_(self): + pass + + def test_(self): + pass