From d17c0600a595462d1f52986f416a3d26f33ce08f Mon Sep 17 00:00:00 2001 From: Soumya Sahu <69827086+soumyasahu@users.noreply.github.com> Date: Sat, 12 Jun 2021 12:23:46 -0500 Subject: [PATCH 1/3] Add files via upload --- ngboost/distns/mixture_normal.py | 98 ++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 ngboost/distns/mixture_normal.py diff --git a/ngboost/distns/mixture_normal.py b/ngboost/distns/mixture_normal.py new file mode 100644 index 00000000..ebf6a34a --- /dev/null +++ b/ngboost/distns/mixture_normal.py @@ -0,0 +1,98 @@ +"""The NGBoost mixture of K Normal distributions and scores""" + +import scipy +from scipy.stats import norm +from scipy.stats import laplace as dist +import numpy as np +import math as math +import pandas as pd +from ngboost.distns.distn import RegressionDistn +from ngboost.scores import LogScore +from sklearn.cluster import KMeans + + + +class NormalMixtureLogScore(LogScore): + def score(self, Y): + return -np.log(np.sum(norm.pdf(Y, self.loc, self.scale)*self.mixprop, axis = 0)) + + + def d_score(self, Y): + K = self.K_ + D = np.zeros((len(Y), (3 * K - 1))) + + D[:, range(K)] = np.transpose(-1/(np.sum(norm.pdf(Y, self.loc, self.scale)*self.mixprop, axis = 0))*self.mixprop*((Y - self.loc) / pow(self.scale, 2))*norm.pdf(Y, self.loc, self.scale)) + + D[:, range(K, (2 * K))] = np.transpose(-1/(np.sum(norm.pdf(Y, self.loc, self.scale)*self.mixprop, axis = 0))*self.mixprop*((pow((Y - self.loc), 2) - pow(self.scale, 2)) / pow(self.scale, 2))*norm.pdf(Y, self.loc, self.scale)) + + D_alpha = np.transpose(-1/(np.sum(norm.pdf(Y, self.loc, self.scale)*self.mixprop, axis = 0))*(norm.pdf(Y, self.loc, self.scale)[range(K-1)] - norm.pdf(Y, self.loc, self.scale)[K-1])) + + m = np.einsum("ij, kj -> jik", self.mixprop[range(K-1)], mixprop[range(K-1)]) + d = np.einsum("ijj -> ij", m) + d -= np.einsum("i...", self.mixprop[range(K-1)]) + + D[:, range(2 * K, (3 * K - 1))] = np.einsum("ij, ijl -> il", D_alpha, -m) + return D + + +def k_normal_mixture(K): + class NormalMixture(RegressionDistn): + + K_ = K + n_params = 3 * K - 1 + scores = [NormalMixtureLogScore] + + def __init__(self, params): + + # save the parameters + self._params = params + + # create other objects that will be useful later + self.loc = params[0:K] + self.logscale = params[K : (2 * K)] + self.scale = np.exp(self.logscale) + + mix_params = np.zeros((K, params.shape[1])) + mix_params[0 : (K - 1), :] = params[(2 * K) : (3 * K - 1)] + exp_mixprop = np.exp(mix_params) + self.mixprop = exp_mixprop/np.sum(exp_mixprop, axis=0) + + def fit(Y): + kmeans = KMeans(n_clusters=K).fit(Y.reshape(-1, 1)) + pred = kmeans.predict(Y.reshape(-1, 1)) + loc = [] + scale = [] + prop = [] + for i in range(K): + obs = Y[pred == i] + loc = np.append(loc, np.mean(obs)) + scale = np.append(scale, np.std(obs)) + prop = np.append(prop, len(obs) / len(Y)) + return np.concatenate( + [ + loc, + np.log(scale), + np.log(prop[range(K - 1)] / (1 - sum(prop[range(K - 1)]))), + ] + ) + + def sample(self, m): + component = np.array( + [ # it's stupid that there is no fast vectorized multinomial in python + np.random.multinomial(n=1, pvals=self.mixprop[:, i], size=m) + for i in range(self.mixprop.shape[1]) + ] + ).transpose(1, 2, 0) + samples = norm.rvs(self.loc, self.scale, size=(m,) + self.loc.shape) + return np.sum(component * samples, axis=1) + + def mean(self,): + n = self._params.shape[1] + np.sum(self.mixprop * self.loc, axis=0) + + @property + def params(self): + return {"loc": self.loc, "scale": self.scale, "mix_prop": self.mixprop} + + return NormalMixture + From 8b34713fa0e211179325f85a6d5cea0c284a9a18 Mon Sep 17 00:00:00 2001 From: Ryan Date: Fri, 18 Jun 2021 11:22:07 -0500 Subject: [PATCH 2/3] fix basic formatting issues --- ngboost/distns/mixture_normal.py | 56 ++++++++++++++++++++++---------- 1 file changed, 38 insertions(+), 18 deletions(-) diff --git a/ngboost/distns/mixture_normal.py b/ngboost/distns/mixture_normal.py index ebf6a34a..4032a2ff 100644 --- a/ngboost/distns/mixture_normal.py +++ b/ngboost/distns/mixture_normal.py @@ -1,35 +1,56 @@ """The NGBoost mixture of K Normal distributions and scores""" -import scipy -from scipy.stats import norm -from scipy.stats import laplace as dist -import numpy as np import math as math + +import numpy as np import pandas as pd -from ngboost.distns.distn import RegressionDistn -from ngboost.scores import LogScore +import scipy +from scipy.stats import laplace as dist +from scipy.stats import norm from sklearn.cluster import KMeans +from ngboost.distns.distn import RegressionDistn +from ngboost.scores import LogScore class NormalMixtureLogScore(LogScore): def score(self, Y): - return -np.log(np.sum(norm.pdf(Y, self.loc, self.scale)*self.mixprop, axis = 0)) - + return -np.log(np.sum(norm.pdf(Y, self.loc, self.scale) * self.mixprop, axis=0)) def d_score(self, Y): K = self.K_ D = np.zeros((len(Y), (3 * K - 1))) - D[:, range(K)] = np.transpose(-1/(np.sum(norm.pdf(Y, self.loc, self.scale)*self.mixprop, axis = 0))*self.mixprop*((Y - self.loc) / pow(self.scale, 2))*norm.pdf(Y, self.loc, self.scale)) - - D[:, range(K, (2 * K))] = np.transpose(-1/(np.sum(norm.pdf(Y, self.loc, self.scale)*self.mixprop, axis = 0))*self.mixprop*((pow((Y - self.loc), 2) - pow(self.scale, 2)) / pow(self.scale, 2))*norm.pdf(Y, self.loc, self.scale)) - - D_alpha = np.transpose(-1/(np.sum(norm.pdf(Y, self.loc, self.scale)*self.mixprop, axis = 0))*(norm.pdf(Y, self.loc, self.scale)[range(K-1)] - norm.pdf(Y, self.loc, self.scale)[K-1])) - - m = np.einsum("ij, kj -> jik", self.mixprop[range(K-1)], mixprop[range(K-1)]) + D[:, range(K)] = np.transpose( + -1 + / (np.sum(norm.pdf(Y, self.loc, self.scale) * self.mixprop, axis=0)) + * self.mixprop + * ((Y - self.loc) / pow(self.scale, 2)) + * norm.pdf(Y, self.loc, self.scale) + ) + + D[:, range(K, (2 * K))] = np.transpose( + -1 + / (np.sum(norm.pdf(Y, self.loc, self.scale) * self.mixprop, axis=0)) + * self.mixprop + * ((pow((Y - self.loc), 2) - pow(self.scale, 2)) / pow(self.scale, 2)) + * norm.pdf(Y, self.loc, self.scale) + ) + + D_alpha = np.transpose( + -1 + / (np.sum(norm.pdf(Y, self.loc, self.scale) * self.mixprop, axis=0)) + * ( + norm.pdf(Y, self.loc, self.scale)[range(K - 1)] + - norm.pdf(Y, self.loc, self.scale)[K - 1] + ) + ) + + m = np.einsum( + "ij, kj -> jik", self.mixprop[range(K - 1)], mixprop[range(K - 1)] + ) d = np.einsum("ijj -> ij", m) - d -= np.einsum("i...", self.mixprop[range(K-1)]) + d -= np.einsum("i...", self.mixprop[range(K - 1)]) D[:, range(2 * K, (3 * K - 1))] = np.einsum("ij, ijl -> il", D_alpha, -m) return D @@ -55,7 +76,7 @@ def __init__(self, params): mix_params = np.zeros((K, params.shape[1])) mix_params[0 : (K - 1), :] = params[(2 * K) : (3 * K - 1)] exp_mixprop = np.exp(mix_params) - self.mixprop = exp_mixprop/np.sum(exp_mixprop, axis=0) + self.mixprop = exp_mixprop / np.sum(exp_mixprop, axis=0) def fit(Y): kmeans = KMeans(n_clusters=K).fit(Y.reshape(-1, 1)) @@ -95,4 +116,3 @@ def params(self): return {"loc": self.loc, "scale": self.scale, "mix_prop": self.mixprop} return NormalMixture - From b3f3e4df24d45e84ca4ea7fc622e0a789c774377 Mon Sep 17 00:00:00 2001 From: Soumya Sahu <69827086+soumyasahu@users.noreply.github.com> Date: Thu, 1 Jul 2021 15:40:59 -0500 Subject: [PATCH 3/3] Update mixture_normal.py --- ngboost/distns/mixture_normal.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/ngboost/distns/mixture_normal.py b/ngboost/distns/mixture_normal.py index 4032a2ff..07997b39 100644 --- a/ngboost/distns/mixture_normal.py +++ b/ngboost/distns/mixture_normal.py @@ -1,11 +1,6 @@ """The NGBoost mixture of K Normal distributions and scores""" -import math as math - import numpy as np -import pandas as pd -import scipy -from scipy.stats import laplace as dist from scipy.stats import norm from sklearn.cluster import KMeans @@ -47,7 +42,7 @@ def d_score(self, Y): ) m = np.einsum( - "ij, kj -> jik", self.mixprop[range(K - 1)], mixprop[range(K - 1)] + "ij, kj -> jik", self.mixprop[range(K - 1)], self.mixprop[range(K - 1)] ) d = np.einsum("ijj -> ij", m) d -= np.einsum("i...", self.mixprop[range(K - 1)]) @@ -108,7 +103,6 @@ def sample(self, m): return np.sum(component * samples, axis=1) def mean(self,): - n = self._params.shape[1] np.sum(self.mixprop * self.loc, axis=0) @property