# -*- coding: utf-8 -*-
"""
Created on Tue Mar 29, 2016
Module:
poibin - Poisson Binomial distribution
Author:
Mika Straka, 2016
Description:
Implementation of the Poisson Binomial distribution as described in the
reference.
Implemented method:
- pmf: probability mass function
- cdf: cumulative density function
- pval: p-value (1 - cdf)
Usage:
Be p a list / numpy array of success probabilities for n non-identically
distributed Bernoulli random variables.
Create an instance of the distribution with
>>> pb = PoiBin(p)
Be x a list or numpy array of different number of successes.
To obtain:
- probability mass function of x, use
>>> pb.pmf(x)
- cumulative density function of x, use
>>> pb.cdf(x)
- p-values of x, use
>>> pb.pval(x)
The functions are applied component-wise and a numpy array of the same
lenth as x is returned.
Reference:
Yili Hong, On computing the distribution function for the Poisson binomial
distribution,
Computational Statistics & Data Analysis, Volume 59, March 2013,
Pages 41-51, ISSN 0167-9473,
http://dx.doi.org/10.1016/j.csda.2012.10.006.
"""
import numpy as np
import math
from scipy.stats import binom
[docs]
class PoiBin:
def __init__(self, p):
self.p = np.array(p)
self.n = self.p.size
self.check_input_prob()
self.omega = 2 * np.pi / (self.n + 1)
self.pmf_list = self.get_pmf_xi()
self.cdf_list = self.get_cdf(self.pmf_list)
# ------------------------------------------------------------------------------
# Methods for the Poisson Binomial Distribution
# ------------------------------------------------------------------------------
[docs]
def pmf(self, kk):
"""Calculate the probability mass function for the input values kk."""
self.check_rv_input(kk)
return self.pmf_list[kk]
[docs]
def cdf(self, kk):
"""Calculate the cumulative density function for the input values kk."""
self.check_rv_input(kk)
return self.cdf_list[kk]
[docs]
def pval(self, k):
"""Return the p-value corresponding to k, defined as 1 - cdf(k)."""
if np.array_equal(self.p, self.p[0] * np.ones(self.p.shape)):
# I all probabilities are equal, it returns the Binomial pvalue (as it should...)
return binom.sf(k - 1, self.n, self.p[0])
elif k > 0:
return 1. - self.cdf(k - 1)
else:
return 1.
# ------------------------------------------------------------------------------
# Methods to obtain pmf and cdf
# ------------------------------------------------------------------------------
[docs]
def get_cdf(self, xx):
"""Return a list which contains all the values of the cumulative
density function for i = 0, 1, ..., n.
"""
c = np.empty(self.n + 1)
c[0] = xx[0]
for i in range(1, self.n + 1):
c[i] = c[i - 1] + xx[i]
return c
[docs]
def get_pmf_xi(self):
"""Return the values of xi, which are the components that make up the
cumulative density function.
"""
chi = np.empty(self.n + 1, dtype=complex)
half_n = int(math.ceil(self.n / 2))
# set first half of chis:
for i in range(0, half_n + 1):
chi[i] = self.get_chi(i)
# set second half of chis:
chi[half_n + 1:] = np.conjugate(chi[1:self.n - half_n + 1][:: - 1])
chi /= self.n + 1
xi = np.fft.fft(chi)
if self.check_xi_are_real(xi):
xi = xi.real
else:
raise TypeError("pmf / xi values have to be real.")
return xi
[docs]
def get_chi(self, idx):
"""Return the value of chi_idx."""
argz_sum = self.get_argz_sum(idx)
d = self.get_d(idx)
chi = d * (np.cos(argz_sum) + 1j * np.sin(argz_sum))
return chi
[docs]
def get_argz_sum(self, idx):
"""Sum over all the principal values of z_j(l) for j = 1, ..., n,
keeping idx fixed.
"""
y = self.p * np.sin(self.omega * idx)
x = 1 - self.p + self.p * np.cos(self.omega * idx)
argz = np.arctan2(y, x).sum()
return argz
[docs]
def get_d(self, idx):
"""Return coefficient d_idx of the chi value
chi_idx = d_idx * (Real + j * Imag).
"""
dum = self.get_z(range(self.n), idx)
exparg = np.log(np.abs(dum)).sum()
return np.exp(exparg)
[docs]
def get_z(self, j, idx):
"""Return z_j(l)."""
z = 1 - self.p[j] + self.p[j] * np.cos(self.omega * idx) + \
1j * self.p[j] * np.sin(self.omega * idx)
return z
# ------------------------------------------------------------------------------
# Auxiliary functions
# ------------------------------------------------------------------------------
[docs]
@staticmethod
def check_xi_are_real(xx):
"""Check whether all the xis have imaginary part equal to 0, i.e.
whether probabilities pmf are positive.
"""
eps = 1e-3 # account for machine precision
return np.all(xx.imag <= eps)