Source code for bicm.poibin

# -*- coding: utf-8 -*-
"""
Created on Tue Mar 29, 2016

Module:
    poibin - Poisson Binomial distribution

Author:
    Mika Straka, 2016

Description:
    Implementation of the Poisson Binomial distribution as described in the
    reference.

    Implemented method:
        - pmf: probability mass function
        - cdf: cumulative density function
        - pval: p-value (1 - cdf)

Usage:
    Be p a list / numpy array of success probabilities for n non-identically
    distributed Bernoulli random variables.
    Create an instance of the distribution with
    
    >>> pb = PoiBin(p)
    
    Be x a list or numpy array of different number of successes.
    To obtain:
    
    - probability mass function of x, use

    >>> pb.pmf(x)

    - cumulative density function of x, use

    >>> pb.cdf(x)

    - p-values of x, use

    >>> pb.pval(x)
        
    The functions are applied component-wise and a numpy array of the same
    lenth as x is returned.

Reference:
    Yili Hong, On computing the distribution function for the Poisson binomial
    distribution,
    Computational Statistics & Data Analysis, Volume 59, March 2013,
    Pages 41-51, ISSN 0167-9473,
    http://dx.doi.org/10.1016/j.csda.2012.10.006.
"""

import numpy as np
import math
from scipy.stats import binom



[docs]
class PoiBin:
    def __init__(self, p):
        self.p = np.array(p)
        self.n = self.p.size
        self.check_input_prob()
        self.omega = 2 * np.pi / (self.n + 1)
        self.pmf_list = self.get_pmf_xi()
        self.cdf_list = self.get_cdf(self.pmf_list)
    
# ------------------------------------------------------------------------------
# Methods for the Poisson Binomial Distribution
# ------------------------------------------------------------------------------
    

[docs]
    def pmf(self, kk):
        """Calculate the probability mass function for the input values kk."""
        self.check_rv_input(kk)
        return self.pmf_list[kk]

    

[docs]
    def cdf(self, kk):
        """Calculate the cumulative density function for the input values kk."""
        self.check_rv_input(kk)
        return self.cdf_list[kk]

    

[docs]
    def pval(self, k):
        """Return the p-value corresponding to k, defined as 1 - cdf(k)."""
        if np.array_equal(self.p, self.p[0] * np.ones(self.p.shape)):
            # I all probabilities are equal, it returns the Binomial pvalue (as it should...)
            return binom.sf(k - 1, self.n, self.p[0]) 
        elif k > 0:
            return 1. - self.cdf(k - 1)
        else:
            return 1.

    
# ------------------------------------------------------------------------------
# Methods to obtain pmf and cdf
# ------------------------------------------------------------------------------
    

[docs]
    def get_cdf(self, xx):
        """Return a list which contains all the values of the cumulative
        density function for i = 0, 1, ..., n.
        """
        c = np.empty(self.n + 1)
        c[0] = xx[0]
        for i in range(1, self.n + 1):
            c[i] = c[i - 1] + xx[i]
        return c

    

[docs]
    def get_pmf_xi(self):
        """Return the values of xi, which are the components that make up the
        cumulative density function.
        """
        chi = np.empty(self.n + 1, dtype=complex)
        half_n = int(math.ceil(self.n / 2))
        # set first half of chis:
        for i in range(0, half_n + 1):
            chi[i] = self.get_chi(i)
        # set second half of chis:
        chi[half_n + 1:] = np.conjugate(chi[1:self.n - half_n + 1][:: - 1])
        chi /= self.n + 1
        xi = np.fft.fft(chi)
        if self.check_xi_are_real(xi):
            xi = xi.real
        else:
            raise TypeError("pmf / xi values have to be real.")
        return xi

    

[docs]
    def get_chi(self, idx):
        """Return the value of chi_idx."""
        argz_sum = self.get_argz_sum(idx)
        d = self.get_d(idx)
        chi = d * (np.cos(argz_sum) + 1j * np.sin(argz_sum))
        return chi

    

[docs]
    def get_argz_sum(self, idx):
        """Sum over all the principal values of z_j(l) for j = 1, ..., n,
        keeping idx fixed.
        """
        y = self.p * np.sin(self.omega * idx)
        x = 1 - self.p + self.p * np.cos(self.omega * idx)
        argz = np.arctan2(y, x).sum()
        return argz

    

[docs]
    def get_d(self, idx):
        """Return coefficient d_idx of the chi value
        chi_idx = d_idx * (Real + j * Imag).
        """
        dum = self.get_z(range(self.n), idx)
        exparg = np.log(np.abs(dum)).sum()
        return np.exp(exparg)

    

[docs]
    def get_z(self, j, idx):
        """Return z_j(l)."""
        z = 1 - self.p[j] + self.p[j] * np.cos(self.omega * idx) + \
            1j * self.p[j] * np.sin(self.omega * idx)
        return z


# ------------------------------------------------------------------------------
# Auxiliary functions
# ------------------------------------------------------------------------------
    

[docs]
    def check_rv_input(self, kk):
        """Check whether input values kk for the random variable are >=0,
        integers and <= n.
        """
        try:
            for k in kk:
                assert (type(k) == int) and (k >= 0), \
                    'Input list must contain positive integers.'
                assert k <= self.n, \
                    'Values in input list must be smaller or equal to the ' \
                    'number of input probabilities "n"'
        except TypeError:
            assert (type(kk) == int) and (kk >= 0), \
                'Input must be a positive integer.'
            assert kk <= self.n, \
                'Input value cannot be greater than' + str(self.n)
        return True

    

[docs]
    @staticmethod
    def check_xi_are_real(xx):
        """Check whether all the xis have imaginary part equal to 0, i.e.
         whether probabilities pmf are positive.
        """
        eps = 1e-3  # account for machine precision
        return np.all(xx.imag <= eps)

    

[docs]
    def check_input_prob(self):
        """Check that all the input probabilities are in the interval [0, 1]."""
        if self.p.shape != (self.n, ):
            raise ValueError("Input must be an one-dimensional array ora list.")
        if not np.all(self.p >= 0):
            raise ValueError("Input probabilites have to be non negative.")
        if not np.all(self.p <= 1):
            raise ValueError("Input probabilites have to be smaller than 1.")