Source code for upxo.statops.distr_01

import numpy as np
from dataclasses import dataclass
from collections import deque
from scipy import stats
import matplotlib.pyplot as plt
from colorama import init as colorama_init
from colorama import Fore, Back, Style
from upxo._sup.console_formats import console_seperator
#------------------------------------------------------------------------------
#------------------------------------------------------------------------------

[docs]
class distribution():
    '''
    RULES:
        An instance should not store than more one data
        If data is updated, then update operations must be perfoemed
    VARIABLES:
        data_name: Data Name
        data: Data
        nbins: Number of bins
        hist: histograms
        bin_edges: edges of the bins
    CONVENTIONS:
        nbins: contained in a list
        H: contained in a list
        bin_edges: contained in a list
    '''
    def __init__(self,
                 data_name = None,
                 data = None,
                 nbins = [None],
                 be_estimator = 'auto',
                 ):
        """Initialise the instance."""
        colorama_init()
        #*******************************
        if isinstance(data_name, list):
            data_name = np.array(data_name)
        #from distr_01 import KDE
        self.S = SUMMARY
        #*******************************
        #from distr_01 import KDE
        self.K = KDE
        #*******************************
        #from distr_01 import HISTOGRAM
        self.H = HISTOGRAM
        self.H.data = data
        #*******************************
        self.data_name = data_name
        self.data = data
        #*******************************
        self.calc_histogram(be_estimator = be_estimator)
        #*******************************
        self.update_summary()
        #*******************************

        console_seperator(seperator = '-*', repetitions = 25)

        #*******************************
    #-----------------------------------------

[docs]
    def update_summary(self):
        """Set or update te summary."""
        self.find_min()
        self.find_mean()
        self.find_median(axis = None)
        self.find_max()
        self.find_total()
        self.find_variance()
        self.find_skewness()
        self.find_kurtosis()
        self.find_std_dev(axis = 0)
        self.find_variance(limits = None,
                           inclusive = (True, True),
                           axis = 0
                           )
        self.find_percentiles(percentile_list = [0, 10, 50, 90, 100],
                              throw_format = 'list',
                              see = False
                              )

    #-----------------------------------------

[docs]
    def find_min(self):
        """Find min."""
        self.S.minimum = self.data.min()


[docs]
    def find_mean(self):
        """Find mean."""
        self.S.mean = self.data.mean()


[docs]
    def find_median(self,
                    axis = None
                    ):
        """Find median."""
        self.S.median = np.median(a = self.data,
                                  axis = axis
                                  )


[docs]
    def find_max(self):
        """Find max."""
        self.S.maximum = self.data.max()


[docs]
    def find_total(self):
        """Find total."""
        self.S.total = self.data.sum()


[docs]
    def find_std_dev(self,
                     axis = 0):
        """Find std dev."""
        self.S.sdev = self.data.std()


[docs]
    def find_skewness(self):
        """Find skewness."""
        self.S.skew = stats.skew(self.data, bias = True)


[docs]
    def find_kurtosis(self):
        """Find kurtosis."""
        self.S.kurt = stats.kurtosis(self.data, bias = True)


[docs]
    def find_variance(self,
                      limits = None,
                      inclusive = (True, True),
                      axis = 0
                      ):
        '''
        REF: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.tvar.html#scipy.stats.tvar
        '''
        if limits == None:
            self.S.variance = stats.tvar(self.data
                                    )
        else:
            self.S.variance = stats.tvar(a = self.data,
                                         limits = limits,
                                         inclusive = inclusive
                                         )


[docs]
    def find_percentiles(self,
                         percentile_list = [0, 10, 50, 90, 100],
                         throw_format = 'list',
                         see = False
                         ):
        """Find percentiles."""
        _ = [np.percentile(self.data, _p) for _p in percentile_list]
        if throw_format == 'dict':
            self.S.percentiles = dict(zip(percentile_list, _))
        elif throw_format == 'list':
            self.S.percentiles = _
        if see:
            print(_)

    #-----------------------------------------

[docs]
    def calc_histogram(self,
                       be_estimator = 'auto'
                       ):
        '''
        "be_estimator" options:
            1. 'auto'
            2. 'fd' (Freedman Diaconis Estimator)
            3. 'doane'
            4. For more, refer: https://numpy.org/doc/stable/reference/generated/numpy.histogram_bin_edges.html
        '''
        self.H.be = np.histogram_bin_edges(self.data, bins = be_estimator)
        self.H.hv, _ = np.histogram(self.data, bins = self.H.be)

    #-----------------------------------------

[docs]
    def calc_rv_histogram(self):
        """Return the  rv histogram."""
        # Resample from existing histogram to yield a similar histogram
        raise NotImplementedError("calc_rv_histogram is not yet implemented.")

    #-----------------------------------------

[docs]
    def plot_histogram(self,
                       be_estimator = 'auto'):
        """Visualise histogram using Matplotlib or PyVista."""
        # First calculate the histogram
        self.calc_histogram(be_estimator = be_estimator)
        # Then plot using "plt.bar"
        _be = self.H.be[:-1]
        _hv = self.H.hv
        plt.bar(_be,
                _hv,
                width = _be.min()/2,
                facecolor = 'gray',
                edgecolor = 'black',
                linewidth = 1)
        plt.xlabel(self.data_name)
        plt.ylabel('Count')


    #-----------------------------------------
    #-----------------------------------------
    #-----------------------------------------
    #-----------------------------------------
###############################################################################

[docs]
@dataclass(repr = False, frozen = True)
class SUMMARY():
    minimum = None
    percentiles = None
    maximum = None
    total = None
    mean = None
    median = None
    variance = None
    skew = None
    kurt = None

# .. .. .. .. .. .. .. .. .. ..

[docs]
@dataclass(repr = False)
class KDE():
    bw = None
    kd = None

# .. .. .. .. .. .. .. .. .. ..

[docs]
@dataclass(repr = False)
class HISTOGRAM():
    hv = None # histogram values
    be = None # Bin edges
    data = None
    nbins = None