Source code for EDAspy.optimization.custom.probabilistic_models.univariate_categorical

#!/usr/bin/env python
# coding: utf-8

import numpy as np
import pandas as pd

from ._probabilistic_model import ProbabilisticModel


[docs]def obtain_probabilities(array) -> dict: res = {} unique, counts = np.unique(array, return_counts=True) for i in range(len(unique)): res[unique[i]] = counts[i]/array.size '''for label in list(set(labels) - set(unique)): res[label] = 0''' return res
[docs]class UniCategorical(ProbabilisticModel): """ This probabilistic model is discrete and univariate. """ def __init__(self, variables: list): super().__init__(variables) self.prob_table = {} # dictionary with variable: {value: prob}
[docs] def learn(self, dataset: np.array, *args, **kwargs): """ Estimates the independent categorical probability distribution for each variable. :param dataset: dataset from which learn the probabilistic model. """ for i in range(self.len_variables): label = self.variables[i] probs = obtain_probabilities(dataset[:, i]) self.prob_table[label] = probs
[docs] def sample(self, size: int) -> np.array: """ Samples new solutions from the probabilistic model. In each solution, each variable is sampled from its respective categorical distribution. :param size: number of samplings of the probabilistic model. :return: array with the dataset sampled :rtype: np.array """ result = pd.DataFrame(columns=self.variables) for i in range(self.len_variables): label = self.variables[i] result[label] = np.random.choice(list(self.prob_table[label].keys()), size=size, p=list(self.prob_table[label].values())).tolist() return result.to_numpy()
[docs] def print_structure(self) -> list: """ Prints the arcs between the nodes that represent the variables in the dataset. This function must be used after the learning process. Univariate approaches generate no-edged graphs. :return: list of arcs between variables :rtype: list """ return list()