#!/usr/bin/env python
# coding: utf-8
import numpy as np
from typing import List, Union
from ..eda import EDA
from ..custom.probabilistic_models import BN
from ..custom.initialization_models import CategoricalSampling
[docs]class BOA(EDA):
"""
Bayesian Optimization Algorithm. This type of Estimation-of-Distribution Algorithm uses a Discrete Bayesian
Network from where new solutions are sampled. This multivariate probabilistic model is updated in each
iteration with the best individuals of the previous generation. The main difference towards EBNA is that a
Bayesian Dirichlet score is used for the structure learning process.
Example:
This example uses some uses a toy example to show how to use the BOA implementation.
.. code-block:: python
from EDAspy.optimization import BOA
def categorical_cost_function(solution: np.array):
cost_dict = {
'Color': {'Red': 0.1, 'Green': 0.5, 'Blue': 0.3},
'Shape': {'Circle': 0.3, 'Square': 0.2, 'Triangle': 0.4},
'Size': {'Small': 0.4, 'Medium': 0.2, 'Large': 0.1}
}
keys = list(cost_dict.keys())
choices = {keys[i]: solution[i] for i in range(len(solution))}
total_cost = 0.0
for variable, choice in choices.items():
total_cost += cost_dict[variable][choice]
return total_cost
variables = ['Color', 'Shape', 'Size']
possible_values = np.array([
['Red', 'Green', 'Blue'],
['Circle', 'Square', 'Triangle'],
['Small', 'Medium', 'Large']], dtype=object
)
frequency = np.array([
[.33, .33, .33],
[.33, .33, .33],
[.33, .33, .33]], dtype=object
)
n_variables = len(variables)
boa = BOA(size_gen=10, max_iter=10, dead_iter=10, n_variables=n_variables, alpha=0.5,
possible_values=possible_values, frequency=frequency)
boa_result = boa.minimize(categorical_cost_function, True)
References:
[1]: LarraƱaga P, Lozano JA (2001) Estimation of Distribution Algorithms: A New Tool for Evolutionary
Computation. Kluwer Academic Publishers
"""
def __init__(self,
size_gen: int,
max_iter: int,
dead_iter: int,
n_variables: int,
possible_values: Union[List, np.array],
frequency: Union[List, np.array],
alpha: float = 0.5,
elite_factor: float = 0.4,
disp: bool = True,
parallelize: bool = False,
init_data: np.array = None):
r"""
:param size_gen: Population size. Number of individuals in each generation.
:param max_iter: Maximum number of iterations during runtime.
:param dead_iter: Stopping criteria. Number of iterations with no improvement after which, the algorithm finish.
:param n_variables: Number of variables to be optimized.
:param possible_values: 2D structure where each row represents the possible values that can have each dimension.
:param frequency: 2D structure with same size as possible_values and represent the frequency of each element.
:param alpha: Percentage of population selected to update the probabilistic model.
:param elite_factor: Percentage of previous population selected to add to new generation (elite approach).
:param disp: Set to True to print convergence messages.
:param parallelize: True if the evaluation of the solutions is desired to be parallelized in multiple cores.
:param init_data: Numpy array containing the data the EDA is desired to be initialized from. By default, an
initializer is used.
"""
super().__init__(size_gen=size_gen, max_iter=max_iter, dead_iter=dead_iter,
n_variables=n_variables, alpha=alpha, elite_factor=elite_factor, disp=disp,
parallelize=parallelize, init_data=init_data, w_noise=-1)
self.vars = [str(i) for i in range(n_variables)]
# self.landscape_bounds = landscape_bounds
self.pm = BN(self.vars)
self.init = CategoricalSampling(self.n_variables, possible_values=possible_values, frequency=frequency)
def _update_pm(self):
"""
Learn the probabilistic model from the best individuals of previous generation.
"""
self.pm.learn(dataset=self.generation, score="bdeuscore")