Source code for slim_gsgp.algorithms.SLIM_GSGP.representations.individual

# MIT License
#
# Copyright (c) 2024 DALabNOVA
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
"""
Individual Class and Utility Functions for SLIM GSGP.
"""

import torch
from slim_gsgp.algorithms.GSGP.representations.tree_utils import apply_tree
from slim_gsgp.utils.utils import check_slim_version

[docs] class Individual: """ Individual of the SLIM_GSGP algorithm. Composed of 'blocks' of trees. Parameters ---------- collection : list The list of trees representing the individual. structure : list The structure of each tree in the collection. size : int The amount of trees in the collection train_semantics : torch.Tensor Training semantics associated with the Individual. test_semantics : torch.Tensor or None Testing semantics associated with the Individual. Can be None if not applicable. fitness : float or None The fitness value of the Individual. Defaults to None. test_fitness : float or None The fitness value of the Individual during testing. Defaults to None. nodes_collection : int The number of nodes in each tree of the collection. nodes_count : int The total amount of nodes in the tree. depth_collection : int The maximum depth of each tree in the collection. depth : int The maximum depth of the tree. """ def __init__(self, collection, train_semantics, test_semantics, reconstruct): """ Initialize an Individual with a collection of trees and their associated semantics. Parameters ---------- collection : list The list of trees representing the individual. train_semantics : torch.Tensor Training semantics associated with the individual. test_semantics : torch.Tensor or None Testing semantics associated with the individual. Can be None if not applicable. reconstruct : bool Boolean indicating if the structure of the individual should be stored. """ # setting the Individual attributes based on the collection, if existent. # Otherwise, those are added to the individual after its created (during mutation). if collection is not None and reconstruct: self.collection = collection self.structure = [tree.structure for tree in collection] self.size = len(collection) self.nodes_collection = [tree.nodes for tree in collection] self.nodes_count = sum(self.nodes_collection) + (self.size - 1) self.depth_collection = [tree.depth for tree in collection] self.depth = max( [ depth - (i - 1) if i != 0 else depth for i, depth in enumerate(self.depth_collection) ] ) + (self.size - 1) # setting the semantics and fitness related attributes self.train_semantics = train_semantics self.test_semantics = test_semantics self.fitness = None self.test_fitness = None
[docs] def calculate_semantics(self, inputs, testing=False): """ Calculate the semantics for the Individual. Result is stored as an attribute associated with the object. Parameters ---------- inputs : torch.Tensor Input data for calculating semantics. testing : bool, optional Boolean indicating if the calculation is for testing semantics. Default is False. Returns ------- None """ # computing the testing semantics, if not existent if testing and self.test_semantics is None: # getting the semantics for every tree in the collection [tree.calculate_semantics(inputs, testing) for tree in self.collection] self.test_semantics = torch.stack( [ ( tree.test_semantics if tree.test_semantics.shape != torch.Size([]) else tree.test_semantics.repeat(len(inputs)) ) for tree in self.collection ] ) # computing the training semantics elif self.train_semantics is None: # getting the semantics for every tree in the collection [tree.calculate_semantics(inputs, testing) for tree in self.collection] self.train_semantics = torch.stack( [ ( tree.train_semantics if tree.train_semantics.shape != torch.Size([]) else tree.train_semantics.repeat(len(inputs)) ) for tree in self.collection ] )
def __len__(self): """ Return the size of the individual. Returns ------- int Size of the individual. """ return self.size def __getitem__(self, item): """ Get a tree from the individual by index. Parameters ---------- item : int Index of the tree to retrieve. Returns ------- Tree The tree at the specified index. """ return self.collection[item]
[docs] def evaluate(self, ffunction, y, testing=False, operator="sum"): """ Evaluate the Individual using a fitness function. Parameters ---------- ffunction : Callable Fitness function to evaluate the Individual. y : torch.Tensor Expected output (target) values. testing : bool, optional Boolean indicating if the evaluation is for testing semantics (default is False). operator : str, optional Operator to apply to the semantics (default is "sum"). Returns ------- None """ # getting the correct torch operator based on the slim_gsgp version if operator == "sum": operator = torch.sum else: operator = torch.prod # computing the testing fitness, if applicable if testing: self.test_fitness = ffunction( y, torch.clamp( operator(self.test_semantics, dim=0), -1000000000000.0, 1000000000000.0, ), ) # computing the training fitness else: self.fitness = ffunction( y, torch.clamp( operator(self.train_semantics, dim=0), -1000000000000.0, 1000000000000.0, ), )
[docs] def predict(self, data): """ Predict the output for the given input data using the model's collection of trees and the specified slim_gsgp version. Parameters ---------- data : array-like or DataFrame The input data to predict. It should be an array-like structure (e.g., list, numpy array) or a pandas DataFrame, where each row represents a different observation and each column represents a feature. Returns ------- Tensor The predicted output for the input data. The output is a PyTorch Tensor whose values are clamped between -1e12 and 1e12. Notes ----- The prediction involves several steps: 1. The `check_slim_version` function is called with the `slim_version` flag to determine the appropriate operator (`sum` or `prod`), whether to apply a sigmoid function (`sig`), and the specific trees to use for prediction. 2. For each tree in the `self.collection`: - If the tree structure is a tuple, predictions are made using the `apply_tree` function. - If the tree structure is a list: - For single-tree structures (length 3), predictions are made directly or with a sigmoid function applied, and training semantics are updated. - For two-tree structures (length 4), predictions for both trees are made with a sigmoid function applied, and training semantics are updated for both trees. 3. The semantics (predicted outputs) of all trees are combined using the specified operator (`sum` or `prod`), and the final output is clamped to be within the range of -1e12 to 1e12. This function relies on PyTorch for tensor operations, including `torch.sigmoid`, `torch.sum`, `torch.prod`, `torch.stack`, and `torch.clamp`. """ # seeing if the tree has the structure attribute if not hasattr(self, "collection"): raise Exception("If reconstruct was set to False, .predict() is not available") # getting the relevant variables based on the used slim_gsgp version operator, sig, trees = check_slim_version(slim_version=self.version) # getting an empty semantics list semantics = [] # getting the semantics for each tree in the collection for t in self.collection: if isinstance(t.structure, tuple): # if it's a base (gp) tree semantics.append(apply_tree(t, data)) else: if len(t.structure) == 3: # one tree mutation # seeing if a logistic function is to be used if sig: # saving the previous semantics, for safekeeping t.structure[1].previous_training = t.train_semantics # getting the new training semantics based on the provided data t.structure[1].train_semantics = torch.sigmoid( apply_tree(t.structure[1], data) ) else: # saving the previous semantics, for safekeeping t.structure[1].previous_training = t.train_semantics # getting the new training semantics based on the provided data t.structure[1].train_semantics = apply_tree(t.structure[1], data) elif len(t.structure) == 4: # two tree mutation t.structure[1].previous_training = t.train_semantics t.structure[1].train_semantics = torch.sigmoid( apply_tree(t.structure[1], data) ) # saving the previous semantics, for safekeeping t.structure[2].previous_training = t.train_semantics # getting the new training semantics based on the provided data t.structure[2].train_semantics = torch.sigmoid( apply_tree(t.structure[2], data) ) # getting the semantics by calling the corresponding operator on the structure semantics.append(t.structure[0](*t.structure[1:], testing=False)) # getting the correct torch function based on the used operator (mul or sum) operator = torch.sum if operator == "sum" else torch.prod # making sure that if the semantics of the collection is solely a constant, # the constant value is repeated len(data) number of times to match the remaining semantics' shapes. semantics = [ten if ten.numel() == len(data) else ten.repeat(len(data)) for ten in semantics] # clamping the semantics return torch.clamp( operator(torch.stack(semantics), dim=0), -1000000000000.0, 1000000000000.0 )
[docs] def get_tree_representation(self): """ Returns a string representation of the trees in the Individual. Parameters ---------- operator : str, optional The operator to use in the representation ("sum" or "mul"). If None, it will be determined based on the version. Returns ------- str A string representing the structure of the trees in the individual. Raises ------ Exception If reconstruct was set to False, indicating that the .get_tree_representation() method is not available. """ # seeing if the tree has the structure attribute if not hasattr(self, "collection"): raise Exception("If reconstruct was set to False, .get_tree_representation() is not available") # finding out the used operator based on the slim_gsgp version operator = "sum" if "+" in self.version else "mul" op = "+" if operator == "sum" else "*" return f" {op} ".join( [ str(t.structure) if isinstance(t.structure, tuple) else f'f({t.structure[1].structure})' if len(t.structure) == 3 else f'f({t.structure[1].structure} - {t.structure[2].structure})' for t in self.collection ] )
[docs] def print_tree_representation(self): """ Prints a string representation of the trees in the Individual. Parameters ---------- operator : str, optional The operator to use in the representation ("sum" or "mul"). If None, it will be determined based on the version. Returns ------- None Prints a string representing the structure of the trees in the individual. """ print(self.get_tree_representation())