Source code for slim_gsgp.algorithms.SLIM_GSGP.representations.individual

# MIT License
#
# Copyright (c) 2024 DALabNOVA
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
"""
Individual Class and Utility Functions for SLIM GSGP.
"""

import torch
from slim_gsgp.algorithms.GSGP.representations.tree_utils import apply_tree
from slim_gsgp.utils.utils import check_slim_version


[docs]
class Individual:
    """
    Individual of the SLIM_GSGP algorithm. Composed of 'blocks' of trees.

    Parameters
    ----------
    collection : list
        The list of trees representing the individual.
    structure : list
        The structure of each tree in the collection.
    size : int
        The amount of trees in the collection
    train_semantics : torch.Tensor
        Training semantics associated with the Individual.
    test_semantics : torch.Tensor or None
        Testing semantics associated with the Individual. Can be None if not applicable.
    fitness : float or None
        The fitness value of the Individual. Defaults to None.
    test_fitness : float or None
        The fitness value of the Individual during testing. Defaults to None.
    nodes_collection : int
        The number of nodes in each tree of the collection.
    nodes_count : int
        The total amount of nodes in the tree.
    depth_collection : int
        The maximum depth of each tree in the collection.
    depth : int
        The maximum depth of the tree.
    """

    def __init__(self, collection, train_semantics, test_semantics, reconstruct):
        """
        Initialize an Individual with a collection of trees and their associated semantics.

        Parameters
        ----------
        collection : list
            The list of trees representing the individual.
        train_semantics : torch.Tensor
            Training semantics associated with the individual.
        test_semantics : torch.Tensor or None
            Testing semantics associated with the individual. Can be None if not applicable.
        reconstruct : bool
            Boolean indicating if the structure of the individual should be stored.
        """
        # setting the Individual attributes based on the collection, if existent.
        # Otherwise, those are added to the individual after its created (during mutation).

        if collection is not None and reconstruct:
            self.collection = collection
            self.structure = [tree.structure for tree in collection]
            self.size = len(collection)

            self.nodes_collection = [tree.nodes for tree in collection]
            self.nodes_count = sum(self.nodes_collection) + (self.size - 1)
            self.depth_collection = [tree.depth for tree in collection]
            self.depth = max(
                [
                    depth - (i - 1) if i != 0 else depth
                    for i, depth in enumerate(self.depth_collection)
                ]
            ) + (self.size - 1)

        # setting the semantics and fitness related attributes
        self.train_semantics = train_semantics
        self.test_semantics = test_semantics
        self.fitness = None
        self.test_fitness = None


[docs]
    def calculate_semantics(self, inputs, testing=False):
        """
        Calculate the semantics for the Individual. Result is stored as an attribute associated with the object.

        Parameters
        ----------
        inputs : torch.Tensor
            Input data for calculating semantics.
        testing : bool, optional
            Boolean indicating if the calculation is for testing semantics. Default is False.

        Returns
        -------
        None
        """

        # computing the testing semantics, if not existent
        if testing and self.test_semantics is None:
            # getting the semantics for every tree in the collection
            [tree.calculate_semantics(inputs, testing) for tree in self.collection]
            self.test_semantics = torch.stack(
                [
                    (
                        tree.test_semantics
                        if tree.test_semantics.shape != torch.Size([])
                        else tree.test_semantics.repeat(len(inputs))
                    )
                    for tree in self.collection
                ]
            )

        # computing the training semantics
        elif self.train_semantics is None:
            # getting the semantics for every tree in the collection
            [tree.calculate_semantics(inputs, testing) for tree in self.collection]
            self.train_semantics = torch.stack(
                [
                    (
                        tree.train_semantics
                        if tree.train_semantics.shape != torch.Size([])
                        else tree.train_semantics.repeat(len(inputs))
                    )
                    for tree in self.collection
                ]
            )


    def __len__(self):
        """
        Return the size of the individual.

        Returns
        -------
        int
            Size of the individual.
        """
        return self.size

    def __getitem__(self, item):
        """
        Get a tree from the individual by index.

        Parameters
        ----------
        item : int
            Index of the tree to retrieve.

        Returns
        -------
        Tree
            The tree at the specified index.
        """
        return self.collection[item]


[docs]
    def evaluate(self, ffunction, y, testing=False, operator="sum"):
        """
        Evaluate the Individual using a fitness function.

        Parameters
        ----------
        ffunction : Callable
            Fitness function to evaluate the Individual.
        y : torch.Tensor
            Expected output (target) values.
        testing : bool, optional
            Boolean indicating if the evaluation is for testing semantics (default is False).
        operator : str, optional
            Operator to apply to the semantics (default is "sum").

        Returns
        -------
        None
        """
        # getting the correct torch operator based on the slim_gsgp version
        if operator == "sum":
            operator = torch.sum
        else:
            operator = torch.prod

        # computing the testing fitness, if applicable
        if testing:
            self.test_fitness = ffunction(
                y,
                torch.clamp(
                    operator(self.test_semantics, dim=0),
                    -1000000000000.0,
                    1000000000000.0,
                ),
            )
        # computing the training fitness
        else:
            self.fitness = ffunction(
                y,
                torch.clamp(
                    operator(self.train_semantics, dim=0),
                    -1000000000000.0,
                    1000000000000.0,
                ),
            )



[docs]
    def predict(self, data):
        """
            Predict the output for the given input data using the model's collection of trees
            and the specified slim_gsgp version.

            Parameters
            ----------
            data : array-like or DataFrame
                The input data to predict. It should be an array-like structure
                (e.g., list, numpy array) or a pandas DataFrame, where each row represents a
                different observation and each column represents a feature.

            Returns
            -------
            Tensor
                The predicted output for the input data. The output is a PyTorch Tensor whose values
                are clamped between -1e12 and 1e12.

            Notes
            -----
            The prediction involves several steps:

            1. The `check_slim_version` function is called with the `slim_version` flag to determine
               the appropriate operator (`sum` or `prod`), whether to apply a sigmoid function (`sig`),
               and the specific trees to use for prediction.

            2. For each tree in the `self.collection`:
               - If the tree structure is a tuple, predictions are made using the `apply_tree` function.
               - If the tree structure is a list:
                 - For single-tree structures (length 3), predictions are made directly or with a sigmoid
                   function applied, and training semantics are updated.
                 - For two-tree structures (length 4), predictions for both trees are made with a sigmoid
                   function applied, and training semantics are updated for both trees.

            3. The semantics (predicted outputs) of all trees are combined using the specified operator
               (`sum` or `prod`), and the final output is clamped to be within the range of -1e12 to 1e12.

            This function relies on PyTorch for tensor operations, including `torch.sigmoid`,
            `torch.sum`, `torch.prod`, `torch.stack`, and `torch.clamp`.
            """

        # seeing if the tree has the structure attribute
        if not hasattr(self, "collection"):
            raise Exception("If reconstruct was set to False, .predict() is not available")

        # getting the relevant variables based on the used slim_gsgp version
        operator, sig, trees = check_slim_version(slim_version=self.version)

        # getting an empty semantics list
        semantics = []

        # getting the semantics for each tree in the collection
        for t in self.collection:
            if isinstance(t.structure, tuple): # if it's a base (gp) tree
                semantics.append(apply_tree(t, data))
            else:
                if len(t.structure) == 3:  # one tree mutation
                    # seeing if a logistic function is to be used
                    if sig:
                        # saving the previous semantics, for safekeeping
                        t.structure[1].previous_training = t.train_semantics
                        # getting the new training semantics based on the provided data
                        t.structure[1].train_semantics = torch.sigmoid(
                            apply_tree(t.structure[1], data)
                        )
                    else:
                        # saving the previous semantics, for safekeeping
                        t.structure[1].previous_training = t.train_semantics
                        # getting the new training semantics based on the provided data
                        t.structure[1].train_semantics = apply_tree(t.structure[1], data)

                elif len(t.structure) == 4:  # two tree mutation
                    t.structure[1].previous_training = t.train_semantics
                    t.structure[1].train_semantics = torch.sigmoid(
                        apply_tree(t.structure[1], data)
                    )
                    # saving the previous semantics, for safekeeping
                    t.structure[2].previous_training = t.train_semantics
                    # getting the new training semantics based on the provided data
                    t.structure[2].train_semantics = torch.sigmoid(
                        apply_tree(t.structure[2], data)
                    )

                # getting the semantics by calling the corresponding operator on the structure
                semantics.append(t.structure[0](*t.structure[1:], testing=False))

        # getting the correct torch function based on the used operator (mul or sum)
        operator = torch.sum if operator == "sum" else torch.prod

        # making sure that if the semantics of the collection is solely a constant,
        # the constant value is repeated len(data) number of times to match the remaining semantics' shapes.

        semantics = [ten if ten.numel() == len(data) else ten.repeat(len(data)) for ten in semantics]

        # clamping the semantics
        return torch.clamp(
            operator(torch.stack(semantics), dim=0), -1000000000000.0, 1000000000000.0
        )



[docs]
    def get_tree_representation(self):
        """
        Returns a string representation of the trees in the Individual.

        Parameters
        ----------
        operator : str, optional
            The operator to use in the representation ("sum" or "mul").
            If None, it will be determined based on the version.

        Returns
        -------
        str
            A string representing the structure of the trees in the individual.

        Raises
        ------
        Exception
            If reconstruct was set to False, indicating that the .get_tree_representation() method is not available.
        """
        # seeing if the tree has the structure attribute
        if not hasattr(self, "collection"):
            raise Exception("If reconstruct was set to False, .get_tree_representation() is not available")

        # finding out the used operator based on the slim_gsgp version
        operator = "sum" if "+" in self.version else "mul"

        op = "+" if operator == "sum" else "*"

        return f" {op} ".join(
            [
                str(t.structure) if isinstance(t.structure, tuple)
                else f'f({t.structure[1].structure})' if len(t.structure) == 3
                else f'f({t.structure[1].structure} - {t.structure[2].structure})'
                for t in self.collection
            ]
        )



[docs]
    def print_tree_representation(self):
        """
        Prints a string representation of the trees in the Individual.

        Parameters
        ----------
        operator : str, optional
            The operator to use in the representation ("sum" or "mul").
            If None, it will be determined based on the version.

        Returns
        -------
        None
            Prints a string representing the structure of the trees in the individual.
        """

        print(self.get_tree_representation())