Source code for simba.data_processors.boolean_conditional_calculator

import os
from copy import deepcopy
from typing import Dict, Optional, Union

import numpy as np
import pandas as pd

from simba.mixins.config_reader import ConfigReader
from simba.utils.checks import (
    check_all_file_names_are_represented_in_video_log,
    check_if_df_field_is_boolean, check_if_dir_exists, check_instance,
    check_valid_boolean, check_valid_dataframe, check_valid_dict)
from simba.utils.data import detect_bouts
from simba.utils.enums import Formats
from simba.utils.errors import NoDataError
from simba.utils.printing import SimbaTimer, stdout_success
from simba.utils.read_write import (find_files_of_filetypes_in_directory,
                                    get_fn_ext, read_df, read_video_info,
                                    str_2_bool)


[docs]class BooleanConditionalCalculator(ConfigReader):
    """
    Compute descriptive statistics (e.g., the time in seconds and number of frames) of multiple Boolean fields fullfilling user-defined conditions.

    For example, computedescriptive statistics for when Animal 1 is inside the shape Rectangle_1 while at the same time directing towards shape Polygon_1,
    while at the same time Animal 2 is outside shape Rectangle_1 and directing towards Polygon_1.

    :param Union[str, os.PathLike] config_path: path to SimBA project config file in Configparser format.
    :param Dict[str, Union[bool, str]] rules: Rules with field names as keys and bools (or string representations of bools) as values.
    :param Optional[Union[str, os.PathLike, None]] data_path: Optional data paths to be processsed. Can be a directory or file path. If None, all CSVs inside the `projecet_folder/csv/outlier_corrected_movement_location` are analysed.
    :param Optional[Union[str, os.PathLike]] agg_save_path: Optional location where to save the aggregate results as CSV file. If None, then results are saved in project logs folder under the ``Detailed_conditional_aggregate_statistics_{self.datetime}.csv`` filename.
    :param Optional[Union[str, os.PathLike]] detailed_save_path: Optional location where to save the detailed results as CSV file (bout level data). If None, then results are saved in project logs folder under the ``Detailed_conditional_aggregate_statistics_{self.datetime}.csv`` filename.

    .. note::
       `Example expected aggregate output table <https://github.com/sgoldenlab/simba/blob/master/misc/Conditional_aggregate_statistics_20231004130314.csv>`__.
       `Example expected detailed output table <https://github.com/sgoldenlab/simba/blob/master/misc/Detailed_conditional_aggregate_statistics_20241011123409.csv>`__.

    :example I:

    >>> rules = {'Rectangle_1 Simon in zone': 'TRUE', 'Polygon_1 JJ in zone': 'TRUE'} #  OR {'Rectangle_1 Simon in zone': True, 'Polygon_1 JJ in zone': True}
    >>> conditional_bool_rule_calculator = BooleanConditionalCalculator(rules=rules, config_path='/Users/simon/Desktop/envs/troubleshooting/two_animals_16bp_032023/project_folder/project_config.ini')
    >>> conditional_bool_rule_calculator.run()
    >>> conditional_bool_rule_calculator.save()


    :example II:

    >>> rules = {'Stimulus 2 Animal_1 in zone': True, 'Stimulus 6 Animal_1 in zone': 'falsE'}
    >>> runner = BooleanConditionalCalculator(rules=rules, config_path=r"C:\troubleshooting\RAT_NOR\project_folder\project_config.ini", data_path=r'C:\troubleshooting\RAT_NOR\project_folder\csv\features_extracted')
    >>> runner.run()
    >>> runner.save()


    References
    ----------
    .. [1] Shonka, S., & Hylin, M. J. (2025). Younger is better but only for males: social behavioral development following juvenile traumatic brain injury to the prefrontal cortex.
           `bioRxiv <https://doi.org/10.1101/2025.05.24.655898>`_.

    """

    def __init__(self,
                 config_path: Union[str, os.PathLike],
                 rules: Dict[str, Union[bool, str]],
                 data_path: Optional[Union[str, os.PathLike, None]] = None,
                 agg_save_path: Optional[Union[str, os.PathLike]] = None,
                 detailed_save_path: Optional[Union[str, os.PathLike]] = None,
                 verbose: bool = True):

        ConfigReader.__init__(self, config_path=config_path)
        check_instance(source=self.__class__.__name__, instance=rules, accepted_types=(dict,))
        check_valid_dict(x=rules, valid_key_dtypes=(str,), valid_values_dtypes=(str, bool,), min_len_keys=2, source=f'{self.__class__.__name__} rules')
        check_valid_boolean(value=verbose, source=f'{self.__class__.__name__} verbose', raise_error=True)
        if data_path is not None:
            if not os.path.isfile(data_path) and not os.path.isdir(data_path):
                raise NoDataError(msg=f'The data_path {data_path} is not a valid file-path or directory', source=self.__class__.__name__)
            elif os.path.isdir(data_path):
                self.data_paths = find_files_of_filetypes_in_directory(directory=data_path, extensions=[f'.{self.file_type}'], as_dict=False, raise_error=False, raise_warning=True)
            else:
                self.data_paths = [data_path]
        else:
            data_path = self.features_dir
            self.data_paths = find_files_of_filetypes_in_directory(directory=data_path, extensions=[f'.{self.file_type}'], as_dict=False, raise_error=False, raise_warning=True)
        if len(self.data_paths) == 0:
            raise NoDataError(msg=f'The data_path {data_path} has no valid data files', source=self.__class__.__name__)
        if agg_save_path is not None:
            check_if_dir_exists(in_dir=os.path.dirname(agg_save_path))
        else:
            agg_save_path = os.path.join(self.logs_path, f"Conditional_aggregate_statistics_{self.datetime}.csv")
        if detailed_save_path is not None:
            check_if_dir_exists(in_dir=os.path.dirname(detailed_save_path))
        else:
            detailed_save_path = os.path.join(self.logs_path, f"Detailed_conditional_aggregate_statistics_{self.datetime}.csv")
        self.agg_save_path, self.detailed_save_path, self.rules = agg_save_path, detailed_save_path, rules
        self.output_df = pd.DataFrame(columns=["VIDEO"] + list(self.rules.keys()) + ["TIME (s)", "FRAMES (count)"])
        self.bout_df_cols = ["VIDEO"] + list(self.rules.keys()) + ["START FRAME", "END FRAME", "START TIME", "END TIME" ,"BOUT TIME"]
        self.bout_dfs, self.rule_cols, self.verbose = [], list(self.rules.keys()), verbose
        self.rules = {k: str_2_bool(v) for k, v in self.rules.items()}


    def _slice_df(self, df: pd.DataFrame, rules: dict) -> pd.DataFrame:
        sliced_df = deepcopy(df)
        for k, v in rules.items():
            sliced_df = sliced_df[sliced_df[k] == 1] if v else sliced_df[sliced_df[k] == 0]
        return sliced_df



[docs]    def run(self):
        check_all_file_names_are_represented_in_video_log(video_info_df=self.video_info_df, data_paths=self.data_paths)
        for file_cnt, file_path in enumerate(self.data_paths):
            _, self.video_name, _ = get_fn_ext(filepath=file_path)
            if self.verbose: print(f'Analyzing conditional boolean statistics in {self.video_name}...({file_cnt+1}/{len(self.data_paths)})')
            _, _, self.fps = read_video_info(vid_info_df=self.video_info_df, video_name=self.video_name)
            self.df = read_df(file_path=file_path, file_type=self.file_type)
            check_valid_dataframe(df=self.df, source=file_path, valid_dtypes=Formats.NUMERIC_DTYPES.value, required_fields=self.rule_cols)
            for rule_col in self.rule_cols: check_if_df_field_is_boolean(df=self.df, field=rule_col, df_name=file_path)
            self.sliced_df = self._slice_df(df=self.df, rules=self.rules)
            time_s = round(len(self.sliced_df) / self.fps, 4)
            if len(self.sliced_df) > 0:
                bout_df = pd.DataFrame(data=np.zeros((len(self.df))), columns=['behavior'])
                bout_df.iloc[self.sliced_df.index] = 1
                bout_df = detect_bouts(data_df=bout_df, target_lst=['behavior'], fps=self.fps)
                bout_df = bout_df.assign(**{k: v for k, v in self.rules.items()})
                bout_df['VIDEO'] = self.video_name
                bout_df = bout_df.rename(columns={'Start_time': 'START TIME', 'End Time': 'END TIME', 'Start_frame': 'START FRAME', 'End_frame': 'END FRAME', 'Bout_time': 'BOUT TIME'})
                self.bout_dfs.append(bout_df[self.bout_df_cols])
            self.output_df.loc[len(self.output_df)] = ([self.video_name] + list(self.rules.values()) + [time_s] + [len(self.sliced_df)])


[docs]    def save(self):
        self.output_df.to_csv(self.agg_save_path, index=False)
        self.timer.stop_timer()
        stdout_success(msg=f"Boolean conditional data saved at at {self.agg_save_path}!", elapsed_time=self.timer.elapsed_time_str, source=self.__class__.__name__)
        if len(self.bout_dfs) > 0:
            self.bout_dfs = pd.concat(self.bout_dfs, axis=0).reset_index(drop=True)
            self.bout_dfs.to_csv(self.detailed_save_path, index=False)
            stdout_success(msg=f"Detailed boolean conditional data saved at at {self.detailed_save_path}!", elapsed_time=self.timer.elapsed_time_str, source=self.__class__.__name__)


#'Stimulus 2 Animal_1 in zone', 'Stimulus 2 Animal_1 facing'
# rules = {'Stimulus 2 Animal_1 in zone': True, 'Stimulus 6 Animal_1 in zone': 'falsE'}
# runner = BooleanConditionalCalculator(rules=rules, config_path=r"C:\troubleshooting\RAT_NOR\project_folder\project_config.ini", data_path=r'C:\troubleshooting\RAT_NOR\project_folder\csv\features_extracted')
# runner.run()
# runner.save()
#