Source code for simba.data_processors.boolean_conditional_calculator

import os
from copy import deepcopy
from typing import Dict, Optional, Union

import numpy as np
import pandas as pd

from simba.mixins.config_reader import ConfigReader
from simba.utils.checks import (
    check_all_file_names_are_represented_in_video_log,
    check_if_df_field_is_boolean, check_if_dir_exists, check_instance,
    check_valid_boolean, check_valid_dataframe, check_valid_dict)
from simba.utils.data import detect_bouts
from simba.utils.enums import Formats
from simba.utils.errors import NoDataError
from simba.utils.printing import SimbaTimer, stdout_success
from simba.utils.read_write import (find_files_of_filetypes_in_directory,
                                    get_fn_ext, read_df, read_video_info,
                                    str_2_bool)


[docs]class BooleanConditionalCalculator(ConfigReader): """ Compute descriptive statistics (e.g., the time in seconds and number of frames) of multiple Boolean fields fullfilling user-defined conditions. For example, computedescriptive statistics for when Animal 1 is inside the shape Rectangle_1 while at the same time directing towards shape Polygon_1, while at the same time Animal 2 is outside shape Rectangle_1 and directing towards Polygon_1. :param Union[str, os.PathLike] config_path: path to SimBA project config file in Configparser format. :param Dict[str, Union[bool, str]] rules: Rules with field names as keys and bools (or string representations of bools) as values. :param Optional[Union[str, os.PathLike, None]] data_path: Optional data paths to be processsed. Can be a directory or file path. If None, all CSVs inside the `projecet_folder/csv/outlier_corrected_movement_location` are analysed. :param Optional[Union[str, os.PathLike]] agg_save_path: Optional location where to save the aggregate results as CSV file. If None, then results are saved in project logs folder under the ``Detailed_conditional_aggregate_statistics_{self.datetime}.csv`` filename. :param Optional[Union[str, os.PathLike]] detailed_save_path: Optional location where to save the detailed results as CSV file (bout level data). If None, then results are saved in project logs folder under the ``Detailed_conditional_aggregate_statistics_{self.datetime}.csv`` filename. .. note:: `Example expected aggregate output table <https://github.com/sgoldenlab/simba/blob/master/misc/Conditional_aggregate_statistics_20231004130314.csv>`__. `Example expected detailed output table <https://github.com/sgoldenlab/simba/blob/master/misc/Detailed_conditional_aggregate_statistics_20241011123409.csv>`__. :example I: >>> rules = {'Rectangle_1 Simon in zone': 'TRUE', 'Polygon_1 JJ in zone': 'TRUE'} # OR {'Rectangle_1 Simon in zone': True, 'Polygon_1 JJ in zone': True} >>> conditional_bool_rule_calculator = BooleanConditionalCalculator(rules=rules, config_path='/Users/simon/Desktop/envs/troubleshooting/two_animals_16bp_032023/project_folder/project_config.ini') >>> conditional_bool_rule_calculator.run() >>> conditional_bool_rule_calculator.save() :example II: >>> rules = {'Stimulus 2 Animal_1 in zone': True, 'Stimulus 6 Animal_1 in zone': 'falsE'} >>> runner = BooleanConditionalCalculator(rules=rules, config_path=r"C:\troubleshooting\RAT_NOR\project_folder\project_config.ini", data_path=r'C:\troubleshooting\RAT_NOR\project_folder\csv\features_extracted') >>> runner.run() >>> runner.save() References ---------- .. [1] Shonka, S., & Hylin, M. J. (2025). Younger is better but only for males: social behavioral development following juvenile traumatic brain injury to the prefrontal cortex. `bioRxiv <https://doi.org/10.1101/2025.05.24.655898>`_. """ def __init__(self, config_path: Union[str, os.PathLike], rules: Dict[str, Union[bool, str]], data_path: Optional[Union[str, os.PathLike, None]] = None, agg_save_path: Optional[Union[str, os.PathLike]] = None, detailed_save_path: Optional[Union[str, os.PathLike]] = None, verbose: bool = True): ConfigReader.__init__(self, config_path=config_path) check_instance(source=self.__class__.__name__, instance=rules, accepted_types=(dict,)) check_valid_dict(x=rules, valid_key_dtypes=(str,), valid_values_dtypes=(str, bool,), min_len_keys=2, source=f'{self.__class__.__name__} rules') check_valid_boolean(value=verbose, source=f'{self.__class__.__name__} verbose', raise_error=True) if data_path is not None: if not os.path.isfile(data_path) and not os.path.isdir(data_path): raise NoDataError(msg=f'The data_path {data_path} is not a valid file-path or directory', source=self.__class__.__name__) elif os.path.isdir(data_path): self.data_paths = find_files_of_filetypes_in_directory(directory=data_path, extensions=[f'.{self.file_type}'], as_dict=False, raise_error=False, raise_warning=True) else: self.data_paths = [data_path] else: data_path = self.features_dir self.data_paths = find_files_of_filetypes_in_directory(directory=data_path, extensions=[f'.{self.file_type}'], as_dict=False, raise_error=False, raise_warning=True) if len(self.data_paths) == 0: raise NoDataError(msg=f'The data_path {data_path} has no valid data files', source=self.__class__.__name__) if agg_save_path is not None: check_if_dir_exists(in_dir=os.path.dirname(agg_save_path)) else: agg_save_path = os.path.join(self.logs_path, f"Conditional_aggregate_statistics_{self.datetime}.csv") if detailed_save_path is not None: check_if_dir_exists(in_dir=os.path.dirname(detailed_save_path)) else: detailed_save_path = os.path.join(self.logs_path, f"Detailed_conditional_aggregate_statistics_{self.datetime}.csv") self.agg_save_path, self.detailed_save_path, self.rules = agg_save_path, detailed_save_path, rules self.output_df = pd.DataFrame(columns=["VIDEO"] + list(self.rules.keys()) + ["TIME (s)", "FRAMES (count)"]) self.bout_df_cols = ["VIDEO"] + list(self.rules.keys()) + ["START FRAME", "END FRAME", "START TIME", "END TIME" ,"BOUT TIME"] self.bout_dfs, self.rule_cols, self.verbose = [], list(self.rules.keys()), verbose self.rules = {k: str_2_bool(v) for k, v in self.rules.items()} def _slice_df(self, df: pd.DataFrame, rules: dict) -> pd.DataFrame: sliced_df = deepcopy(df) for k, v in rules.items(): sliced_df = sliced_df[sliced_df[k] == 1] if v else sliced_df[sliced_df[k] == 0] return sliced_df
[docs] def run(self): check_all_file_names_are_represented_in_video_log(video_info_df=self.video_info_df, data_paths=self.data_paths) for file_cnt, file_path in enumerate(self.data_paths): _, self.video_name, _ = get_fn_ext(filepath=file_path) if self.verbose: print(f'Analyzing conditional boolean statistics in {self.video_name}...({file_cnt+1}/{len(self.data_paths)})') _, _, self.fps = read_video_info(vid_info_df=self.video_info_df, video_name=self.video_name) self.df = read_df(file_path=file_path, file_type=self.file_type) check_valid_dataframe(df=self.df, source=file_path, valid_dtypes=Formats.NUMERIC_DTYPES.value, required_fields=self.rule_cols) for rule_col in self.rule_cols: check_if_df_field_is_boolean(df=self.df, field=rule_col, df_name=file_path) self.sliced_df = self._slice_df(df=self.df, rules=self.rules) time_s = round(len(self.sliced_df) / self.fps, 4) if len(self.sliced_df) > 0: bout_df = pd.DataFrame(data=np.zeros((len(self.df))), columns=['behavior']) bout_df.iloc[self.sliced_df.index] = 1 bout_df = detect_bouts(data_df=bout_df, target_lst=['behavior'], fps=self.fps) bout_df = bout_df.assign(**{k: v for k, v in self.rules.items()}) bout_df['VIDEO'] = self.video_name bout_df = bout_df.rename(columns={'Start_time': 'START TIME', 'End Time': 'END TIME', 'Start_frame': 'START FRAME', 'End_frame': 'END FRAME', 'Bout_time': 'BOUT TIME'}) self.bout_dfs.append(bout_df[self.bout_df_cols]) self.output_df.loc[len(self.output_df)] = ([self.video_name] + list(self.rules.values()) + [time_s] + [len(self.sliced_df)])
[docs] def save(self): self.output_df.to_csv(self.agg_save_path, index=False) self.timer.stop_timer() stdout_success(msg=f"Boolean conditional data saved at at {self.agg_save_path}!", elapsed_time=self.timer.elapsed_time_str, source=self.__class__.__name__) if len(self.bout_dfs) > 0: self.bout_dfs = pd.concat(self.bout_dfs, axis=0).reset_index(drop=True) self.bout_dfs.to_csv(self.detailed_save_path, index=False) stdout_success(msg=f"Detailed boolean conditional data saved at at {self.detailed_save_path}!", elapsed_time=self.timer.elapsed_time_str, source=self.__class__.__name__)
#'Stimulus 2 Animal_1 in zone', 'Stimulus 2 Animal_1 facing' # rules = {'Stimulus 2 Animal_1 in zone': True, 'Stimulus 6 Animal_1 in zone': 'falsE'} # runner = BooleanConditionalCalculator(rules=rules, config_path=r"C:\troubleshooting\RAT_NOR\project_folder\project_config.ini", data_path=r'C:\troubleshooting\RAT_NOR\project_folder\csv\features_extracted') # runner.run() # runner.save() #