Source code for simba.labelling.extract_labelling_meta
import os
from typing import Optional, Union
import pandas as pd
from simba.mixins.config_reader import ConfigReader
from simba.utils.checks import (
check_all_file_names_are_represented_in_video_log,
check_if_df_field_is_boolean, check_that_column_exist, check_valid_boolean)
from simba.utils.data import detect_bouts
from simba.utils.errors import CountError
from simba.utils.printing import SimbaTimer, stdout_success
from simba.utils.read_write import get_fn_ext, read_df
[docs]class AnnotationMetaDataExtractor(ConfigReader):
"""
Extract annotation statistics (number of annotated frames, seconds, bouts etc.) for all classifiers in a SimBA project
to MS Excel format.
.. note::
`Example expected output <https://github.com/sgoldenlab/simba/blob/master/misc/ANNOTATION_STATISTICS_20240713132805.xlsx>`__.
:param Union[str, os.PathLike] config_path: path to SimBA configparser.ConfigParser project_config.ini
:param Optional[bool] annotated_bouts: If True, includes information on annotated bouts (start and stop time and bout length). Default True.
:param Optional[bool] split_by_video: If True, includes a worksheet where the annotation counts are split by video. Default True.
:example:
>>> annotation_meta_extractor = AnnotationMetaDataExtractor(config_path='/Users/simon/Desktop/envs/simba/troubleshooting/two_black_animals_14bp/project_folder/project_config.ini')
>>> annotation_meta_extractor.run()
>>> annotation_meta_extractor.save()
"""
def __init__(self,
config_path: Union[str, os.PathLike],
split_by_video: Optional[bool] = True,
annotated_bouts: Optional[bool] = True):
ConfigReader.__init__(self, config_path=config_path, read_video_info=True)
if len(self.clf_names) == 0:
raise CountError(msg=f'No classifier names associated with SimBA project {config_path}', source=self.__class__.__name__)
if len(self.target_file_paths) == 0:
raise CountError(msg=f'No data files found inside the {self.targets_folder} directory', source=self.__class__.__name__)
self.save_path = os.path.join(self.logs_path, f'ANNOTATION_STATISTICS_{self.datetime}.xlsx')
check_valid_boolean(value=[annotated_bouts, split_by_video])
self.annotated_bouts, self.split_by_video = annotated_bouts, split_by_video
def run(self):
check_all_file_names_are_represented_in_video_log(video_info_df=self.video_info_df, data_paths=self.target_file_paths)
self.results, self.bout_data = {}, []
print(f'Analyzing annotations in {len(self.target_file_paths)} data file(s)...')
for file_cnt, file_path in enumerate(self.target_file_paths):
_, video_name, _ = get_fn_ext(filepath=file_path)
self.results[video_name] = {}
_, _, fps = self.read_video_info(video_name=video_name)
print(f'Analyzing annotations in {video_name }... ')
df = read_df(file_path=file_path, file_type=self.file_type)
check_that_column_exist(df=df, column_name=self.clf_names, file_name=file_path)
bouts = detect_bouts(data_df=df, target_lst=self.clf_names, fps=fps)
bouts.columns = ['ANNOTATED CLASSIFIER', 'ANNOTATED START TIME', 'ANNOTATED END TIME', 'ANNOTATED START FRAME','ANNOTATED END FRAME','ANNOTATED BOUT TIME (S)']
bouts['VIDEO'] = video_name
bouts = bouts[['VIDEO', 'ANNOTATED CLASSIFIER', 'ANNOTATED START TIME', 'ANNOTATED END TIME', 'ANNOTATED START FRAME','ANNOTATED END FRAME','ANNOTATED BOUT TIME (S)']]
self.bout_data.append(bouts)
for clf in self.clf_names:
check_if_df_field_is_boolean(df=df, field=clf, df_name=file_path)
present_df, absent_df = df[df[clf] == 1], df[df[clf] == 0]
self.results[video_name][clf] = {f'ANNOTATED PRESENT FRAME COUNT': len(present_df),
f'ANNOTATED PRESENT TIME (S)': round((len(present_df) / fps), 4),
f'ANNOTATED ABSENT FRAMES COUNT': len(absent_df),
f'ANNOTATED ABSENT TIME (S)': round((len(absent_df) / fps), 4)}
def __aggregates(self):
self.by_video = pd.DataFrame(columns=['VIDEO', 'CLASSIFIER', 'ANNOTATION MEASUREMENT', 'ANNOTATION STATISTIC'])
for video_name, video_data in self.results.items():
for clf in self.clf_names:
for clf_key, clf_data in self.results[video_name][clf].items():
self.by_video.loc[len(self.by_video)] = [video_name, clf, clf_key, clf_data]
self.aggregates = pd.DataFrame(self.by_video.drop(['VIDEO'], axis=1).groupby(by=['CLASSIFIER', 'ANNOTATION MEASUREMENT'])['ANNOTATION STATISTIC'].sum())
self.by_video = self.by_video.set_index(['VIDEO', 'CLASSIFIER'])
def save(self):
self.__aggregates()
self.bout_data = pd.concat(self.bout_data, axis=0)
with pd.ExcelWriter(self.save_path) as writer:
self.aggregates.to_excel(writer, sheet_name='TOTAL ANNOTATION COUNTS', index=True)
if self.split_by_video:
self.by_video.to_excel(writer, sheet_name='VIDEO ANNOTATION COUNTS', index=True)
if self.annotated_bouts:
self.bout_data.to_excel(writer, sheet_name='VIDEO ANNOTATION BOUT DATA', index=False)
self.timer.stop_timer()
stdout_success(msg=f'Annotation data for {len(self.target_file_paths)} video(s) saved at {self.save_path}', source=self.__class__.__name__, elapsed_time=self.timer.elapsed_time)
# annotation_meta_extractor = AnnotationMetaDataExtractor(config_path='/Users/simon/Desktop/envs/simba/troubleshooting/two_black_animals_14bp/project_folder/project_config.ini')
# annotation_meta_extractor.run()
# annotation_meta_extractor.save()