Source code for simba.third_party_label_appenders.transform.labelme_to_dlc

import itertools
import json
import os
from datetime import datetime
from typing import Optional, Union

import pandas as pd

try:
    from typing import Literal
except:
    from typing_extensions import Literal

import cv2

from simba.mixins.image_mixin import ImageMixin
from simba.third_party_label_appenders.transform.utils import b64_to_arr
from simba.utils.checks import (check_if_dir_exists,
                                check_if_keys_exist_in_dict, check_str,
                                check_valid_boolean)
from simba.utils.printing import SimbaTimer, stdout_success
from simba.utils.read_write import (find_files_of_filetypes_in_directory,
                                    img_array_to_clahe, read_json)


[docs]class Labelme2DLC: """ Convert labels from labelme format to DLC annotation format. .. note:: See `labelme GitHub repo <https://github.com/wkentaro/labelme>`__. .. seealso:: For DLC -> Labelme annotation conversion, see :func:`simba.third_party_label_appenders.transform.dlc_to_labelme.DLC2Labelme` :param Union[str, os.PathLike] labelme_dir: Directory with labelme json files. :param Optional[str] scorer: Name of the scorer (anticipated by DLC as header) :param bool greyscale: If True, convert images to grayscale. :param bool clahe: If True, apply CLAHE (Contrast Limited Adaptive Histogram Equalization). :param bool verbose: If True, prints progress. :param Optional[Union[str, os.PathLike]] save_dir: Directory where to save the DLC annotations. If None, then same directory as labelme_dir with `_dlc_annotations` suffix. :return: None :example: >>> labelme_dir = r"D:\platea\ts_annotations" >>> runner = Labelme2DLC(labelme_dir=labelme_dir) >>> runner.run() """ def __init__(self, labelme_dir: Union[str, os.PathLike], scorer: str = 'SN', greyscale: bool = False, clahe: bool = False, verbose: bool = True, save_dir: Optional[Union[str, os.PathLike]] = None) -> None: check_if_dir_exists(in_dir=labelme_dir) check_str(name=f'{self.__class__.__name__} scorer', value=scorer) self.annotation_paths = find_files_of_filetypes_in_directory(directory=labelme_dir, extensions=['.json'], raise_error=True, as_dict=True) if save_dir is None: self.save_dir = os.path.join(os.path.dirname(labelme_dir), os.path.basename(labelme_dir) + f'_dlc_annotations_{datetime.now().strftime("%Y%m%d%H%M%S")}') if not os.path.isdir(self.save_dir): os.makedirs(self.save_dir) else: check_if_dir_exists(in_dir=save_dir) self.save_dir = save_dir check_valid_boolean(value=greyscale, source=f'{self.__class__.__name__} greyscale', raise_error=True) check_valid_boolean(value=clahe, source=f'{self.__class__.__name__} clahe', raise_error=True) check_valid_boolean(value=verbose, source=f'{self.__class__.__name__} verbose', raise_error=True) self.clahe, self.greyscale, self.scorer, self.labelme_dir, self.verbose = clahe, greyscale, scorer, labelme_dir, verbose self.file_cnt = len(list(self.annotation_paths.keys())) def run(self): timer = SimbaTimer(start=True) results_dict, images = {}, {} for file_cnt, (file_name, annot_path) in enumerate(self.annotation_paths.items()): if self.verbose: print(f'Reading labelme file {file_cnt+1}/{self.file_cnt}...') annot_data = read_json(x=annot_path) check_if_keys_exist_in_dict(data=annot_data, key=['shapes', 'imageData', 'imagePath'], name=annot_path) img_name = os.path.basename(annot_data['imagePath']) images[img_name] = b64_to_arr(annot_data['imageData']) if self.greyscale: images[img_name] = ImageMixin.img_to_greyscale(img=images[img_name]) if self.clahe: images[img_name] = img_array_to_clahe(img=images[img_name]) for bp_data in annot_data['shapes']: check_if_keys_exist_in_dict(data=bp_data, key=['label', 'points'], name=annot_path) point_x, point_y = bp_data['points'][0][0], bp_data['points'][0][1] lbl = bp_data['label'] id = os.path.join('labeled-data', os.path.basename(self.labelme_dir), img_name) if id not in results_dict.keys(): results_dict[id] = {f'{lbl}': {'x': point_x, 'y': point_y}} else: results_dict[id].update({f'{lbl}': {'x': point_x, 'y': point_y}}) bp_names = set() for img, bp in results_dict.items(): bp_names.update(set(bp.keys())) col_names = list(itertools.product(*[[self.scorer], bp_names, ['x', 'y']])) columns = pd.MultiIndex.from_tuples(col_names) results = pd.DataFrame(columns=columns) results.columns.names = ['scorer', 'bodyparts', 'coords'] for img, bp_data in results_dict.items(): for bp_name, bp_cords in bp_data.items(): results.at[img, (self.scorer, bp_name, 'x')] = bp_cords['x'] results.at[img, (self.scorer, bp_name, 'y')] = bp_cords['y'] for img_cnt, (img_name, img) in enumerate(images.items()): if self.verbose: print(f'Saving DLC file {img_cnt+1}/{self.file_cnt}...') img_save_path = os.path.join(self.save_dir, img_name) cv2.imwrite(img_save_path, img) save_path = os.path.join(self.save_dir, f'CollectedData_{self.scorer}.csv') results.to_csv(save_path) timer.stop_timer() if self.verbose: stdout_success(msg=f'DLC annotations for {self.file_cnt} images saved in directory {self.save_dir}', elapsed_time=timer.elapsed_time_str, source=self.__class__.__name__)
# labelme_dir = r"D:\platea\ts_annotations" # runner = Labelme2DLC(labelme_dir=labelme_dir) # runner.run()