Source code for simba.third_party_label_appenders.transform.labelme_to_dlc

import itertools
import json
import os
from datetime import datetime
from typing import Optional, Union

import pandas as pd

try:
    from typing import Literal
except:
    from typing_extensions import Literal

import cv2

from simba.mixins.image_mixin import ImageMixin
from simba.third_party_label_appenders.transform.utils import b64_to_arr
from simba.utils.checks import (check_if_dir_exists,
                                check_if_keys_exist_in_dict, check_str,
                                check_valid_boolean)
from simba.utils.printing import SimbaTimer, stdout_success
from simba.utils.read_write import (find_files_of_filetypes_in_directory,
                                    img_array_to_clahe, read_json)


[docs]class Labelme2DLC:

    """
    Convert labels from labelme format to DLC annotation format.

    .. note::
        See `labelme GitHub repo <https://github.com/wkentaro/labelme>`__.

    .. seealso::
       For DLC -> Labelme annotation conversion, see :func:`simba.third_party_label_appenders.transform.dlc_to_labelme.DLC2Labelme`

    :param Union[str, os.PathLike] labelme_dir: Directory with labelme json files.
    :param Optional[str] scorer: Name of the scorer (anticipated by DLC as header)
    :param bool greyscale: If True, convert images to grayscale.
    :param bool clahe: If True, apply CLAHE (Contrast Limited Adaptive Histogram Equalization).
    :param bool verbose: If True, prints progress.
    :param Optional[Union[str, os.PathLike]] save_dir: Directory where to save the DLC annotations. If None, then same directory as labelme_dir with `_dlc_annotations` suffix.
    :return: None


    :example:

    >>> labelme_dir = r"D:\platea\ts_annotations"
    >>> runner = Labelme2DLC(labelme_dir=labelme_dir)
    >>> runner.run()
    """

    def __init__(self,
                 labelme_dir: Union[str, os.PathLike],
                 scorer: str = 'SN',
                 greyscale: bool = False,
                 clahe: bool = False,
                 verbose: bool = True,
                 save_dir: Optional[Union[str, os.PathLike]] = None) -> None:

        check_if_dir_exists(in_dir=labelme_dir)
        check_str(name=f'{self.__class__.__name__} scorer', value=scorer)
        self.annotation_paths = find_files_of_filetypes_in_directory(directory=labelme_dir, extensions=['.json'], raise_error=True, as_dict=True)
        if save_dir is None:
            self.save_dir = os.path.join(os.path.dirname(labelme_dir), os.path.basename(labelme_dir) + f'_dlc_annotations_{datetime.now().strftime("%Y%m%d%H%M%S")}')
            if not os.path.isdir(self.save_dir): os.makedirs(self.save_dir)
        else:
            check_if_dir_exists(in_dir=save_dir)
            self.save_dir = save_dir
        check_valid_boolean(value=greyscale, source=f'{self.__class__.__name__} greyscale', raise_error=True)
        check_valid_boolean(value=clahe, source=f'{self.__class__.__name__} clahe', raise_error=True)
        check_valid_boolean(value=verbose, source=f'{self.__class__.__name__} verbose', raise_error=True)
        self.clahe, self.greyscale, self.scorer, self.labelme_dir, self.verbose = clahe, greyscale, scorer, labelme_dir, verbose
        self.file_cnt = len(list(self.annotation_paths.keys()))


    def run(self):
        timer = SimbaTimer(start=True)
        results_dict, images = {}, {}
        for file_cnt, (file_name, annot_path) in enumerate(self.annotation_paths.items()):
            if self.verbose:
                print(f'Reading labelme file {file_cnt+1}/{self.file_cnt}...')
            annot_data = read_json(x=annot_path)
            check_if_keys_exist_in_dict(data=annot_data, key=['shapes', 'imageData', 'imagePath'], name=annot_path)
            img_name = os.path.basename(annot_data['imagePath'])
            images[img_name] = b64_to_arr(annot_data['imageData'])
            if self.greyscale:
                images[img_name] = ImageMixin.img_to_greyscale(img=images[img_name])
            if self.clahe:
                images[img_name] = img_array_to_clahe(img=images[img_name])
            for bp_data in annot_data['shapes']:
                check_if_keys_exist_in_dict(data=bp_data, key=['label', 'points'], name=annot_path)
                point_x, point_y = bp_data['points'][0][0], bp_data['points'][0][1]
                lbl = bp_data['label']
                id = os.path.join('labeled-data', os.path.basename(self.labelme_dir), img_name)
                if id not in results_dict.keys():
                    results_dict[id] = {f'{lbl}': {'x': point_x, 'y': point_y}}
                else:
                    results_dict[id].update({f'{lbl}': {'x': point_x, 'y': point_y}})

        bp_names = set()
        for img, bp in results_dict.items(): bp_names.update(set(bp.keys()))
        col_names = list(itertools.product(*[[self.scorer], bp_names, ['x', 'y']]))
        columns = pd.MultiIndex.from_tuples(col_names)
        results = pd.DataFrame(columns=columns)
        results.columns.names = ['scorer', 'bodyparts', 'coords']
        for img, bp_data in results_dict.items():
            for bp_name, bp_cords in bp_data.items():
                results.at[img, (self.scorer, bp_name, 'x')] = bp_cords['x']
                results.at[img, (self.scorer, bp_name, 'y')] = bp_cords['y']

        for img_cnt, (img_name, img) in enumerate(images.items()):
            if self.verbose:
                print(f'Saving DLC file {img_cnt+1}/{self.file_cnt}...')
            img_save_path = os.path.join(self.save_dir, img_name)
            cv2.imwrite(img_save_path, img)
        save_path = os.path.join(self.save_dir, f'CollectedData_{self.scorer}.csv')
        results.to_csv(save_path)
        timer.stop_timer()
        if self.verbose:
            stdout_success(msg=f'DLC annotations for {self.file_cnt} images saved in directory {self.save_dir}', elapsed_time=timer.elapsed_time_str, source=self.__class__.__name__)


# labelme_dir = r"D:\platea\ts_annotations"
# runner = Labelme2DLC(labelme_dir=labelme_dir)
# runner.run()