Source code for simba.third_party_label_appenders.transform.labelme_to_yolo

import json
import os
import random
from typing import Union

try:
    from typing import Literal
except:
    from typing_extensions import Literal

import cv2

from simba.mixins.image_mixin import ImageMixin
from simba.third_party_label_appenders.transform.utils import b64_to_arr
from simba.utils.checks import (check_float, check_if_dir_exists,
                                check_if_keys_exist_in_dict,
                                check_valid_boolean)
from simba.utils.printing import SimbaTimer, stdout_success
from simba.utils.read_write import (create_directory,
                                    find_files_of_filetypes_in_directory,
                                    get_fn_ext, img_array_to_clahe)
from simba.utils.warnings import ROIWarning


[docs]class LabelmeBoundingBoxes2YoloBoundingBoxes: """ Convert LabelMe annotations in json to YOLO format and save the corresponding images and labels in txt format. .. note:: For more information on the LabelMe annotation tool, see the `LabelMe GitHub repository <https://github.com/wkentaro/labelme>`_. The Labelme Json files **has too** contain a `imageData` key holding the image as a b64 string. For an expected Labelme json format, see `THIS FILE <https://github.com/sgoldenlab/simba/blob/master/misc/labelme_ex.json>`_. .. seealso:: To split YOLO data into train, test, and validation sets (expected by e.g., UltraLytics), see :func:`simba.third_party_label_appenders.converters.split_yolo_train_test_val`. To convert Labelme points annotations to YOLO keypoint training data, see :func:`simba.third_party_label_appenders.transform.labelme_to_yolo_keypoints.LabelmeKeypoints2YoloKeypoints`. .. important:: For YOLO bounding boxes (not YOLO keypoint data!) from labelme keypoints. :param Union[str, os.PathLike labelme_dir: Path to the directory containing LabelMe annotation `.json` files. :param Union[str, os.PathLike save_dir: Directory where the YOLO-format images and labels will be saved. Will create 'images/', 'labels/', and 'map.json' inside this directory. :param bool obb: If True, saves annotations as oriented bounding boxes (8 coordinates). If False, uses standard YOLO format (x_center, y_center, width, height) :param bool verbose: If True, prints progress messages during conversion. :example: >>> LABELME_DIR = r'D:\platea\ts_annotations' >>> SAVE_DIR = r"D:\platea\yolo" >>> runner = LabelmeBoundingBoxes2YoloBoundingBoxes(labelme_dir=LABELME_DIR, save_dir=SAVE_DIR) >>> runner.run() """ def __init__(self, labelme_dir: Union[str, os.PathLike], save_dir: Union[str, os.PathLike], obb: bool = False, verbose: bool = True, clahe: bool = False, train_size: float = 0.7, greyscale: bool = False) -> None: check_if_dir_exists(in_dir=os.path.dirname(save_dir), source=f'{self.__class__.__name__} save_dir', raise_error=True) check_if_dir_exists(in_dir=labelme_dir, source=f'{self.__class__.__name__} labelme_dir', raise_error=True) self.labelme_file_paths = find_files_of_filetypes_in_directory(directory=labelme_dir, extensions=['.json'], raise_error=True) self.map_path = os.path.join(save_dir, 'map.json') self.img_dir, self.lbl_dir = os.path.join(save_dir, 'images'), os.path.join(save_dir, 'labels') self.img_train_dir, self.img_val_dir = os.path.join(self.img_dir, 'train'), os.path.join(self.img_dir, 'val') self.lbl_train_dir, self.lbl_val_dir = os.path.join(self.lbl_dir, 'train'), os.path.join(self.lbl_dir, 'val') create_directory(paths=[self.img_train_dir, self.img_val_dir, self.lbl_train_dir, self.lbl_val_dir], overwrite=False) check_valid_boolean(value=[verbose], source=f'{self.__class__.__name__} verbose', raise_error=True) check_valid_boolean(value=[obb], source=f'{self.__class__.__name__} obb', raise_error=True) check_valid_boolean(value=[verbose], source=f'{self.__class__.__name__} clahe', raise_error=True) check_valid_boolean(value=[verbose], source=f'{self.__class__.__name__} greyscale', raise_error=True) check_float(name=f'{self.__class__.__name__} train_size', value=train_size, min_value=0.0, max_value=1.0) self.obb, self.verbose, self.save_dir = obb, verbose, save_dir self.clahe, self.greyscale, self.train_size = clahe, greyscale, train_size def run(self): train_idx = random.sample(range(0, len(self.labelme_file_paths)), int(len(self.labelme_file_paths)*self.train_size)) timer = SimbaTimer(start=True) labels = {} for file_cnt, file_path in enumerate(self.labelme_file_paths): if self.verbose: print(f'Labelme to YOLO file {file_cnt + 1}/{len(self.labelme_file_paths)}...') with open(file_path) as f: annot_data = json.load(f) check_if_keys_exist_in_dict(data=annot_data, key=['shapes', 'imageData', 'imagePath'], name=file_path) img_name = get_fn_ext(filepath=annot_data['imagePath'])[1] img = b64_to_arr(annot_data['imageData']) if img.ndim == 3: img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) if self.greyscale: img = ImageMixin.img_to_greyscale(img=img) if self.clahe: img = img_array_to_clahe(img=img) img_h, img_w = img.shape[:2] if file_cnt in train_idx: label_save_path = os.path.join(self.lbl_train_dir, f'{img_name}.txt') img_save_path = os.path.join(self.img_train_dir, f'{img_name}.png') else: label_save_path = os.path.join(self.lbl_val_dir, f'{img_name}.txt') img_save_path = os.path.join(self.img_val_dir, f'{img_name}.png') roi_str = '' for bp_data in annot_data['shapes']: check_if_keys_exist_in_dict(data=bp_data, key=['label', 'points', 'shape_type'], name=file_path) if bp_data['shape_type'] == 'rectangle': if bp_data['label'] not in labels.keys(): label_id = len(labels.keys()) labels[bp_data['label']] = len(labels.keys()) else: label_id = labels[bp_data['label']] x1, y1 = bp_data['points'][0] x2, y2 = bp_data['points'][1] x_min, x_max = sorted([x1, x2]) y_min, y_max = sorted([y1, y2]) if not self.obb: w = (x_max - x_min) / img_w h = (y_max - y_min) / img_h x_center = (x_min + (x_max - x_min) / 2) / img_w y_center = (y_min + (y_max - y_min) / 2) / img_h roi_str += ' '.join([f"{label_id}", str(x_center), str(y_center), str(w), str(h) + '\n']) else: top_left = (x_min / img_w, y_min / img_h) top_right = (x_max / img_w, y_min / img_h) bottom_right = (x_max / img_w, y_max / img_h) bottom_left = (x_min / img_w, y_max / img_h) roi_str += ' '.join([f"{label_id}", str(top_left[0]), str(top_left[1]), str(top_right[0]), str(top_right[1]), str(bottom_right[0]), str(bottom_right[1]), str(bottom_left[0]), str(bottom_left[1]) + '\n']) else: ROIWarning(msg=f'Only Labelme shape type rectangle recognized for YOLO bounding box transformation. Got {bp_data["shape_type"]}. Skipping annotation...', source=self.__class__.__name__) with open(label_save_path, mode='wt', encoding='utf-8') as f: f.write(roi_str) cv2.imwrite(img_save_path, img) with open(self.map_path, 'w') as f: json.dump(labels, f, indent=4) timer.stop_timer() if self.verbose: stdout_success(msg=f'Labelme to YOLO conversion complete. Data saved in directory {self.save_dir}.', elapsed_time=timer.elapsed_time_str)
# LABELME_DIR = r'D:\platea\ts_annotations' # SAVE_DIR = r"D:\platea\yolo" # runner = LabelmeBoundingBoxes2YoloBoundingBoxes(labelme_dir=LABELME_DIR, save_dir=SAVE_DIR) # runner.run() #