diff --git a/darwin/dataset/download_manager.py b/darwin/dataset/download_manager.py index c54e9b126..e6cc50cbb 100644 --- a/darwin/dataset/download_manager.py +++ b/darwin/dataset/download_manager.py @@ -9,7 +9,6 @@ from tempfile import TemporaryDirectory from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple -import deprecation import numpy as np import orjson as json import requests @@ -28,18 +27,10 @@ is_image_extension_allowed, parse_darwin_json, ) -from darwin.version import __version__ -@deprecation.deprecated( - deprecated_in="0.7.5", - removed_in="0.8.0", - current_version=__version__, - details="The api_url parameter will be removed.", -) def download_all_images_from_annotations( api_key: str, - api_url: str, annotations_path: Path, images_path: Path, force_replace: bool = False, @@ -57,8 +48,6 @@ def download_all_images_from_annotations( ---------- api_key : str API Key of the current team - api_url : str - Url of the darwin API (e.g. 'https://darwin.v7labs.com/api/') annotations_path : Path Path where the annotations are located images_path : Path @@ -152,70 +141,6 @@ def download_all_images_from_annotations( return lambda: download_functions, len(download_functions) -@deprecation.deprecated( - deprecated_in="0.7.5", - removed_in="0.8.0", - current_version=__version__, - details="The api_url parameter will be removed.", -) -def download_image_from_annotation( - api_key: str, - api_url: str, - annotation_path: Path, - images_path: Path, - annotation_format: str, - use_folders: bool, - video_frames: bool, - force_slots: bool, - ignore_slots: bool = False, -) -> None: - """ - Dispatches functions to download an image given an annotation. - - Parameters - ---------- - api_key : str - API Key of the current team - api_url : str - Url of the darwin API (e.g. 'https://darwin.v7labs.com/api/') - annotation_path : Path - Path where the annotation is located - images_path : Path - Path where to download the image - annotation_format : str - Format of the annotations. Currently only JSON is supported - use_folders : bool - Recreate folder structure - video_frames : bool - Pulls video frames images instead of video files - force_slots: bool - Pulls all slots of items into deeper file structure ({prefix}/{item_name}/{slot_name}/{file_name}) - - Raises - ------ - NotImplementedError - If the format of the annotation is not supported. - """ - - console = Console() - - if annotation_format == "json": - downloadables = _download_image_from_json_annotation( - api_key, - annotation_path, - images_path, - use_folders, - video_frames, - force_slots, - ignore_slots, - ) - for downloadable in downloadables: - downloadable() - else: - console.print("[bold red]Unsupported file format. Please use 'json'.") - raise NotImplementedError - - def lazy_download_image_from_annotation( api_key: str, annotation_path: Path, @@ -454,103 +379,6 @@ def _update_local_path(annotation: AnnotationFile, url, local_path): file.write(op) -@deprecation.deprecated( - deprecated_in="0.7.5", - removed_in="0.8.0", - current_version=__version__, - details="Use the ``download_image_from_annotation`` instead.", -) -def download_image_from_json_annotation( - api_key: str, - api_url: str, - annotation_path: Path, - image_path: Path, - use_folders: bool, - video_frames: bool, -) -> None: - """ - Downloads an image given a ``.json`` annotation path and renames the json after the image's - filename. - - Parameters - ---------- - api_key : str - API Key of the current team - api_url : str - Url of the darwin API (e.g. 'https://darwin.v7labs.com/api/') - annotation_path : Path - Path where the annotation is located - image_path : Path - Path where to download the image - use_folders : bool - Recreate folders - video_frames : bool - Pulls video frames images instead of video files - """ - annotation = attempt_decode(annotation_path) - - # If we are using folders, extract the path for the image and create the folder if needed - sub_path = annotation["image"].get("path", "/") if use_folders else "/" - parent_path = Path(image_path) / Path(sub_path).relative_to(Path(sub_path).anchor) - parent_path.mkdir(exist_ok=True, parents=True) - - if video_frames and "frame_urls" in annotation["image"]: - video_path: Path = parent_path / annotation_path.stem - video_path.mkdir(exist_ok=True, parents=True) - for i, frame_url in enumerate(annotation["image"]["frame_urls"]): - path = video_path / f"{i:07d}.png" - _download_image(frame_url, path, api_key) - else: - image_url = annotation["image"]["url"] - image_path = parent_path / sanitize_filename(annotation["image"]["filename"]) - _download_image(image_url, image_path, api_key) - - -@deprecation.deprecated( - deprecated_in="0.7.5", - removed_in="0.8.0", - current_version=__version__, - details="Use the ``download_image_from_annotation`` instead.", -) -def download_image(url: str, path: Path, api_key: str) -> None: - """ - Helper function: downloads one image from url. - - Parameters - ---------- - url : str - Url of the image to download - path : Path - Path where to download the image, with filename - api_key : str - API Key of the current team - """ - if path.exists(): - return - TIMEOUT: int = 60 - start: float = time.time() - while True: - if "token" in url: - response: requests.Response = requests.get(url, stream=True) - else: - response = requests.get( - url, headers={"Authorization": f"ApiKey {api_key}"}, stream=True - ) - # Correct status: download image - if response.ok: - with open(str(path), "wb") as file: - for chunk in response: - file.write(chunk) - return - # Fatal-error status: fail - if 400 <= response.status_code <= 499: - raise Exception(response.status_code, response.json()) - # Timeout - if time.time() - start > TIMEOUT: - raise Exception(f"Timeout url request ({url}) after {TIMEOUT} seconds.") - time.sleep(1) - - def _download_image( url: str, path: Path, api_key: str, slot: Optional[dt.Slot] = None ) -> None: diff --git a/darwin/dataset/local_dataset.py b/darwin/dataset/local_dataset.py index 5f22320e1..cff6fedc8 100644 --- a/darwin/dataset/local_dataset.py +++ b/darwin/dataset/local_dataset.py @@ -335,12 +335,11 @@ def annotation_type_supported(self, annotation) -> bool: elif self.annotation_type == "bounding_box": is_bounding_box = annotation_type == "bounding_box" is_supported_polygon = ( - annotation_type in ["polygon", "complex_polygon"] - and "bounding_box" in annotation.data + annotation_type == "polygon" and "bounding_box" in annotation.data ) return is_bounding_box or is_supported_polygon elif self.annotation_type == "polygon": - return annotation_type in ["polygon", "complex_polygon"] + return annotation_type == "polygon" else: raise ValueError( "annotation_type should be either 'tag', 'bounding_box', or 'polygon'" diff --git a/darwin/dataset/remote_dataset.py b/darwin/dataset/remote_dataset.py index 0be301c22..db9db9689 100644 --- a/darwin/dataset/remote_dataset.py +++ b/darwin/dataset/remote_dataset.py @@ -345,7 +345,6 @@ def pull( # Create the generator with the download instructions progress, count = download_all_images_from_annotations( api_key=api_key, - api_url=self.client.url, annotations_path=annotations_dir, images_path=self.local_images_path, force_replace=force_replace, diff --git a/darwin/dataset/utils.py b/darwin/dataset/utils.py index 3070c6912..4d31de604 100644 --- a/darwin/dataset/utils.py +++ b/darwin/dataset/utils.py @@ -705,7 +705,7 @@ def convert_to_rgb(pic: PILImage.Image) -> PILImage.Image: def compute_max_density(annotations_dir: Path) -> int: """ Calculates the maximum density of all of the annotations in the given folder. - Density is calculated as the number of polygons / complex_polygons present in an annotation + Density is calculated as the number of polygons present in an annotation file. Parameters diff --git a/darwin/datatypes.py b/darwin/datatypes.py index e922b17c6..30a02bb00 100644 --- a/darwin/datatypes.py +++ b/darwin/datatypes.py @@ -89,7 +89,6 @@ def from_dict(cls, json: JSONFreeForm) -> "JSONType": AnnotationType = Literal[ # NB: Some of these are not supported yet "bounding_box", "polygon", - "complex_polygon", "ellipse", "cuboid", "segmentation", @@ -645,7 +644,7 @@ def make_tag( def make_polygon( class_name: str, - point_path: List[Point], + point_paths: List[List[Point]] | List[Point], bounding_box: Optional[Dict] = None, subs: Optional[List[SubAnnotation]] = None, slot_names: Optional[List[str]] = None, @@ -655,55 +654,22 @@ def make_polygon( Parameters ---------- - class_name : str + class_name: str The name of the class for this ``Annotation``. - point_path : List[Point] - A list of points that comprises the polygon. The list should have a format similar to: + point_paths: List[List[Point]] | List[Point] + Either a list of points that comprises a polygon or a list of lists of points that comprises a complex polygon. + A complex polygon is a polygon that is defined by >1 path. - .. code-block:: python + A polygon should be defined by a List[Point] and have a format similar to: + + ... code-block:: python [ {"x": 1, "y": 0}, {"x": 2, "y": 1} ] - bounding_box : Optional[Dict], default: None - The bounding box that encompasses the polyong. - subs : Optional[List[SubAnnotation]], default: None - List of ``SubAnnotation``s for this ``Annotation``. - - Returns - ------- - Annotation - A polygon ``Annotation``. - """ - return Annotation( - AnnotationClass(class_name, "polygon"), - _maybe_add_bounding_box_data({"path": point_path}, bounding_box), - subs or [], - slot_names=slot_names or [], - ) - - -def make_complex_polygon( - class_name: str, - point_paths: List[List[Point]], - bounding_box: Optional[Dict] = None, - subs: Optional[List[SubAnnotation]] = None, - slot_names: Optional[List[str]] = None, -) -> Annotation: - """ - Creates and returns a complex polygon annotation. Complex polygons are those who have holes - and/or disform shapes. - - Parameters - ---------- - class_name: str - The name of the class for this ``Annotation``. - point_paths: List[List[Point]] - A list of lists points that comprises the complex polygon. This is needed as a complex - polygon can be effectively seen as a sum of multiple simple polygons. The list should have - a format similar to: + A complex polygon should be defined by a List[List[Point]] and have a format similar to: .. code-block:: python @@ -727,10 +693,20 @@ def make_complex_polygon( Returns ------- Annotation - A complex polygon ``Annotation``. + A polygon ``Annotation``. """ + + # Check if point_paths is List[Point] and convert to List[List[Point]] + if ( + len(point_paths) > 1 + and isinstance(point_paths[0], dict) + and "x" in point_paths[0] + and "y" in point_paths[0] + ): + point_paths = [point_paths] + return Annotation( - AnnotationClass(class_name, "complex_polygon", "polygon"), + AnnotationClass(class_name, "polygon", "polygon"), _maybe_add_bounding_box_data({"paths": point_paths}, bounding_box), subs or [], slot_names=slot_names or [], diff --git a/darwin/exporter/formats/coco.py b/darwin/exporter/formats/coco.py index fbba4d504..a66c1b53a 100644 --- a/darwin/exporter/formats/coco.py +++ b/darwin/exporter/formats/coco.py @@ -1,16 +1,14 @@ from datetime import date +from operator import itemgetter from pathlib import Path from typing import Any, Dict, Iterator, List, Optional from zlib import crc32 -import deprecation import numpy as np import orjson as json -from upolygon import draw_polygon, rle_encode import darwin.datatypes as dt from darwin.utils import convert_polygons_to_sequences -from darwin.version import __version__ DEPRECATION_MESSAGE = """ @@ -41,321 +39,6 @@ def export(annotation_files: Iterator[dt.AnnotationFile], output_dir: Path) -> N f.write(op) -@deprecation.deprecated( - deprecated_in="0.7.7", - removed_in="0.8.0", - current_version=__version__, - details=DEPRECATION_MESSAGE, -) -def build_json(annotation_files: List[dt.AnnotationFile]) -> Dict[str, Any]: - categories: Dict[str, int] = calculate_categories(annotation_files) - tag_categories: Dict[str, int] = calculate_tag_categories(annotation_files) - return { - "info": build_info(), - "licenses": build_licenses(), - "images": build_images(annotation_files, tag_categories), - "annotations": list(build_annotations(annotation_files, categories)), - "categories": list(build_categories(categories)), - "tag_categories": list(build_tag_categories(tag_categories)), - } - - -@deprecation.deprecated( - deprecated_in="0.7.7", - removed_in="0.8.0", - current_version=__version__, - details=DEPRECATION_MESSAGE, -) -def calculate_categories(annotation_files: List[dt.AnnotationFile]) -> Dict[str, int]: - categories: Dict[str, int] = {} - for annotation_file in annotation_files: - for annotation_class in annotation_file.annotation_classes: - if ( - annotation_class.name not in categories - and annotation_class.annotation_type - in [ - "polygon", - "complex_polygon", - "bounding_box", - ] - ): - categories[annotation_class.name] = _calculate_category_id( - annotation_class - ) - return categories - - -@deprecation.deprecated( - deprecated_in="0.7.7", - removed_in="0.8.0", - current_version=__version__, - details=DEPRECATION_MESSAGE, -) -def calculate_tag_categories( - annotation_files: List[dt.AnnotationFile], -) -> Dict[str, int]: - categories: Dict[str, int] = {} - for annotation_file in annotation_files: - for annotation_class in annotation_file.annotation_classes: - if ( - annotation_class.name not in categories - and annotation_class.annotation_type == "tag" - ): - categories[annotation_class.name] = _calculate_category_id( - annotation_class - ) - return categories - - -@deprecation.deprecated( - deprecated_in="0.7.7", - removed_in="0.8.0", - current_version=__version__, - details=DEPRECATION_MESSAGE, -) -def build_info() -> Dict[str, Any]: - # TODO fill out these fields in a meaningful way - today = date.today() - return { - "description": "Exported from Darwin", - "url": "n/a", - "version": "n/a", - "year": today.year, - "contributor": "n/a", - "date_created": today.strftime("%Y/%m/%d"), - } - - -@deprecation.deprecated( - deprecated_in="0.7.7", - removed_in="0.8.0", - current_version=__version__, - details=DEPRECATION_MESSAGE, -) -def build_licenses() -> List[Dict[str, Any]]: - return [{"url": "n/a", "id": 0, "name": "placeholder license"}] - - -@deprecation.deprecated( - deprecated_in="0.7.7", - removed_in="0.8.0", - current_version=__version__, - details=DEPRECATION_MESSAGE, -) -def build_images( - annotation_files: List[dt.AnnotationFile], tag_categories: Dict[str, int] -) -> List[Dict[str, Any]]: - return [ - build_image(annotation_file, tag_categories) - for annotation_file in sorted(annotation_files, key=lambda x: x.seq) - ] - - -@deprecation.deprecated( - deprecated_in="0.7.7", - removed_in="0.8.0", - current_version=__version__, - details=DEPRECATION_MESSAGE, -) -def build_image( - annotation_file: dt.AnnotationFile, tag_categories: Dict[str, int] -) -> Dict[str, Any]: - tags = [ - annotation - for annotation in annotation_file.annotations - if annotation.annotation_class.annotation_type == "tag" - ] - return { - "license": 0, - "file_name": annotation_file.filename, - "coco_url": "n/a", - "height": annotation_file.image_height, - "width": annotation_file.image_width, - "date_captured": "", - "flickr_url": "n/a", - "darwin_url": annotation_file.image_url, - "darwin_workview_url": annotation_file.workview_url, - "id": _build_image_id(annotation_file), - "tag_ids": [tag_categories[tag.annotation_class.name] for tag in tags], - } - - -@deprecation.deprecated( - deprecated_in="0.7.7", - removed_in="0.8.0", - current_version=__version__, - details=DEPRECATION_MESSAGE, -) -def build_annotations( - annotation_files: List[dt.AnnotationFile], categories: Dict[str, int] -) -> Iterator[Optional[Dict[str, Any]]]: - annotation_id = 0 - for annotation_file in annotation_files: - for annotation in annotation_file.annotations: - annotation_id += 1 - annotation_data = build_annotation( - annotation_file, annotation_id, annotation, categories - ) - if annotation_data: - yield annotation_data - - -@deprecation.deprecated( - deprecated_in="0.7.7", - removed_in="0.8.0", - current_version=__version__, - details=DEPRECATION_MESSAGE, -) -def build_annotation( - annotation_file: dt.AnnotationFile, - annotation_id: int, - annotation: dt.Annotation, - categories: Dict[str, int], -) -> Optional[Dict[str, Any]]: - annotation_type = annotation.annotation_class.annotation_type - if annotation_type == "polygon": - sequences = convert_polygons_to_sequences( - annotation.data["path"], rounding=False - ) - x_coords = [s[0::2] for s in sequences] - y_coords = [s[1::2] for s in sequences] - min_x = np.min([np.min(x_coord) for x_coord in x_coords]) - min_y = np.min([np.min(y_coord) for y_coord in y_coords]) - max_x = np.max([np.max(x_coord) for x_coord in x_coords]) - max_y = np.max([np.max(y_coord) for y_coord in y_coords]) - w = max_x - min_x - h = max_y - min_y - # Compute the area of the polygon - poly_area = np.sum( - [ - polygon_area(x_coord, y_coord) - for x_coord, y_coord in zip(x_coords, y_coords) - ] - ) - - return { - "id": annotation_id, - "image_id": _build_image_id(annotation_file), - "category_id": categories[annotation.annotation_class.name], - "segmentation": sequences, - "area": poly_area, - "bbox": [min_x, min_y, w, h], - "iscrowd": 0, - "extra": build_extra(annotation), - } - elif annotation_type == "complex_polygon": - mask = np.zeros((annotation_file.image_height, annotation_file.image_width)) - sequences = convert_polygons_to_sequences(annotation.data["paths"]) - draw_polygon(mask, sequences, 1) - counts = rle_encode(mask) - - x_coords = [s[0::2] for s in sequences] - y_coords = [s[1::2] for s in sequences] - min_x = np.min([np.min(x_coord) for x_coord in x_coords]) - min_y = np.min([np.min(y_coord) for y_coord in y_coords]) - max_x = np.max([np.max(x_coord) for x_coord in x_coords]) - max_y = np.max([np.max(y_coord) for y_coord in y_coords]) - w = max_x - min_x + 1 - h = max_y - min_y + 1 - - return { - "id": annotation_id, - "image_id": _build_image_id(annotation_file), - "category_id": categories[annotation.annotation_class.name], - "segmentation": { - "counts": counts, - "size": [annotation_file.image_height, annotation_file.image_width], - }, - "area": np.sum(mask), - "bbox": [min_x, min_y, w, h], - "iscrowd": 1, - "extra": build_extra(annotation), - } - elif annotation_type == "tag": - pass - elif annotation_type == "bounding_box": - x = annotation.data["x"] - y = annotation.data["y"] - w = annotation.data["w"] - h = annotation.data["h"] - - return build_annotation( - annotation_file, - annotation_id, - dt.make_polygon( - annotation.annotation_class.name, - [ - {"x": x, "y": y}, - {"x": x + w, "y": y}, - {"x": x + w, "y": y + h}, - {"x": x, "y": y + h}, - ], - None, - annotation.subs, - ), - categories, - ) - else: - print(f"skipping unsupported annotation_type '{annotation_type}'") - - -@deprecation.deprecated( - deprecated_in="0.7.7", - removed_in="0.8.0", - current_version=__version__, - details=DEPRECATION_MESSAGE, -) -def build_extra(annotation: dt.Annotation) -> Dict[str, Any]: - data = {} - instance_id_sub = annotation.get_sub("instance_id") - attributes_sub = annotation.get_sub("attributes") - text_sub = annotation.get_sub("text") - - if instance_id_sub: - data["instance_id"] = instance_id_sub.data - if attributes_sub: - data["attributes"] = attributes_sub.data - if text_sub: - data["text"] = text_sub.data - return data - - -@deprecation.deprecated( - deprecated_in="0.7.7", - removed_in="0.8.0", - current_version=__version__, - details=DEPRECATION_MESSAGE, -) -def build_categories(categories: Dict[str, int]) -> Iterator[Dict[str, Any]]: - for name, id in categories.items(): - yield {"id": id, "name": name, "supercategory": "root"} - - -@deprecation.deprecated( - deprecated_in="0.7.7", - removed_in="0.8.0", - current_version=__version__, - details=DEPRECATION_MESSAGE, -) -def build_tag_categories(categories: Dict[str, int]) -> Iterator[Dict[str, Any]]: - for name, id in categories.items(): - yield {"id": id, "name": name} - - -@deprecation.deprecated( - deprecated_in="0.7.7", - removed_in="0.8.0", - current_version=__version__, - details=DEPRECATION_MESSAGE, -) -def polygon_area(x: np.ndarray, y: np.ndarray) -> float: - """ - Returns the area of the input polygon, represented with two numpy arrays - for x and y coordinates. - """ - return 0.5 * np.abs(np.dot(x, np.roll(y, 1)) - np.dot(y, np.roll(x, 1))) - - def _build_json(annotation_files: List[dt.AnnotationFile]) -> Dict[str, Any]: categories: Dict[str, int] = _calculate_categories(annotation_files) tag_categories: Dict[str, int] = _calculate_tag_categories(annotation_files) @@ -378,14 +61,13 @@ def _calculate_categories(annotation_files: List[dt.AnnotationFile]) -> Dict[str and annotation_class.annotation_type in [ "polygon", - "complex_polygon", "bounding_box", ] ): categories[annotation_class.name] = _calculate_category_id( annotation_class ) - return categories + return dict(sorted(categories.items(), key=itemgetter(1))) def _calculate_tag_categories( @@ -401,7 +83,7 @@ def _calculate_tag_categories( categories[annotation_class.name] = _calculate_category_id( annotation_class ) - return categories + return dict(sorted(categories.items(), key=itemgetter(1))) def _calculate_category_id(annotation_class: dt.AnnotationClass) -> int: @@ -494,7 +176,7 @@ def _build_annotation( annotation_type = annotation.annotation_class.annotation_type if annotation_type == "polygon": sequences = convert_polygons_to_sequences( - annotation.data["path"], rounding=False + annotation.data["paths"], rounding=False ) x_coords = [s[0::2] for s in sequences] y_coords = [s[1::2] for s in sequences] @@ -522,34 +204,6 @@ def _build_annotation( "iscrowd": 0, "extra": _build_extra(annotation), } - elif annotation_type == "complex_polygon": - mask = np.zeros((annotation_file.image_height, annotation_file.image_width)) - sequences = convert_polygons_to_sequences(annotation.data["paths"]) - draw_polygon(mask, sequences, 1) - counts = rle_encode(mask) - - x_coords = [s[0::2] for s in sequences] - y_coords = [s[1::2] for s in sequences] - min_x = np.min([np.min(x_coord) for x_coord in x_coords]) - min_y = np.min([np.min(y_coord) for y_coord in y_coords]) - max_x = np.max([np.max(x_coord) for x_coord in x_coords]) - max_y = np.max([np.max(y_coord) for y_coord in y_coords]) - w = max_x - min_x + 1 - h = max_y - min_y + 1 - - return { - "id": annotation_id, - "image_id": _build_image_id(annotation_file), - "category_id": categories[annotation.annotation_class.name], - "segmentation": { - "counts": counts, - "size": [annotation_file.image_height, annotation_file.image_width], - }, - "area": np.sum(mask), - "bbox": [min_x, min_y, w, h], - "iscrowd": 1, - "extra": _build_extra(annotation), - } elif annotation_type == "tag": pass elif annotation_type == "bounding_box": diff --git a/darwin/exporter/formats/cvat.py b/darwin/exporter/formats/cvat.py index 20d6dcfa5..88e685dcd 100644 --- a/darwin/exporter/formats/cvat.py +++ b/darwin/exporter/formats/cvat.py @@ -3,10 +3,8 @@ from typing import Any, Dict, Iterator, List, Optional from xml.etree.ElementTree import Element, SubElement, tostring -import deprecation import darwin.datatypes as dt -from darwin.version import __version__ DEPRECATION_MESSAGE = """ @@ -35,192 +33,6 @@ def export(annotation_files: Iterator[dt.AnnotationFile], output_dir: Path) -> N f.write(tostring(output)) -@deprecation.deprecated( - deprecated_in="0.7.7", - removed_in="0.8.0", - current_version=__version__, - details=DEPRECATION_MESSAGE, -) -def add_subelement_text(parent: Element, name: str, value: Any) -> Element: - sub = SubElement(parent, name) - sub.text = str(value) - return sub - - -@deprecation.deprecated( - deprecated_in="0.7.7", - removed_in="0.8.0", - current_version=__version__, - details=DEPRECATION_MESSAGE, -) -def build_xml(annotation_files: List[dt.AnnotationFile]) -> Element: - label_lookup: Dict[str, int] = build_label_lookup(annotation_files) - root: Element = Element("annotations") - add_subelement_text(root, "version", "1.1") - build_meta(root, annotation_files, label_lookup) - build_images(root, annotation_files, label_lookup) - return root - - -@deprecation.deprecated( - deprecated_in="0.7.7", - removed_in="0.8.0", - current_version=__version__, - details=DEPRECATION_MESSAGE, -) -def build_images( - root: Element, - annotation_files: List[dt.AnnotationFile], - label_lookup: Dict[str, int], -) -> None: - for id, annotation_file in enumerate(annotation_files, 1): - image = SubElement(root, "image") - image.attrib["id"] = str(id) - image.attrib["name"] = annotation_file.filename - image.attrib["width"] = str(annotation_file.image_width) - image.attrib["height"] = str(annotation_file.image_height) - - for annotation in annotation_file.annotations: - build_annotation(image, annotation) - - -@deprecation.deprecated( - deprecated_in="0.7.7", - removed_in="0.8.0", - current_version=__version__, - details=DEPRECATION_MESSAGE, -) -def build_annotation(image: Element, annotation: dt.Annotation) -> None: - if annotation.annotation_class.annotation_type == "bounding_box": - box = SubElement(image, "box") - box.attrib["label"] = annotation.annotation_class.name - box.attrib["xtl"] = str(annotation.data["x"]) - box.attrib["ytl"] = str(annotation.data["y"]) - box.attrib["xbr"] = str(annotation.data["x"] + annotation.data["w"]) - box.attrib["ybr"] = str(annotation.data["y"] + annotation.data["h"]) - box.attrib["occluded"] = "0" - build_attributes(box, annotation) - else: - print(f"[warning] skipping {annotation.annotation_class.annotation_type}") - - -@deprecation.deprecated( - deprecated_in="0.7.7", - removed_in="0.8.0", - current_version=__version__, - details=DEPRECATION_MESSAGE, -) -def build_attributes(box: Element, annotation: dt.Annotation) -> None: - annotation_text: Optional[dt.SubAnnotation] = annotation.get_sub("text") - if annotation_text: - attribute = add_subelement_text(box, "attribute", annotation_text.data) - attribute.attrib["name"] = "__text" - - annotation_instance_id: Optional[dt.SubAnnotation] = annotation.get_sub( - "instance_id" - ) - if annotation_instance_id: - attribute = add_subelement_text( - box, "attribute", str(annotation_instance_id.data) - ) - attribute.attrib["name"] = "__instance_id" - - annotation_attributes: Optional[dt.SubAnnotation] = annotation.get_sub("attributes") - if annotation_attributes: - for attrib in annotation_attributes.data: - attribute = add_subelement_text(box, "attribute", "") - attribute.attrib["name"] = attrib - - -@deprecation.deprecated( - deprecated_in="0.7.7", - removed_in="0.8.0", - current_version=__version__, - details=DEPRECATION_MESSAGE, -) -def build_meta( - root: Element, - annotation_files: List[dt.AnnotationFile], - label_lookup: Dict[str, int], -) -> None: - meta: Element = SubElement(root, "meta") - add_subelement_text( - meta, "dumped", str(datetime.datetime.now(tz=datetime.timezone.utc)) - ) - - task: Element = SubElement(meta, "task") - add_subelement_text(task, "id", 1) - add_subelement_text(task, "name", "exported_task_from_darwin") - add_subelement_text(task, "size", len(annotation_files)) - add_subelement_text(task, "mode", "annotation") - add_subelement_text(task, "overlapp", 0) - add_subelement_text(task, "bugtracker", None) - add_subelement_text(task, "flipped", False) - add_subelement_text( - task, "created", str(datetime.datetime.now(tz=datetime.timezone.utc)) - ) - add_subelement_text( - task, "updated", str(datetime.datetime.now(tz=datetime.timezone.utc)) - ) - - labels: Element = SubElement(task, "labels") - build_labels(labels, label_lookup) - - segments: Element = SubElement(task, "segments") - build_segments(segments, annotation_files) - - owner: Element = SubElement(task, "owner") - add_subelement_text(owner, "username", "example_username") - add_subelement_text(owner, "email", "user@example.com") - - -@deprecation.deprecated( - deprecated_in="0.7.7", - removed_in="0.8.0", - current_version=__version__, - details=DEPRECATION_MESSAGE, -) -def build_segments( - segments: Element, annotation_files: List[dt.AnnotationFile] -) -> None: - segment: Element = SubElement(segments, "segment") - add_subelement_text(segment, "id", 1) - add_subelement_text(segment, "start", 1) - add_subelement_text(segment, "end", len(annotation_files)) - add_subelement_text(segment, "url", "not applicable") - - -@deprecation.deprecated( - deprecated_in="0.7.7", - removed_in="0.8.0", - current_version=__version__, - details=DEPRECATION_MESSAGE, -) -def build_labels(labels: Element, label_lookup: Dict[str, int]) -> None: - for key in label_lookup.keys(): - label: Element = SubElement(labels, "label") - add_subelement_text(label, "name", key) - SubElement(label, "attributes") - - -@deprecation.deprecated( - deprecated_in="0.7.7", - removed_in="0.8.0", - current_version=__version__, - details=DEPRECATION_MESSAGE, -) -def build_label_lookup(annotation_files: List[dt.AnnotationFile]) -> Dict[str, int]: - labels: Dict[str, int] = {} - for annotation_file in annotation_files: - for annotation_class in annotation_file.annotation_classes: - if ( - annotation_class.name not in labels - and annotation_class.annotation_type == "bounding_box" - ): - labels[annotation_class.name] = len(labels) - return labels - - def _add_subelement_text(parent: Element, name: str, value: Any) -> Element: sub = SubElement(parent, name) sub.text = str(value) diff --git a/darwin/exporter/formats/darwin.py b/darwin/exporter/formats/darwin.py index 430f9ca67..becf4a179 100644 --- a/darwin/exporter/formats/darwin.py +++ b/darwin/exporter/formats/darwin.py @@ -1,11 +1,9 @@ from typing import Any, Dict, List -import deprecation import darwin.datatypes as dt # from darwin.datatypes import PolygonPath, PolygonPaths -from darwin.version import __version__ DEPRECATION_MESSAGE = """ @@ -58,10 +56,7 @@ def _build_v2_annotation_data(annotation: dt.Annotation) -> Dict[str, Any]: annotation_data["bounding_box"] = _build_bounding_box_data(annotation.data) elif annotation.annotation_class.annotation_type == "tag": annotation_data["tag"] = {} - elif ( - annotation.annotation_class.annotation_type == "polygon" - or annotation.annotation_class.annotation_type == "complex_polygon" - ): + elif annotation.annotation_class.annotation_type == "polygon": polygon_data = _build_polygon_data(annotation.data) annotation_data["polygon"] = polygon_data annotation_data["bounding_box"] = _build_bounding_box_data(annotation.data) @@ -94,12 +89,7 @@ def _build_polygon_data(data: Dict[str, Any]) -> Dict[str, Any]: Dict[str, List[List[Dict[str, float]]]] The polygon data in the format required for Darwin v2 annotations. """ - - # Complex polygon - if "paths" in data: - return {"paths": data["paths"]} - else: - return {"paths": [data["path"]]} + return {"paths": data["paths"]} def _build_item_data( @@ -167,33 +157,3 @@ def _build_slots_data(slots: List[dt.Slot]) -> List[Dict[str, Any]]: slots_data.append(slot_data) return slots_data - - -@deprecation.deprecated( - deprecated_in="0.7.8", - removed_in="0.8.0", - current_version=__version__, - details=DEPRECATION_MESSAGE, -) -def build_annotation_data(annotation: dt.Annotation) -> Dict[str, Any]: - if annotation.annotation_class.annotation_type == "complex_polygon": - return {"path": annotation.data["paths"]} - - if annotation.annotation_class.annotation_type == "polygon": - return dict( - filter(lambda item: item[0] != "bounding_box", annotation.data.items()) - ) - - return dict(annotation.data) - - -def _build_annotation_data(annotation: dt.Annotation) -> Dict[str, Any]: - if annotation.annotation_class.annotation_type == "complex_polygon": - return {"path": annotation.data["paths"]} - - if annotation.annotation_class.annotation_type == "polygon": - return dict( - filter(lambda item: item[0] != "bounding_box", annotation.data.items()) - ) - - return dict(annotation.data) diff --git a/darwin/exporter/formats/dataloop.py b/darwin/exporter/formats/dataloop.py index 96fc47bdd..5f5bee622 100644 --- a/darwin/exporter/formats/dataloop.py +++ b/darwin/exporter/formats/dataloop.py @@ -1,11 +1,9 @@ from pathlib import Path from typing import Any, Dict, Iterable -import deprecation import orjson as json import darwin.datatypes as dt -from darwin.version import __version__ DEPRECATION_MESSAGE = """ @@ -31,87 +29,6 @@ def export(annotation_files: Iterable[dt.AnnotationFile], output_dir: Path) -> N _export_file(annotation_file, id, output_dir) -@deprecation.deprecated( - deprecated_in="0.7.8", - removed_in="0.8.0", - current_version=__version__, - details=DEPRECATION_MESSAGE, -) -def export_file(annotation_file: dt.AnnotationFile, id: int, output_dir: Path) -> None: - output: Dict[str, Any] = _build_json(annotation_file, id) - output_file_path: Path = (output_dir / annotation_file.filename).with_suffix( - ".json" - ) - with open(output_file_path, "w") as f: - op = json.dumps( - output, option=json.OPT_INDENT_2 | json.OPT_SERIALIZE_NUMPY - ).decode("utf-8") - f.write(op) - - -@deprecation.deprecated( - deprecated_in="0.7.8", - removed_in="0.8.0", - current_version=__version__, - details=DEPRECATION_MESSAGE, -) -def build_json(annotation_file: dt.AnnotationFile, id: int) -> Dict[str, Any]: - return { - "_id": id, - "filename": annotation_file.filename, - "itemMetadata": [], - "annotations": _build_annotations(annotation_file, id), - } - - -@deprecation.deprecated( - deprecated_in="0.7.8", - removed_in="0.8.0", - current_version=__version__, - details=DEPRECATION_MESSAGE, -) -def build_annotations( - annotation_file: dt.AnnotationFile, id: int -) -> Iterable[Dict[str, Any]]: - output = [] - for annotation_id, annotation in enumerate(annotation_file.annotations): - print(annotation) - if annotation.annotation_class.annotation_type == "bounding_box": - entry = { - "id": annotation_id, - "datasetId": "darwin", - "type": "box", - "label": annotation.annotation_class.name, - "attributes": [], - "coordinates": [ - {"x": annotation.data["x"], "y": annotation.data["y"], "z": 0}, - { - "x": annotation.data["x"] + annotation.data["w"], - "y": annotation.data["y"] + annotation.data["h"], - "z": 0, - }, - ], - "metadata": {}, - } - output.append(entry) - elif annotation.annotation_class.annotation_type == "polygon": - entry = { - "id": annotation_id, - "datasetId": "darwin", - "type": "segment", - "label": annotation.annotation_class.name, - "attributes": [], - "coordinates": [ - {"x": point["x"], "y": point["y"], "z": 0} - for point in annotation.data["path"] - ], - "metadata": {}, - } - output.append(entry) - - return output - - def _export_file(annotation_file: dt.AnnotationFile, id: int, output_dir: Path) -> None: output: Dict[str, Any] = _build_json(annotation_file, id) output_file_path: Path = (output_dir / annotation_file.filename).with_suffix( diff --git a/darwin/exporter/formats/helpers/yolo_class_builder.py b/darwin/exporter/formats/helpers/yolo_class_builder.py index ac20a495b..fd5c3e41f 100644 --- a/darwin/exporter/formats/helpers/yolo_class_builder.py +++ b/darwin/exporter/formats/helpers/yolo_class_builder.py @@ -8,7 +8,7 @@ def build_class_index( annotation_files: Iterable[AnnotationFile], - include_types: List[str] = ["bounding_box", "polygon", "complex_polygon"], + include_types: List[str] = ["bounding_box", "polygon"], ) -> ClassIndex: classes = set() for annotation_file in annotation_files: diff --git a/darwin/exporter/formats/instance_mask.py b/darwin/exporter/formats/instance_mask.py index 2991faaa9..e7a31b0d4 100644 --- a/darwin/exporter/formats/instance_mask.py +++ b/darwin/exporter/formats/instance_mask.py @@ -40,8 +40,6 @@ def export(annotation_files: Iterable[dt.AnnotationFile], output_dir: Path) -> N for i, annotation in enumerate(annotations): cat = annotation.annotation_class.name if annotation.annotation_class.annotation_type == "polygon": - polygon = annotation.data["path"] - elif annotation.annotation_class.annotation_type == "complex_polygon": polygon = annotation.data["paths"] else: continue diff --git a/darwin/exporter/formats/mask.py b/darwin/exporter/formats/mask.py index 22d92d2ca..534755844 100644 --- a/darwin/exporter/formats/mask.py +++ b/darwin/exporter/formats/mask.py @@ -138,7 +138,7 @@ def get_render_mode(annotations: List[dt.AnnotationLike]) -> dt.MaskTypes.TypeOf types: Set[str] = set(list_of_types) is_raster_mask = ("mask" in types) and ("raster_layer" in types) - is_polygon = ("polygon" in types) or ("complex_polygon" in types) + is_polygon = "polygon" in types raster_layer_count = len([a for a in types if a == "raster_layer"]) @@ -260,8 +260,6 @@ def render_polygons( categories.append(cat) if a.annotation_class.annotation_type == "polygon": - polygon = a.data["path"] - elif a.annotation_class.annotation_type == "complex_polygon": polygon = a.data["paths"] else: raise ValueError( @@ -418,7 +416,7 @@ def export( masks_dir: Path = output_dir / "masks" masks_dir.mkdir(exist_ok=True, parents=True) annotation_files = list(annotation_files) - accepted_types = ["polygon", "complex_polygon", "raster_layer", "mask"] + accepted_types = ["polygon", "raster_layer", "mask"] all_classes_sets: List[Set[dt.AnnotationClass]] = [ a.annotation_classes for a in annotation_files ] @@ -570,13 +568,10 @@ def offset_polygon(polygon: List, offset_x: int, offset_y: int) -> List: Returns: List: polygon with offset applied """ - if isinstance(polygon[0], list): - return offset_complex_polygon(polygon, offset_x, offset_y) - else: - return offset_simple_polygon(polygon, offset_x, offset_y) + return offset_polygon_paths(polygon, offset_x, offset_y) -def offset_complex_polygon(polygons: List, offset_x: int, offset_y: int) -> List: +def offset_polygon_paths(polygons: List, offset_x: int, offset_y: int) -> List: new_polygons = [] for polygon in polygons: new_polygons.append(offset_simple_polygon(polygon, offset_x, offset_y)) diff --git a/darwin/exporter/formats/nifti.py b/darwin/exporter/formats/nifti.py index da4d1b8bd..c0baec33a 100644 --- a/darwin/exporter/formats/nifti.py +++ b/darwin/exporter/formats/nifti.py @@ -340,12 +340,6 @@ def populate_output_volumes_from_polygons( shift_polygon_coords(polygon_path, pixdims) for polygon_path in frame_data["paths"] ] - elif "path" in frame_data: - # Dealing with a simple polygon - polygons = shift_polygon_coords( - frame_data["path"], - pixdims, - ) else: continue im_mask = convert_polygons_to_mask(polygons, height=height, width=width) diff --git a/darwin/exporter/formats/pascalvoc.py b/darwin/exporter/formats/pascalvoc.py index aa7acd08b..8c698ed93 100644 --- a/darwin/exporter/formats/pascalvoc.py +++ b/darwin/exporter/formats/pascalvoc.py @@ -1,12 +1,9 @@ from pathlib import Path -from typing import Any, Dict, Iterable +from typing import Any, Iterable from xml.etree.ElementTree import Element, SubElement, tostring -import deprecation import darwin.datatypes as dt -from darwin.utils import attempt_decode -from darwin.version import __version__ DEPRECATION_MESSAGE = """ @@ -40,150 +37,6 @@ def export(annotation_files: Iterable[dt.AnnotationFile], output_dir: Path) -> N _export_file(annotation_file, output_dir) -@deprecation.deprecated( - deprecated_in="0.7.10", - removed_in="0.8.0", - current_version=__version__, - details=DEPRECATION_MESSAGE, -) -def export_file(annotation_file: dt.AnnotationFile, output_dir: Path) -> None: - xml = build_xml(annotation_file) - output_file_path = (output_dir / annotation_file.filename).with_suffix(".xml") - output_file_path.parent.mkdir(parents=True, exist_ok=True) - with open(output_file_path, "wb") as f: - f.write(tostring(xml)) - - -@deprecation.deprecated( - deprecated_in="0.7.10", - removed_in="0.8.0", - current_version=__version__, - details=DEPRECATION_MESSAGE, -) -def build_xml(annotation_file: dt.AnnotationFile) -> Element: - root: Element = Element("annotation") - add_subelement_text(root, "folder", "images") - add_subelement_text(root, "filename", annotation_file.filename) - add_subelement_text(root, "path", f"images/{annotation_file.filename}") - - source = SubElement(root, "source") - add_subelement_text(source, "database", "darwin") - - size = SubElement(root, "size") - add_subelement_text(size, "width", str(annotation_file.image_width)) - add_subelement_text(size, "height", str(annotation_file.image_height)) - add_subelement_text(size, "depth", "3") - - add_subelement_text(root, "segmented", "0") - - for annotation in annotation_file.annotations: - annotation_type = annotation.annotation_class.annotation_type - if annotation_type not in ["bounding_box", "polygon", "complex_polygon"]: - continue - - data = annotation.data - sub_annotation = SubElement(root, "object") - add_subelement_text(sub_annotation, "name", annotation.annotation_class.name) - add_subelement_text(sub_annotation, "pose", "Unspecified") - add_subelement_text(sub_annotation, "truncated", "0") - add_subelement_text(sub_annotation, "difficult", "0") - bndbox = SubElement(sub_annotation, "bndbox") - - if annotation_type == "polygon" or annotation_type == "complex_polygon": - data = data.get("bounding_box") - - xmin = data.get("x") - ymin = data.get("y") - xmax = xmin + data.get("w") - ymax = ymin + data.get("h") - add_subelement_text(bndbox, "xmin", str(round(xmin))) - add_subelement_text(bndbox, "ymin", str(round(ymin))) - add_subelement_text(bndbox, "xmax", str(round(xmax))) - add_subelement_text(bndbox, "ymax", str(round(ymax))) - - return root - - -@deprecation.deprecated( - deprecated_in="0.7.10", - removed_in="0.8.0", - current_version=__version__, - details=DEPRECATION_MESSAGE, -) -def add_subelement_text(parent: Element, name: str, value: Any) -> Element: - sub: Element = SubElement(parent, name) - sub.text = value - return sub - - -@deprecation.deprecated( - deprecated_in="0.7.10", - removed_in="0.8.0", - current_version=__version__, - details=REMOVAL_MESSAGE, -) -def convert_file(path: Path) -> Element: - data = attempt_decode(path) - return build_voc(data["image"], data["annotations"]) - - -@deprecation.deprecated( - deprecated_in="0.7.10", - removed_in="0.8.0", - current_version=__version__, - details=REMOVAL_MESSAGE, -) -def save_xml(xml: Element, path: Path) -> None: - with open(path, "wb") as f: - f.write(tostring(xml)) - - -@deprecation.deprecated( - deprecated_in="0.7.10", - removed_in="0.8.0", - current_version=__version__, - details=REMOVAL_MESSAGE, -) -def build_voc( - metadata: Dict[str, Any], annotations: Iterable[Dict[str, Any]] -) -> Element: - print(metadata) - root: Element = Element("annotation") - add_subelement_text(root, "folder", "images") - add_subelement_text(root, "filename", metadata["original_filename"]) - add_subelement_text(root, "path", f"images/{metadata['original_filename']}") - - source: Element = SubElement(root, "source") - add_subelement_text(source, "database", "darwin") - - size: Element = SubElement(root, "size") - add_subelement_text(size, "width", str(metadata["width"])) - add_subelement_text(size, "height", str(metadata["height"])) - add_subelement_text(size, "depth", "3") - - add_subelement_text(root, "segmented", "0") - - for annotation in annotations: - if "bounding_box" not in annotation: - continue - data = annotation["bounding_box"] - sub_annotation = SubElement(root, "object") - add_subelement_text(sub_annotation, "name", annotation["name"]) - add_subelement_text(sub_annotation, "pose", "Unspecified") - add_subelement_text(sub_annotation, "truncated", "0") - add_subelement_text(sub_annotation, "difficult", "0") - bndbox = SubElement(sub_annotation, "bndbox") - add_subelement_text(bndbox, "xmin", str(round(data["x"]))) - add_subelement_text(bndbox, "ymin", str(round(data["y"]))) - add_subelement_text(bndbox, "xmax", str(round(data["x"] + data["w"]))) - add_subelement_text(bndbox, "ymax", str(round(data["y"] + data["h"]))) - - return root - - -###################################### - - def _export_file(annotation_file: dt.AnnotationFile, output_dir: Path) -> None: xml = _build_xml(annotation_file) output_file_path = (output_dir / annotation_file.filename).with_suffix(".xml") @@ -210,7 +63,7 @@ def _build_xml(annotation_file: dt.AnnotationFile) -> Element: for annotation in annotation_file.annotations: annotation_type = annotation.annotation_class.annotation_type - if annotation_type not in ["bounding_box", "polygon", "complex_polygon"]: + if annotation_type not in ["bounding_box", "polygon"]: continue data = annotation.data @@ -221,7 +74,7 @@ def _build_xml(annotation_file: dt.AnnotationFile) -> Element: _add_subelement_text(sub_annotation, "difficult", "0") bndbox = SubElement(sub_annotation, "bndbox") - if annotation_type == "polygon" or annotation_type == "complex_polygon": + if annotation_type == "polygon": data = data.get("bounding_box") xmin = data.get("x") diff --git a/darwin/exporter/formats/yolo.py b/darwin/exporter/formats/yolo.py index 73ac5ddb3..3aeb993d6 100644 --- a/darwin/exporter/formats/yolo.py +++ b/darwin/exporter/formats/yolo.py @@ -45,7 +45,7 @@ def _build_txt(annotation_file: dt.AnnotationFile, class_index: ClassIndex) -> s if annotation_type == "bounding_box": data = annotation.data - elif annotation_type in ["polygon", "complex_polygon"]: + elif annotation_type == "polygon": data = annotation.data data = data.get("bounding_box") else: diff --git a/darwin/importer/formats/coco.py b/darwin/importer/formats/coco.py index 28d4f4f8f..616a4eeb5 100644 --- a/darwin/importer/formats/coco.py +++ b/darwin/importer/formats/coco.py @@ -2,14 +2,12 @@ from pathlib import Path from typing import Dict, Iterator, List, Optional -import deprecation import orjson as json from upolygon import find_contours, rle_decode import darwin.datatypes as dt from darwin.path_utils import deconstruct_full_path from darwin.utils import attempt_decode -from darwin.version import __version__ DEPRECATION_MESSAGE = """ @@ -169,7 +167,7 @@ def parse_annotation( except StopIteration: break paths.append(path) - return dt.make_complex_polygon(category["name"], paths) + return dt.make_polygon(category["name"], paths) elif isinstance(segmentation, list): path = [] points = iter( @@ -196,12 +194,6 @@ def _decode_file(current_encoding: str, path: Path): return list(parse_json(path, data)) -@deprecation.deprecated( - deprecated_in="0.7.12", - removed_in="0.8.0", - current_version=__version__, - details=DEPRECATION_MESSAGE, -) def decode_binary_rle(data: str) -> List[int]: """ Decodes binary rle to integer list rle. diff --git a/darwin/importer/formats/dataloop.py b/darwin/importer/formats/dataloop.py index 4f546ab09..a075b4a2b 100644 --- a/darwin/importer/formats/dataloop.py +++ b/darwin/importer/formats/dataloop.py @@ -1,7 +1,6 @@ from pathlib import Path from typing import Any, Dict, List, Optional, Set - import darwin.datatypes as dt from darwin.exceptions import ( DataloopComplexPolygonsNotYetSupported, @@ -76,6 +75,6 @@ def _parse_annotation(annotation: Dict[str, Any]) -> Optional[dt.Annotation]: raise DataloopComplexPolygonsNotYetSupported() points: List[dt.Point] = [{"x": c["x"], "y": c["y"]} for c in coords[0]] - return dt.make_polygon(annotation_label, point_path=points) + return dt.make_polygon(annotation_label, point_paths=points) return None diff --git a/darwin/importer/formats/nifti.py b/darwin/importer/formats/nifti.py index 018690188..64e2ee097 100644 --- a/darwin/importer/formats/nifti.py +++ b/darwin/importer/formats/nifti.py @@ -350,11 +350,11 @@ def adjust_for_pixdims(x, y, pixdims): ] paths.append(path) if len(paths) > 1: - polygon = dt.make_complex_polygon(class_name, paths) + polygon = dt.make_polygon(class_name, paths) elif len(paths) == 1: polygon = dt.make_polygon( class_name, - point_path=paths[0], + point_paths=paths[0], ) else: return None @@ -364,7 +364,7 @@ def adjust_for_pixdims(x, y, pixdims): return None polygon = dt.make_polygon( class_name, - point_path=[ + point_paths=[ adjust_for_pixdims(x, y, pixdims) for x, y in zip(external_path[0::2], external_path[1::2]) ], diff --git a/darwin/importer/importer.py b/darwin/importer/importer.py index 5d398334f..785182f74 100644 --- a/darwin/importer/importer.py +++ b/darwin/importer/importer.py @@ -36,7 +36,6 @@ from darwin.client import Client from darwin.dataset.remote_dataset import RemoteDataset -import deprecation from rich.console import Console from rich.progress import track from rich.theme import Theme @@ -46,7 +45,6 @@ from darwin.exceptions import IncompatibleOptions, RequestEntitySizeExceeded from darwin.utils import secure_continue_request from darwin.utils.flatten_list import flatten_list -from darwin.version import __version__ logger = getLogger(__name__) @@ -72,13 +70,7 @@ """ -@deprecation.deprecated( # type:ignore - deprecated_in="0.7.12", - removed_in="0.8.0", - current_version=__version__, - details=DEPRECATION_MESSAGE, -) -def build_main_annotations_lookup_table( +def _build_main_annotations_lookup_table( annotation_classes: List[Dict[str, Unknown]] ) -> Dict[str, Unknown]: MAIN_ANNOTATION_TYPES = [ @@ -109,13 +101,7 @@ def build_main_annotations_lookup_table( return lookup -@deprecation.deprecated( # type:ignore - deprecated_in="0.7.12", - removed_in="0.8.0", - current_version=__version__, - details=DEPRECATION_MESSAGE, -) -def find_and_parse( # noqa: C901 +def _find_and_parse( # noqa: C901 importer: Callable[[Path], Union[List[dt.AnnotationFile], dt.AnnotationFile, None]], file_paths: List[PathLike], console: Optional[Console] = None, @@ -183,13 +169,7 @@ def _get_files_for_parsing(file_paths: List[PathLike]) -> List[Path]: return [file for files in packed_files for file in files] -@deprecation.deprecated( # type:ignore - deprecated_in="0.7.12", - removed_in="0.8.0", - current_version=__version__, - details=DEPRECATION_MESSAGE, -) -def build_attribute_lookup(dataset: "RemoteDataset") -> Dict[str, Unknown]: +def _build_attribute_lookup(dataset: "RemoteDataset") -> Dict[str, Unknown]: attributes: List[Dict[str, Unknown]] = dataset.fetch_remote_attributes() lookup: Dict[str, Unknown] = {} for attribute in attributes: @@ -200,13 +180,7 @@ def build_attribute_lookup(dataset: "RemoteDataset") -> Dict[str, Unknown]: return lookup -@deprecation.deprecated( # type:ignore - deprecated_in="0.7.12", - removed_in="0.8.0", - current_version=__version__, - details=DEPRECATION_MESSAGE, -) -def get_remote_files( +def _get_remote_files( dataset: "RemoteDataset", filenames: List[str], chunk_size: int = 100 ) -> Dict[str, Tuple[int, str]]: """ @@ -779,28 +753,28 @@ def import_annotations( # noqa: C901 if not team_classes: raise ValueError("Unable to fetch remote class list.") - classes_in_dataset: dt.DictFreeForm = build_main_annotations_lookup_table( + classes_in_dataset: dt.DictFreeForm = _build_main_annotations_lookup_table( [ cls for cls in team_classes if cls["available"] or cls["name"] in GLOBAL_CLASSES ] ) - classes_in_team: dt.DictFreeForm = build_main_annotations_lookup_table( + classes_in_team: dt.DictFreeForm = _build_main_annotations_lookup_table( [ cls for cls in team_classes if not cls["available"] and cls["name"] not in GLOBAL_CLASSES ] ) - attributes = build_attribute_lookup(dataset) + attributes = _build_attribute_lookup(dataset) console.print("Retrieving local annotations ...", style="info") local_files = [] local_files_missing_remotely = [] # ! Other place we can use multiprocessing - hard to pass in the importer though - maybe_parsed_files: Optional[Iterable[dt.AnnotationFile]] = find_and_parse( + maybe_parsed_files: Optional[Iterable[dt.AnnotationFile]] = _find_and_parse( importer, file_paths, console, use_multi_cpu, cpu_limit ) @@ -823,7 +797,7 @@ def import_annotations( # noqa: C901 chunk_size = 100 while chunk_size > 0: try: - remote_files = get_remote_files(dataset, filenames, chunk_size) + remote_files = _get_remote_files(dataset, filenames, chunk_size) break except RequestEntitySizeExceeded: chunk_size -= 8 @@ -912,9 +886,9 @@ def import_annotations( # noqa: C901 if not maybe_remote_classes: raise ValueError("Unable to fetch remote classes.") - remote_classes = build_main_annotations_lookup_table(maybe_remote_classes) + remote_classes = _build_main_annotations_lookup_table(maybe_remote_classes) else: - remote_classes = build_main_annotations_lookup_table(team_classes) + remote_classes = _build_main_annotations_lookup_table(team_classes) if delete_for_empty: console.print( @@ -1069,15 +1043,17 @@ def _handle_subs( return data -def _handle_complex_polygon( +def _format_polygon_for_import( annotation: dt.Annotation, data: dt.DictFreeForm ) -> dt.DictFreeForm: - if "complex_polygon" in data: - del data["complex_polygon"] - data["polygon"] = { - "path": annotation.data["paths"][0], - "additional_paths": annotation.data["paths"][1:], - } + if "polygon" in data: + if len(annotation.data["paths"]) > 1: + data["polygon"] = { + "path": annotation.data["paths"][0], + "additional_paths": annotation.data["paths"][1:], + } + elif len(annotation.data["paths"]) == 1: + data["polygon"] = {"path": annotation.data["paths"][0]} return data @@ -1145,14 +1121,14 @@ def _get_annotation_data( only_keyframes=True, post_processing=lambda annotation, data: _handle_subs( annotation, - _handle_complex_polygon(annotation, data), + _format_polygon_for_import(annotation, data), annotation_class_id, attributes, ), ) else: data = {annotation_class.annotation_type: annotation.data} - data = _handle_complex_polygon(annotation, data) + data = _format_polygon_for_import(annotation, data) data = _handle_subs(annotation, data, annotation_class_id, attributes) return data diff --git a/darwin/item.py b/darwin/item.py index 2b554e725..6b11fbffc 100644 --- a/darwin/item.py +++ b/darwin/item.py @@ -1,11 +1,9 @@ from dataclasses import dataclass from typing import Any, Dict, List, Optional -import deprecation from pydantic import BaseModel from darwin.path_utils import construct_full_path -from darwin.version import __version__ @dataclass(frozen=True, eq=True) @@ -114,39 +112,3 @@ def parse(cls, raw: Dict[str, Any], dataset_slug: str = "n/a") -> "DatasetItem": "slots": [], } return DatasetItem(**data) - - -@deprecation.deprecated( - deprecated_in="0.7.5", - removed_in="0.8.0", - current_version=__version__, - details="Use the ``DatasetItem.parse`` instead.", -) -def parse_dataset_item(raw: Dict[str, Any]) -> DatasetItem: - """ - Parses the given dictionary into a ``DatasetItem``. Performs no validations. - - Parameters - ---------- - raw : Dict[str, Any] - The dictionary to parse. - - Returns - ------- - DatasetItem - A dataset item with the parsed information. - """ - return DatasetItem( - id=raw["id"], - filename=raw["filename"], - status=raw["status"], - archived=raw["archived"], - filesize=raw["file_size"], - dataset_id=raw["dataset_id"], - dataset_slug="n/a", - seq=raw["seq"], - current_workflow_id=raw.get("current_workflow_id"), - path=raw["path"], - slots=[], - current_workflow=raw.get("current_workflow"), - ) diff --git a/darwin/torch/dataset.py b/darwin/torch/dataset.py index b98592e0c..256a519db 100644 --- a/darwin/torch/dataset.py +++ b/darwin/torch/dataset.py @@ -326,8 +326,6 @@ def get_target(self, index: int) -> Dict[str, Any]: annotations = [] for annotation in target["annotations"]: - annotation_type: str = annotation.annotation_class.annotation_type - path_key = "paths" if annotation_type == "complex_polygon" else "path" # Darwin V2 only has paths (TODO it might be more robust fixes) if "paths" in annotation.data: diff --git a/darwin/utils/utils.py b/darwin/utils/utils.py index 49bf6079f..05a36aa2c 100644 --- a/darwin/utils/utils.py +++ b/darwin/utils/utils.py @@ -19,7 +19,6 @@ cast, ) -import deprecation import json_stream import numpy as np import orjson as json @@ -39,7 +38,6 @@ UnsupportedFileType, ) from darwin.future.data_objects.properties import SelectedProperty -from darwin.version import __version__ if TYPE_CHECKING: from darwin.client import Client @@ -91,26 +89,6 @@ def is_extension_allowed_by_filename(filename: str) -> bool: return any(filename.lower().endswith(ext) for ext in SUPPORTED_EXTENSIONS) -@deprecation.deprecated(deprecated_in="0.8.4", current_version=__version__) -def is_extension_allowed(extension: str) -> bool: - """ - Returns whether or not the given extension is allowed. - @Deprecated. Use is_extension_allowed_by_filename instead, and pass full filename. - This is due to the fact that some extensions now include multiple dots, e.g. .nii.gz - - Parameters - ---------- - extension : str - The extension. - - Returns - ------- - bool - Whether or not the given extension is allowed. - """ - return extension.lower() in SUPPORTED_EXTENSIONS - - def is_image_extension_allowed_by_filename(filename: str) -> bool: """ Returns whether or not the given image extension is allowed. @@ -128,7 +106,6 @@ def is_image_extension_allowed_by_filename(filename: str) -> bool: return any(filename.lower().endswith(ext) for ext in SUPPORTED_IMAGE_EXTENSIONS) -@deprecation.deprecated(deprecated_in="0.8.4", current_version=__version__) def is_image_extension_allowed(extension: str) -> bool: """ Returns whether or not the given image extension is allowed. @@ -146,41 +123,6 @@ def is_image_extension_allowed(extension: str) -> bool: return extension.lower() in SUPPORTED_IMAGE_EXTENSIONS -def is_video_extension_allowed_by_filename(extension: str) -> bool: - """ - Returns whether or not the given image extension is allowed. - - Parameters - ---------- - extension : str - The image extension. - - Returns - ------- - bool - Whether or not the given extension is allowed. - """ - return any(extension.lower().endswith(ext) for ext in SUPPORTED_VIDEO_EXTENSIONS) - - -@deprecation.deprecated(deprecated_in="0.8.4", current_version=__version__) -def is_video_extension_allowed(extension: str) -> bool: - """ - Returns whether or not the given video extension is allowed. - - Parameters - ---------- - extension : str - The video extension. - - Returns - ------- - bool - Whether or not the given extension is allowed. - """ - return extension.lower() in SUPPORTED_VIDEO_EXTENSIONS - - def urljoin(*parts: str) -> str: """ Take as input an unpacked list of strings and joins them to form an URL. @@ -464,13 +406,7 @@ def parse_darwin_json( if "annotations" not in data: return None - if version.major == 2: - return _parse_darwin_v2(path, data) - else: - if "fps" in data["image"] or "frame_count" in data["image"]: - return _parse_darwin_video(path, data, count) - else: - return _parse_darwin_image(path, data, count) + return _parse_darwin_v2(path, data) def stream_darwin_json(path: Path) -> PersistentStreamingJSONObject: @@ -726,61 +662,31 @@ def _parse_darwin_video( return annotation_file -def _parse_darwin_annotation(annotation: Dict[str, Any]) -> Optional[dt.Annotation]: +def _parse_darwin_annotation( + annotation: Dict[str, Any], + only_keyframes: bool = False, + annotation_type: Optional[str] = None, + annotation_data: Optional[Dict] = None, +) -> Optional[dt.Annotation]: slot_names = parse_slot_names(annotation) name: str = annotation["name"] main_annotation: Optional[dt.Annotation] = None - # Darwin JSON 2.0 representation of complex polygons - if ( - "polygon" in annotation - and "paths" in annotation["polygon"] - and len(annotation["polygon"]["paths"]) > 1 - ): + # Darwin JSON 2.0 representation of polygons + if "polygon" in annotation and "paths" in annotation["polygon"]: bounding_box = annotation.get("bounding_box") paths = annotation["polygon"]["paths"] - main_annotation = dt.make_complex_polygon( + main_annotation = dt.make_polygon( name, paths, bounding_box, slot_names=slot_names ) - # Darwin JSON 2.0 representation of simple polygons - elif ( - "polygon" in annotation - and "paths" in annotation["polygon"] - and len(annotation["polygon"]["paths"]) == 1 - ): + + elif "polygon" in annotation and "path" in annotation["polygon"]: bounding_box = annotation.get("bounding_box") - paths = annotation["polygon"]["paths"] + path = annotation["polygon"]["path"] main_annotation = dt.make_polygon( - name, paths[0], bounding_box, slot_names=slot_names + name, path, bounding_box, slot_names=slot_names ) - # Darwin JSON 1.0 representation of complex and simple polygons - elif "polygon" in annotation: - bounding_box = annotation.get("bounding_box") - if "additional_paths" in annotation["polygon"]: - paths = [annotation["polygon"]["path"]] + annotation["polygon"][ - "additional_paths" - ] - main_annotation = dt.make_complex_polygon( - name, paths, bounding_box, slot_names=slot_names - ) - else: - main_annotation = dt.make_polygon( - name, annotation["polygon"]["path"], bounding_box, slot_names=slot_names - ) - # Darwin JSON 1.0 representation of complex polygons - elif "complex_polygon" in annotation: - bounding_box = annotation.get("bounding_box") - if isinstance(annotation["complex_polygon"]["path"][0], list): - paths = annotation["complex_polygon"]["path"] - else: - paths = [annotation["complex_polygon"]["path"]] - - if "additional_paths" in annotation["complex_polygon"]: - paths.extend(annotation["complex_polygon"]["additional_paths"]) - main_annotation = dt.make_complex_polygon( - name, paths, bounding_box, slot_names=slot_names - ) elif "bounding_box" in annotation: bounding_box = annotation["bounding_box"] main_annotation = dt.make_bounding_box( @@ -853,6 +759,10 @@ def _parse_darwin_annotation(annotation: Dict[str, Any]) -> Optional[dt.Annotati raster_layer["dense_rle"], slot_names=slot_names, ) + elif only_keyframes: + main_annotation = make_keyframe_annotation( + annotation_type, annotation_data, name, slot_names + ) if not main_annotation: print(f"[WARNING] Unsupported annotation type: '{annotation.keys()}'") @@ -897,15 +807,155 @@ def _parse_darwin_annotation(annotation: Dict[str, Any]) -> Optional[dt.Annotati return main_annotation +def make_keyframe_annotation( + annotation_type: Optional[str], + annotation_data: Optional[Dict], + name: str, + slot_names: List[str], +) -> dt.Annotation: + if annotation_type == "polygon": + return dt.make_polygon( + name, annotation_data["paths"], annotation_data["bounding_box"] + ) + elif annotation_type == "bounding_box": + return dt.make_bounding_box( + name, + annotation_data["x"], + annotation_data["y"], + annotation_data["w"], + annotation_data["h"], + ) + elif annotation_type == "tag": + return dt.make_tag(name) + elif annotation_type == "line": + return dt.make_line(name, annotation_data["path"]) + elif annotation_type == "keypoint": + return dt.make_keypoint(name, annotation_data["x"], annotation_data["y"]) + elif annotation_type == "ellipse": + return dt.make_ellipse(name, annotation_data) + elif annotation_type == "cuboid": + return dt.make_cuboid(name, annotation_data) + elif annotation_type == "skeleton": + return dt.make_skeleton(name, annotation_data["nodes"]) + elif annotation_type == "table": + return dt.make_table( + name, annotation_data["bounding_box"], annotation_data["cells"] + ) + elif annotation_type == "simple_table": + return dt.make_simple_table( + name, + annotation_data["bounding_box"], + annotation_data["col_offsets"], + annotation_data["row_offsets"], + ) + elif annotation_type == "string": + return dt.make_string(name, annotation_data["sources"]) + elif annotation_type == "graph": + return dt.make_graph(name, annotation_data["nodes"], annotation_data["edges"]) + elif annotation_type == "mask": + return dt.make_mask(name) + elif annotation_type == "raster_layer": + return dt.make_raster_layer( + name, + annotation_data["mask_annotation_ids_mapping"], + annotation_data["total_pixels"], + annotation_data["dense_rle"], + ) + else: + raise ValueError(f"Unsupported annotation type: '{annotation_type}'") + + +def update_annotation_data( + main_annotation_data: Dict[str, Any], + annotation_type: Optional[str], + annotation_data: Optional[Dict], +) -> Tuple[Optional[str], Optional[Dict]]: + if annotation_type == "polygon": + bounding_box = main_annotation_data.get("bounding_box") + paths = main_annotation_data["paths"] + annotation_data = {"paths": paths, "bounding_box": bounding_box} + elif annotation_type == "bounding_box": + annotation_data = { + "x": main_annotation_data["x"], + "y": main_annotation_data["y"], + "w": main_annotation_data["w"], + "h": main_annotation_data["h"], + } + elif annotation_type == "tag": + annotation_data = {} + elif annotation_type == "line": + annotation_data = {"path": main_annotation_data["path"]} + elif annotation_type == "keypoint": + annotation_data = { + "x": main_annotation_data["x"], + "y": main_annotation_data["y"], + } + elif annotation_type == "ellipse": + annotation_data = { + "angle": main_annotation_data["angle"], + "center": main_annotation_data["center"], + "radius": main_annotation_data["radius"], + } + elif annotation_type == "cuboid": + annotation_data = { + "back": main_annotation_data["back"], + "front": main_annotation_data["front"], + } + elif annotation_type == "skeleton": + annotation_data = {"nodes": main_annotation_data["nodes"]} + elif annotation_type == "table": + annotation_type = "table" + annotation_data = { + "bounding_box": main_annotation_data["table"]["bounding_box"], + "cells": main_annotation_data["table"]["cells"], + } + elif annotation_type == "string": + annotation_data = {"sources": main_annotation_data["string"]["sources"]} + elif annotation_type == "graph": + annotation_data = { + "nodes": main_annotation_data["graph"]["nodes"], + "edges": main_annotation_data["graph"]["edges"], + } + elif annotation_type == "mask": + annotation_data = {} + elif annotation_type == "raster_layer": + annotation_data = { + "dense_rle": main_annotation_data["dense_rle"], + "mask_annotation_ids_mapping": main_annotation_data[ + "mask_annotation_ids_mapping" + ], + "total_pixels": main_annotation_data["total_pixels"], + } + + return annotation_data + + def _parse_darwin_video_annotation(annotation: dict) -> Optional[dt.VideoAnnotation]: name = annotation["name"] frame_annotations = {} keyframes: Dict[int, bool] = {} frames = {**annotation.get("frames", {}), **annotation.get("sections", {})} + only_keyframes = annotation.get("only_keyframes", False) + annotation_type, annotation_data = None, None + if only_keyframes: + for f, frame in frames.items(): + annotation_type, annotation_data = get_annotation_type_and_data( + frame, annotation_type, annotation_data + ) + if annotation_type: + break for f, frame in frames.items(): frame_annotations[int(f)] = _parse_darwin_annotation( - {**frame, **{"name": name, "id": annotation.get("id", None)}} + {**frame, **{"name": name, "id": annotation.get("id", None)}}, + only_keyframes, + annotation_type, + annotation_data, ) + # If we hit a keyframe, we need to update annotation_data for frames later on that may be missing a main type + if only_keyframes: + annotation_data = update_annotation_data( + frame_annotations[int(f)].data, annotation_type, annotation_data + ) keyframes[int(f)] = frame.get("keyframe", False) if not frame_annotations or None in frame_annotations.values(): @@ -932,6 +982,84 @@ def _parse_darwin_video_annotation(annotation: dict) -> Optional[dt.VideoAnnotat return main_annotation +def get_annotation_type_and_data( + frame: Dict, annotation_type: str, annotation_data: Dict +) -> Tuple[Optional[str], Optional[Dict]]: + """ + Returns the type of a given video annotation and its data. + """ + + if "polygon" in frame: + if frame["polygon"]["paths"]: + bounding_box = frame.get("bounding_box") + paths = frame["polygon"]["paths"] + annotation_type = "polygon" + annotation_data = {"paths": paths, "bounding_box": bounding_box} + else: + bounding_box = frame.get("bounding_box") + path = frame["polygon"]["paths"] + annotation_type = "polygon" + annotation_data = {"paths": path, "bounding_box": bounding_box} + elif "bounding_box" in frame: + bounding_box = frame["bounding_box"] + annotation_type = "bounding_box" + annotation_data = { + "x": bounding_box["x"], + "y": bounding_box["y"], + "w": bounding_box["w"], + "h": bounding_box["h"], + } + elif "tag" in frame: + annotation_type = "tag" + annotation_data = {} + elif "line" in frame: + annotation_type = "line" + annotation_data = {"path": frame["line"]["path"]} + elif "keypoint" in frame: + annotation_type = "keypoint" + annotation_data = { + "x": frame["keypoint"]["x"], + "y": frame["keypoint"]["y"], + } + elif "ellipse" in frame: + annotation_type = "ellipse" + annotation_data = frame["ellipse"] + elif "cuboid" in frame: + annotation_type = "cuboid" + annotation_data = frame["cuboid"] + elif "skeleton" in frame: + annotation_type = "skeleton" + annotation_data = {"nodes": frame["skeleton"]["nodes"]} + elif "table" in frame: + annotation_type = "table" + annotation_data = { + "bounding_box": frame["table"]["bounding_box"], + "cells": frame["table"]["cells"], + } + elif "string" in frame: + annotation_type = "string" + annotation_data = {"sources": frame["string"]["sources"]} + elif "graph" in frame: + annotation_type = "graph" + annotation_type = { + "nodes": frame["graph"]["nodes"], + "edges": frame["graph"]["edges"], + } + elif "mask" in frame: + annotation_type = "mask" + annotation_data = {} + elif "raster_layer" in frame: + raster_layer = frame["raster_layer"] + annotation_type = "raster_layer" + annotation_data = { + "dense_rle": raster_layer["dense_rle"], + "mask_annotation_ids_mapping": raster_layer["mask_annotation_ids_mapping"], + "total_pixels": raster_layer["total_pixels"], + } + + return annotation_type, annotation_data + + def _parse_darwin_raster_annotation(annotation: dict) -> Optional[dt.Annotation]: if not annotation.get("raster_layer"): raise ValueError("Raster annotation must have a 'raster_layer' field") @@ -1053,6 +1181,7 @@ def split_video_annotation(annotation: dt.AnnotationFile) -> List[dt.AnnotationF urls = annotation.frame_urls or [None] * (annotation.frame_count or 1) frame_annotations = [] for i, frame_url in enumerate(urls): + print(i) annotations = [ a.frames[i] for a in annotation.annotations @@ -1101,7 +1230,7 @@ def ispolygon(annotation: dt.AnnotationClass) -> bool: ------- ``True`` is the given ``AnnotationClass`` is a polygon, ``False`` otherwise. """ - return annotation.annotation_type in ["polygon", "complex_polygon"] + return annotation.annotation_type == "polygon" def convert_polygons_to_sequences( @@ -1169,127 +1298,6 @@ def convert_polygons_to_sequences( return sequences -@deprecation.deprecated( - deprecated_in="0.7.5", - removed_in="0.8.0", - current_version=__version__, - details="Do not use.", -) -def convert_sequences_to_polygons( - sequences: List[Union[List[int], List[float]]], - height: Optional[int] = None, - width: Optional[int] = None, -) -> Dict[str, List[dt.Polygon]]: - """ - Converts a list of polygons, encoded as a list of dictionaries of into a list of nd.arrays - of coordinates. - - Parameters - ---------- - sequences : List[Union[List[int], List[float]]] - List of arrays of coordinates in the format ``[x1, y1, x2, y2, ..., xn, yn]`` or as a list - of them as ``[[x1, y1, x2, y2, ..., xn, yn], ..., [x1, y1, x2, y2, ..., xn, yn]]``. - height : Optional[int], default: None - Maximum height for a polygon coordinate. - width : Optional[int], default: None - Maximum width for a polygon coordinate. - - Returns - ------- - Dict[str, List[dt.Polygon]] - Dictionary with the key ``path`` containing a list of coordinates in the format of - ``[[{x: x1, y:y1}, ..., {x: xn, y:yn}], ..., [{x: x1, y:y1}, ..., {x: xn, y:yn}]]``. - - Raises - ------ - ValueError - If sequences is a falsy value (such as ``[]``) or if it is in an incorrect format. - """ - if not sequences: - raise ValueError("No sequences provided") - # If there is a single sequences composing the instance then this is - # transformed to polygons = [[x1, y1, ..., xn, yn]] - if not isinstance(sequences[0], list): - sequences = [sequences] - - if not isinstance(sequences[0][0], (int, float)): - raise ValueError("Unknown input format") - - def grouped(iterable, n): - return zip(*[iter(iterable)] * n) - - polygons = [] - for sequence in sequences: - path = [] - for x, y in grouped(sequence, 2): - # Clip coordinates to the image size - x = max(min(x, width - 1) if width else x, 0) - y = max(min(y, height - 1) if height else y, 0) - path.append({"x": x, "y": y}) - polygons.append(path) - return {"path": polygons} - - -@deprecation.deprecated( - deprecated_in="0.7.5", - removed_in="0.8.0", - current_version=__version__, - details="Do not use.", -) -def convert_xyxy_to_bounding_box(box: List[Union[int, float]]) -> dt.BoundingBox: - """ - Converts a list of xy coordinates representing a bounding box into a dictionary. - - Parameters - ---------- - box : List[Union[int, float]] - List of arrays of coordinates in the format [x1, y1, x2, y2] - - Returns - ------- - BoundingBox - Bounding box in the format ``{x: x1, y: y1, h: height, w: width}``. - - Raises - ------ - ValueError - If ``box`` has an incorrect format. - """ - if not isinstance(box[0], float) and not isinstance(box[0], int): - raise ValueError("Unknown input format") - - x1, y1, x2, y2 = box - width = x2 - x1 - height = y2 - y1 - return {"x": x1, "y": y1, "w": width, "h": height} - - -@deprecation.deprecated( - deprecated_in="0.7.5", - removed_in="0.8.0", - current_version=__version__, - details="Do not use.", -) -def convert_bounding_box_to_xyxy(box: dt.BoundingBox) -> List[float]: - """ - Converts dictionary representing a bounding box into a list of xy coordinates. - - Parameters - ---------- - box : BoundingBox - Bounding box in the format ``{x: x1, y: y1, h: height, w: width}``. - - Returns - ------- - List[float] - List of arrays of coordinates in the format ``[x1, y1, x2, y2]``. - """ - - x2 = box["x"] + box["width"] - y2 = box["y"] + box["height"] - return [box["x"], box["y"], x2, y2] - - def convert_polygons_to_mask( polygons: List, height: int, width: int, value: Optional[int] = 1 ) -> np.ndarray: diff --git a/tests/darwin/dataset/dataset_utils_test.py b/tests/darwin/dataset/dataset_utils_test.py index 92f4fc18c..cee354aec 100644 --- a/tests/darwin/dataset/dataset_utils_test.py +++ b/tests/darwin/dataset/dataset_utils_test.py @@ -27,20 +27,58 @@ def open_resource_file(): def parsed_annotation_file(): return { + "version": "2.0", + "schema_ref": "https://darwin-public.s3.eu-west-1.amazonaws.com/darwin_json/2.0/schema.json", + "item": { + "name": "test.jpg", + "path": "/", + "slots": [ + { + "type": "image", + "slot_name": "0", + "width": 1920, + "height": 1080, + "source_files": [ + { + "file_name": "test.jpg", + "url": "https://darwin.v7labs.com/test.jpg", + } + ], + } + ], + }, "annotations": [ - {"name": "class_1", "polygon": {"path": []}}, - {"name": "class_1", "polygon": {"path": []}}, - {"name": "class_2", "polygon": {"path": []}}, - {"name": "class_2", "polygon": {"path": []}}, - {"name": "class_2", "polygon": {"path": []}}, - {"name": "class_3", "polygon": {"path": []}}, + { + "name": "class_1", + "polygon": {"paths": [[{"x": 0, "y": 0}]]}, + "slot_names": ["0"], + }, + { + "name": "class_1", + "polygon": {"paths": [[{"x": 0, "y": 0}]]}, + "slot_names": ["0"], + }, + { + "name": "class_2", + "polygon": {"paths": [[{"x": 0, "y": 0}]]}, + "slot_names": ["0"], + }, + { + "name": "class_2", + "polygon": {"paths": [[{"x": 0, "y": 0}]]}, + "slot_names": ["0"], + }, + { + "name": "class_2", + "polygon": {"paths": [[{"x": 0, "y": 0}]]}, + "slot_names": ["0"], + }, + { + "name": "class_3", + "polygon": {"paths": [[{"x": 0, "y": 0}]]}, + "slot_names": ["0"], + }, ], - "image": { - "filename": "test.jpg", - "height": 1080, - "url": "https://darwin.v7labs.com/test.jpg", - "width": 1920, - }, } @@ -73,32 +111,60 @@ def annotations_path(self, tmp_path: Path): def test_builds_correct_mapping_dictionaries(self, annotations_path: Path): payload = { + "version": "2.0", + "schema_ref": "https://darwin-public.s3.eu-west-1.amazonaws.com/darwin_json/2.0/schema.json", + "item": { + "name": "0.jpg", + "path": "/", + "slots": [ + { + "type": "image", + "slot_name": "0", + "source_files": [ + {"file_name": "0.jpg", "url": "https://example.com/0.jpg"} + ], + } + ], + }, "annotations": [ - {"name": "class_1", "polygon": {"path": []}}, + {"name": "class_1", "polygon": {"paths": [[]]}}, { "name": "class_2", "bounding_box": {"x": 0, "y": 0, "w": 100, "h": 100}, }, - {"name": "class_3", "polygon": {"path": []}}, + {"name": "class_3", "polygon": {"paths": [[]]}}, {"name": "class_4", "tag": {}}, - {"name": "class_1", "polygon": {"path": []}}, + {"name": "class_1", "polygon": {"paths": [[]]}}, ], - "image": {"filename": "0.jpg"}, } _create_annotation_file(annotations_path, "0.json", payload) payload = { + "version": "2.0", + "schema_ref": "https://darwin-public.s3.eu-west-1.amazonaws.com/darwin_json/2.0/schema.json", + "item": { + "name": "1.jpg", + "path": "/", + "slots": [ + { + "type": "image", + "slot_name": "0", + "source_files": [ + {"file_name": "1.jpg", "url": "https://example.com/1.jpg"} + ], + } + ], + }, "annotations": [ - {"name": "class_5", "polygon": {"path": []}}, + {"name": "class_5", "polygon": {"paths": [[]]}}, { "name": "class_6", "bounding_box": {"x": 0, "y": 0, "w": 100, "h": 100}, }, - {"name": "class_1", "polygon": {"path": []}}, + {"name": "class_1", "polygon": {"paths": [[]]}}, {"name": "class_4", "tag": {}}, - {"name": "class_1", "polygon": {"path": []}}, + {"name": "class_1", "polygon": {"paths": [[]]}}, ], - "image": {"filename": "1.jpg"}, } _create_annotation_file(annotations_path, "1.json", payload) class_dict, index_dict = extract_classes(annotations_path, "polygon") @@ -123,34 +189,94 @@ def test_extract_multiple_annotation_types(self, annotations_path: Path): annotations_path, "0.json", { + "version": "2.0", + "schema_ref": "https://darwin-public.s3.eu-west-1.amazonaws.com/darwin_json/2.0/schema.json", + "item": { + "name": "0.jpg", + "path": "/", + "slots": [ + { + "type": "image", + "slot_name": "0", + "source_files": [ + { + "file_name": "0.jpg", + "url": "https://example.com/0.jpg", + } + ], + } + ], + }, "annotations": [ - {"name": "class_1", "polygon": {"path": []}}, + { + "name": "class_1", + "polygon": {"paths": [[]]}, + "slot_names": ["0"], + }, { "name": "class_2", "bounding_box": {"x": 0, "y": 0, "w": 100, "h": 100}, + "slot_names": ["0"], + }, + { + "name": "class_3", + "polygon": {"paths": [[]]}, + "slot_names": ["0"], + }, + {"name": "class_4", "slot_names": ["0"]}, + { + "name": "class_1", + "polygon": {"paths": [[]]}, + "slot_names": ["0"], }, - {"name": "class_3", "polygon": {"path": []}}, - {"name": "class_4", "tag": {}}, - {"name": "class_1", "polygon": {"path": []}}, ], - "image": {"filename": "0.jpg"}, }, ) _create_annotation_file( annotations_path, "1.json", { + "version": "2.0", + "schema_ref": "https://darwin-public.s3.eu-west-1.amazonaws.com/darwin_json/2.0/schema.json", + "item": { + "name": "1.jpg", + "path": "/", + "slots": [ + { + "type": "image", + "slot_name": "0", + "source_files": [ + { + "file_name": "1.jpg", + "url": "https://example.com/1.jpg", + } + ], + } + ], + }, "annotations": [ - {"name": "class_5", "polygon": {"path": []}}, + { + "name": "class_5", + "polygon": {"paths": [[]]}, + "slot_names": ["0"], + }, { "name": "class_6", "bounding_box": {"x": 0, "y": 0, "w": 100, "h": 100}, + "slot_names": ["0"], + }, + { + "name": "class_1", + "polygon": {"paths": [[]]}, + "slot_names": ["0"], + }, + {"name": "class_4", "slot_names": ["0"]}, + { + "name": "class_1", + "polygon": {"paths": [[]]}, + "slot_names": ["0"], }, - {"name": "class_1", "polygon": {"path": []}}, - {"name": "class_4", "tag": {}}, - {"name": "class_1", "polygon": {"path": []}}, ], - "image": {"filename": "1.jpg"}, }, ) diff --git a/tests/darwin/dataset/remote_dataset_test.py b/tests/darwin/dataset/remote_dataset_test.py index 3df57d8a5..98adcb6a5 100644 --- a/tests/darwin/dataset/remote_dataset_test.py +++ b/tests/darwin/dataset/remote_dataset_test.py @@ -791,7 +791,7 @@ def fake_download_zip(self, path): with patch.object( RemoteDataset, "get_release", return_value=stub_release_response - ) as get_release_stub: + ): with patch.object(Release, "download_zip", new=fake_download_zip): remote_dataset.pull(only_annotations=True) metadata_path = ( diff --git a/tests/darwin/datatypes_test.py b/tests/darwin/datatypes_test.py index 13852c493..e2fb6d662 100644 --- a/tests/darwin/datatypes_test.py +++ b/tests/darwin/datatypes_test.py @@ -12,7 +12,6 @@ from darwin.datatypes import ( ObjectStore, Point, - make_complex_polygon, make_polygon, parse_property_classes, split_paths_by_metadata, @@ -22,24 +21,24 @@ class TestMakePolygon: def test_it_returns_annotation_with_default_params(self): class_name: str = "class_name" - points: List[Point] = [{"x": 1, "y": 2}, {"x": 3, "y": 4}, {"x": 1, "y": 2}] + points: List[Point] = [[{"x": 1, "y": 2}, {"x": 3, "y": 4}, {"x": 1, "y": 2}]] annotation = make_polygon(class_name, points) - assert_annotation_class(annotation, class_name, "polygon") + assert_annotation_class(annotation, class_name, "polygon", "polygon") - path = annotation.data.get("path") - assert path == points + paths = annotation.data.get("paths") + assert paths == points def test_it_returns_annotation_with_bounding_box(self): class_name: str = "class_name" - points: List[Point] = [{"x": 1, "y": 2}, {"x": 3, "y": 4}, {"x": 1, "y": 2}] + points: List[Point] = [[{"x": 1, "y": 2}, {"x": 3, "y": 4}, {"x": 1, "y": 2}]] bbox: Dict[str, float] = {"x": 1, "y": 2, "w": 2, "h": 2} annotation = make_polygon(class_name, points, bbox) - assert_annotation_class(annotation, class_name, "polygon") + assert_annotation_class(annotation, class_name, "polygon", "polygon") - path = annotation.data.get("path") - assert path == points + paths = annotation.data.get("paths") + assert paths == points class_bbox = annotation.data.get("bounding_box") assert class_bbox == bbox @@ -52,9 +51,9 @@ def test_it_returns_annotation_with_default_params(self): [{"x": 1, "y": 2}, {"x": 3, "y": 4}, {"x": 1, "y": 2}], [{"x": 4, "y": 5}, {"x": 6, "y": 7}, {"x": 4, "y": 5}], ] - annotation = make_complex_polygon(class_name, points) + annotation = make_polygon(class_name, points) - assert_annotation_class(annotation, class_name, "complex_polygon", "polygon") + assert_annotation_class(annotation, class_name, "polygon", "polygon") paths = annotation.data.get("paths") assert paths == points @@ -66,9 +65,9 @@ def test_it_returns_annotation_with_bounding_box(self): [{"x": 4, "y": 5}, {"x": 6, "y": 7}, {"x": 4, "y": 5}], ] bbox: Dict[str, float] = {"x": 1, "y": 2, "w": 2, "h": 2} - annotation = make_complex_polygon(class_name, points, bbox) + annotation = make_polygon(class_name, points, bbox) - assert_annotation_class(annotation, class_name, "complex_polygon", "polygon") + assert_annotation_class(annotation, class_name, "polygon", "polygon") paths = annotation.data.get("paths") assert paths == points diff --git a/tests/darwin/exporter/formats/export_coco_test.py b/tests/darwin/exporter/formats/export_coco_test.py index ecdac9aed..b3471ddf3 100644 --- a/tests/darwin/exporter/formats/export_coco_test.py +++ b/tests/darwin/exporter/formats/export_coco_test.py @@ -19,7 +19,7 @@ def annotation_file(self) -> dt.AnnotationFile: def test_polygon_include_extras(self, annotation_file: dt.AnnotationFile): polygon = dt.Annotation( dt.AnnotationClass("polygon_class", "polygon"), - {"path": [{"x": 1, "y": 1}, {"x": 2, "y": 2}, {"x": 1, "y": 2}]}, + {"paths": [{"x": 1, "y": 1}, {"x": 2, "y": 2}, {"x": 1, "y": 2}]}, [dt.make_instance_id(1)], ) diff --git a/tests/darwin/exporter/formats/export_mask_test.py b/tests/darwin/exporter/formats/export_mask_test.py index b0e01e3be..24bb61593 100644 --- a/tests/darwin/exporter/formats/export_mask_test.py +++ b/tests/darwin/exporter/formats/export_mask_test.py @@ -173,9 +173,7 @@ def annotations() -> List[dt.Annotation]: ), dt.Annotation(dt.AnnotationClass("class_2", "mask"), data={"sparse_rle": []}), dt.Annotation(dt.AnnotationClass("class_3", "polygon"), data={"path": "data"}), - dt.Annotation( - dt.AnnotationClass("class_4", "complex_polygon"), data={"paths": "data"} - ), + dt.Annotation(dt.AnnotationClass("class_4", "polygon"), data={"paths": "data"}), ] @@ -232,12 +230,14 @@ def test_beyond_polygon_beyond_window() -> None: dt.Annotation( dt.AnnotationClass("cat1", "polygon"), { - "path": [ - {"x": -1, "y": -1}, - {"x": -1, "y": 1}, - {"x": 1, "y": 1}, - {"x": 1, "y": -1}, - {"x": -1, "y": -1}, + "paths": [ + [ + {"x": -1, "y": -1}, + {"x": -1, "y": 1}, + {"x": 1, "y": 1}, + {"x": 1, "y": -1}, + {"x": -1, "y": -1}, + ] ], "bounding_box": {"x": -1, "y": -1, "w": 2, "h": 2}, }, @@ -268,13 +268,13 @@ def test_beyond_polygon_beyond_window() -> None: assert not errors -def test_beyond_complex_polygon() -> None: +def test_beyond_multi_path_polygons() -> None: mask = np.zeros((5, 5), dtype=np.uint8) colours: dt.MaskTypes.ColoursDict = {} categories: dt.MaskTypes.CategoryList = ["__background__"] annotations: List[dt.AnnotationLike] = [ dt.Annotation( - dt.AnnotationClass("cat3", "complex_polygon"), + dt.AnnotationClass("cat3", "polygon"), { "paths": [ [ @@ -333,7 +333,7 @@ def test_render_polygons() -> None: dt.Annotation( dt.AnnotationClass("cat1", "polygon"), { - "path": [ + "paths": [ {"x": 10, "y": 10}, {"x": 20, "y": 10}, {"x": 20, "y": 20}, @@ -345,7 +345,7 @@ def test_render_polygons() -> None: dt.Annotation( dt.AnnotationClass("cat2", "polygon"), { - "path": [ + "paths": [ {"x": 30, "y": 30}, {"x": 40, "y": 30}, {"x": 40, "y": 40}, @@ -357,7 +357,7 @@ def test_render_polygons() -> None: dt.Annotation( dt.AnnotationClass("cat1", "polygon"), { - "path": [ + "paths": [ {"x": 50, "y": 50}, {"x": 60, "y": 50}, {"x": 60, "y": 60}, @@ -369,12 +369,12 @@ def test_render_polygons() -> None: dt.Annotation( dt.AnnotationClass("cat1", "polygon"), { - "path": [{"x": 10, "y": 80}, {"x": 20, "y": 80}, {"x": 20, "y": 60}], + "paths": [{"x": 10, "y": 80}, {"x": 20, "y": 80}, {"x": 20, "y": 60}], "bounding_box": base_bb, }, ), dt.Annotation( - dt.AnnotationClass("cat3", "complex_polygon"), + dt.AnnotationClass("cat3", "polygon"), { "paths": [ [ @@ -736,7 +736,7 @@ def test_class_mappings_preserved_on_large_export(tmpdir) -> None: dt.Annotation( dt.AnnotationClass("cat1", "polygon"), { - "path": [ + "paths": [ {"x": 0, "y": 0}, {"x": 1, "y": 0}, {"x": 1, "y": 1}, @@ -748,7 +748,7 @@ def test_class_mappings_preserved_on_large_export(tmpdir) -> None: dt.Annotation( dt.AnnotationClass("cat2", "polygon"), { - "path": [ + "paths": [ {"x": 2, "y": 2}, {"x": 4, "y": 2}, {"x": 4, "y": 4}, @@ -760,7 +760,7 @@ def test_class_mappings_preserved_on_large_export(tmpdir) -> None: dt.Annotation( dt.AnnotationClass("cat3", "polygon"), { - "path": [ + "paths": [ {"x": 5, "y": 5}, {"x": 8, "y": 5}, {"x": 8, "y": 8}, @@ -772,7 +772,7 @@ def test_class_mappings_preserved_on_large_export(tmpdir) -> None: dt.Annotation( dt.AnnotationClass("cat1", "polygon"), { - "path": [ + "paths": [ {"x": 4, "y": 0}, {"x": 5, "y": 0}, {"x": 5, "y": 1}, @@ -782,7 +782,7 @@ def test_class_mappings_preserved_on_large_export(tmpdir) -> None: }, ), dt.Annotation( - dt.AnnotationClass("cat4", "complex_polygon"), + dt.AnnotationClass("cat4", "polygon"), { "paths": [ [ diff --git a/tests/darwin/exporter/formats/export_pascalvoc_test.py b/tests/darwin/exporter/formats/export_pascalvoc_test.py index d31251267..1bff42daf 100644 --- a/tests/darwin/exporter/formats/export_pascalvoc_test.py +++ b/tests/darwin/exporter/formats/export_pascalvoc_test.py @@ -80,10 +80,10 @@ def test_xml_has_bounding_boxes_of_polygons(self): assert_xml_element_text(bndbox, "xmax", "1803") assert_xml_element_text(bndbox, "ymax", "983") - def test_xml_has_bounding_boxes_of_complex_polygons(self): + def test_xml_has_bounding_boxes_of_multi_path_polygons(self): annotation_class = AnnotationClass( name="rubber", - annotation_type="complex_polygon", + annotation_type="polygon", annotation_internal_type="polygon", ) annotation = Annotation( diff --git a/tests/darwin/importer/formats/import_darwin_test.py b/tests/darwin/importer/formats/import_darwin_test.py index 6dd6c2335..d6db42aec 100644 --- a/tests/darwin/importer/formats/import_darwin_test.py +++ b/tests/darwin/importer/formats/import_darwin_test.py @@ -22,53 +22,148 @@ def test_it_parses_slot_names_properly_if_present_for_sequences( self, file_path: Path ): json: str = """ - { - "dataset": "test", - "image": { - "width": 2479, - "height": 3508, - "fps": 30.0, - "original_filename": "Invoice.pdf", - "filename": "Invoice.pdf", - "url": "https://staging.v7labs.com/api/v2/teams/rafals-team/files/1a46356d-005b-4095-98fc-fc4ea6d7294a/original", - "path": "/", - "workview_url": "https://staging.v7labs.com/teams/rafals-team/items/0182e9d2-d217-3260-52db-d7828422f86b/workview", - "frame_count": 2, - "frame_urls": [ - "https://staging.v7labs.com/api/v2/teams/rafals-team/files/1a46356d-005b-4095-98fc-fc4ea6d7294a/sections/0", - "https://staging.v7labs.com/api/v2/teams/rafals-team/files/1a46356d-005b-4095-98fc-fc4ea6d7294a/sections/1" - ] + { + "version": "2.0", + "schema_ref": "https://darwin-public.s3.eu-west-1.amazonaws.com/darwin_json/2.0/schema.json", + "item": { + "name": "Invoice.pdf", + "path": "/", + "source_info": { + "item_id": "018e3385-822c-fbab-e766-acd624a8a273", + "dataset": { + "name": "folder_test", + "slug": "folder_test", + "dataset_management_url": "https://darwin.v7labs.com/datasets/722603/dataset-management" + }, + "team": { + "name": "V7 John", + "slug": "v7-john" + }, + "workview_url": "https://darwin.v7labs.com/workview?dataset=722603&item=018e3385-822c-fbab-e766-acd624a8a273" }, - "annotations": [ + "slots": [ { - "frames": { - "0": { - "bounding_box": { - "h": 338.29, - "w": 444.87, - "x": 845.6, - "y": 1056.57 - }, - "keyframe": true, - "text": { - "text": "some weird text" - } - } - }, - "id": "d89a5895-c721-420b-9c7d-d71880e3679b", - "interpolate_algorithm": "linear-1.1", - "interpolated": true, - "name": "address", - "segments": [ - [0, 2] - ], - "slot_names": [ - "my_slot" - ] + "type": "video", + "slot_name": "0", + "width": 1920, + "height": 1080, + "fps": 1, + "thumbnail_url": "https://darwin.v7labs.com/api/v2/teams/v7-john/files/926ee041-03c0-4354-aea2-8b9db422341d/thumbnail", + "source_files": [ + { + "file_name": "mini_uct.mp4", + "url": "https://darwin.v7labs.com/api/v2/teams/v7-john/uploads/db035ac4-4327-4b11-85b7-432c0e09c896" + } + ], + "frame_count": 8, + "frame_urls": [ + "https://darwin.v7labs.com/api/v2/teams/v7-john/files/926ee041-03c0-4354-aea2-8b9db422341d/sections/0", + "https://darwin.v7labs.com/api/v2/teams/v7-john/files/926ee041-03c0-4354-aea2-8b9db422341d/sections/1", + "https://darwin.v7labs.com/api/v2/teams/v7-john/files/926ee041-03c0-4354-aea2-8b9db422341d/sections/2", + "https://darwin.v7labs.com/api/v2/teams/v7-john/files/926ee041-03c0-4354-aea2-8b9db422341d/sections/3", + "https://darwin.v7labs.com/api/v2/teams/v7-john/files/926ee041-03c0-4354-aea2-8b9db422341d/sections/4", + "https://darwin.v7labs.com/api/v2/teams/v7-john/files/926ee041-03c0-4354-aea2-8b9db422341d/sections/5", + "https://darwin.v7labs.com/api/v2/teams/v7-john/files/926ee041-03c0-4354-aea2-8b9db422341d/sections/6", + "https://darwin.v7labs.com/api/v2/teams/v7-john/files/926ee041-03c0-4354-aea2-8b9db422341d/sections/7" + ] } ] + }, + "annotations": [ + { + "frames": { + "0": { + "bounding_box": { + "h": 152.502, + "w": 309.579, + "x": 466.6561, + "y": 338.5544 + }, + "keyframe": true + }, + "1": { + "bounding_box": { + "h": 152.502, + "w": 309.579, + "x": 466.6561, + "y": 338.5544 + }, + "keyframe": false + }, + "2": { + "bounding_box": { + "h": 152.502, + "w": 309.579, + "x": 466.6561, + "y": 338.5544 + }, + "keyframe": false + }, + "3": { + "bounding_box": { + "h": 152.502, + "w": 309.579, + "x": 466.6561, + "y": 338.5544 + }, + "keyframe": false + }, + "4": { + "bounding_box": { + "h": 152.502, + "w": 309.579, + "x": 466.6561, + "y": 338.5544 + }, + "keyframe": false + }, + "5": { + "bounding_box": { + "h": 152.502, + "w": 309.579, + "x": 466.6561, + "y": 338.5544 + }, + "keyframe": false + }, + "6": { + "bounding_box": { + "h": 152.502, + "w": 309.579, + "x": 466.6561, + "y": 338.5544 + }, + "keyframe": false + }, + "7": { + "bounding_box": { + "h": 152.502, + "w": 309.579, + "x": 466.6561, + "y": 338.5544 + }, + "keyframe": true + } + }, + "hidden_areas": [], + "id": "06865ac8-d2f8-4b8f-a653-9cd08df5b3f5", + "interpolate_algorithm": "linear-1.1", + "interpolated": true, + "name": "curia", + "properties": [], + "ranges": [ + [ + 0, + 8 + ] + ], + "slot_names": [ + "0" + ] } - """ + ] + } + """ file_path.write_text(json) @@ -83,37 +178,151 @@ def test_it_parses_slot_names_properly_if_present_for_sequences( assert annotation_file.annotations for annotation in annotation_file.annotations: - assert annotation.slot_names == ["my_slot"] + assert annotation.slot_names == ["0"] def test_it_parses_slot_names_properly_if_present_for_images(self, file_path: Path): json: str = """ { - "dataset": "test", - "image": { - "width": 500, - "height": 375, - "original_filename": "my_image.jpg", - "filename": "my_image.jpg", - "url": "https://staging.v7labs.com/api/v2/teams/rafals-team/files/d119a57f-bbbb-4b9b-a7a2-6dcb16a59e98/original", - "thumbnail_url": "https://staging.v7labs.com/api/v2/teams/rafals-team/files/d119a57f-bbbb-4b9b-a7a2-6dcb16a59e98/thumbnail", - "path": "/", - "workview_url": "https://staging.v7labs.com/teams/rafals-team/items/0182e9d2-d217-681d-2448-197904d2e05c/workview" + "version": "2.0", + "schema_ref": "https://darwin-public.s3.eu-west-1.amazonaws.com/darwin_json/2.0/schema.json", + "item": { + "name": "ferrari-laferrari.jpg", + "path": "/", + "source_info": { + "item_id": "018c4450-d91d-ff3e-b226-60d48b66f86e", + "dataset": { + "name": "bbox", + "slug": "bbox", + "dataset_management_url": "https://darwin.v7labs.com/datasets/623079/dataset-management" + }, + "team": { + "name": "V7 John", + "slug": "v7-john" + }, + "workview_url": "https://darwin.v7labs.com/workview?dataset=623079&item=018c4450-d91d-ff3e-b226-60d48b66f86e" }, - "annotations": [ + "slots": [ { - "bounding_box": { - "h": 151.76, - "w": 140.89, - "x": 252.09, - "y": 173.49 - }, - "id": "ab8035d0-61b8-4294-b348-085461555df8", - "name": "dog", - "slot_names": [ - "my_slot" - ] + "type": "image", + "slot_name": "0", + "width": 640, + "height": 425, + "thumbnail_url": "https://darwin.v7labs.com/api/v2/teams/v7-john/files/ddc5cbc2-8438-4e36-8ab6-43e2f3746bf1/thumbnail", + "source_files": [ + { + "file_name": "000000007751.jpg", + "url": "https://darwin.v7labs.com/api/v2/teams/v7-john/uploads/3395d29a-7539-4a51-a3ca-c7a95f460345" + } + ] } ] + }, + "annotations": [ + { + "bounding_box": { + "h": 53.963699999999996, + "w": 83.7195, + "x": 32.7817, + "y": 53.9638 + }, + "id": "8940a690-d8a9-4c83-9f59-38f0ef780246", + "name": "new-class-2", + "polygon": { + "paths": [ + [ + { + "x": 65.0591, + "y": 53.9638 + }, + { + "x": 32.7817, + "y": 107.9275 + }, + { + "x": 116.5012, + "y": 104.9015 + } + ] + ] + }, + "properties": [], + "slot_names": [ + "0" + ] + }, + { + "id": "782618fb-4c69-436e-80cb-71765d255dbf", + "name": "skeleton-test", + "properties": [], + "skeleton": { + "nodes": [ + { + "name": "node", + "occluded": false, + "x": 264.7754, + "y": 121.5445 + }, + { + "name": "2", + "occluded": false, + "x": 245.1335, + "y": 107.3425 + }, + { + "name": "3", + "occluded": false, + "x": 240.4646, + "y": 125.4178 + }, + { + "name": "4", + "occluded": false, + "x": 280.3923, + "y": 137.468 + } + ] + }, + "slot_names": [ + "0" + ] + }, + { + "id": "b6bea00c-c8a4-4d34-b72f-88567d9e8cd5", + "name": "skeleton-test", + "properties": [], + "skeleton": { + "nodes": [ + { + "name": "node", + "occluded": false, + "x": 136.1702, + "y": 306.1308 + }, + { + "name": "2", + "occluded": false, + "x": 145.1629, + "y": 291.263 + }, + { + "name": "3", + "occluded": false, + "x": 147.3005, + "y": 310.1857 + }, + { + "name": "4", + "occluded": false, + "x": 129.0203, + "y": 322.8007 + } + ] + }, + "slot_names": [ + "0" + ] + } + ] } """ @@ -123,64 +332,159 @@ def test_it_parses_slot_names_properly_if_present_for_images(self, file_path: Pa assert annotation_file is not None assert annotation_file.path == file_path - assert annotation_file.filename == "my_image.jpg" + assert annotation_file.filename == "ferrari-laferrari.jpg" assert annotation_file.annotation_classes assert annotation_file.remote_path == "/" assert annotation_file.annotations for annotation in annotation_file.annotations: - assert annotation.slot_names == ["my_slot"] + assert annotation.slot_names == ["0"] def test_it_skips_slot_names_when_no_slot_names_for_sequences( self, file_path: Path ): json: str = """ - { - "dataset": "test", - "image": { - "width": 2479, - "height": 3508, - "fps": 30.0, - "original_filename": "Invoice.pdf", - "filename": "Invoice.pdf", - "url": "https://staging.v7labs.com/api/v2/teams/rafals-team/files/1a46356d-005b-4095-98fc-fc4ea6d7294a/original", - "path": "/", - "workview_url": "https://staging.v7labs.com/teams/rafals-team/items/0182e9d2-d217-3260-52db-d7828422f86b/workview", - "frame_count": 2, - "frame_urls": [ - "https://staging.v7labs.com/api/v2/teams/rafals-team/files/1a46356d-005b-4095-98fc-fc4ea6d7294a/sections/0", - "https://staging.v7labs.com/api/v2/teams/rafals-team/files/1a46356d-005b-4095-98fc-fc4ea6d7294a/sections/1" - ] + { + "version": "2.0", + "schema_ref": "https://darwin-public.s3.eu-west-1.amazonaws.com/darwin_json/2.0/schema.json", + "item": { + "name": "Invoice.pdf", + "path": "/", + "source_info": { + "item_id": "018e3385-822c-fbab-e766-acd624a8a273", + "dataset": { + "name": "folder_test", + "slug": "folder_test", + "dataset_management_url": "https://darwin.v7labs.com/datasets/722603/dataset-management" + }, + "team": { + "name": "V7 John", + "slug": "v7-john" + }, + "workview_url": "https://darwin.v7labs.com/workview?dataset=722603&item=018e3385-822c-fbab-e766-acd624a8a273" }, - "annotations": [ + "slots": [ { - "frames": { - "0": { - "bounding_box": { - "h": 338.29, - "w": 444.87, - "x": 845.6, - "y": 1056.57 - }, - "keyframe": true, - "text": { - "text": "some weird text" - } - } - }, - "id": "d89a5895-c721-420b-9c7d-d71880e3679b", - "interpolate_algorithm": "linear-1.1", - "interpolated": true, - "name": "address", - "segments": [ - [0, 2] - ] + "type": "video", + "slot_name": "", + "width": 1920, + "height": 1080, + "fps": 1, + "thumbnail_url": "https://darwin.v7labs.com/api/v2/teams/v7-john/files/926ee041-03c0-4354-aea2-8b9db422341d/thumbnail", + "source_files": [ + { + "file_name": "mini_uct.mp4", + "url": "https://darwin.v7labs.com/api/v2/teams/v7-john/uploads/db035ac4-4327-4b11-85b7-432c0e09c896" + } + ], + "frame_count": 8, + "frame_urls": [ + "https://darwin.v7labs.com/api/v2/teams/v7-john/files/926ee041-03c0-4354-aea2-8b9db422341d/sections/0", + "https://darwin.v7labs.com/api/v2/teams/v7-john/files/926ee041-03c0-4354-aea2-8b9db422341d/sections/1", + "https://darwin.v7labs.com/api/v2/teams/v7-john/files/926ee041-03c0-4354-aea2-8b9db422341d/sections/2", + "https://darwin.v7labs.com/api/v2/teams/v7-john/files/926ee041-03c0-4354-aea2-8b9db422341d/sections/3", + "https://darwin.v7labs.com/api/v2/teams/v7-john/files/926ee041-03c0-4354-aea2-8b9db422341d/sections/4", + "https://darwin.v7labs.com/api/v2/teams/v7-john/files/926ee041-03c0-4354-aea2-8b9db422341d/sections/5", + "https://darwin.v7labs.com/api/v2/teams/v7-john/files/926ee041-03c0-4354-aea2-8b9db422341d/sections/6", + "https://darwin.v7labs.com/api/v2/teams/v7-john/files/926ee041-03c0-4354-aea2-8b9db422341d/sections/7" + ] } ] + }, + "annotations": [ + { + "frames": { + "0": { + "bounding_box": { + "h": 152.502, + "w": 309.579, + "x": 466.6561, + "y": 338.5544 + }, + "keyframe": true + }, + "1": { + "bounding_box": { + "h": 152.502, + "w": 309.579, + "x": 466.6561, + "y": 338.5544 + }, + "keyframe": false + }, + "2": { + "bounding_box": { + "h": 152.502, + "w": 309.579, + "x": 466.6561, + "y": 338.5544 + }, + "keyframe": false + }, + "3": { + "bounding_box": { + "h": 152.502, + "w": 309.579, + "x": 466.6561, + "y": 338.5544 + }, + "keyframe": false + }, + "4": { + "bounding_box": { + "h": 152.502, + "w": 309.579, + "x": 466.6561, + "y": 338.5544 + }, + "keyframe": false + }, + "5": { + "bounding_box": { + "h": 152.502, + "w": 309.579, + "x": 466.6561, + "y": 338.5544 + }, + "keyframe": false + }, + "6": { + "bounding_box": { + "h": 152.502, + "w": 309.579, + "x": 466.6561, + "y": 338.5544 + }, + "keyframe": false + }, + "7": { + "bounding_box": { + "h": 152.502, + "w": 309.579, + "x": 466.6561, + "y": 338.5544 + }, + "keyframe": true + } + }, + "hidden_areas": [], + "id": "06865ac8-d2f8-4b8f-a653-9cd08df5b3f5", + "interpolate_algorithm": "linear-1.1", + "interpolated": true, + "name": "curia", + "properties": [], + "ranges": [ + [ + 0, + 8 + ] + ], + "slot_names": [] } - """ - + ] + } + """ file_path.write_text(json) annotation_file: Optional[AnnotationFile] = parse_path(file_path) @@ -199,29 +503,54 @@ def test_it_skips_slot_names_when_no_slot_names_for_sequences( def test_it_skips_slot_names_when_no_slot_names_for_images(self, file_path: Path): json: str = """ { - "dataset": "test", - "image": { - "width": 500, - "height": 375, - "original_filename": "my_image.jpg", - "filename": "my_image.jpg", - "url": "https://staging.v7labs.com/api/v2/teams/rafals-team/files/d119a57f-bbbb-4b9b-a7a2-6dcb16a59e98/original", - "thumbnail_url": "https://staging.v7labs.com/api/v2/teams/rafals-team/files/d119a57f-bbbb-4b9b-a7a2-6dcb16a59e98/thumbnail", - "path": "/", + "version": "2.0", + "schema_ref": "https://darwin-public.s3.eu-west-1.amazonaws.com/darwin_json/2.0/schema.json", + "item": { + "name": "my_image.jpg", + "path": "/", + "source_info": { + "item_id": "0182e9d2-d217-681d-2448-197904d2e05c", + "dataset": { + "name": "test", + "slug": "test", + "dataset_management_url": "https://staging.v7labs.com/teams/rafals-team/items/0182e9d2-d217-681d-2448-197904d2e05c/workview" + }, + "team": { + "name": "rafals-team", + "slug": "rafals-team" + }, "workview_url": "https://staging.v7labs.com/teams/rafals-team/items/0182e9d2-d217-681d-2448-197904d2e05c/workview" }, - "annotations": [ + "slots": [ { - "bounding_box": { - "h": 151.76, - "w": 140.89, - "x": 252.09, - "y": 173.49 - }, - "id": "ab8035d0-61b8-4294-b348-085461555df8", - "name": "dog" + "type": "image", + "slot_name": "", + "width": 500, + "height": 375, + "thumbnail_url": "https://staging.v7labs.com/api/v2/teams/rafals-team/files/d119a57f-bbbb-4b9b-a7a2-6dcb16a59e98/thumbnail", + "source_files": [ + { + "file_name": "my_image.jpg", + "url": "https://staging.v7labs.com/api/v2/teams/rafals-team/files/d119a57f-bbbb-4b9b-a7a2-6dcb16a59e98/original" + } + ] } ] + }, + "annotations": [ + { + "bounding_box": { + "h": 151.76, + "w": 140.89, + "x": 252.09, + "y": 173.49 + }, + "id": "ab8035d0-61b8-4294-b348-085461555df8", + "name": "dog", + "properties": [], + "slot_names": [] + } + ] } """ diff --git a/tests/darwin/importer/formats/import_labelbox_test.py b/tests/darwin/importer/formats/import_labelbox_test.py index f01f7b320..755c0e93a 100644 --- a/tests/darwin/importer/formats/import_labelbox_test.py +++ b/tests/darwin/importer/formats/import_labelbox_test.py @@ -384,14 +384,16 @@ def test_it_imports_polygon_images(self, file_path: Path): assert_polygon( polygon_annotation, [ - {"x": 3665.814, "y": 351.628}, - {"x": 3762.93, "y": 810.419}, - {"x": 3042.93, "y": 914.233}, + [ + {"x": 3665.814, "y": 351.628}, + {"x": 3762.93, "y": 810.419}, + {"x": 3042.93, "y": 914.233}, + ], ], ) annotation_class = polygon_annotation.annotation_class - assert_annotation_class(annotation_class, "Fish", "polygon") + assert_annotation_class(annotation_class, "Fish", "polygon", "polygon") def test_it_imports_point_images(self, file_path: Path): json: str = """ @@ -728,8 +730,8 @@ def assert_bbox(annotation: Annotation, x: float, y: float, h: float, w: float) assert data.get("h") == h -def assert_polygon(annotation: Annotation, points: List[Point]) -> None: - actual_points = annotation.data.get("path") +def assert_polygon(annotation: Annotation, points: List[List[Point]]) -> None: + actual_points = annotation.data.get("paths") assert actual_points assert actual_points == points diff --git a/tests/darwin/importer/formats/import_superannotate_test.py b/tests/darwin/importer/formats/import_superannotate_test.py index 2b28ada40..5af2c5edd 100644 --- a/tests/darwin/importer/formats/import_superannotate_test.py +++ b/tests/darwin/importer/formats/import_superannotate_test.py @@ -471,14 +471,18 @@ def test_imports_polygon_vectors( assert_polygon( polygon_annotation, [ - {"x": 1053, "y": 587.2}, - {"x": 1053.1, "y": 586}, - {"x": 1053.8, "y": 585.4}, + [ + {"x": 1053, "y": 587.2}, + {"x": 1053.1, "y": 586}, + {"x": 1053.8, "y": 585.4}, + ], ], ) annotation_class = polygon_annotation.annotation_class - assert_annotation_class(annotation_class, "Person-polygon", "polygon") + assert_annotation_class( + annotation_class, "Person-polygon", "polygon", "polygon" + ) def test_raises_if_polyline_has_missing_points( self, annotations_file_path: Path, classes_file_path: Path @@ -890,8 +894,8 @@ def assert_bbox(annotation: Annotation, x: float, y: float, h: float, w: float) assert data.get("h") == h -def assert_polygon(annotation: Annotation, points: List[Point]) -> None: - actual_points = annotation.data.get("path") +def assert_polygon(annotation: Annotation, points: List[List[Point]]) -> None: + actual_points = annotation.data.get("paths") assert actual_points assert actual_points == points diff --git a/tests/darwin/importer/importer_mcpu_test.py b/tests/darwin/importer/importer_mcpu_test.py index ce7528325..9168fc5ea 100644 --- a/tests/darwin/importer/importer_mcpu_test.py +++ b/tests/darwin/importer/importer_mcpu_test.py @@ -99,7 +99,7 @@ def tearDown(self) -> None: def test_uses_mpire_if_use_multi_cpu_true( self, mock_wp: MagicMock, mock_gffp: MagicMock, mock_gmcus: MagicMock ) -> None: - from darwin.importer.importer import find_and_parse + from darwin.importer.importer import _find_and_parse mock_gmcus.return_value = (2, True) mock_gffp.return_value = [ @@ -123,7 +123,7 @@ def __exit__(self, *args) -> None: # type: ignore mock_wp.return_value = MockWorkerPool() mock_map.return_value = ["1", "2"] - result = find_and_parse( + result = _find_and_parse( mock_importer, [Path("example_dir")], self.mock_console, True, 2 ) @@ -138,7 +138,7 @@ def __exit__(self, *args) -> None: # type: ignore def test_runs_single_threaded_if_use_multi_cpu_false( self, mock_wp: MagicMock, mock_gffp: MagicMock ) -> None: - from darwin.importer.importer import find_and_parse + from darwin.importer.importer import _find_and_parse mock_gffp.return_value = [ Path("example_dir/file1.txt"), @@ -148,7 +148,7 @@ def test_runs_single_threaded_if_use_multi_cpu_false( mock_importer = MagicMock() mock_importer.side_effect = ["1", "2"] - result = find_and_parse( + result = _find_and_parse( mock_importer, [Path("example_dir")], self.mock_console, False ) @@ -163,7 +163,7 @@ def test_runs_single_threaded_if_use_multi_cpu_false( def test_returns_list_if_solo_value( self, mock_wp: MagicMock, mock_gffp: MagicMock, mock_gmcus: MagicMock ) -> None: - from darwin.importer.importer import find_and_parse + from darwin.importer.importer import _find_and_parse mock_gmcus.return_value = (2, True) mock_gffp.return_value = [ @@ -187,7 +187,7 @@ def __exit__(self, *args) -> None: # type: ignore mock_wp.return_value = MockWorkerPool() mock_map.return_value = "1" - result = find_and_parse( + result = _find_and_parse( mock_importer, [Path("example_dir")], self.mock_console, True, 2 ) @@ -203,7 +203,7 @@ def __exit__(self, *args) -> None: # type: ignore def test_returns_none_if_pool_raises_error( self, mock_wp: MagicMock, mock_gffp: MagicMock, mock_gmcus: MagicMock ) -> None: - from darwin.importer.importer import find_and_parse + from darwin.importer.importer import _find_and_parse mock_gmcus.return_value = (2, True) mock_gffp.return_value = [ @@ -227,7 +227,7 @@ def __exit__(self, *args) -> None: # type: ignore mock_wp.return_value = MockWorkerPool() mock_map.side_effect = Exception("Test") - result = find_and_parse( + result = _find_and_parse( mock_importer, [Path("example_dir")], self.mock_console, True, 2 ) diff --git a/tests/darwin/importer/importer_test.py b/tests/darwin/importer/importer_test.py index 15c87696f..b7822e19c 100644 --- a/tests/darwin/importer/importer_test.py +++ b/tests/darwin/importer/importer_test.py @@ -106,29 +106,35 @@ def test_handle_subs() -> None: assert result == expected_result -def test__handle_complex_polygon() -> None: - from darwin.importer.importer import _handle_complex_polygon +def test__format_polygon_for_import() -> None: + from darwin.importer.importer import _format_polygon_for_import - assert _handle_complex_polygon( - {}, - { - "example": "data", - "example2": "data2", - "example3": "data3", - }, - ) == { # type: ignore - "example": "data", - "example2": "data2", - "example3": "data3", - } - assert _handle_complex_polygon( + # Test case when "polygon" key is not in data + assert _format_polygon_for_import( dt.Annotation( - dt.AnnotationClass("Class", "bbox"), {"paths": [1, 2, 3, 4, 5]}, [], [] + dt.AnnotationClass("Class", "polygon"), {"paths": [1, 2, 3, 4, 5]}, [], [] ), - {"complex_polygon": "test_data"}, - ) == { - "polygon": {"path": 1, "additional_paths": [2, 3, 4, 5]}, - } + {"example": "data"}, + ) == {"example": "data"} + + # Test case when "polygon" key is in data and there is more than one path + assert _format_polygon_for_import( + dt.Annotation( + dt.AnnotationClass("Class", "polygon"), + {"paths": [[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]]}, + [], + [], + ), + {"polygon": {"paths": [[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]]}}, + ) == {"polygon": {"path": [1, 2, 3, 4, 5], "additional_paths": [[6, 7, 8, 9, 10]]}} + + # Test case when "polygon" key is in data and there is only one path + assert _format_polygon_for_import( + dt.Annotation( + dt.AnnotationClass("Class", "polygon"), {"paths": [[1, 2, 3, 4, 5]]}, [], [] + ), + {"polygon": {"paths": [[1, 2, 3, 4, 5]]}}, + ) == {"polygon": {"path": [1, 2, 3, 4, 5]}} def test__annotators_or_reviewers_to_payload() -> None: @@ -189,7 +195,7 @@ def test__get_annotation_data() -> None: annotation.data = "TEST DATA" - with patch_factory("_handle_complex_polygon") as mock_hcp, patch_factory( + with patch_factory("_format_polygon_for_import") as mock_hcp, patch_factory( "_handle_subs" ) as mock_hs, patch.object( dt.VideoAnnotation, "get_data", return_value="TEST VIDEO DATA" @@ -208,7 +214,7 @@ def test__get_annotation_data() -> None: assert mock_hcp.call_count == 1 assert mock_hs.call_count == 1 - with patch_factory("_handle_complex_polygon") as mock_hcp, patch_factory( + with patch_factory("_format_polygon_for_import") as mock_hcp, patch_factory( "_handle_subs" ) as mock_hs: from darwin.importer.importer import _get_annotation_data @@ -482,7 +488,7 @@ def test__parse_empty_masks_video(raster_layer_video_annotations) -> None: def test__import_annotations() -> None: - with patch_factory("_handle_complex_polygon") as mock_hcp, patch_factory( + with patch_factory("_format_polygon_for_import") as mock_hcp, patch_factory( "_handle_reviewers" ) as mock_hr, patch_factory("_handle_annotators") as mock_ha, patch_factory( "_handle_subs" diff --git a/tests/darwin/torch/dataset_test.py b/tests/darwin/torch/dataset_test.py index 21a60de4a..fac24bf6c 100644 --- a/tests/darwin/torch/dataset_test.py +++ b/tests/darwin/torch/dataset_test.py @@ -181,7 +181,7 @@ def test_loads_object_detection_dataset_from_polygon_annotations( "iscrowd": [0], } - def test_loads_object_detection_dataset_from_complex_polygon_annotations( + def test_loads_object_detection_dataset_from_multi_path_polygon_annotations( self, team_slug_darwin_json_v2: str, local_config_file: Config, @@ -260,7 +260,7 @@ def test_loads_instance_segmentation_dataset_from_polygon_annotations( assert label["image_path"] == str(dataset.dataset_path / "images" / "0.png") assert label["width"] == 50 - def test_loads_instance_segmentation_dataset_from_complex_polygon_annotations( + def test_loads_instance_segmentation_dataset_from_multi_path_polygon_annotations( self, team_slug_darwin_json_v2: str, local_config_file: Config, diff --git a/tests/darwin/utils/find_files_test.py b/tests/darwin/utils/find_files_test.py index 64c4dadce..d56fd352b 100644 --- a/tests/darwin/utils/find_files_test.py +++ b/tests/darwin/utils/find_files_test.py @@ -8,7 +8,6 @@ from darwin.utils import ( SUPPORTED_EXTENSIONS, SUPPORTED_IMAGE_EXTENSIONS, - SUPPORTED_VIDEO_EXTENSIONS, find_files, ) @@ -132,9 +131,8 @@ def dependency_factory(self) -> Dependencies: """ from darwin.utils import is_extension_allowed_by_filename as ieabf from darwin.utils import is_image_extension_allowed_by_filename as iieabf - from darwin.utils import is_video_extension_allowed_by_filename as iveabf - return self.Dependencies(ieabf=ieabf, iveabf=iveabf, iieabf=iieabf) + return self.Dependencies(ieabf=ieabf, iieabf=iieabf) def test_ieabf_returns_true_for_a_valid_extension(self): valid_extensions = [ @@ -152,21 +150,6 @@ def test_ieabf_returns_false_for_an_invalid_extension(self): self.assertFalse(all(results)) - def test_iveabf_returns_true_for_a_valid_extension(self): - results = [ - self.dependency_factory().iveabf(file) - for file in SUPPORTED_VIDEO_EXTENSIONS - ] - - self.assertTrue(all(results)) - - def test_iveabf_returns_false_for_an_invalid_extension(self): - results = [ - self.dependency_factory().iveabf(file) for file in self.fake_invalid_files - ] - - self.assertFalse(all(results)) - def test_iieabf_returns_true_for_a_valid_extension(self): results = [ self.dependency_factory().iieabf(file) diff --git a/tests/darwin/utils_test.py b/tests/darwin/utils_test.py index 58a8ccac5..6f7e16459 100644 --- a/tests/darwin/utils_test.py +++ b/tests/darwin/utils_test.py @@ -8,11 +8,9 @@ from darwin.utils import ( get_response_content, has_json_content_type, - is_extension_allowed, is_image_extension_allowed, is_project_dir, is_unix_like_os, - is_video_extension_allowed, parse_darwin_json, urljoin, validate_data_against_schema, @@ -46,24 +44,12 @@ def test_validates_correct_data(self): class TestExtensions: - def test_returns_true_for_allowed_extensions(self): - assert is_extension_allowed(".png") - - def test_returns_false_for_unknown_extensions(self): - assert not is_extension_allowed(".mkv") - def test_returns_true_for_allowed_image_extensions(self): assert is_image_extension_allowed(".png") def test_returns_false_for_unknown_image_extensions(self): assert not is_image_extension_allowed(".not_an_image") - def test_returns_true_for_allowed_video_extensions(self): - assert is_video_extension_allowed(".mp4") - - def test_returns_false_for_unknown_video_extensions(self): - assert not is_video_extension_allowed(".not_video") - class TestUrlJoin: def test_returns_an_url(self): @@ -111,31 +97,51 @@ class TestParseDarwinJson: def test_parses_darwin_images_correctly(self, tmp_path): content = """ { - "image": { - "width": 497, - "height": 778, - "original_filename": "P49-RediPad-ProPlayLEFTY_442.jpg", - "filename": "P49-RediPad-ProPlayLEFTY_442.jpg", - "url": "", - "path": "/tmp_files" + "version": "2.0", + "schema_ref": "https://darwin-public.s3.eu-west-1.amazonaws.com/darwin_json/2.0/schema.json", + "item": { + "name": "P49-RediPad-ProPlayLEFTY_442.jpg", + "path": "/tmp_files", + "slots": [ + { + "type": "image", + "slot_name": "0", + "width": 497, + "height": 778, + "source_files": [ + { + "file_name": "P49-RediPad-ProPlayLEFTY_442.jpg", + "url": "" + } + ] + } + ] }, "annotations": [ { - "keypoint": { - "x": 207.97048950195312, - "y": 449.39691162109375 - }, - "name": "left_knee" + "id": "unique_id_1", + "name": "left_knee", + "keypoint": { + "x": 207.97048950195312, + "y": 449.39691162109375 + }, + "slot_names": [ + "0" + ] }, { - "keypoint": { - "x": 302.9606018066406, - "y": 426.13946533203125 - }, - "name": "left_ankle" + "id": "unique_id_2", + "name": "left_ankle", + "keypoint": { + "x": 302.9606018066406, + "y": 426.13946533203125 + }, + "slot_names": [ + "0" + ] } ] - } + } """ directory = tmp_path / "imports" @@ -149,7 +155,7 @@ def test_parses_darwin_images_correctly(self, tmp_path): assert annotation_file.filename == "P49-RediPad-ProPlayLEFTY_442.jpg" assert annotation_file.dataset_name is None assert annotation_file.version == dt.AnnotationFileVersion( - major=1, minor=0, suffix="" + major=2, minor=0, suffix="" ) assert len(annotation_file.annotations) == 2 @@ -166,68 +172,65 @@ def test_parses_darwin_images_correctly(self, tmp_path): def test_parses_darwin_videos_correctly(self, tmp_path): content = """ { - "dataset": "my-dataset", - "image": { - "width": 3840, - "height": 2160, - "fps": 0.0, - "original_filename": "above tractor.mp4", - "filename": "above tractor.mp4", - "url": "https://my-website.com/api/videos/209/original", + "version": "2.0", + "schema_ref": "https://darwin-public.s3.eu-west-1.amazonaws.com/darwin_json_2_0.schema.json", + "item": { + "name": "above tractor.mp4", "path": "/", - "workview_url": "https://my-website.com/workview?dataset=102&image=530", - "frame_count": 343, - "frame_urls": [ - "https://my-website.com/api/videos/209/frames/0" + "source_info": { + "item_id": "018a4ad2-41cb-5b6a-8141-fe1afeb65746", + "team": {"name": "Test Team", "slug": "test-team"}, + "dataset": { + "name": "My dataset", + "slug": "my-dataset", + "dataset_management_url": "https://my-website.com/datasets/018a4ad2-41cb-5b6a-8141-fe1afeb65746/dataset-management" + }, + "workview_url": "https://my-website.com/workview?dataset=102&image=530" + }, + "slots": [ + { + "type": "video", + "slot_name": "0", + "width": 3840, + "height": 2160, + "fps": 0.0, + "thumbnail_url": "https://my-website.com/api/videos/209/thumbnail", + "source_files": [ + { + "file_name": "above tractor.mp4", + "url": "https://my-website.com/api/videos/209/original" + } + ], + "frame_count": 343, + "frame_urls": ["https://my-website.com/api/videos/209/frames/0"] + } ] }, "annotations": [ { "frames": { "3": { - "bounding_box": { - "h": 547.0, - "w": 400.0, - "x": 363.0, - "y": 701.0 - }, - "instance_id": { - "value": 119 - }, + "bounding_box": {"h": 547.0, "w": 400.0, "x": 363.0, "y": 701.0}, + "instance_id": {"value": 119}, "keyframe": true, "polygon": { - "path": [ - { - "x": 748.0, - "y": 732.0 - }, - { - "x": 751.0, - "y": 735.0 - }, - { - "x": 748.0, - "y": 733.0 - } + "paths": [ + [ + {"x": 748.0, "y": 732.0}, + {"x": 751.0, "y": 735.0}, + {"x": 748.0, "y": 733.0} + ] ] } } }, + "id": "f8f5f235-bd47-47be-b4fe-07d49e0177a7", "interpolate_algorithm": "linear-1.1", "interpolated": true, "name": "Hand", - "segments": [ - [ - 3, - 46 - ] - ], - "hidden_areas": [ - [ - 5, - 8 - ] - ] + "ranges": [[3, 46]], + "hidden_areas": [[5, 8]], + "slot_names": ["0"] } ] } @@ -238,13 +241,13 @@ def test_parses_darwin_videos_correctly(self, tmp_path): import_file = directory / "darwin-file.json" import_file.write_text(content) - annotation_file: dt.AnnotationFile = parse_darwin_json(import_file, None) + annotation_file: dt.AnnotationFile = parse_darwin_json(import_file) assert annotation_file.path == import_file assert annotation_file.filename == "above tractor.mp4" - assert annotation_file.dataset_name is None + assert annotation_file.dataset_name == "My dataset" assert annotation_file.version == dt.AnnotationFileVersion( - major=1, minor=0, suffix="" + major=2, minor=0, suffix="" ) assert len(annotation_file.annotations) == 1 @@ -271,20 +274,22 @@ def test_parses_darwin_videos_correctly(self, tmp_path): annotation_class=dt.AnnotationClass( name="Hand", annotation_type="polygon", - annotation_internal_type=None, + annotation_internal_type="polygon", ), frames={ 3: dt.Annotation( annotation_class=dt.AnnotationClass( name="Hand", annotation_type="polygon", - annotation_internal_type=None, + annotation_internal_type="polygon", ), data={ - "path": [ - {"x": 748.0, "y": 732.0}, - {"x": 751.0, "y": 735.0}, - {"x": 748.0, "y": 733.0}, + "paths": [ + [ + {"x": 748.0, "y": 732.0}, + {"x": 751.0, "y": 735.0}, + {"x": 748.0, "y": 733.0}, + ] ], "bounding_box": { "x": 363.0, @@ -296,12 +301,22 @@ def test_parses_darwin_videos_correctly(self, tmp_path): subs=[ dt.SubAnnotation(annotation_type="instance_id", data=119) ], + slot_names=[], + annotators=None, + reviewers=None, + id="f8f5f235-bd47-47be-b4fe-07d49e0177a7", + properties=None, ) }, keyframes={3: True}, segments=[[3, 46]], hidden_areas=[[5, 8]], interpolated=True, + slot_names=["0"], + annotators=None, + reviewers=None, + id="f8f5f235-bd47-47be-b4fe-07d49e0177a7", + properties=None, ) ] @@ -554,21 +569,56 @@ def test_returns_None_if_no_annotations_exist(self, tmp_path): def test_uses_a_default_path_if_one_is_missing(self, tmp_path): content = """ + { + "version": "2.0", + "schema_ref": "https://darwin-public.s3.eu-west-1.amazonaws.com/darwin_json/2.0/schema.json", + "item": { + "name": "P49-RediPad-ProPlayLEFTY_442.jpg", + "path": "/", + "source_info": { + "item_id": "unknown", + "dataset": { + "name": "unknown", + "slug": "unknown", + "dataset_management_url": "unknown" + }, + "team": { + "name": "unknown", + "slug": "unknown" + }, + "workview_url": "unknown" + }, + "slots": [ { - "image": { - "original_filename": "P49-RediPad-ProPlayLEFTY_442.jpg", - "filename": "P49-RediPad-ProPlayLEFTY_442.jpg" - }, - "annotations": [ - { - "keypoint": { - "x": 207.97048950195312, - "y": 449.39691162109375 - }, - "name": "left_knee" - } + "type": "image", + "slot_name": "0", + "width": 640, + "height": 425, + "thumbnail_url": "unknown", + "source_files": [ + { + "file_name": "P49-RediPad-ProPlayLEFTY_442.jpg", + "url": "unknown" + } ] } + ] + }, + "annotations": [ + { + "id": "unknown", + "name": "left_knee", + "properties": [], + "keypoint": { + "x": 207.97048950195312, + "y": 449.39691162109375 + }, + "slot_names": [ + "0" + ] + } + ] + } """ directory = tmp_path / "imports" @@ -582,58 +632,148 @@ def test_uses_a_default_path_if_one_is_missing(self, tmp_path): def test_imports_a_skeleton(self, tmp_path): content = """ + { + "version": "2.0", + "schema_ref": "https://darwin-public.s3.eu-west-1.amazonaws.com/darwin_json/2.0/schema.json", + "item": { + "name": "ferrari-laferrari.jpg", + "path": "/", + "source_info": { + "item_id": "018c4450-d91d-ff3e-b226-60d48b66f86e", + "dataset": { + "name": "bbox", + "slug": "bbox", + "dataset_management_url": "https://darwin.v7labs.com/datasets/623079/dataset-management" + }, + "team": { + "name": "V7 John", + "slug": "v7-john" + }, + "workview_url": "https://darwin.v7labs.com/workview?dataset=623079&item=018c4450-d91d-ff3e-b226-60d48b66f86e" + }, + "slots": [ { - "dataset": "cars", - "image": { - "filename": "ferrari-laferrari.jpg" - }, - "annotations": [ + "type": "image", + "slot_name": "0", + "width": 640, + "height": 425, + "thumbnail_url": "https://darwin.v7labs.com/api/v2/teams/v7-john/files/ddc5cbc2-8438-4e36-8ab6-43e2f3746bf1/thumbnail", + "source_files": [ + { + "file_name": "000000007751.jpg", + "url": "https://darwin.v7labs.com/api/v2/teams/v7-john/uploads/3395d29a-7539-4a51-a3ca-c7a95f460345" + } + ] + } + ] + }, + "annotations": [ + { + "bounding_box": { + "h": 53.963699999999996, + "w": 83.7195, + "x": 32.7817, + "y": 53.9638 + }, + "id": "8940a690-d8a9-4c83-9f59-38f0ef780246", + "name": "new-class-2", + "polygon": { + "paths": [ + [ { - "bounding_box": { - "h": 547.0, - "w": 1709.0, - "x": 96.0, - "y": 437.0 - }, - "name": "car", - "polygon": { - "path": [ - { - "x": 1805.0, - "y": 586.0 - }, - { - "x": 1802.0, - "y": 586.0 - }, - { - "x": 1805.0, - "y": 588.0 - } - ] - } + "x": 65.0591, + "y": 53.9638 }, { - "name": "wheels", - "skeleton": { - "nodes": [ - { - "name": "1", - "occluded": false, - "x": 829.56, - "y": 824.5 - }, - { - "name": "2", - "occluded": false, - "x": 1670.5, - "y": 741.76 - } - ] - } + "x": 32.7817, + "y": 107.9275 + }, + { + "x": 116.5012, + "y": 104.9015 } ] + ] + }, + "properties": [], + "slot_names": [ + "0" + ] + }, + { + "id": "782618fb-4c69-436e-80cb-71765d255dbf", + "name": "skeleton-test", + "properties": [], + "skeleton": { + "nodes": [ + { + "name": "node", + "occluded": false, + "x": 264.7754, + "y": 121.5445 + }, + { + "name": "2", + "occluded": false, + "x": 245.1335, + "y": 107.3425 + }, + { + "name": "3", + "occluded": false, + "x": 240.4646, + "y": 125.4178 + }, + { + "name": "4", + "occluded": false, + "x": 280.3923, + "y": 137.468 + } + ] + }, + "slot_names": [ + "0" + ] + }, + { + "id": "b6bea00c-c8a4-4d34-b72f-88567d9e8cd5", + "name": "skeleton-test", + "properties": [], + "skeleton": { + "nodes": [ + { + "name": "node", + "occluded": false, + "x": 136.1702, + "y": 306.1308 + }, + { + "name": "2", + "occluded": false, + "x": 145.1629, + "y": 291.263 + }, + { + "name": "3", + "occluded": false, + "x": 147.3005, + "y": 310.1857 + }, + { + "name": "4", + "occluded": false, + "x": 129.0203, + "y": 322.8007 + } + ] + }, + "slot_names": [ + "0" + ] } + ] + } """ directory = tmp_path / "imports" @@ -653,89 +793,148 @@ def test_imports_a_skeleton(self, tmp_path): def test_imports_multiple_skeletetons(self, tmp_path): content = """ + { + "version": "2.0", + "schema_ref": "https://darwin-public.s3.eu-west-1.amazonaws.com/darwin_json/2.0/schema.json", + "item": { + "name": "ferrari-laferrari.jpg", + "path": "/", + "source_info": { + "item_id": "018c4450-d91d-ff3e-b226-60d48b66f86e", + "dataset": { + "name": "bbox", + "slug": "bbox", + "dataset_management_url": "https://darwin.v7labs.com/datasets/623079/dataset-management" + }, + "team": { + "name": "V7 John", + "slug": "v7-john" + }, + "workview_url": "https://darwin.v7labs.com/workview?dataset=623079&item=018c4450-d91d-ff3e-b226-60d48b66f86e" + }, + "slots": [ { - "dataset":"cars", - "image":{ - "filename":"ferrari-laferrari.jpg" - }, - "annotations":[ + "type": "image", + "slot_name": "0", + "width": 640, + "height": 425, + "thumbnail_url": "https://darwin.v7labs.com/api/v2/teams/v7-john/files/ddc5cbc2-8438-4e36-8ab6-43e2f3746bf1/thumbnail", + "source_files": [ + { + "file_name": "000000007751.jpg", + "url": "https://darwin.v7labs.com/api/v2/teams/v7-john/uploads/3395d29a-7539-4a51-a3ca-c7a95f460345" + } + ] + } + ] + }, + "annotations": [ + { + "bounding_box": { + "h": 53.963699999999996, + "w": 83.7195, + "x": 32.7817, + "y": 53.9638 + }, + "id": "8940a690-d8a9-4c83-9f59-38f0ef780246", + "name": "new-class-2", + "polygon": { + "paths": [ + [ { - "bounding_box":{ - "h":547.0, - "w":1709.0, - "x":96.0, - "y":437.0 - }, - "name":"car", - "polygon":{ - "path":[ - { - "x":1805.0, - "y":586.0 - }, - { - "x":1802.0, - "y":586.0 - }, - { - "x":1805.0, - "y":588.0 - } - ] - } + "x": 65.0591, + "y": 53.9638 }, { - "name":"wheels", - "skeleton":{ - "nodes":[ - { - "name":"1", - "occluded":false, - "x":829.56, - "y":824.5 - }, - { - "name":"2", - "occluded":false, - "x":1670.5, - "y":741.76 - } - ] - } + "x": 32.7817, + "y": 107.9275 }, { - "name":"door", - "skeleton":{ - "nodes":[ - { - "name":"1", - "occluded":false, - "x":867.86, - "y":637.16 - }, - { - "name":"2", - "occluded":false, - "x":1100.21, - "y":810.09 - }, - { - "name":"3", - "occluded":false, - "x":1298.45, - "y":856.56 - }, - { - "name":"4", - "occluded":false, - "x":1234.63, - "y":492.12 - } - ] - } + "x": 116.5012, + "y": 104.9015 } ] + ] + }, + "properties": [], + "slot_names": [ + "0" + ] + }, + { + "id": "782618fb-4c69-436e-80cb-71765d255dbf", + "name": "skeleton-test", + "properties": [], + "skeleton": { + "nodes": [ + { + "name": "node", + "occluded": false, + "x": 264.7754, + "y": 121.5445 + }, + { + "name": "2", + "occluded": false, + "x": 245.1335, + "y": 107.3425 + }, + { + "name": "3", + "occluded": false, + "x": 240.4646, + "y": 125.4178 + }, + { + "name": "4", + "occluded": false, + "x": 280.3923, + "y": 137.468 + } + ] + }, + "slot_names": [ + "0" + ] + }, + { + "id": "b6bea00c-c8a4-4d34-b72f-88567d9e8cd5", + "name": "skeleton-test", + "properties": [], + "skeleton": { + "nodes": [ + { + "name": "node", + "occluded": false, + "x": 136.1702, + "y": 306.1308 + }, + { + "name": "2", + "occluded": false, + "x": 145.1629, + "y": 291.263 + }, + { + "name": "3", + "occluded": false, + "x": 147.3005, + "y": 310.1857 + }, + { + "name": "4", + "occluded": false, + "x": 129.0203, + "y": 322.8007 + } + ] + }, + "slot_names": [ + "0" + ] } + ] + } """ directory = tmp_path / "imports" diff --git a/tests/data.zip b/tests/data.zip index ce18d5d62..19825eeaa 100644 Binary files a/tests/data.zip and b/tests/data.zip differ