Skip to content

Commit

Permalink
[PLA-585][external] Allow imports where some annotation frames are mi…
Browse files Browse the repository at this point in the history
…ssing a main type (#788)

* WIP

* WIP

* WIP

* WIP

* WIP

* Ensure COCO categories are always in the same, ascending order to avoid E2E flakiness

* Ensure COCO categories are always in the same, ascending order to avoid E2E test flakiness

* WIP

* Removed deprecated functions & removed erroneous deprecation labels

* Fixed teststhat broke when removing deprecated functions

* WIP

* Updated data.zip with v2 data structures

* Undo accidental changes

* 1 failing test remaining (Video annotation parsing) (WIP)

* Moved ObjectStore tests to datatypes_test

* Fixed video annotation import test

* Added support for simple table annotations incase exports are compressed to avoid OOM

* Small fix for importing the raster layer

* Removed 2 unused functions

* Removed deprecated api_url parameter

* Replace lambda with itemgetter() for improved performance

* Turned 4 functions private

* Removed deprecated api_url parameter from function call

* Fixed private function calls

* Removed debugging print statements

* Linting

* Update darwin/datatypes.py

Co-authored-by: saurbhc <sc@saurabhchopra.co.uk>

* docstring updates

---------

Co-authored-by: saurbhc <sc@saurabhchopra.co.uk>
  • Loading branch information
JBWilkie and saurbhc committed Apr 18, 2024
1 parent 8927be5 commit 973d9cc
Show file tree
Hide file tree
Showing 37 changed files with 1,428 additions and 1,860 deletions.
172 changes: 0 additions & 172 deletions darwin/dataset/download_manager.py
Expand Up @@ -9,7 +9,6 @@
from tempfile import TemporaryDirectory
from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple

import deprecation
import numpy as np
import orjson as json
import requests
Expand All @@ -28,18 +27,10 @@
is_image_extension_allowed,
parse_darwin_json,
)
from darwin.version import __version__


@deprecation.deprecated(
deprecated_in="0.7.5",
removed_in="0.8.0",
current_version=__version__,
details="The api_url parameter will be removed.",
)
def download_all_images_from_annotations(
api_key: str,
api_url: str,
annotations_path: Path,
images_path: Path,
force_replace: bool = False,
Expand All @@ -57,8 +48,6 @@ def download_all_images_from_annotations(
----------
api_key : str
API Key of the current team
api_url : str
Url of the darwin API (e.g. 'https://darwin.v7labs.com/api/')
annotations_path : Path
Path where the annotations are located
images_path : Path
Expand Down Expand Up @@ -152,70 +141,6 @@ def download_all_images_from_annotations(
return lambda: download_functions, len(download_functions)


@deprecation.deprecated(
deprecated_in="0.7.5",
removed_in="0.8.0",
current_version=__version__,
details="The api_url parameter will be removed.",
)
def download_image_from_annotation(
api_key: str,
api_url: str,
annotation_path: Path,
images_path: Path,
annotation_format: str,
use_folders: bool,
video_frames: bool,
force_slots: bool,
ignore_slots: bool = False,
) -> None:
"""
Dispatches functions to download an image given an annotation.
Parameters
----------
api_key : str
API Key of the current team
api_url : str
Url of the darwin API (e.g. 'https://darwin.v7labs.com/api/')
annotation_path : Path
Path where the annotation is located
images_path : Path
Path where to download the image
annotation_format : str
Format of the annotations. Currently only JSON is supported
use_folders : bool
Recreate folder structure
video_frames : bool
Pulls video frames images instead of video files
force_slots: bool
Pulls all slots of items into deeper file structure ({prefix}/{item_name}/{slot_name}/{file_name})
Raises
------
NotImplementedError
If the format of the annotation is not supported.
"""

console = Console()

if annotation_format == "json":
downloadables = _download_image_from_json_annotation(
api_key,
annotation_path,
images_path,
use_folders,
video_frames,
force_slots,
ignore_slots,
)
for downloadable in downloadables:
downloadable()
else:
console.print("[bold red]Unsupported file format. Please use 'json'.")
raise NotImplementedError


def lazy_download_image_from_annotation(
api_key: str,
annotation_path: Path,
Expand Down Expand Up @@ -454,103 +379,6 @@ def _update_local_path(annotation: AnnotationFile, url, local_path):
file.write(op)


@deprecation.deprecated(
deprecated_in="0.7.5",
removed_in="0.8.0",
current_version=__version__,
details="Use the ``download_image_from_annotation`` instead.",
)
def download_image_from_json_annotation(
api_key: str,
api_url: str,
annotation_path: Path,
image_path: Path,
use_folders: bool,
video_frames: bool,
) -> None:
"""
Downloads an image given a ``.json`` annotation path and renames the json after the image's
filename.
Parameters
----------
api_key : str
API Key of the current team
api_url : str
Url of the darwin API (e.g. 'https://darwin.v7labs.com/api/')
annotation_path : Path
Path where the annotation is located
image_path : Path
Path where to download the image
use_folders : bool
Recreate folders
video_frames : bool
Pulls video frames images instead of video files
"""
annotation = attempt_decode(annotation_path)

# If we are using folders, extract the path for the image and create the folder if needed
sub_path = annotation["image"].get("path", "/") if use_folders else "/"
parent_path = Path(image_path) / Path(sub_path).relative_to(Path(sub_path).anchor)
parent_path.mkdir(exist_ok=True, parents=True)

if video_frames and "frame_urls" in annotation["image"]:
video_path: Path = parent_path / annotation_path.stem
video_path.mkdir(exist_ok=True, parents=True)
for i, frame_url in enumerate(annotation["image"]["frame_urls"]):
path = video_path / f"{i:07d}.png"
_download_image(frame_url, path, api_key)
else:
image_url = annotation["image"]["url"]
image_path = parent_path / sanitize_filename(annotation["image"]["filename"])
_download_image(image_url, image_path, api_key)


@deprecation.deprecated(
deprecated_in="0.7.5",
removed_in="0.8.0",
current_version=__version__,
details="Use the ``download_image_from_annotation`` instead.",
)
def download_image(url: str, path: Path, api_key: str) -> None:
"""
Helper function: downloads one image from url.
Parameters
----------
url : str
Url of the image to download
path : Path
Path where to download the image, with filename
api_key : str
API Key of the current team
"""
if path.exists():
return
TIMEOUT: int = 60
start: float = time.time()
while True:
if "token" in url:
response: requests.Response = requests.get(url, stream=True)
else:
response = requests.get(
url, headers={"Authorization": f"ApiKey {api_key}"}, stream=True
)
# Correct status: download image
if response.ok:
with open(str(path), "wb") as file:
for chunk in response:
file.write(chunk)
return
# Fatal-error status: fail
if 400 <= response.status_code <= 499:
raise Exception(response.status_code, response.json())
# Timeout
if time.time() - start > TIMEOUT:
raise Exception(f"Timeout url request ({url}) after {TIMEOUT} seconds.")
time.sleep(1)


def _download_image(
url: str, path: Path, api_key: str, slot: Optional[dt.Slot] = None
) -> None:
Expand Down
5 changes: 2 additions & 3 deletions darwin/dataset/local_dataset.py
Expand Up @@ -335,12 +335,11 @@ def annotation_type_supported(self, annotation) -> bool:
elif self.annotation_type == "bounding_box":
is_bounding_box = annotation_type == "bounding_box"
is_supported_polygon = (
annotation_type in ["polygon", "complex_polygon"]
and "bounding_box" in annotation.data
annotation_type == "polygon" and "bounding_box" in annotation.data
)
return is_bounding_box or is_supported_polygon
elif self.annotation_type == "polygon":
return annotation_type in ["polygon", "complex_polygon"]
return annotation_type == "polygon"
else:
raise ValueError(
"annotation_type should be either 'tag', 'bounding_box', or 'polygon'"
Expand Down
1 change: 0 additions & 1 deletion darwin/dataset/remote_dataset.py
Expand Up @@ -345,7 +345,6 @@ def pull(
# Create the generator with the download instructions
progress, count = download_all_images_from_annotations(
api_key=api_key,
api_url=self.client.url,
annotations_path=annotations_dir,
images_path=self.local_images_path,
force_replace=force_replace,
Expand Down
2 changes: 1 addition & 1 deletion darwin/dataset/utils.py
Expand Up @@ -705,7 +705,7 @@ def convert_to_rgb(pic: PILImage.Image) -> PILImage.Image:
def compute_max_density(annotations_dir: Path) -> int:
"""
Calculates the maximum density of all of the annotations in the given folder.
Density is calculated as the number of polygons / complex_polygons present in an annotation
Density is calculated as the number of polygons present in an annotation
file.
Parameters
Expand Down
66 changes: 21 additions & 45 deletions darwin/datatypes.py
Expand Up @@ -89,7 +89,6 @@ def from_dict(cls, json: JSONFreeForm) -> "JSONType":
AnnotationType = Literal[ # NB: Some of these are not supported yet
"bounding_box",
"polygon",
"complex_polygon",
"ellipse",
"cuboid",
"segmentation",
Expand Down Expand Up @@ -645,7 +644,7 @@ def make_tag(

def make_polygon(
class_name: str,
point_path: List[Point],
point_paths: List[List[Point]] | List[Point],
bounding_box: Optional[Dict] = None,
subs: Optional[List[SubAnnotation]] = None,
slot_names: Optional[List[str]] = None,
Expand All @@ -655,55 +654,22 @@ def make_polygon(
Parameters
----------
class_name : str
class_name: str
The name of the class for this ``Annotation``.
point_path : List[Point]
A list of points that comprises the polygon. The list should have a format similar to:
point_paths: List[List[Point]] | List[Point]
Either a list of points that comprises a polygon or a list of lists of points that comprises a complex polygon.
A complex polygon is a polygon that is defined by >1 path.
.. code-block:: python
A polygon should be defined by a List[Point] and have a format similar to:
... code-block:: python
[
{"x": 1, "y": 0},
{"x": 2, "y": 1}
]
bounding_box : Optional[Dict], default: None
The bounding box that encompasses the polyong.
subs : Optional[List[SubAnnotation]], default: None
List of ``SubAnnotation``s for this ``Annotation``.
Returns
-------
Annotation
A polygon ``Annotation``.
"""
return Annotation(
AnnotationClass(class_name, "polygon"),
_maybe_add_bounding_box_data({"path": point_path}, bounding_box),
subs or [],
slot_names=slot_names or [],
)


def make_complex_polygon(
class_name: str,
point_paths: List[List[Point]],
bounding_box: Optional[Dict] = None,
subs: Optional[List[SubAnnotation]] = None,
slot_names: Optional[List[str]] = None,
) -> Annotation:
"""
Creates and returns a complex polygon annotation. Complex polygons are those who have holes
and/or disform shapes.
Parameters
----------
class_name: str
The name of the class for this ``Annotation``.
point_paths: List[List[Point]]
A list of lists points that comprises the complex polygon. This is needed as a complex
polygon can be effectively seen as a sum of multiple simple polygons. The list should have
a format similar to:
A complex polygon should be defined by a List[List[Point]] and have a format similar to:
.. code-block:: python
Expand All @@ -727,10 +693,20 @@ def make_complex_polygon(
Returns
-------
Annotation
A complex polygon ``Annotation``.
A polygon ``Annotation``.
"""

# Check if point_paths is List[Point] and convert to List[List[Point]]
if (
len(point_paths) > 1
and isinstance(point_paths[0], dict)
and "x" in point_paths[0]
and "y" in point_paths[0]
):
point_paths = [point_paths]

return Annotation(
AnnotationClass(class_name, "complex_polygon", "polygon"),
AnnotationClass(class_name, "polygon", "polygon"),
_maybe_add_bounding_box_data({"paths": point_paths}, bounding_box),
subs or [],
slot_names=slot_names or [],
Expand Down

0 comments on commit 973d9cc

Please sign in to comment.