From 0ed2a7586fde5bd9e61672fb9e73568ab6a3b56c Mon Sep 17 00:00:00 2001 From: John Wilkie Date: Wed, 6 Mar 2024 15:11:23 +0000 Subject: [PATCH 1/4] Fixed mislabelled default arg value --- darwin/cli_functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/darwin/cli_functions.py b/darwin/cli_functions.py index ad8d75268..662280e75 100644 --- a/darwin/cli_functions.py +++ b/darwin/cli_functions.py @@ -881,7 +881,7 @@ def dataset_import( import_reviewers : bool, default: False If ``True`` it will import the reviewers from the files to the dataset, if . If ``False`` it will not import the reviewers. - use_multi_cpu : bool, default: True + use_multi_cpu : bool, default: False If ``True`` it will use all multiple CPUs to speed up the import process. cpu_limit : Optional[int], default: Core count - 2 The maximum number of CPUs to use for the import process. From 3b876a4dd5dc656c0f6fe33dfa176003df7ecd0f Mon Sep 17 00:00:00 2001 From: John Wilkie Date: Wed, 6 Mar 2024 16:15:13 +0000 Subject: [PATCH 2/4] Added support for importing PASCAL VOC annotations to folders + unit test --- darwin/importer/formats/pascal_voc.py | 5 ++++- test2.py | 12 ++++++++++++ .../importer/formats/import_pascalvoc_test.py | 14 ++++++++++++++ 3 files changed, 30 insertions(+), 1 deletion(-) create mode 100644 test2.py diff --git a/darwin/importer/formats/pascal_voc.py b/darwin/importer/formats/pascal_voc.py index 4470a6149..e6fadf328 100644 --- a/darwin/importer/formats/pascal_voc.py +++ b/darwin/importer/formats/pascal_voc.py @@ -3,6 +3,7 @@ from typing import List, Optional import darwin.datatypes as dt +from darwin.path_utils import deconstruct_full_path def parse_path(path: Path) -> Optional[dt.AnnotationFile]: @@ -57,8 +58,10 @@ def parse_path(path: Path) -> Optional[dt.AnnotationFile]: ) annotation_classes = {annotation.annotation_class for annotation in annotations} + remote_path, filename = deconstruct_full_path(filename) + return dt.AnnotationFile( - path, filename, annotation_classes, annotations, remote_path="/" + path, filename, annotation_classes, annotations, remote_path=remote_path ) diff --git a/test2.py b/test2.py new file mode 100644 index 000000000..97fd8c9f4 --- /dev/null +++ b/test2.py @@ -0,0 +1,12 @@ +import darwin.importer as importer +from darwin.client import Client +from darwin.importer import get_importer + +DATASET_IDENTIFIER = "v7-john/bbox" +FORMAT_NAME = "pascal_voc" +ANNOTATION_PATHS = ["/Users/john/Desktop/pascal"] + +client = Client.local() +dataset = client.get_remote_dataset(dataset_identifier=DATASET_IDENTIFIER) +parser = get_importer(FORMAT_NAME) +importer.import_annotations(dataset, parser, ANNOTATION_PATHS, append=True) diff --git a/tests/darwin/importer/formats/import_pascalvoc_test.py b/tests/darwin/importer/formats/import_pascalvoc_test.py index 2c50dc24f..84d09aa9f 100644 --- a/tests/darwin/importer/formats/import_pascalvoc_test.py +++ b/tests/darwin/importer/formats/import_pascalvoc_test.py @@ -171,3 +171,17 @@ def test_returns_annotation_file_with_correct_annotations_with_float_values( assert annotation.subs == [] assert annotation_file.remote_path == "/" + + def test_deconstructs_filepath_properly_if_folder_included_in_filename( + self, annotation_path: Path + ): + annotation_path.write_text( + "folder/image.jpgClass10101010" + ) + + annotation_file = parse_path(annotation_path) + + assert annotation_file is not None + assert annotation_file.path == annotation_path + assert annotation_file.filename == "image.jpg" + assert annotation_file.remote_path == "/folder" From 65da6f12f3a8990b24a7a3fe9ca178fc56ca6d57 Mon Sep 17 00:00:00 2001 From: John Wilkie Date: Wed, 6 Mar 2024 16:44:11 +0000 Subject: [PATCH 3/4] Fixed misconfigured querystring filter name in fetch_remote_files() --- darwin/importer/importer.py | 2 +- test2.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/darwin/importer/importer.py b/darwin/importer/importer.py index eac3e295c..1dd0880e4 100644 --- a/darwin/importer/importer.py +++ b/darwin/importer/importer.py @@ -221,7 +221,7 @@ def get_remote_files( for i in range(0, len(filenames), chunk_size): chunk = filenames[i : i + chunk_size] for remote_file in dataset.fetch_remote_files( - {"types": "image,playback_video,video_frame", "filenames": chunk} + {"types": "image,playback_video,video_frame", "item_names": chunk} ): slot_name = _get_slot_name(remote_file) remote_files[remote_file.full_path] = (remote_file.id, slot_name) diff --git a/test2.py b/test2.py index 97fd8c9f4..d17077097 100644 --- a/test2.py +++ b/test2.py @@ -2,11 +2,11 @@ from darwin.client import Client from darwin.importer import get_importer -DATASET_IDENTIFIER = "v7-john/bbox" +DATASET_IDENTIFIER = "product-camera-team/xero-goat" FORMAT_NAME = "pascal_voc" ANNOTATION_PATHS = ["/Users/john/Desktop/pascal"] -client = Client.local() +client = Client.from_api_key("zCOGdus.PdY-kT07sKASoHsw8FmlczMRDKw532Uz") dataset = client.get_remote_dataset(dataset_identifier=DATASET_IDENTIFIER) parser = get_importer(FORMAT_NAME) -importer.import_annotations(dataset, parser, ANNOTATION_PATHS, append=True) +importer.import_annotations(dataset, parser, ANNOTATION_PATHS, append=False) From e3e4cd2eb8e3210bf5b369954257f17b4653c8df Mon Sep 17 00:00:00 2001 From: John Wilkie Date: Wed, 6 Mar 2024 17:12:24 +0000 Subject: [PATCH 4/4] removed test file --- test2.py | 12 ------------ 1 file changed, 12 deletions(-) delete mode 100644 test2.py diff --git a/test2.py b/test2.py deleted file mode 100644 index d17077097..000000000 --- a/test2.py +++ /dev/null @@ -1,12 +0,0 @@ -import darwin.importer as importer -from darwin.client import Client -from darwin.importer import get_importer - -DATASET_IDENTIFIER = "product-camera-team/xero-goat" -FORMAT_NAME = "pascal_voc" -ANNOTATION_PATHS = ["/Users/john/Desktop/pascal"] - -client = Client.from_api_key("zCOGdus.PdY-kT07sKASoHsw8FmlczMRDKw532Uz") -dataset = client.get_remote_dataset(dataset_identifier=DATASET_IDENTIFIER) -parser = get_importer(FORMAT_NAME) -importer.import_annotations(dataset, parser, ANNOTATION_PATHS, append=False)