/
pascal_voc.py
150 lines (119 loc) · 4.22 KB
/
pascal_voc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
import xml.etree.ElementTree as ET
from pathlib import Path
from typing import List, Optional
import darwin.datatypes as dt
from darwin.path_utils import deconstruct_full_path
def parse_path(path: Path) -> Optional[dt.AnnotationFile]:
"""
Parses the given pascalvoc file and maybe returns the corresponding annotation.
The file must have the following structure:
.. code-block:: xml
<filename>SOME_FILE_NAME</filename>
<object>
<name>CLASS_NAME</name>
<bndbox>
<xmax>NUMBER</xmax>
<xmin>NUMBER</xmin>
<ymax>NUMBER</ymax>
<ymin>NUMBER</ymin>
</bndbox>
</object>
<object>
...
</object>
Parameters
--------
path: Path
The path of the file to parse.
Returns
-------
Optional[darwin.datatypes.AnnotationFile]
An AnnotationFile with the parsed information from the file or None, if the file is not a
`XML` file.
Raises
------
ValueError
If a mandatory child element is missing or is empty. Mandatory child elements are:
filename, name, bndbox, xmin, xmax, ymin and ymax.
"""
if path.suffix != ".xml":
return None
tree = ET.parse(str(path))
root = tree.getroot()
filename = _find_text_value(root, "filename")
annotations: List[dt.Annotation] = list(
filter(None, map(_parse_annotation, root.findall("object")))
)
annotation_classes = {annotation.annotation_class for annotation in annotations}
remote_path, filename = deconstruct_full_path(filename)
return dt.AnnotationFile(
path, filename, annotation_classes, annotations, remote_path=remote_path
)
def _parse_annotation(annotation_object: ET.Element) -> dt.Annotation:
"""
Parses the given XML element and returns the corresponding annotation.
Parameters
--------
annotation_object: xml.etree.ElementTree.Element
The element to convert into an annotation.
Returns
-------
darwin.datatypes.AnnotationFile
An AnnotationFile with the parsed information from the XML element.
Raises
------
ValueError
If a mandatory chield element is missing or is empty. Mandatory child elements are:
name, bndbox, xmin, xmax, ymin and ymax.
"""
class_name = _find_text_value(annotation_object, "name")
bndbox = _find_element(annotation_object, "bndbox")
xmin = int(float(_find_text_value(bndbox, "xmin")))
xmax = int(float(_find_text_value(bndbox, "xmax")))
ymin = int(float(_find_text_value(bndbox, "ymin")))
ymax = int(float(_find_text_value(bndbox, "ymax")))
return dt.make_bounding_box(class_name, xmin, ymin, xmax - xmin, ymax - ymin)
def _find_element(source: ET.Element, name: str) -> ET.Element:
"""
Finds a child element inside the source element with the given name and returns it.
Parameters
--------
source: xml.etree.ElementTree.Element
Parent element that contains childs elements to be searched.
name: str
Name of the child element we wish to find.
Returns
-------
xml.etree.ElementTree.Element
Child element with the given name.
Raises
------
ValueError
If a child element with the given name could not be found.
"""
element = source.find(name)
if element is None:
raise ValueError(f"Could not find {name} element in annotation file")
return element
def _find_text_value(source: ET.Element, name: str) -> str:
"""
Finds a child element inside the source element with the given name and returns its text value.
Parameters
--------
source: xml.etree.ElementTree.Element
Parent element that contains childs elements to be searched.
name: str
Name of the child element we wish to find.
Returns
-------
str
Text value of the found child element.
Raises
------
ValueError
If the found child element has no text value or its text value is empty.
"""
element = _find_element(source, name)
if element.text is None or not element.text.strip():
raise ValueError(f"{name} element does not have a text value")
return element.text