This repository has been archived by the owner on Sep 5, 2023. It is now read-only.
/
dataset.proto
304 lines (240 loc) · 9.83 KB
/
dataset.proto
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
// Copyright 2019 Google LLC.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
syntax = "proto3";
package google.cloud.datalabeling.v1beta1;
import "google/api/annotations.proto";
import "google/cloud/datalabeling/v1beta1/annotation.proto";
import "google/cloud/datalabeling/v1beta1/data_payloads.proto";
import "google/cloud/datalabeling/v1beta1/human_annotation_config.proto";
import "google/protobuf/timestamp.proto";
option go_package = "google.golang.org/genproto/googleapis/cloud/datalabeling/v1beta1;datalabeling";
option java_multiple_files = true;
option java_package = "com.google.cloud.datalabeling.v1beta1";
enum DataType {
DATA_TYPE_UNSPECIFIED = 0;
IMAGE = 1;
VIDEO = 2;
TEXT = 4;
GENERAL_DATA = 6;
}
// Dataset is the resource to hold your data. You can request multiple labeling
// tasks for a dataset while each one will generate an AnnotatedDataset.
message Dataset {
// Output only. Dataset resource name, format is:
// projects/{project_id}/datasets/{dataset_id}
string name = 1;
// Required. The display name of the dataset. Maximum of 64 characters.
string display_name = 2;
// Optional. User-provided description of the annotation specification set.
// The description can be up to 10000 characters long.
string description = 3;
// Output only. Time the dataset is created.
google.protobuf.Timestamp create_time = 4;
// Output only. This is populated with the original input configs
// where ImportData is called. It is available only after the clients
// import data to this dataset.
repeated InputConfig input_configs = 5;
// Output only. The names of any related resources that are blocking changes
// to the dataset.
repeated string blocking_resources = 6;
// Output only. The number of data items in the dataset.
int64 data_item_count = 7;
}
// The configuration of input data, including data type, location, etc.
message InputConfig {
// Optional. The metadata associated with each data type.
oneof data_type_metadata {
// Required for text import, as language code must be specified.
TextMetadata text_metadata = 6;
}
// Required. Where the data is from.
oneof source {
// Source located in Cloud Storage.
GcsSource gcs_source = 2;
BigQuerySource bigquery_source = 5;
}
// Required. Data type must be specifed when user tries to import data.
DataType data_type = 1;
// Optional. If input contains annotation, user needs to specify the
// type and metadata of the annotation when creating it as an annotated
// dataset.
AnnotationType annotation_type = 3;
// Optional. Metadata about annotations in the input. Each annotation type may
// have different metadata.
// Metadata for classification problem.
ClassificationMetadata classification_metadata = 4;
}
// Metadata for the text.
message TextMetadata {
// The language of this text, as a
// [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt).
// Default value is en-US.
string language_code = 1;
}
// Metadata for classification annotations.
message ClassificationMetadata {
// Whether the classification task is multi-label or not.
bool is_multi_label = 1;
}
// Source of the Cloud Storage file to be imported.
message GcsSource {
// Required. The input URI of source file. This must be a Cloud Storage path
// (`gs://...`).
string input_uri = 1;
// Required. The format of the source file. Only "text/csv" is supported.
string mime_type = 2;
}
// The BigQuery location for the input content.
message BigQuerySource {
// Required. BigQuery URI to a table, up to 2000 characters long.
// Accepted forms: BigQuery gs path e.g. bq://projectId.bqDatasetId.bqTableId
string input_uri = 1;
}
// The configuration of output data.
message OutputConfig {
// Required. Location to output data to.
oneof destination {
// Output to a file in Cloud Storage. Should be used for labeling output
// other than image segmentation.
GcsDestination gcs_destination = 1;
// Output to a folder in Cloud Storage. Should be used for image
// segmentation labeling output.
GcsFolderDestination gcs_folder_destination = 2;
}
}
// Export destination of the data.Only gcs path is allowed in
// output_uri.
message GcsDestination {
// Required. The output uri of destination file.
string output_uri = 1;
// Required. The format of the gcs destination. Only "text/csv" and
// "application/json"
// are supported.
string mime_type = 2;
}
// Export folder destination of the data.
message GcsFolderDestination {
// Required. Cloud Storage directory to export data to.
string output_folder_uri = 1;
}
// DataItem is a piece of data, without annotation. For example, an image.
message DataItem {
// Output only.
oneof payload {
// The image payload, a container of the image bytes/uri.
ImagePayload image_payload = 2;
// The text payload, a container of text content.
TextPayload text_payload = 3;
// The video payload, a container of the video uri.
VideoPayload video_payload = 4;
}
// Output only. Name of the data item, in format of:
// projects/{project_id}/datasets/{dataset_id}/dataItems/{data_item_id}
string name = 1;
}
// AnnotatedDataset is a set holding annotations for data in a Dataset. Each
// labeling task will generate an AnnotatedDataset under the Dataset that the
// task is requested for.
message AnnotatedDataset {
// Output only. AnnotatedDataset resource name in format of:
// projects/{project_id}/datasets/{dataset_id}/annotatedDatasets/
// {annotated_dataset_id}
string name = 1;
// Output only. The display name of the AnnotatedDataset. It is specified in
// HumanAnnotationConfig when user starts a labeling task. Maximum of 64
// characters.
string display_name = 2;
// Output only. The description of the AnnotatedDataset. It is specified in
// HumanAnnotationConfig when user starts a labeling task. Maximum of 10000
// characters.
string description = 9;
// Output only. Source of the annotation.
AnnotationSource annotation_source = 3;
// Output only. Type of the annotation. It is specified when starting labeling
// task.
AnnotationType annotation_type = 8;
// Output only. Number of examples in the annotated dataset.
int64 example_count = 4;
// Output only. Number of examples that have annotation in the annotated
// dataset.
int64 completed_example_count = 5;
// Output only. Per label statistics.
LabelStats label_stats = 6;
// Output only. Time the AnnotatedDataset was created.
google.protobuf.Timestamp create_time = 7;
// Output only. Additional information about AnnotatedDataset.
AnnotatedDatasetMetadata metadata = 10;
// Output only. The names of any related resources that are blocking changes
// to the annotated dataset.
repeated string blocking_resources = 11;
}
// Statistics about annotation specs.
message LabelStats {
// Map of each annotation spec's example count. Key is the annotation spec
// name and value is the number of examples for that annotation spec.
// If the annotated dataset does not have annotation spec, the map will return
// a pair where the key is empty string and value is the total number of
// annotations.
map<string, int64> example_count = 1;
}
// Metadata on AnnotatedDataset.
message AnnotatedDatasetMetadata {
// Specific request configuration used when requesting the labeling task.
oneof annotation_request_config {
// Configuration for image classification task.
ImageClassificationConfig image_classification_config = 2;
// Configuration for image bounding box and bounding poly task.
BoundingPolyConfig bounding_poly_config = 3;
// Configuration for image polyline task.
PolylineConfig polyline_config = 4;
// Configuration for image segmentation task.
SegmentationConfig segmentation_config = 5;
// Configuration for video classification task.
VideoClassificationConfig video_classification_config = 6;
// Configuration for video object detection task.
ObjectDetectionConfig object_detection_config = 7;
// Configuration for video object tracking task.
ObjectTrackingConfig object_tracking_config = 8;
// Configuration for video event labeling task.
EventConfig event_config = 9;
// Configuration for text classification task.
TextClassificationConfig text_classification_config = 10;
// Configuration for text entity extraction task.
TextEntityExtractionConfig text_entity_extraction_config = 11;
}
// HumanAnnotationConfig used when requesting the human labeling task for this
// AnnotatedDataset.
HumanAnnotationConfig human_annotation_config = 1;
}
// An Example is a piece of data and its annotation. For example, an image with
// label "house".
message Example {
// Output only. The data part of Example.
oneof payload {
// The image payload, a container of the image bytes/uri.
ImagePayload image_payload = 2;
// The text payload, a container of the text content.
TextPayload text_payload = 6;
// The video payload, a container of the video uri.
VideoPayload video_payload = 7;
}
// Output only. Name of the example, in format of:
// projects/{project_id}/datasets/{dataset_id}/annotatedDatasets/
// {annotated_dataset_id}/examples/{example_id}
string name = 1;
// Output only. Annotations for the piece of data in Example.
// One piece of data can have multiple annotations.
repeated Annotation annotations = 5;
}