This repository has been archived by the owner on Nov 9, 2023. It is now read-only.
/
document_processor_service.proto
402 lines (322 loc) · 13.1 KB
/
document_processor_service.proto
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.cloud.documentai.v1beta3;
import "google/api/annotations.proto";
import "google/api/client.proto";
import "google/api/field_behavior.proto";
import "google/api/resource.proto";
import "google/cloud/documentai/v1beta3/document.proto";
import "google/cloud/documentai/v1beta3/document_io.proto";
import "google/longrunning/operations.proto";
import "google/protobuf/field_mask.proto";
import "google/protobuf/timestamp.proto";
import "google/rpc/status.proto";
option csharp_namespace = "Google.Cloud.DocumentAI.V1Beta3";
option go_package = "google.golang.org/genproto/googleapis/cloud/documentai/v1beta3;documentai";
option java_multiple_files = true;
option java_outer_classname = "DocumentAiProcessorService";
option java_package = "com.google.cloud.documentai.v1beta3";
option php_namespace = "Google\\Cloud\\DocumentAI\\V1beta3";
option ruby_package = "Google::Cloud::DocumentAI::V1beta3";
// (-- aip.dev/not-precedent: This is needed because we have references to
// these resources in our public API, but the resource management is not
// part of the public API (UI access only). So we have to define
// these resource here to avoid any "unable to find resources" error. --)
option (google.api.resource_definition) = {
type: "documentai.googleapis.com/Location"
pattern: "projects/{project}/locations/{location}"
};
option (google.api.resource_definition) = {
type: "documentai.googleapis.com/Processor"
pattern: "projects/{project}/locations/{location}/processors/{processor}"
};
option (google.api.resource_definition) = {
type: "documentai.googleapis.com/HumanReviewConfig"
pattern: "projects/{project}/locations/{location}/processors/{processor}/humanReviewConfig"
};
// Service to call Cloud DocumentAI to process documents according to the
// processor's definition. Processors are built using state-of-the-art Google
// AI such as natural language, computer vision, and translation to extract
// structured information from unstructured or semi-structured documents.
service DocumentProcessorService {
option (google.api.default_host) = "documentai.googleapis.com";
option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform";
// Processes a single document.
rpc ProcessDocument(ProcessRequest) returns (ProcessResponse) {
option (google.api.http) = {
post: "/v1beta3/{name=projects/*/locations/*/processors/*}:process"
body: "*"
};
option (google.api.method_signature) = "name";
}
// LRO endpoint to batch process many documents. The output is written
// to Cloud Storage as JSON in the [Document] format.
rpc BatchProcessDocuments(BatchProcessRequest) returns (google.longrunning.Operation) {
option (google.api.http) = {
post: "/v1beta3/{name=projects/*/locations/*/processors/*}:batchProcess"
body: "*"
};
option (google.api.method_signature) = "name";
option (google.longrunning.operation_info) = {
response_type: "BatchProcessResponse"
metadata_type: "BatchProcessMetadata"
};
}
// Send a document for Human Review. The input document should be processed by
// the specified processor.
rpc ReviewDocument(ReviewDocumentRequest) returns (google.longrunning.Operation) {
option (google.api.http) = {
post: "/v1beta3/{human_review_config=projects/*/locations/*/processors/*/humanReviewConfig}:reviewDocument"
body: "*"
};
option (google.api.method_signature) = "human_review_config";
option (google.longrunning.operation_info) = {
response_type: "ReviewDocumentResponse"
metadata_type: "ReviewDocumentOperationMetadata"
};
}
}
// Request message for the process document method.
message ProcessRequest {
// The document payload.
oneof source {
// An inline document proto.
Document inline_document = 4;
// A raw document content (bytes).
RawDocument raw_document = 5;
}
// Required. The processor resource name.
string name = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "documentai.googleapis.com/Processor"
}
];
// The document payload, the [content] and [mime_type] fields must be set.
Document document = 2 [deprecated = true];
// Whether Human Review feature should be skipped for this request. Default to
// false.
bool skip_human_review = 3;
}
// The status of human review on a processed document.
message HumanReviewStatus {
// The final state of human review on a processed document.
enum State {
// Human review state is unspecified. Most likely due to an internal error.
STATE_UNSPECIFIED = 0;
// Human review is skipped for the document. This can happen because human
// review is not enabled on the processor or the processing request has
// been set to skip this document.
SKIPPED = 1;
// Human review validation is triggered and passed, so no review is needed.
VALIDATION_PASSED = 2;
// Human review validation is triggered and the document is under review.
IN_PROGRESS = 3;
// Some error happened during triggering human review, see the
// [state_message] for details.
ERROR = 4;
}
// The state of human review on the processing request.
State state = 1;
// A message providing more details about the human review state.
string state_message = 2;
// The name of the operation triggered by the processed document. This field
// is populated only when the [state] is [HUMAN_REVIEW_IN_PROGRESS]. It has
// the same response type and metadata as the long running operation returned
// by [ReviewDocument] method.
string human_review_operation = 3;
}
// Response message for the process document method.
message ProcessResponse {
// The document payload, will populate fields based on the processor's
// behavior.
Document document = 1;
// The name of the operation triggered by the processed document. If the human
// review process is not triggered, this field will be empty. It has the same
// response type and metadata as the long running operation returned by
// ReviewDocument method.
string human_review_operation = 2 [deprecated = true];
// The status of human review on the processed document.
HumanReviewStatus human_review_status = 3;
}
// Request message for batch process document method.
message BatchProcessRequest {
// The message for input config in batch process.
message BatchInputConfig {
option deprecated = true;
// The Cloud Storage location as the source of the document.
string gcs_source = 1;
// Mimetype of the input. If the input is a raw document, the supported
// mimetypes are application/pdf, image/tiff, and image/gif.
// If the input is a [Document] proto, the type should be application/json.
string mime_type = 2;
}
// The message for output config in batch process.
message BatchOutputConfig {
option deprecated = true;
// The output Cloud Storage directory to put the processed documents.
string gcs_destination = 1;
}
// Required. The processor resource name.
string name = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "documentai.googleapis.com/Processor"
}
];
// The input config for each single document in the batch process.
repeated BatchInputConfig input_configs = 2 [deprecated = true];
// The overall output config for batch process.
BatchOutputConfig output_config = 3 [deprecated = true];
// The input documents for batch process.
BatchDocumentsInputConfig input_documents = 5;
// The overall output config for batch process.
DocumentOutputConfig document_output_config = 6;
// Whether Human Review feature should be skipped for this request. Default to
// false.
bool skip_human_review = 4;
}
// Response message for batch process document method.
message BatchProcessResponse {
}
// The long running operation metadata for batch process method.
message BatchProcessMetadata {
// The status of a each individual document in the batch process.
message IndividualProcessStatus {
// The source of the document, same as the [input_gcs_source] field in the
// request when the batch process started. The batch process is started by
// take snapshot of that document, since a user can move or change that
// document during the process.
string input_gcs_source = 1;
// The status of the processing of the document.
google.rpc.Status status = 2;
// The output_gcs_destination (in the request as 'output_gcs_destination')
// of the processed document if it was successful, otherwise empty.
string output_gcs_destination = 3;
// The name of the operation triggered by the processed document. If the
// human review process is not triggered, this field will be empty. It has
// the same response type and metadata as the long running operation
// returned by ReviewDocument method.
string human_review_operation = 4 [deprecated = true];
// The status of human review on the processed document.
HumanReviewStatus human_review_status = 5;
}
// Possible states of the batch processing operation.
enum State {
// The default value. This value is used if the state is omitted.
STATE_UNSPECIFIED = 0;
// Request operation is waiting for scheduling.
WAITING = 1;
// Request is being processed.
RUNNING = 2;
// The batch processing completed successfully.
SUCCEEDED = 3;
// The batch processing was being cancelled.
CANCELLING = 4;
// The batch processing was cancelled.
CANCELLED = 5;
// The batch processing has failed.
FAILED = 6;
}
// The state of the current batch processing.
State state = 1;
// A message providing more details about the current state of processing.
// For example, the error message if the operation is failed.
string state_message = 2;
// The creation time of the operation.
google.protobuf.Timestamp create_time = 3;
// The last update time of the operation.
google.protobuf.Timestamp update_time = 4;
// The list of response details of each document.
repeated IndividualProcessStatus individual_process_statuses = 5;
}
// Request message for review document method.
message ReviewDocumentRequest {
// The document payload.
oneof source {
// An inline document proto.
Document inline_document = 4;
}
// Required. The resource name of the HumanReviewConfig that the document will be
// reviewed with.
string human_review_config = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "documentai.googleapis.com/HumanReviewConfig"
}
];
// The document that needs human review.
Document document = 2 [deprecated = true];
}
// Response message for review document method.
message ReviewDocumentResponse {
// The Cloud Storage uri for the human reviewed document.
string gcs_destination = 1;
}
// The long running operation metadata for review document method.
message ReviewDocumentOperationMetadata {
// State of the longrunning operation.
enum State {
// Unspecified state.
STATE_UNSPECIFIED = 0;
// Operation is still running.
RUNNING = 1;
// Operation is being cancelled.
CANCELLING = 2;
// Operation succeeded.
SUCCEEDED = 3;
// Operation failed.
FAILED = 4;
// Operation is cancelled.
CANCELLED = 5;
}
// Used only when Operation.done is false.
State state = 1;
// A message providing more details about the current state of processing.
// For example, the error message if the operation is failed.
string state_message = 2;
// The creation time of the operation.
google.protobuf.Timestamp create_time = 3;
// The last update time of the operation.
google.protobuf.Timestamp update_time = 4;
// The basic metadata of the long running operation.
CommonOperationMetadata common_metadata = 5;
}
// The common metadata for long running operations.
message CommonOperationMetadata {
// State of the longrunning operation.
enum State {
// Unspecified state.
STATE_UNSPECIFIED = 0;
// Operation is still running.
RUNNING = 1;
// Operation is being cancelled.
CANCELLING = 2;
// Operation succeeded.
SUCCEEDED = 3;
// Operation failed.
FAILED = 4;
// Operation is cancelled.
CANCELLED = 5;
}
// The state of the operation.
State state = 1;
// A message providing more details about the current state of processing.
string state_message = 2;
// The creation time of the operation.
google.protobuf.Timestamp create_time = 3;
// The last update time of the operation.
google.protobuf.Timestamp update_time = 4;
}