feat: introduces configuration, e.g., dimensions, for doc prediction

googleapis · Aug 26, 2019 · 9c8ce64 · 9c8ce64
1 parent 01aa9d6
commit 9c8ce64
Show file tree

Hide file tree

Showing 9 changed files with 589 additions and 93 deletions.
diff --git a/protos/google/cloud/automl/v1beta1/data_items.proto b/protos/google/cloud/automl/v1beta1/data_items.proto
@@ -17,11 +17,13 @@ syntax = "proto3";
 
 package google.cloud.automl.v1beta1;
 
+import "google/api/annotations.proto";
+import "google/cloud/automl/v1beta1/geometry.proto";
 import "google/cloud/automl/v1beta1/io.proto";
+import "google/cloud/automl/v1beta1/text_segment.proto";
 import "google/protobuf/any.proto";
 import "google/protobuf/duration.proto";
 import "google/protobuf/struct.proto";
-import "google/api/annotations.proto";
 
 option go_package = "google.golang.org/genproto/googleapis/cloud/automl/v1beta1;automl";
 option java_multiple_files = true;
@@ -56,19 +58,135 @@ message TextSnippet {
   // characters long.
   string content = 1;
 
-  // Optional. The format of [content][google.cloud.automl.v1beta1.TextSnippet.content]. Currently the only two allowed
-  // values are "text/html" and "text/plain". If left blank, the format is
-  // automatically determined from the type of the uploaded [content][google.cloud.automl.v1beta1.TextSnippet.content].
+  // Optional. The format of
+  // [content][google.cloud.automl.v1beta1.TextSnippet.content]. Currently the
+  // only two allowed values are "text/html" and "text/plain". If left blank,
+  // the format is automatically determined from the type of the uploaded
+  // [content][google.cloud.automl.v1beta1.TextSnippet.content].
   string mime_type = 2;
 
   // Output only. HTTP URI where you can download the content.
   string content_uri = 4;
 }
 
+// Message that describes dimension of a document.
+message DocumentDimensions {
+  // Unit of the document dimension.
+  enum DocumentDimensionUnit {
+    // Should not be used.
+    DOCUMENT_DIMENSION_UNIT_UNSPECIFIED = 0;
+
+    // Document dimension is measured in inches.
+    INCH = 1;
+
+    // Document dimension is measured in centimeters.
+    CENTIMETER = 2;
+
+    // Document dimension is measured in points. 72 points = 1 inch.
+    POINT = 3;
+  }
+
+  // Unit of the dimension.
+  DocumentDimensionUnit unit = 1;
+
+  // Width value of the document, works together with the unit.
+  float width = 2;
+
+  // Height value of the document, works together with the unit.
+  float height = 3;
+}
+
 // A structured text document e.g. a PDF.
 message Document {
+  // Describes the layout information of a
+  // [text_segment][google.cloud.automl.v1beta1.Document.Layout.text_segment] in
+  // the document.
+  message Layout {
+    // The type of TextSegment in the context of the original document.
+    enum TextSegmentType {
+      // Should not be used.
+      TEXT_SEGMENT_TYPE_UNSPECIFIED = 0;
+
+      // The text segment is a token. e.g. word.
+      TOKEN = 1;
+
+      // The text segment is a paragraph.
+      PARAGRAPH = 2;
+
+      // The text segment is a form field.
+      FORM_FIELD = 3;
+
+      // The text segment is the name part of a form field. It will be treated
+      // as child of another FORM_FIELD TextSegment if its span is subspan of
+      // another TextSegment with type FORM_FIELD.
+      FORM_FIELD_NAME = 4;
+
+      // The text segment is the text content part of a form field. It will be
+      // treated as child of another FORM_FIELD TextSegment if its span is
+      // subspan of another TextSegment with type FORM_FIELD.
+      FORM_FIELD_CONTENTS = 5;
+
+      // The text segment is a whole table, including headers, and all rows.
+      TABLE = 6;
+
+      // The text segment is a table's headers. It will be treated as child of
+      // another TABLE TextSegment if its span is subspan of another TextSegment
+      // with type TABLE.
+      TABLE_HEADER = 7;
+
+      // The text segment is a row in table. It will be treated as child of
+      // another TABLE TextSegment if its span is subspan of another TextSegment
+      // with type TABLE.
+      TABLE_ROW = 8;
+
+      // The text segment is a cell in table. It will be treated as child of
+      // another TABLE_ROW TextSegment if its span is subspan of another
+      // TextSegment with type TABLE_ROW.
+      TABLE_CELL = 9;
+    }
+
+    // Text Segment that represents a segment in
+    // [document_text][google.cloud.automl.v1beta1.Document.document_text].
+    TextSegment text_segment = 1;
+
+    // Page number of the
+    // [text_segment][google.cloud.automl.v1beta1.Document.Layout.text_segment]
+    // in the original document, starts from 1.
+    int32 page_number = 2;
+
+    // The position of the
+    // [text_segment][google.cloud.automl.v1beta1.Document.Layout.text_segment]
+    // in the page. Contains exactly 4
+    //
+    // [normalized_vertices][google.cloud.automl.v1beta1.BoundingPoly.normalized_vertices]
+    // and they are connected by edges in the order provided, which will
+    // represent a rectangle parallel to the frame. The
+    // [NormalizedVertex-s][google.cloud.automl.v1beta1.NormalizedVertex] are
+    // relative to the page.
+    // Coordinates are based on top-left as point (0,0).
+    BoundingPoly bounding_poly = 3;
+
+    // The type of the
+    // [text_segment][google.cloud.automl.v1beta1.Document.Layout.text_segment]
+    // in document.
+    TextSegmentType text_segment_type = 4;
+  }
+
   // An input config specifying the content of the document.
   DocumentInputConfig input_config = 1;
+
+  // The plain text version of this document.
+  TextSnippet document_text = 2;
+
+  // Describes the layout of the document.
+  // Sorted by [page_number][].
+  repeated Layout layout = 3;
+
+  // The dimensions of the page in the document.
+  DocumentDimensions document_dimensions = 4;
+
+  // Number of pages in the document.
+  int32 page_count = 5;
 }
 
 // A representation of a row in a relational table.

diff --git a/protos/google/cloud/automl/v1beta1/io.proto b/protos/google/cloud/automl/v1beta1/io.proto
@@ -140,20 +140,25 @@ option ruby_package = "Google::Cloud::AutoML::V1beta1";
 //         CSV file(s) with each line in format:
 //           ML_USE,GCS_FILE_PATH
 //           GCS_FILE_PATH leads to a .JSONL (that is, JSON Lines) file which
-//           either imports text in-line or as documents.
+//           either imports text in-line or as documents. Any given
+//           .JSONL file must be 100MB or smaller.
 //           The in-line .JSONL file contains, per line, a proto that wraps a
 //           TextSnippet proto (in json representation) followed by one or more
 //           AnnotationPayload protos (called annotations), which have
 //           display_name and text_extraction detail populated. The given text
 //           is expected to be annotated exhaustively, for example, if you look
 //           for animals and text contains "dolphin" that is not labeled, then
 //           "dolphin" is assumed to not be an animal. Any given text snippet
-//           content must have 30,000 characters or less,  and also be UTF-8 NFC
-//           encoded (ASCII already is).           The document .JSONL file contains, per line, a proto that wraps a
-//           Document proto with input_config set. Only PDF documents are
-//           supported now, and each document may be up to 2MB large. Currently
-//           annotations on documents cannot be specified at import. Any given
-//           .JSONL file must be 100MB or smaller.
+//           content must be 10KB or smaller, and also be UTF-8 NFC encoded
+//           (ASCII already is).
+//           The document .JSONL file contains, per line, a proto that wraps a
+//           Document proto. The Document proto must have either document_text
+//           or input_config set. In document_text case, the Document proto may
+//           also contain the spatial information of the document, including
+//           layout, document dimension and page number. In input_config case,
+//           only PDF documents are supported now, and each document may be up
+//           to 2MB large. Currently, annotations on documents cannot be
+//           specified at import.
 //         Three sample CSV rows:
 //           TRAIN,gs://folder/file1.jsonl
 //           VALIDATE,gs://folder/file2.jsonl
@@ -162,27 +167,61 @@ option ruby_package = "Google::Cloud::AutoML::V1beta1";
 //         with artificial line breaks, but the only actual line break is
 //         denoted by \n).:
 //           {
-//             "text_snippet": {
-//               "content": "dog car cat"
-//             }             "annotations": [
-//               {
-//                 "display_name": "animal",
-//                 "text_extraction": {
-//                   "text_segment": {"start_offset": 0, "end_offset": 3}
+//             "document": {
+//               "document_text": {"content": "dog cat"}
+//               "layout": [
+//                 {
+//                   "text_segment": {
+//                     "start_offset": 0,
+//                     "end_offset": 3,
+//                   },
+//                   "page_number": 1,
+//                   "bounding_poly": {
+//                     "normalized_vertices": [
+//                       {"x": 0.1, "y": 0.1},
+//                       {"x": 0.1, "y": 0.3},
+//                       {"x": 0.3, "y": 0.3},
+//                       {"x": 0.3, "y": 0.1},
+//                     ],
+//                   },
+//                   "text_segment_type": TOKEN,
+//                 },
+//                 {
+//                   "text_segment": {
+//                     "start_offset": 4,
+//                     "end_offset": 7,
+//                   },
+//                   "page_number": 1,
+//                   "bounding_poly": {
+//                     "normalized_vertices": [
+//                       {"x": 0.4, "y": 0.1},
+//                       {"x": 0.4, "y": 0.3},
+//                       {"x": 0.8, "y": 0.3},
+//                       {"x": 0.8, "y": 0.1},
+//                     ],
+//                   },
+//                   "text_segment_type": TOKEN,
 //                 }
-//               },
+//
+//               ],
+//               "document_dimensions": {
+//                 "width": 8.27,
+//                 "height": 11.69,
+//                 "unit": INCH,
+//               }
+//               "page_count": 1,
+//             },
+//             "annotations": [
 //               {
-//                 "display_name": "vehicle",
-//                 "text_extraction": {
-//                   "text_segment": {"start_offset": 4, "end_offset": 7}
-//                 }
+//                 "display_name": "animal",
+//                 "text_extraction": {"text_segment": {"start_offset": 0,
+//                 "end_offset": 3}}
 //               },
 //               {
 //                 "display_name": "animal",
-//                 "text_extraction": {
-//                   "text_segment": {"start_offset": 8, "end_offset": 11}
-//                 }
-//               },
+//                 "text_extraction": {"text_segment": {"start_offset": 4,
+//                 "end_offset": 7}}
+//               }
 //             ],
 //           }\n
 //           {

diff --git a/protos/google/cloud/automl/v1beta1/prediction_service.proto b/protos/google/cloud/automl/v1beta1/prediction_service.proto
@@ -18,12 +18,12 @@ syntax = "proto3";
 package google.cloud.automl.v1beta1;
 
 import "google/api/annotations.proto";
+import "google/api/client.proto";
 import "google/cloud/automl/v1beta1/annotation_payload.proto";
 import "google/cloud/automl/v1beta1/data_items.proto";
 import "google/cloud/automl/v1beta1/io.proto";
 import "google/cloud/automl/v1beta1/operations.proto";
 import "google/longrunning/operations.proto";
-import "google/api/client.proto";
 
 option go_package = "google.golang.org/genproto/googleapis/cloud/automl/v1beta1;automl";
 option java_multiple_files = true;
@@ -38,7 +38,8 @@ option ruby_package = "Google::Cloud::AutoML::V1beta1";
 // snake_case or kebab-case, either of those cases is accepted.
 service PredictionService {
   option (google.api.default_host) = "automl.googleapis.com";
-  option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform";
+  option (google.api.oauth_scopes) =
+      "https://www.googleapis.com/auth/cloud-platform";
 
   // Perform an online prediction. The prediction result will be directly
   // returned in the response.
@@ -66,12 +67,14 @@ service PredictionService {
     };
   }
 
-  // Perform a batch prediction. Unlike the online [Predict][google.cloud.automl.v1beta1.PredictionService.Predict], batch
+  // Perform a batch prediction. Unlike the online
+  // [Predict][google.cloud.automl.v1beta1.PredictionService.Predict], batch
   // prediction result won't be immediately available in the response. Instead,
   // a long running operation object is returned. User can poll the operation
   // result via [GetOperation][google.longrunning.Operations.GetOperation]
-  // method. Once the operation is done, [BatchPredictResult][google.cloud.automl.v1beta1.BatchPredictResult] is returned in
-  // the [response][google.longrunning.Operation.response] field.
+  // method. Once the operation is done,
+  // [BatchPredictResult][google.cloud.automl.v1beta1.BatchPredictResult] is
+  // returned in the [response][google.longrunning.Operation.response] field.
   // Available for following ML problems:
   // * Image Classification
   // * Image Object Detection
@@ -86,7 +89,8 @@ service PredictionService {
   }
 }
 
-// Request message for [PredictionService.Predict][google.cloud.automl.v1beta1.PredictionService.Predict].
+// Request message for
+// [PredictionService.Predict][google.cloud.automl.v1beta1.PredictionService.Predict].
 message PredictRequest {
   // Name of the model requested to serve the prediction.
   string name = 1;
@@ -122,12 +126,20 @@ message PredictRequest {
   map<string, string> params = 3;
 }
 
-// Response message for [PredictionService.Predict][google.cloud.automl.v1beta1.PredictionService.Predict].
+// Response message for
+// [PredictionService.Predict][google.cloud.automl.v1beta1.PredictionService.Predict].
 message PredictResponse {
   // Prediction result.
   // Translation and Text Sentiment will return precisely one payload.
   repeated AnnotationPayload payload = 1;
 
+  // The preprocessed example that AutoML actually makes prediction on.
+  // Empty if AutoML does not preprocess the input example.
+  // * For Text Extraction:
+  //   If the input is a .pdf file, the OCR'ed text will be provided in
+  //   [document_text][google.cloud.automl.v1beta1.Document.document_text].
+  ExamplePayload preprocessed_input = 3;
+
   // Additional domain-specific prediction response metadata.
   //
   // * For Image Object Detection:
@@ -146,7 +158,8 @@ message PredictResponse {
   map<string, string> metadata = 2;
 }
 
-// Request message for [PredictionService.BatchPredict][google.cloud.automl.v1beta1.PredictionService.BatchPredict].
+// Request message for
+// [PredictionService.BatchPredict][google.cloud.automl.v1beta1.PredictionService.BatchPredict].
 message BatchPredictRequest {
   // Name of the model requested to serve the batch prediction.
   string name = 1;
@@ -226,7 +239,8 @@ message BatchPredictRequest {
 
 // Result of the Batch Predict. This message is returned in
 // [response][google.longrunning.Operation.response] of the operation returned
-// by the [PredictionService.BatchPredict][google.cloud.automl.v1beta1.PredictionService.BatchPredict].
+// by the
+// [PredictionService.BatchPredict][google.cloud.automl.v1beta1.PredictionService.BatchPredict].
 message BatchPredictResult {
   // Additional domain-specific prediction response metadata.
   //