Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: ProtoSchemaConver's problem when converting fields reference same… #428

Merged
merged 2 commits into from Jul 20, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -19,6 +19,7 @@
import com.google.api.gax.rpc.InvalidArgumentException;
import com.google.cloud.bigquery.storage.v1alpha2.ProtoBufProto.ProtoSchema;
import com.google.protobuf.DescriptorProtos.DescriptorProto;
import com.google.protobuf.DescriptorProtos.EnumDescriptorProto;
import com.google.protobuf.DescriptorProtos.FieldDescriptorProto;
import com.google.protobuf.Descriptors.Descriptor;
import com.google.protobuf.Descriptors.FieldDescriptor;
Expand All @@ -29,51 +30,95 @@
// protobuf::DescriptorProto
// that can be reconstructed by the backend.
public class ProtoSchemaConverter {
private static class StructName {
public String getName() {
count++;
return count == 1 ? "__ROOT__" : "__S" + count;
}

private int count = 0;
}

private static ProtoSchema convertInternal(
Descriptor input, List<String> visitedTypes, StructName structName) {
Descriptor input,
Set<String> visitedTypes,
Set<String> enumTypes,
Set<String> structTypes,
DescriptorProto.Builder rootProtoSchema) {
DescriptorProto.Builder resultProto = DescriptorProto.newBuilder();
resultProto.setName(structName.getName());
if (rootProtoSchema == null) {
rootProtoSchema = resultProto;
}
String protoName = input.getFullName();
protoName = protoName.replace('.', '_');
resultProto.setName(protoName);
Set<String> localEnumTypes = new HashSet<String>();
visitedTypes.add(input.getFullName());
yirutang marked this conversation as resolved.
Show resolved Hide resolved
for (int i = 0; i < input.getFields().size(); i++) {
FieldDescriptor inputField = input.getFields().get(i);
FieldDescriptorProto.Builder resultField = inputField.toProto().toBuilder();
if (inputField.getType() == FieldDescriptor.Type.GROUP
|| inputField.getType() == FieldDescriptor.Type.MESSAGE) {
if (visitedTypes.contains(inputField.getMessageType().getFullName())) {
throw new InvalidArgumentException(
"Recursive type is not supported:" + inputField.getMessageType().getFullName(),
null,
GrpcStatusCode.of(Status.Code.INVALID_ARGUMENT),
false);
String msgFullName = inputField.getMessageType().getFullName();
msgFullName = msgFullName.replace('.', '_');
if (structTypes.contains(msgFullName)) {
resultField.setTypeName(msgFullName);
} else {
if (visitedTypes.contains(msgFullName)) {
throw new InvalidArgumentException(
"Recursive type is not supported:" + inputField.getMessageType().getFullName(),
null,
GrpcStatusCode.of(Status.Code.INVALID_ARGUMENT),
false);
}
visitedTypes.add(msgFullName);
rootProtoSchema.addNestedType(
convertInternal(
inputField.getMessageType(),
visitedTypes,
enumTypes,
structTypes,
rootProtoSchema)
.getProtoDescriptor());
visitedTypes.remove(msgFullName);
resultField.setTypeName(
rootProtoSchema.getNestedType(rootProtoSchema.getNestedTypeCount() - 1).getName());
}
resultProto.addNestedType(
convertInternal(inputField.getMessageType(), visitedTypes, structName)
.getProtoDescriptor());
visitedTypes.remove(inputField.getMessageType().getFullName());
resultField.setTypeName(
resultProto.getNestedType(resultProto.getNestedTypeCount() - 1).getName());
}
if (inputField.getType() == FieldDescriptor.Type.ENUM) {
resultProto.addEnumType(inputField.getEnumType().toProto());
resultField.setTypeName(
resultProto.getEnumType(resultProto.getEnumTypeCount() - 1).getName());
String enumFullName = inputField.getEnumType().getFullName();
// If the enum is defined within the current message, we don't want to
// pull it out to the top since then the same enum values will not be
// allowed if the value collides with other enums.
if (enumFullName.startsWith(input.getFullName())) {
String enumName = inputField.getEnumType().getName();
if (localEnumTypes.contains(enumName)) {
resultField.setTypeName(enumName);
} else {
resultProto.addEnumType(inputField.getEnumType().toProto());
resultField.setTypeName(enumName);
localEnumTypes.add(enumName);
}
} else {
// If the enum is defined elsewhere, then redefine it at the top
// message scope. There is a problem that different enum values might
// be OK when living under its original scope, but when they all live
// in top scope, their values cannot collide. Say if thers A.Color with
// RED and B.Color with RED, if they are redefined here, the RED will
// collide with each other and thus not allowed.
enumFullName = enumFullName.replace('.', '_');
if (enumTypes.contains(enumFullName)) {
resultField.setTypeName(enumFullName);
} else {
EnumDescriptorProto enumType =
inputField.getEnumType().toProto().toBuilder().setName(enumFullName).build();
resultProto.addEnumType(enumType);
resultField.setTypeName(enumFullName);
enumTypes.add(enumFullName);
}
}
}
resultProto.addField(resultField);
}
structTypes.add(protoName);
return ProtoSchema.newBuilder().setProtoDescriptor(resultProto.build()).build();
}

public static ProtoSchema convert(Descriptor descriptor) {
ArrayList<String> visitedTypes = new ArrayList<String>();
return convertInternal(descriptor, visitedTypes, new StructName());
Set<String> visitedTypes = new HashSet<String>();
Set<String> enumTypes = new HashSet<String>();
Set<String> structTypes = new HashSet<String>();
return convertInternal(descriptor, visitedTypes, enumTypes, structTypes, null);
}
}
Expand Up @@ -17,6 +17,8 @@

import com.google.api.gax.rpc.InvalidArgumentException;
import com.google.cloud.bigquery.storage.test.Test.*;
import com.google.protobuf.DescriptorProtos.FileDescriptorProto;
import com.google.protobuf.Descriptors;
import org.junit.*;

public class ProtoSchemaConverterTest {
Expand All @@ -26,7 +28,7 @@ public void convertSimple() {
ProtoBufProto.ProtoSchema protoSchema =
ProtoSchemaConverter.convert(testProto.getDescriptorForType());
Assert.assertEquals(
"name: \"__ROOT__\"\n"
"name: \"com_google_cloud_bigquery_storage_test_AllSupportedTypes\"\n"
+ "field {\n"
+ " name: \"int32_value\"\n"
+ " number: 1\n"
Expand Down Expand Up @@ -74,7 +76,7 @@ public void convertSimple() {
+ " number: 8\n"
+ " label: LABEL_OPTIONAL\n"
+ " type: TYPE_ENUM\n"
+ " type_name: \"TestEnum\"\n"
+ " type_name: \"com_google_cloud_bigquery_storage_test_TestEnum\"\n"
+ "}\n"
+ "field {\n"
+ " name: \"string_value\"\n"
Expand All @@ -83,7 +85,7 @@ public void convertSimple() {
+ " type: TYPE_STRING\n"
+ "}\n"
+ "enum_type {\n"
+ " name: \"TestEnum\"\n"
+ " name: \"com_google_cloud_bigquery_storage_test_TestEnum\"\n"
+ " value {\n"
+ " name: \"TestEnum0\"\n"
+ " number: 0\n"
Expand All @@ -102,47 +104,38 @@ public void convertNested() {
ProtoBufProto.ProtoSchema protoSchema =
ProtoSchemaConverter.convert(testProto.getDescriptorForType());
Assert.assertEquals(
"name: \"__ROOT__\"\n"
"name: \"com_google_cloud_bigquery_storage_test_ComplicateType\"\n"
+ "field {\n"
+ " name: \"nested_repeated_type\"\n"
+ " number: 1\n"
+ " label: LABEL_REPEATED\n"
+ " type: TYPE_MESSAGE\n"
+ " type_name: \"__S2\"\n"
+ " type_name: \"com_google_cloud_bigquery_storage_test_NestedType\"\n"
+ "}\n"
+ "field {\n"
+ " name: \"inner_type\"\n"
+ " number: 2\n"
+ " label: LABEL_OPTIONAL\n"
+ " type: TYPE_MESSAGE\n"
+ " type_name: \"__S4\"\n"
+ " type_name: \"com_google_cloud_bigquery_storage_test_InnerType\"\n"
+ "}\n"
+ "nested_type {\n"
+ " name: \"__S2\"\n"
+ " name: \"com_google_cloud_bigquery_storage_test_InnerType\"\n"
+ " field {\n"
+ " name: \"inner_type\"\n"
+ " name: \"value\"\n"
+ " number: 1\n"
+ " label: LABEL_REPEATED\n"
+ " type: TYPE_MESSAGE\n"
+ " type_name: \"__S3\"\n"
+ " }\n"
+ " nested_type {\n"
+ " name: \"__S3\"\n"
+ " field {\n"
+ " name: \"value\"\n"
+ " number: 1\n"
+ " label: LABEL_REPEATED\n"
+ " type: TYPE_STRING\n"
+ " }\n"
+ " type: TYPE_STRING\n"
+ " }\n"
+ "}\n"
+ "nested_type {\n"
+ " name: \"__S4\"\n"
+ " name: \"com_google_cloud_bigquery_storage_test_NestedType\"\n"
+ " field {\n"
+ " name: \"value\"\n"
+ " name: \"inner_type\"\n"
+ " number: 1\n"
+ " label: LABEL_REPEATED\n"
+ " type: TYPE_STRING\n"
+ " type: TYPE_MESSAGE\n"
+ " type_name: \"com_google_cloud_bigquery_storage_test_InnerType\"\n"
+ " }\n"
+ "}\n",
protoSchema.getProtoDescriptor().toString());
Expand All @@ -156,7 +149,46 @@ public void convertRecursive() {
ProtoSchemaConverter.convert(testProto.getDescriptorForType());
Assert.fail("No exception raised");
} catch (InvalidArgumentException e) {
// Expected exception
Assert.assertEquals(
"Recursive type is not supported:com.google.cloud.bigquery.storage.test.ContainsRecursive",
e.getMessage());
}
}

@Test
public void convertRecursiveTopMessage() {
try {
RecursiveTypeTopMessage testProto = RecursiveTypeTopMessage.newBuilder().build();
ProtoBufProto.ProtoSchema protoSchema =
ProtoSchemaConverter.convert(testProto.getDescriptorForType());
Assert.fail("No exception raised");
} catch (InvalidArgumentException e) {
Assert.assertEquals(
"Recursive type is not supported:com.google.cloud.bigquery.storage.test.RecursiveTypeTopMessage",
e.getMessage());
}
}

@Test
public void convertDuplicateType() {
DuplicateType testProto = DuplicateType.newBuilder().build();
ProtoBufProto.ProtoSchema protoSchema =
ProtoSchemaConverter.convert(testProto.getDescriptorForType());

FileDescriptorProto fileDescriptorProto =
FileDescriptorProto.newBuilder()
.setName("foo.proto")
.addMessageType(protoSchema.getProtoDescriptor())
.build();
try {
Descriptors.FileDescriptor fs =
Descriptors.FileDescriptor.buildFrom(
fileDescriptorProto, new Descriptors.FileDescriptor[0]);
Descriptors.Descriptor type =
fs.findMessageTypeByName(protoSchema.getProtoDescriptor().getName());
Assert.assertEquals(4, type.getFields().size());
} catch (Descriptors.DescriptorValidationException ex) {
Assert.fail("Got unexpected exception: " + ex.getMessage());
}
}
}
11 changes: 11 additions & 0 deletions google-cloud-bigquerystorage/src/test/proto/test.proto
Expand Up @@ -55,6 +55,17 @@ message RecursiveType {
optional ContainsRecursive field = 2;
}

message RecursiveTypeTopMessage {
optional RecursiveTypeTopMessage field = 2;
}

message FooType {
optional string foo = 1;
}

message DuplicateType {
optional TestEnum f1 = 1;
optional TestEnum f2 = 2;
optional ComplicateType f3 = 3;
optional ComplicateType f4 = 4;
}