Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding translated.from annotation when using SchemaTranslator #970

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ project.ext.externalDependency = [
'parseq_restClient': 'com.linkedin.parseq:parseq-restli-client:5.1.2',
'parseq_testApi': 'com.linkedin.parseq:parseq-test-api:5.1.2',
'servletApi': 'javax.servlet:javax.servlet-api:3.1.0',
'skyScreamer': 'org.skyscreamer:jsonassert:1.5.1',
'slf4jApi': 'org.slf4j:slf4j-api:1.7.30',
'slf4jLog4j2': 'org.apache.logging.log4j:log4j-slf4j-impl:2.0.2',
'snappy': 'org.iq80.snappy:snappy:0.4',
Expand Down
1 change: 1 addition & 0 deletions data-avro-generator/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ dependencies {
compile project(':data')
compile project(':data-avro')
compile externalDependency.avro
compile externalDependency.skyScreamer
testCompile project(path: ':data', configuration: 'testArtifacts')
testCompile externalDependency.testng
}
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,17 @@
import java.util.Set;

import java.util.stream.Collectors;
import org.json.JSONException;
import org.json.JSONObject;
import org.skyscreamer.jsonassert.Customization;
import org.skyscreamer.jsonassert.JSONAssert;
import org.skyscreamer.jsonassert.JSONCompareMode;
import org.skyscreamer.jsonassert.JSONParser;
import org.skyscreamer.jsonassert.comparator.CustomComparator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import static com.linkedin.data.avro.SchemaTranslator.AVRO_PREFIX;
import static com.linkedin.data.avro.SchemaTranslator.*;


/**
Expand Down Expand Up @@ -162,7 +169,11 @@ public static void run(String resolverPath,
{
targetDirectoryPath += "/" + AVRO_PREFIX;
}
generator.generate(targetDirectoryPath, sources);
try {
generator.generate(targetDirectoryPath, sources);
} catch (JSONException e) {
throw new RuntimeException(e);
}
}

/**
Expand All @@ -180,8 +191,7 @@ public DataToAvroSchemaTranslationOptions getDataToAvroSchemaTranslationOptions(
* @param targetDirectoryPath path to target root java source directory
* @throws IOException if there are problems opening or deleting files.
*/
private void generate(String targetDirectoryPath, String[] sources) throws IOException
{
private void generate(String targetDirectoryPath, String[] sources) throws IOException, JSONException {
initSchemaResolver();

_fileToAvroSchemaMap.clear();
Expand Down Expand Up @@ -242,8 +252,7 @@ protected void outputAvroSchemas(File targetDirectory) throws IOException
}
}

protected List<File> targetFiles(File targetDirectory)
{
protected List<File> targetFiles(File targetDirectory) throws JSONException {
ArrayList<File> generatedFiles = new ArrayList<>();

DataSchemaResolver resolver = getSchemaResolver();
Expand All @@ -267,7 +276,14 @@ protected List<File> targetFiles(File targetDirectory)
String avroSchemaText = SchemaTranslator.dataToAvroSchemaJson(recordDataSchema, _options);
_fileToAvroSchemaMap.put(generatedFile, avroSchemaText);
String postTranslateSchemaText = recordDataSchema.toString();
assert(preTranslateSchemaText.equals(postTranslateSchemaText));

// JSON compare except TRANSLATED_FROM_SOURCE_OPTION in root
JSONAssert.assertEquals(preTranslateSchemaText, postTranslateSchemaText,
new CustomComparator(JSONCompareMode.LENIENT,
new Customization(TRANSLATED_FROM_SOURCE_OPTION, (o1, o2) -> true)));

assert (((JSONObject) JSONParser.parseJSON(postTranslateSchemaText)).get(TRANSLATED_FROM_SOURCE_OPTION)
!= null);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,19 @@
import org.apache.avro.Schema;
import org.apache.avro.Schema.Parser;
import org.apache.commons.compress.utils.IOUtils;
import org.json.JSONException;
import org.skyscreamer.jsonassert.Customization;
import org.skyscreamer.jsonassert.JSONAssert;
import org.skyscreamer.jsonassert.JSONCompareMode;
import org.skyscreamer.jsonassert.comparator.CustomComparator;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import static com.linkedin.data.TestUtil.*;
import static com.linkedin.data.avro.SchemaTranslator.*;
import static com.linkedin.data.avro.generator.AvroSchemaGenerator.GENERATOR_AVRO_NAMESPACE_OVERRIDE;
import static com.linkedin.data.avro.SchemaTranslator.AVRO_PREFIX;
import static com.linkedin.data.schema.generator.AbstractGenerator.GENERATOR_RESOLVER_PATH;
import static com.linkedin.util.FileUtil.buildSystemIndependentPath;
import static org.testng.Assert.assertEquals;
Expand Down Expand Up @@ -175,8 +180,8 @@ public Object[][] toAvroSchemaData()
}

@Test(dataProvider = "toAvroSchemaData")
public void testFileNameAsArgs(Map<String, String> testSchemas, Map<String, String> expectedAvroSchemas, List<String> paths, boolean override) throws IOException
{
public void testFileNameAsArgs(Map<String, String> testSchemas, Map<String, String> expectedAvroSchemas, List<String> paths, boolean override)
throws IOException, JSONException {
Map<File, Map.Entry<String,String>> files = TestUtil.createSchemaFiles(_testDir, testSchemas, _debug);
// directory in path
Collection<String> testPaths = computePathFromRelativePaths(_testDir, paths);
Expand All @@ -193,8 +198,8 @@ public void testFileNameAsArgs(Map<String, String> testSchemas, Map<String, Stri
}

@Test(dataProvider = "toAvroSchemaData")
public void testFullNameAsArgsWithJarInPath(Map<String, String> testSchemas, Map<String, String> expectedAvroSchemas, List<String> paths, boolean override) throws IOException
{
public void testFullNameAsArgsWithJarInPath(Map<String, String> testSchemas, Map<String, String> expectedAvroSchemas, List<String> paths, boolean override)
throws IOException, JSONException {
Map<File, Map.Entry<String,String>> files = TestUtil.createSchemaFiles(_testDir, testSchemas, _debug);
// jar files in path, create jar files
Collection<String> testPaths = createJarsFromRelativePaths(_testDir, testSchemas, paths, _debug);
Expand Down Expand Up @@ -324,8 +329,8 @@ private File setup(Collection<String> paths, boolean override) throws IOExceptio
return targetDir;
}

private void run(String[] args, Map.Entry<File, Map.Entry<String, String>> entry, File targetDir, Map<String, String> expectedAvroSchemas) throws IOException
{
private void run(String[] args, Map.Entry<File, Map.Entry<String, String>> entry, File targetDir, Map<String, String> expectedAvroSchemas)
throws IOException, JSONException {
Exception exc = null;
try
{
Expand Down Expand Up @@ -359,7 +364,11 @@ private void run(String[] args, Map.Entry<File, Map.Entry<String, String>> entry
assertFalse(avroSchema.isError());
String avroSchemaText = avroSchema.toString();
if (_debug) out.println(avroSchemaText);
assertEquals(avroSchemaText, expectedAvroSchemas.get(pdscFileName));

// JSON compare except TRANSLATED_FROM_SOURCE_OPTION in root
JSONAssert.assertEquals(expectedAvroSchemas.get(pdscFileName), avroSchemaText,
new CustomComparator(JSONCompareMode.LENIENT,
new Customization(TRANSLATED_FROM_SOURCE_OPTION, (o1, o2) -> true)));
}
}
}
1 change: 1 addition & 0 deletions data-avro/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ dependencies {
compile externalDependency.avro
compile externalDependency.avroUtil
testCompile externalDependency.testng
testCompile externalDependency.skyScreamer
testCompile project(path: ':data', configuration: 'testArtifacts')
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,24 +18,29 @@


import com.linkedin.avroutil1.compatibility.AvroCompatibilityHelper;
import com.linkedin.avroutil1.compatibility.ConfigurableSchemaComparator;
import com.linkedin.avroutil1.compatibility.SchemaComparisonConfiguration;
import com.linkedin.avroutil1.compatibility.SchemaParseConfiguration;
import com.linkedin.data.DataMap;
import com.linkedin.data.DataMapBuilder;
import com.linkedin.data.schema.DataSchema;
import com.linkedin.data.schema.DataSchemaResolver;
import com.linkedin.data.schema.DataSchemaTraverse;
import com.linkedin.data.schema.NamedDataSchema;
import com.linkedin.data.schema.RecordDataSchema;
import com.linkedin.data.schema.SchemaFormatType;
import com.linkedin.data.schema.SchemaParser;
import com.linkedin.data.schema.SchemaParserFactory;
import com.linkedin.data.schema.PegasusSchemaParser;
import com.linkedin.data.schema.SchemaToPdlEncoder;
import com.linkedin.data.schema.TyperefDataSchema;
import com.linkedin.data.schema.resolver.DefaultDataSchemaResolver;
import com.linkedin.data.schema.resolver.FileDataSchemaResolver;
import com.linkedin.data.schema.validation.ValidationOptions;
import com.linkedin.data.template.DataTemplateUtil;

import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.IdentityHashMap;
import java.util.Map;
Expand All @@ -53,6 +58,7 @@ public class SchemaTranslator
private static final Logger log = LoggerFactory.getLogger(SchemaTranslator.class);

public static final String DATA_PROPERTY = "com.linkedin.data";
public static final String TRANSLATED_FROM_SOURCE_OPTION = "translated.from";
public static final String SCHEMA_PROPERTY = "schema";
public static final String OPTIONAL_DEFAULT_MODE_PROPERTY = "optionalDefaultMode";
public static final String AVRO_FILE_EXTENSION = ".avsc";
Expand Down Expand Up @@ -166,9 +172,12 @@ public static DataSchema avroToDataSchema(String avroSchemaInJson, AvroToDataSch
{
avroSchemaFromEmbedded.addProp(DATA_PROPERTY, embededSchemaPropertyVal);
}
if (!avroSchemaFromEmbedded.equals(avroSchemaFromJson))
{
throw new IllegalArgumentException("Embedded schema does not translate to input Avro schema: " + avroSchemaInJson);
// Compare using configuration equivalent to STRICT, except ignore TRANSLATED_FROM_SOURCE_OPTION
if (!ConfigurableSchemaComparator.equals(avroSchemaFromEmbedded, avroSchemaFromJson,
SchemaComparisonConfiguration.STRICT.jsonPropNamesToIgnore(
Collections.singleton(SchemaTranslator.TRANSLATED_FROM_SOURCE_OPTION)))) {
throw new IllegalArgumentException(
"Embedded schema does not translate to input Avro schema: " + avroSchemaInJson);
}
}
}
Expand All @@ -186,6 +195,9 @@ public static DataSchema avroToDataSchema(String avroSchemaInJson, AvroToDataSch
String dataSchemaJson = dataSchema.toString();
resultDataSchema = DataTemplateUtil.parseSchema(dataSchemaJson);
}

// add translated from annotation if this is a named dataSchema
resultDataSchema = addTranslatedPropToNamedDataSchema(resultDataSchema);
return resultDataSchema;
}

Expand Down Expand Up @@ -317,6 +329,8 @@ public static String dataToAvroSchemaJson(DataSchema dataSchema)
*/
public static String dataToAvroSchemaJson(DataSchema dataSchema, DataToAvroSchemaTranslationOptions options) throws IllegalArgumentException
{
dataSchema = addTranslatedPropToNamedDataSchema(dataSchema);

// Create a copy of the schema before the actual translation, since the translation process ends up modifying the
// schema for unions with aliases, and we don't want to disturb the original schema. Use PDL to preserve annotations.
final DataSchema translatedDataSchema = DataTemplateUtil.parseSchema(
Expand All @@ -341,6 +355,36 @@ public static String dataToAvroSchemaJson(DataSchema dataSchema, DataToAvroSchem
return SchemaToAvroJsonEncoder.schemaToAvro(translatedDataSchema, dataSchema, defaultValueOverrides, options);
}

/**
* Adds TRANSLATED_FROM_SOURCE_OPTION property to named data schemas if not already present.
* @param dataSchema the data schema to add the property to
* @return the data schema with the property added if applicable.
*/
private static DataSchema addTranslatedPropToNamedDataSchema(DataSchema dataSchema) {
// Add translated from annotation if this is a named dataSchema
if (dataSchema instanceof NamedDataSchema) {
if (dataSchema instanceof TyperefDataSchema) {
((TyperefDataSchema) dataSchema).setReferencedType(
addAnnotationToDataSchema(((TyperefDataSchema) dataSchema).getRef()));
} else {
return addAnnotationToDataSchema(dataSchema);
}
}
return dataSchema;
}

private static DataSchema addAnnotationToDataSchema(DataSchema dataSchema) {
if (dataSchema instanceof NamedDataSchema) {
NamedDataSchema namedDataSchema = (NamedDataSchema) dataSchema;
if (!namedDataSchema.getProperties().containsKey(TRANSLATED_FROM_SOURCE_OPTION)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what should you do if existing translated_from prop is different from updated one?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It doesn't update the source in that case. So, if schema A translates to B, and B is translated to C,
the translatedFrom option in C will be A

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My original question is talking about an error case, I am guessing that should never happen and we should throw error?
But your response provides me something different from my understanding, I thought that annotation is just for direct source, seems not?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't need to stop users from translating a translated schema again, as it is allowed right now in SchemaTranslator right now, and stopping it might fail users.

However, we can use this annotation to fail other flows like PDL -> Proto or Avro -> proto.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Based on this logic, translated.from annotation is used to indicate the root source, not directly translated source, is this clearly communicated?

Map<String, Object> properties = new HashMap<>(namedDataSchema.getProperties());
properties.put(TRANSLATED_FROM_SOURCE_OPTION, namedDataSchema.getFullName());
namedDataSchema.setProperties(properties);
}
}
return dataSchema;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this will have side effect of changing passed DataSchema?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there should be none, except one additional json property in the result avro schema, which is the requirement of this change. This new property will identify the avro schemas as a translated schema.

}

/**
* Allows caller to specify a file path for schema resolution.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
package com.linkedin.data.avro;


import com.linkedin.avroutil1.compatibility.ConfigurableSchemaComparator;
import com.linkedin.avroutil1.compatibility.SchemaComparisonConfiguration;
import com.linkedin.data.DataMap;
import com.linkedin.data.TestUtil;
import com.linkedin.data.avro.util.AvroUtil;
Expand All @@ -26,11 +28,21 @@
import java.io.IOException;

import com.linkedin.data.schema.PegasusSchemaParser;
import java.util.Collections;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
import org.json.JSONException;
import org.json.JSONObject;
import org.skyscreamer.jsonassert.Customization;
import org.skyscreamer.jsonassert.JSONAssert;
import org.skyscreamer.jsonassert.JSONCompareMode;
import org.skyscreamer.jsonassert.JSONParser;
import org.skyscreamer.jsonassert.comparator.CustomComparator;
import org.testng.Assert;
import org.testng.annotations.Test;

import static com.linkedin.data.TestUtil.dataSchemaFromString;
import static com.linkedin.data.avro.SchemaTranslator.*;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertNotNull;
Expand Down Expand Up @@ -175,8 +187,7 @@ public void testSchemaTranslation() throws IOException
}

@Test
public void testCustomSchemaAndDataTranslation() throws IOException
{
public void testCustomSchemaAndDataTranslation() throws IOException, JSONException {
Object[][] inputs =
{
{
Expand Down Expand Up @@ -289,8 +300,7 @@ public void testCustomSchemaAndDataTranslation() throws IOException
}

private void translate(String dataSchemaFieldsJson, String avroSchemaFieldsJson, String dataJson, String avroDataJson)
throws IOException
{
throws IOException, JSONException {
boolean debug = false;

String fullSchemaJson = DATA_SCHEMA_JSON_TEMPLATE.replace("##FIELDS", dataSchemaFieldsJson);
Expand All @@ -305,12 +315,22 @@ private void translate(String dataSchemaFieldsJson, String avroSchemaFieldsJson,
RecordDataSchema schema = (RecordDataSchema) parser.topLevelDataSchemas().get(2);

String avroJsonOutput = SchemaTranslator.dataToAvroSchemaJson(schema);
assertEquals(TestUtil.dataMapFromString(avroJsonOutput), TestUtil.dataMapFromString(fullAvroSchemaJson));

// JSON compare except TRANSLATED_FROM_SOURCE_OPTION in root
JSONAssert.assertEquals(fullAvroSchemaJson, avroJsonOutput,
new CustomComparator(JSONCompareMode.LENIENT,
new Customization(TRANSLATED_FROM_SOURCE_OPTION, (o1, o2) -> true)));
// output json should have the TRANSLATED_FROM_SOURCE_OPTION
assertNotNull(((JSONObject) JSONParser.parseJSON(avroJsonOutput)).get(TRANSLATED_FROM_SOURCE_OPTION));
// assertEquals(TestUtil.dataMapFromString(avroJsonOutput), TestUtil.dataMapFromString(fullAvroSchemaJson));
Schema avroSchema = Schema.parse(avroJsonOutput);
Schema avroSchema2 = SchemaTranslator.dataToAvroSchema(schema);
assertEquals(avroSchema, avroSchema2);
String avroSchemaToString = avroSchema.toString();
assertEquals(Schema.parse(avroSchemaToString), Schema.parse(fullAvroSchemaJson));
Assert.assertFalse(avroSchema.getProp(SchemaTranslator.TRANSLATED_FROM_SOURCE_OPTION).isEmpty());
Assert.assertTrue(
ConfigurableSchemaComparator.equals(avroSchema, Schema.parse(fullAvroSchemaJson),
SchemaComparisonConfiguration.STRICT.jsonPropNamesToIgnore(
Collections.singleton(SchemaTranslator.TRANSLATED_FROM_SOURCE_OPTION))));

if (debug)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
package com.linkedin.data.avro;

import com.google.common.collect.ImmutableMap;
import com.linkedin.avroutil1.compatibility.ConfigurableSchemaComparator;
import com.linkedin.avroutil1.compatibility.SchemaComparisonConfiguration;
import com.linkedin.data.DataList;
import com.linkedin.data.DataMap;
import com.linkedin.data.TestUtil;
Expand Down Expand Up @@ -1879,7 +1881,13 @@ public void testPegasusDefaultToAvroOptionalTranslation(Object... testSchemaText

// AvroSchema translated needs to be as expected
Schema expectedAvroSchema = Schema.parse(expectedAvroSchemaString);
assertEquals(avroSchema, expectedAvroSchema);

Assert.assertNotNull(avroSchema.getProp(SchemaTranslator.TRANSLATED_FROM_SOURCE_OPTION));
Assert.assertFalse(avroSchema.getProp(SchemaTranslator.TRANSLATED_FROM_SOURCE_OPTION).isEmpty());

Assert.assertTrue(ConfigurableSchemaComparator.equals(avroSchema, expectedAvroSchema,
SchemaComparisonConfiguration.STRICT.jsonPropNamesToIgnore(
Collections.singleton(SchemaTranslator.TRANSLATED_FROM_SOURCE_OPTION))));

//Have a DataMap from pegasus schema
DataMap dataMap = TestUtil.dataMapFromString(dataMapString);
Expand Down