From 4f58f87f73d84dd93f0b8e97c16c38162eb7c2ce Mon Sep 17 00:00:00 2001 From: Anthony Miyaguchi Date: Tue, 17 Sep 2019 13:35:23 -0700 Subject: [PATCH 01/16] Add failing tests for tuple validation --- tests/resources/translate/tuple.json | 165 +++++++++++++++++++++++++++ tests/transpile_avro.rs | 145 +++++++++++++++++++++++ tests/transpile_bigquery.rs | 95 +++++++++++++++ 3 files changed, 405 insertions(+) create mode 100644 tests/resources/translate/tuple.json diff --git a/tests/resources/translate/tuple.json b/tests/resources/translate/tuple.json new file mode 100644 index 0000000..d195616 --- /dev/null +++ b/tests/resources/translate/tuple.json @@ -0,0 +1,165 @@ +{ + "name": "tuple", + "tests": [ + { + "name": "test_tuple_atomic", + "compatible": true, + "test": { + "avro": { + "fields": [ + { + "default": "null", + "name": "_f0", + "type": [ + { + "type": "null" + }, + { + "type": "boolean" + } + ] + }, + { + "default": "null", + "name": "_f1", + "type": [ + { + "type": "null" + }, + { + "type": "string" + } + ] + } + ], + "name": "root", + "type": "record" + }, + "bigquery": [ + { + "mode": "NULLABLE", + "name": "_f0", + "type": "BOOL" + }, + { + "mode": "NULLABLE", + "name": "_f1", + "type": "STRING" + } + ], + "json": { + "additionalItems": false, + "items": [ + { + "type": "boolean" + }, + { + "type": "string" + } + ], + "type": "array" + } + } + }, + { + "description": [ + "Additional items in the tuple requires setting the max number of items" + ], + "name": "test_tuple_atomic_with_additional_items", + "compatible": true, + "test": { + "avro": { + "fields": [ + { + "default": "null", + "name": "_f0", + "type": [ + { + "type": "null" + }, + { + "type": "boolean" + } + ] + }, + { + "default": "null", + "name": "_f1", + "type": [ + { + "type": "null" + }, + { + "type": "string" + } + ] + }, + { + "default": "null", + "name": "_f2", + "type": [ + { + "type": "null" + }, + { + "type": "long" + } + ] + }, + { + "default": "null", + "name": "_f3", + "type": [ + { + "type": "null" + }, + { + "type": "long" + } + ] + } + ], + "name": "root", + "type": "record" + }, + "bigquery": [ + { + "mode": "NULLABLE", + "name": "_f0", + "type": "BOOLEAN" + }, + { + "mode": "NULLABLE", + "name": "_f1", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "_f2", + "type": "INT64" + }, + { + "mode": "NULLABLE", + "name": "_f3", + "type": "INT64" + } + ], + "json": { + "additionalItems": { + "type": "integer" + }, + "items": [ + { + "type": "boolean" + }, + { + "type": "string" + } + ], + "maxItems": 4, + "type": "array" + } + } + } + ] +} diff --git a/tests/transpile_avro.rs b/tests/transpile_avro.rs index 7072de7..ba9ffad 100644 --- a/tests/transpile_avro.rs +++ b/tests/transpile_avro.rs @@ -1478,3 +1478,148 @@ fn avro_test_oneof_object_merge_nullability() { context.resolve_method = ResolveMethod::Panic; convert_avro(&input, context); } + +#[test] +fn avro_test_tuple_atomic() { + let input_data = r#" + { + "additionalItems": false, + "items": [ + { + "type": "boolean" + }, + { + "type": "string" + } + ], + "type": "array" + } + "#; + let expected_data = r#" + { + "fields": [ + { + "default": "null", + "name": "_f0", + "type": [ + { + "type": "null" + }, + { + "type": "boolean" + } + ] + }, + { + "default": "null", + "name": "_f1", + "type": [ + { + "type": "null" + }, + { + "type": "string" + } + ] + } + ], + "name": "root", + "type": "record" + } + "#; + let mut context = Context { + ..Default::default() + }; + let input: Value = serde_json::from_str(input_data).unwrap(); + let expected: Value = serde_json::from_str(expected_data).unwrap(); + assert_eq!(expected, convert_avro(&input, context)); + + context.resolve_method = ResolveMethod::Panic; + convert_avro(&input, context); +} + +#[test] +fn avro_test_tuple_atomic_with_additional_items() { + let input_data = r#" + { + "additionalItems": { + "type": "integer" + }, + "items": [ + { + "type": "boolean" + }, + { + "type": "string" + } + ], + "maxItems": 4, + "type": "array" + } + "#; + let expected_data = r#" + { + "fields": [ + { + "default": "null", + "name": "_f0", + "type": [ + { + "type": "null" + }, + { + "type": "boolean" + } + ] + }, + { + "default": "null", + "name": "_f1", + "type": [ + { + "type": "null" + }, + { + "type": "string" + } + ] + }, + { + "default": "null", + "name": "_f2", + "type": [ + { + "type": "null" + }, + { + "type": "long" + } + ] + }, + { + "default": "null", + "name": "_f3", + "type": [ + { + "type": "null" + }, + { + "type": "long" + } + ] + } + ], + "name": "root", + "type": "record" + } + "#; + let mut context = Context { + ..Default::default() + }; + let input: Value = serde_json::from_str(input_data).unwrap(); + let expected: Value = serde_json::from_str(expected_data).unwrap(); + assert_eq!(expected, convert_avro(&input, context)); + + context.resolve_method = ResolveMethod::Panic; + convert_avro(&input, context); +} diff --git a/tests/transpile_bigquery.rs b/tests/transpile_bigquery.rs index 2ba75eb..abef0de 100644 --- a/tests/transpile_bigquery.rs +++ b/tests/transpile_bigquery.rs @@ -1361,3 +1361,98 @@ fn bigquery_test_oneof_object_merge_nullability() { context.resolve_method = ResolveMethod::Panic; convert_bigquery(&input, context); } + +#[test] +fn bigquery_test_tuple_atomic() { + let input_data = r#" + { + "additionalItems": false, + "items": [ + { + "type": "boolean" + }, + { + "type": "string" + } + ], + "type": "array" + } + "#; + let expected_data = r#" + [ + { + "mode": "NULLABLE", + "name": "_f0", + "type": "BOOL" + }, + { + "mode": "NULLABLE", + "name": "_f1", + "type": "STRING" + } + ] + "#; + let mut context = Context { + ..Default::default() + }; + let input: Value = serde_json::from_str(input_data).unwrap(); + let expected: Value = serde_json::from_str(expected_data).unwrap(); + assert_eq!(expected, convert_bigquery(&input, context)); + + context.resolve_method = ResolveMethod::Panic; + convert_bigquery(&input, context); +} + +#[test] +fn bigquery_test_tuple_atomic_with_additional_items() { + let input_data = r#" + { + "additionalItems": { + "type": "integer" + }, + "items": [ + { + "type": "boolean" + }, + { + "type": "string" + } + ], + "maxItems": 4, + "type": "array" + } + "#; + let expected_data = r#" + [ + { + "mode": "NULLABLE", + "name": "_f0", + "type": "BOOLEAN" + }, + { + "mode": "NULLABLE", + "name": "_f1", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "_f2", + "type": "INT64" + }, + { + "mode": "NULLABLE", + "name": "_f3", + "type": "INT64" + } + ] + "#; + let mut context = Context { + ..Default::default() + }; + let input: Value = serde_json::from_str(input_data).unwrap(); + let expected: Value = serde_json::from_str(expected_data).unwrap(); + assert_eq!(expected, convert_bigquery(&input, context)); + + context.resolve_method = ResolveMethod::Panic; + convert_bigquery(&input, context); +} From 7e202266fa3a9bbdb8e106f188c4ee1f8c6c1660 Mon Sep 17 00:00:00 2001 From: Anthony Miyaguchi Date: Tue, 17 Sep 2019 13:51:55 -0700 Subject: [PATCH 02/16] Add extra properties into jsonschema::Array type --- src/jsonschema.rs | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/src/jsonschema.rs b/src/jsonschema.rs index 2db85f5..19b40e8 100644 --- a/src/jsonschema.rs +++ b/src/jsonschema.rs @@ -60,10 +60,20 @@ enum ArrayType { } #[derive(Serialize, Deserialize, Debug, Default)] +#[serde(rename_all = "camelCase")] struct Array { // Using Option would support tuple validation #[serde(skip_serializing_if = "Option::is_none")] items: Option, + + #[serde(skip_serializing_if = "Option::is_none")] + additional_items: Option, + + #[serde(skip_serializing_if = "Option::is_none")] + min_items: Option, + + #[serde(skip_serializing_if = "Option::is_none")] + max_items: Option, } #[derive(Serialize, Deserialize, Debug, PartialEq)] @@ -407,7 +417,9 @@ mod tests { "items": [ {"type": "integer"}, {"type": "boolean"} - ] + ], + "additionalItems": {"type": "string"}, + "maxItems": 4, }); let schema: Tag = serde_json::from_value(data).unwrap(); if let ArrayType::TagTuple(items) = schema.array.items.unwrap() { @@ -416,6 +428,12 @@ mod tests { } else { panic!(); } + if let AdditionalProperties::Object(tag) = schema.array.additional_items.unwrap() { + assert_eq!(tag.data_type, json!("string")); + } else { + panic!(); + } + assert_eq!(schema.array.max_items.unwrap(), 4); } #[test] From c6cf977dd9025a1e098880fb5d6da568a62eb781 Mon Sep 17 00:00:00 2001 From: Anthony Miyaguchi Date: Tue, 17 Sep 2019 15:30:04 -0700 Subject: [PATCH 03/16] Update type_into_ast and atom_into_ast to return Result --- src/ast.rs | 2 +- src/jsonschema.rs | 59 +++++++++++++++++++++++++++++------------------ 2 files changed, 37 insertions(+), 24 deletions(-) diff --git a/src/ast.rs b/src/ast.rs index c339d93..386f277 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -567,7 +567,7 @@ impl TranslateFrom for Tag { type Error = &'static str; fn translate_from(tag: jsonschema::Tag, context: Context) -> Result { - let mut tag = tag.type_into_ast(); + let mut tag = tag.type_into_ast(context)?; tag.infer_name(context.normalize_case); tag.infer_nullability(context.force_nullable); tag.is_root = true; diff --git a/src/jsonschema.rs b/src/jsonschema.rs index 19b40e8..c226b92 100644 --- a/src/jsonschema.rs +++ b/src/jsonschema.rs @@ -4,6 +4,7 @@ use serde_json::{json, Value}; use std::collections::{HashMap, HashSet}; use super::ast; +use super::Context; /// The type enumeration does not contain any data and is used to determine /// available fields in the flattened tag. In JSONSchema parlance, these are @@ -130,9 +131,9 @@ impl Tag { } } - pub fn type_into_ast(&self) -> ast::Tag { + pub fn type_into_ast(&self, context: Context) -> Result { match self.get_type() { - Type::Atom(atom) => self.atom_into_ast(atom), + Type::Atom(atom) => self.atom_into_ast(atom, context), Type::List(list) => { let mut nullable: bool = false; let mut items: Vec = Vec::new(); @@ -140,15 +141,19 @@ impl Tag { if let Atom::Null = &atom { nullable = true; } - items.push(self.atom_into_ast(atom)); + items.push(self.atom_into_ast(atom, context)?); } - ast::Tag::new(ast::Type::Union(ast::Union::new(items)), None, nullable) + Ok(ast::Tag::new( + ast::Type::Union(ast::Union::new(items)), + None, + nullable, + )) } } } - fn atom_into_ast(&self, data_type: Atom) -> ast::Tag { - match data_type { + fn atom_into_ast(&self, data_type: Atom, context: Context) -> Result { + let result = match data_type { Atom::Null => ast::Tag::new(ast::Type::Null, None, true), Atom::Boolean => ast::Tag::new(ast::Type::Atom(ast::Atom::Boolean), None, false), Atom::Number => ast::Tag::new(ast::Type::Atom(ast::Atom::Number), None, false), @@ -160,7 +165,7 @@ impl Tag { Some(properties) => { let mut fields: HashMap = HashMap::new(); for (key, value) in properties { - fields.insert(key.to_string(), value.type_into_ast()); + fields.insert(key.to_string(), value.type_into_ast(context)?); } ast::Tag::new( ast::Type::Object(ast::Object::new(fields, self.object.required.clone())), @@ -176,23 +181,25 @@ impl Tag { ) { (Some(AdditionalProperties::Object(add)), Some(pat)) => { let mut vec: Vec = Vec::new(); - vec.push(add.type_into_ast()); - vec.extend(pat.values().map(|v| v.type_into_ast())); + vec.push(add.type_into_ast(context)?); + let pat_vec: Result, _> = + pat.values().map(|v| v.type_into_ast(context)).collect(); + vec.extend(pat_vec?); let value = ast::Tag::new(ast::Type::Union(ast::Union::new(vec)), None, false); ast::Tag::new(ast::Type::Map(ast::Map::new(None, value)), None, false) } (Some(AdditionalProperties::Object(tag)), None) => ast::Tag::new( - ast::Type::Map(ast::Map::new(None, tag.type_into_ast())), + ast::Type::Map(ast::Map::new(None, tag.type_into_ast(context)?)), None, false, ), (_, Some(tag)) => { + let items: Result, _> = + tag.values().map(|v| v.type_into_ast(context)).collect(); let union = ast::Tag::new( - ast::Type::Union(ast::Union::new( - tag.values().map(|v| v.type_into_ast()).collect(), - )), + ast::Type::Union(ast::Union::new(items?)), None, false, ); @@ -202,11 +209,14 @@ impl Tag { // handle oneOf match &self.one_of { Some(vec) => { - let items: Vec = - vec.iter().map(|item| item.type_into_ast()).collect(); - let nullable: bool = items.iter().any(ast::Tag::is_null); + let items: Result, _> = vec + .iter() + .map(|item| item.type_into_ast(context)) + .collect(); + let unwrapped = items?; + let nullable: bool = unwrapped.iter().any(ast::Tag::is_null); ast::Tag::new( - ast::Type::Union(ast::Union::new(items)), + ast::Type::Union(ast::Union::new(unwrapped)), None, nullable, ) @@ -223,20 +233,23 @@ impl Tag { if let Some(items) = &self.array.items { let data_type = match items { ArrayType::Tag(items) => { - ast::Type::Array(ast::Array::new(items.type_into_ast())) + ast::Type::Array(ast::Array::new(items.type_into_ast(context)?)) } ArrayType::TagTuple(items) => { - let items: Vec = - items.iter().map(|item| item.type_into_ast()).collect(); - ast::Type::Tuple(ast::Tuple::new(items)) + let items: Result, _> = items + .iter() + .map(|item| item.type_into_ast(context)) + .collect(); + ast::Type::Tuple(ast::Tuple::new(items?)) } }; ast::Tag::new(data_type, None, false) } else { - panic!(format!("array missing item: {:#?}", self)) + return Err("array missing item"); } } - } + }; + Ok(result) } } From c8cbf58a97ef0307976e83a8878a4b466c7d8ea5 Mon Sep 17 00:00:00 2001 From: Anthony Miyaguchi Date: Tue, 17 Sep 2019 16:30:57 -0700 Subject: [PATCH 04/16] Add --tuple-struct option for enabling treatment of tuples as structs --- src/jsonschema.rs | 114 ++++++++++++++++++++++++++++++++++++++++++++-- src/lib.rs | 1 + src/main.rs | 7 +++ 3 files changed, 117 insertions(+), 5 deletions(-) diff --git a/src/jsonschema.rs b/src/jsonschema.rs index c226b92..212b126 100644 --- a/src/jsonschema.rs +++ b/src/jsonschema.rs @@ -236,11 +236,40 @@ impl Tag { ast::Type::Array(ast::Array::new(items.type_into_ast(context)?)) } ArrayType::TagTuple(items) => { - let items: Result, _> = items - .iter() - .map(|item| item.type_into_ast(context)) - .collect(); - ast::Type::Tuple(ast::Tuple::new(items?)) + // Instead of expanding the definition of the AST + // tuple type, only a subset of tuple validation is + // accepted as valid. The type must be set to + // "array", the items a list of sub-schemas, + // additionalItems set to a valid type, and maxItems + // set to a value that is longer than the items + // list. Anything else will be directly translated + // into a JSON atom. + if context.tuple_struct { + let items: Result, _> = items + .iter() + .map(|item| item.type_into_ast(context)) + .collect(); + let mut unwrapped = items?; + + match &self.array.additional_items { + Some(AdditionalProperties::Object(tag)) => { + let max_items: usize = self.array.max_items.unwrap_or(0); + if max_items < unwrapped.len() { + return Err("maxItems is less than tuple length"); + } + for _ in unwrapped.len()..max_items { + unwrapped.push(tag.type_into_ast(context)?) + } + ast::Type::Tuple(ast::Tuple::new(unwrapped)) + } + Some(AdditionalProperties::Bool(false)) => { + ast::Type::Tuple(ast::Tuple::new(unwrapped)) + } + _ => return Err("additionalItems set incorrectly"), + } + } else { + ast::Type::Atom(ast::Atom::JSON) + } } }; ast::Tag::new(data_type, None, false) @@ -714,4 +743,79 @@ mod tests { }); assert_eq!(expect, translate(data)) } + + fn translate_tuple(data: Value) -> Value { + let context = Context { + tuple_struct: true, + ..Default::default() + }; + let schema: Tag = serde_json::from_value(data).unwrap(); + let ast: ast::Tag = schema.translate_into(context).unwrap(); + json!(ast) + } + + #[test] + fn test_into_ast_tuple_default_behavior() { + let data = json!({ + "type": "array", + "items": [ + {"type": "boolean"}, + {"type": "integer"} + ] + }); + let expect = json!({"type": {"atom": "json"}, "nullable": false}); + assert_eq!(expect, translate(data)) + } + + #[test] + #[should_panic(expected = "additionalItems")] + fn test_into_ast_tuple_additional_items() { + let data = json!({ + "type": "array", + "items": [ + {"type": "boolean"}, + {"type": "integer"} + ] + }); + let expect = json!({"type": {"atom": "json"}, "nullable": false}); + assert_eq!(expect, translate_tuple(data)) + } + + #[test] + #[should_panic(expected = "maxItems")] + fn test_into_ast_tuple_missing_max_items() { + let data = json!({ + "type": "array", + "items": [ + {"type": "boolean"}, + {"type": "integer"} + ], + "additionalItems": {"type": "string"} + }); + let expect = json!({"type": {"atom": "json"}, "nullable": false}); + assert_eq!(expect, translate_tuple(data)) + } + + #[test] + fn test_into_ast_tuple_valid() { + let data = json!({ + "type": "array", + "items": [ + {"type": "boolean"}, + {"type": "integer"} + ], + "additionalItems": {"type": "string"}, + "maxItems": 4 + }); + let expect = json!({ + "type": {"tuple": {"items": [ + {"type": {"atom": "boolean"}, "nullable": false}, + {"type": {"atom": "integer"}, "nullable": false}, + {"type": {"atom": "string"}, "nullable": false}, + {"type": {"atom": "string"}, "nullable": false}, + ]}}, + "nullable": false, + }); + assert_eq!(expect, translate_tuple(data)) + } } diff --git a/src/lib.rs b/src/lib.rs index 6dc95bc..1d18606 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -63,6 +63,7 @@ pub struct Context { pub resolve_method: ResolveMethod, pub normalize_case: bool, pub force_nullable: bool, + pub tuple_struct: bool, } fn into_ast(input: &Value, context: Context) -> ast::Tag { diff --git a/src/main.rs b/src/main.rs index 32843d0..d9d1b8e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -49,6 +49,12 @@ fn main() { .short("n") .long("force-nullable"), ) + .arg( + Arg::with_name("tuple-struct") + .help("Treats tuple validation as an anonymous struct") + .short("t") + .long("tuple-struct"), + ) .get_matches(); let reader: Box = match matches.value_of("file") { @@ -69,6 +75,7 @@ fn main() { }, normalize_case: matches.is_present("normalize-case"), force_nullable: matches.is_present("force-nullable"), + tuple_struct: matches.is_present("tuple-struct"), }; let output = match matches.value_of("type").unwrap() { From 2189311a9ec32ec2d065fec8eaf31067e12b0e93 Mon Sep 17 00:00:00 2001 From: Anthony Miyaguchi Date: Tue, 17 Sep 2019 16:57:37 -0700 Subject: [PATCH 05/16] Add implementation for avro tuples This may be a less than ideal implementation, because it assumes that the AST representation is already in the correct format. This doesn't take into account the context passed from the command-line -- however, this interface is only used from the main translate functions in the library. --- src/avro.rs | 43 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/src/avro.rs b/src/avro.rs index 5e713bd..df1c08e 100644 --- a/src/avro.rs +++ b/src/avro.rs @@ -192,6 +192,40 @@ impl TranslateFrom for Type { Type::Complex(Complex::Record(record)) } } + ast::Type::Tuple(tuple) => { + let fields = tuple + .items + .iter() + .enumerate() + .map(|(i, v)| { + let default = if v.nullable { Some(json!(null)) } else { None }; + ( + format!("_f{}", i), + Type::translate_from(v.clone(), context), + default, + ) + }) + .filter(|(_, v, _)| v.is_ok()) + .map(|(name, data_type, default)| Field { + name, + data_type: data_type.unwrap(), + default, + ..Default::default() + }) + .collect(); + let record = Record { + common: CommonAttributes { + name: tag.name.clone().unwrap_or_else(|| "__UNNAMED__".into()), + namespace: tag.namespace.clone(), + ..Default::default() + }, + fields, + }; + if record.common.name == "__UNNAMED__" { + warn!("{} - Unnamed field", tag.fully_qualified_name()); + } + Type::Complex(Complex::Record(record)) + } ast::Type::Array(array) => match Type::translate_from(*array.items.clone(), context) { Ok(data_type) => Type::Complex(Complex::Array(Array { items: Box::new(data_type), @@ -596,7 +630,14 @@ mod tests { } } }); - let avro = json!({"type": "string"}); + let avro = json!({ + "type": "record", + "name": "__UNNAMED__", + "fields": [ + {"name": "_f0", "type": {"type": "boolean"}}, + {"name": "_f1", "type": {"type": "long"}} + ] + }); assert_from_ast_eq(ast, avro); } From a25163d079dc4a3a8486b677cb8805b1e80f0db5 Mon Sep 17 00:00:00 2001 From: Anthony Miyaguchi Date: Wed, 18 Sep 2019 10:40:34 -0700 Subject: [PATCH 06/16] Move tests into a rust file for context --- tests/resources/translate/tuple.json | 165 -------------------------- tests/transpile_avro.rs | 145 ----------------------- tests/transpile_bigquery.rs | 95 --------------- tests/tuple_struct.rs | 171 +++++++++++++++++++++++++++ 4 files changed, 171 insertions(+), 405 deletions(-) delete mode 100644 tests/resources/translate/tuple.json create mode 100644 tests/tuple_struct.rs diff --git a/tests/resources/translate/tuple.json b/tests/resources/translate/tuple.json deleted file mode 100644 index d195616..0000000 --- a/tests/resources/translate/tuple.json +++ /dev/null @@ -1,165 +0,0 @@ -{ - "name": "tuple", - "tests": [ - { - "name": "test_tuple_atomic", - "compatible": true, - "test": { - "avro": { - "fields": [ - { - "default": "null", - "name": "_f0", - "type": [ - { - "type": "null" - }, - { - "type": "boolean" - } - ] - }, - { - "default": "null", - "name": "_f1", - "type": [ - { - "type": "null" - }, - { - "type": "string" - } - ] - } - ], - "name": "root", - "type": "record" - }, - "bigquery": [ - { - "mode": "NULLABLE", - "name": "_f0", - "type": "BOOL" - }, - { - "mode": "NULLABLE", - "name": "_f1", - "type": "STRING" - } - ], - "json": { - "additionalItems": false, - "items": [ - { - "type": "boolean" - }, - { - "type": "string" - } - ], - "type": "array" - } - } - }, - { - "description": [ - "Additional items in the tuple requires setting the max number of items" - ], - "name": "test_tuple_atomic_with_additional_items", - "compatible": true, - "test": { - "avro": { - "fields": [ - { - "default": "null", - "name": "_f0", - "type": [ - { - "type": "null" - }, - { - "type": "boolean" - } - ] - }, - { - "default": "null", - "name": "_f1", - "type": [ - { - "type": "null" - }, - { - "type": "string" - } - ] - }, - { - "default": "null", - "name": "_f2", - "type": [ - { - "type": "null" - }, - { - "type": "long" - } - ] - }, - { - "default": "null", - "name": "_f3", - "type": [ - { - "type": "null" - }, - { - "type": "long" - } - ] - } - ], - "name": "root", - "type": "record" - }, - "bigquery": [ - { - "mode": "NULLABLE", - "name": "_f0", - "type": "BOOLEAN" - }, - { - "mode": "NULLABLE", - "name": "_f1", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "_f2", - "type": "INT64" - }, - { - "mode": "NULLABLE", - "name": "_f3", - "type": "INT64" - } - ], - "json": { - "additionalItems": { - "type": "integer" - }, - "items": [ - { - "type": "boolean" - }, - { - "type": "string" - } - ], - "maxItems": 4, - "type": "array" - } - } - } - ] -} diff --git a/tests/transpile_avro.rs b/tests/transpile_avro.rs index ba9ffad..7072de7 100644 --- a/tests/transpile_avro.rs +++ b/tests/transpile_avro.rs @@ -1478,148 +1478,3 @@ fn avro_test_oneof_object_merge_nullability() { context.resolve_method = ResolveMethod::Panic; convert_avro(&input, context); } - -#[test] -fn avro_test_tuple_atomic() { - let input_data = r#" - { - "additionalItems": false, - "items": [ - { - "type": "boolean" - }, - { - "type": "string" - } - ], - "type": "array" - } - "#; - let expected_data = r#" - { - "fields": [ - { - "default": "null", - "name": "_f0", - "type": [ - { - "type": "null" - }, - { - "type": "boolean" - } - ] - }, - { - "default": "null", - "name": "_f1", - "type": [ - { - "type": "null" - }, - { - "type": "string" - } - ] - } - ], - "name": "root", - "type": "record" - } - "#; - let mut context = Context { - ..Default::default() - }; - let input: Value = serde_json::from_str(input_data).unwrap(); - let expected: Value = serde_json::from_str(expected_data).unwrap(); - assert_eq!(expected, convert_avro(&input, context)); - - context.resolve_method = ResolveMethod::Panic; - convert_avro(&input, context); -} - -#[test] -fn avro_test_tuple_atomic_with_additional_items() { - let input_data = r#" - { - "additionalItems": { - "type": "integer" - }, - "items": [ - { - "type": "boolean" - }, - { - "type": "string" - } - ], - "maxItems": 4, - "type": "array" - } - "#; - let expected_data = r#" - { - "fields": [ - { - "default": "null", - "name": "_f0", - "type": [ - { - "type": "null" - }, - { - "type": "boolean" - } - ] - }, - { - "default": "null", - "name": "_f1", - "type": [ - { - "type": "null" - }, - { - "type": "string" - } - ] - }, - { - "default": "null", - "name": "_f2", - "type": [ - { - "type": "null" - }, - { - "type": "long" - } - ] - }, - { - "default": "null", - "name": "_f3", - "type": [ - { - "type": "null" - }, - { - "type": "long" - } - ] - } - ], - "name": "root", - "type": "record" - } - "#; - let mut context = Context { - ..Default::default() - }; - let input: Value = serde_json::from_str(input_data).unwrap(); - let expected: Value = serde_json::from_str(expected_data).unwrap(); - assert_eq!(expected, convert_avro(&input, context)); - - context.resolve_method = ResolveMethod::Panic; - convert_avro(&input, context); -} diff --git a/tests/transpile_bigquery.rs b/tests/transpile_bigquery.rs index abef0de..2ba75eb 100644 --- a/tests/transpile_bigquery.rs +++ b/tests/transpile_bigquery.rs @@ -1361,98 +1361,3 @@ fn bigquery_test_oneof_object_merge_nullability() { context.resolve_method = ResolveMethod::Panic; convert_bigquery(&input, context); } - -#[test] -fn bigquery_test_tuple_atomic() { - let input_data = r#" - { - "additionalItems": false, - "items": [ - { - "type": "boolean" - }, - { - "type": "string" - } - ], - "type": "array" - } - "#; - let expected_data = r#" - [ - { - "mode": "NULLABLE", - "name": "_f0", - "type": "BOOL" - }, - { - "mode": "NULLABLE", - "name": "_f1", - "type": "STRING" - } - ] - "#; - let mut context = Context { - ..Default::default() - }; - let input: Value = serde_json::from_str(input_data).unwrap(); - let expected: Value = serde_json::from_str(expected_data).unwrap(); - assert_eq!(expected, convert_bigquery(&input, context)); - - context.resolve_method = ResolveMethod::Panic; - convert_bigquery(&input, context); -} - -#[test] -fn bigquery_test_tuple_atomic_with_additional_items() { - let input_data = r#" - { - "additionalItems": { - "type": "integer" - }, - "items": [ - { - "type": "boolean" - }, - { - "type": "string" - } - ], - "maxItems": 4, - "type": "array" - } - "#; - let expected_data = r#" - [ - { - "mode": "NULLABLE", - "name": "_f0", - "type": "BOOLEAN" - }, - { - "mode": "NULLABLE", - "name": "_f1", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "_f2", - "type": "INT64" - }, - { - "mode": "NULLABLE", - "name": "_f3", - "type": "INT64" - } - ] - "#; - let mut context = Context { - ..Default::default() - }; - let input: Value = serde_json::from_str(input_data).unwrap(); - let expected: Value = serde_json::from_str(expected_data).unwrap(); - assert_eq!(expected, convert_bigquery(&input, context)); - - context.resolve_method = ResolveMethod::Panic; - convert_bigquery(&input, context); -} diff --git a/tests/tuple_struct.rs b/tests/tuple_struct.rs new file mode 100644 index 0000000..cc30ad7 --- /dev/null +++ b/tests/tuple_struct.rs @@ -0,0 +1,171 @@ +use jst::Context; +use jst::{convert_avro, convert_bigquery}; +use pretty_assertions::assert_eq; +use serde_json::Value; + +fn data_atomic() -> Value { + serde_json::from_str( + r#" + { + "additionalItems": false, + "items": [ + {"type": "boolean"}, + {"type": "string"} + ], + "type": "array" + } + "#, + ) + .unwrap() +} + +fn data_atomic_with_additional_properties() -> Value { + serde_json::from_str( + r#" + { + "additionalItems": { + "type": "integer" + }, + "items": [ + {"type": "boolean"}, + {"type": "string"} + ], + "maxItems": 4, + "type": "array" + } + "#, + ) + .unwrap() +} + +#[test] +fn test_avro_tuple_atomic() { + let context = Context { + tuple_struct: true, + ..Default::default() + }; + let expected: Value = serde_json::from_str( + r#" + { + "fields": [ + { + "name": "_f0", + "type": {"type": "boolean"} + }, + { + "name": "_f1", + "type": {"type": "string"} + } + ], + "name": "root", + "type": "record" + } + "#, + ) + .unwrap(); + assert_eq!(expected, convert_avro(&data_atomic(), context)); +} + +#[test] +fn test_bigquery_tuple_atomic() { + let context = Context { + tuple_struct: true, + ..Default::default() + }; + let expected: Value = serde_json::from_str( + r#" + [ + { + "mode": "NULLABLE", + "name": "_f0", + "type": "BOOL" + }, + { + "mode": "NULLABLE", + "name": "_f1", + "type": "STRING" + } + ] + "#, + ) + .unwrap(); + assert_eq!(expected, convert_bigquery(&data_atomic(), context)); +} + +#[test] +fn test_avro_tuple_atomic_with_additional_items() { + let context = Context { + tuple_struct: true, + ..Default::default() + }; + let expected: Value = serde_json::from_str( + r#" + { + "fields": [ + { + "name": "_f0", + "type": {"type": "boolean"} + }, + { + "name": "_f1", + "type": {"type": "string"} + }, + { + "name": "_f2", + "type": {"type": "long"} + }, + { + "name": "_f3", + "type": {"type": "long"} + } + ], + "name": "root", + "type": "record" + } + "#, + ) + .unwrap(); + assert_eq!( + expected, + convert_avro(&data_atomic_with_additional_properties(), context) + ); +} + +#[test] +fn test_bigquery_tuple_atomic_with_additional_items() { + let context = Context { + tuple_struct: true, + ..Default::default() + }; + let expected: Value = serde_json::from_str( + r#" + [ + { + "mode": "NULLABLE", + "name": "_f0", + "type": "BOOLEAN" + }, + { + "mode": "NULLABLE", + "name": "_f1", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "_f2", + "type": "INT64" + }, + { + "mode": "NULLABLE", + "name": "_f3", + "type": "INT64" + } + ] + "#, + ) + .unwrap(); + assert_eq!( + expected, + convert_bigquery(&data_atomic_with_additional_properties(), context) + ); +} From 6769effc5217909f47d66c0aecc04f0d8c99bae4 Mon Sep 17 00:00:00 2001 From: Anthony Miyaguchi Date: Wed, 18 Sep 2019 11:13:56 -0700 Subject: [PATCH 07/16] Add tuple support for BigQuery --- src/bigquery.rs | 29 ++++++++++++++++++++++++++++- tests/tuple_struct.rs | 14 +++++++------- 2 files changed, 35 insertions(+), 8 deletions(-) diff --git a/src/bigquery.rs b/src/bigquery.rs index 09376b2..5567e8a 100644 --- a/src/bigquery.rs +++ b/src/bigquery.rs @@ -110,6 +110,29 @@ impl TranslateFrom for Tag { Type::Record(Record { fields }) } } + ast::Type::Tuple(tuple) => { + let fields: Result, _> = tuple + .items + .iter() + .map(|v| Tag::translate_from(v.clone(), context)) + .collect(); + + let named_fields: HashMap> = fields? + .into_iter() + .enumerate() + .map(|(i, mut v)| { + // The name is actually derived from the value, not from + // the associated key. Modify the name in place. + let name = format!("_f{}", i); + v.name = Some(name.clone()); + (name, Box::new(v)) + }) + .collect(); + + Type::Record(Record { + fields: named_fields, + }) + } ast::Type::Array(array) => match Tag::translate_from(*array.items.clone(), context) { Ok(tag) => *tag.data_type, Err(_) => return Err(fmt_reason("untyped array")), @@ -531,8 +554,12 @@ mod tests { } }); let expect = json!({ - "type": "STRING", + "type": "RECORD", "mode": "REQUIRED", + "fields": [ + {"name": "_f0", "type": "BOOL", "mode": "REQUIRED"}, + {"name": "_f1", "type": "INT64", "mode": "REQUIRED"} + ] }); assert_eq!(expect, transform_tag(data)); } diff --git a/tests/tuple_struct.rs b/tests/tuple_struct.rs index cc30ad7..3d7cef8 100644 --- a/tests/tuple_struct.rs +++ b/tests/tuple_struct.rs @@ -76,12 +76,12 @@ fn test_bigquery_tuple_atomic() { r#" [ { - "mode": "NULLABLE", + "mode": "REQUIRED", "name": "_f0", "type": "BOOL" }, { - "mode": "NULLABLE", + "mode": "REQUIRED", "name": "_f1", "type": "STRING" } @@ -141,22 +141,22 @@ fn test_bigquery_tuple_atomic_with_additional_items() { r#" [ { - "mode": "NULLABLE", + "mode": "REQUIRED", "name": "_f0", - "type": "BOOLEAN" + "type": "BOOL" }, { - "mode": "NULLABLE", + "mode": "REQUIRED", "name": "_f1", "type": "STRING" }, { - "mode": "NULLABLE", + "mode": "REQUIRED", "name": "_f2", "type": "INT64" }, { - "mode": "NULLABLE", + "mode": "REQUIRED", "name": "_f3", "type": "INT64" } From ffe9506f7b94b961c89235142bea31ed205248df Mon Sep 17 00:00:00 2001 From: Anthony Miyaguchi Date: Wed, 18 Sep 2019 12:39:03 -0700 Subject: [PATCH 08/16] Update mps-download-schemas to point to master --- scripts/mps-download-schemas.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/mps-download-schemas.sh b/scripts/mps-download-schemas.sh index 72092b5..5c3fde1 100755 --- a/scripts/mps-download-schemas.sh +++ b/scripts/mps-download-schemas.sh @@ -2,6 +2,6 @@ # Download production mozilla-pipeline-schemas into a schema folder cd "$(dirname "$0")/.." || exit -curl -o schemas.tar.gz -L https://github.com/mozilla-services/mozilla-pipeline-schemas/archive/dev.tar.gz -tar --strip-components=1 -xvf schemas.tar.gz mozilla-pipeline-schemas-dev/schemas +curl -o schemas.tar.gz -L https://github.com/mozilla-services/mozilla-pipeline-schemas/archive/master.tar.gz +tar --strip-components=1 -xvf schemas.tar.gz mozilla-pipeline-schemas-master/schemas rm schemas.tar.gz From 6a254ed9fb350394656981c38bd3d69c7693e603 Mon Sep 17 00:00:00 2001 From: Anthony Miyaguchi Date: Wed, 18 Sep 2019 12:39:19 -0700 Subject: [PATCH 09/16] Remove short version of --tuple-struct due to conflicts --- src/main.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main.rs b/src/main.rs index d9d1b8e..a91b970 100644 --- a/src/main.rs +++ b/src/main.rs @@ -52,7 +52,6 @@ fn main() { .arg( Arg::with_name("tuple-struct") .help("Treats tuple validation as an anonymous struct") - .short("t") .long("tuple-struct"), ) .get_matches(); From 38382926064b4c293e50bd3473f50f23448a5ea3 Mon Sep 17 00:00:00 2001 From: Anthony Miyaguchi Date: Wed, 18 Sep 2019 12:56:42 -0700 Subject: [PATCH 10/16] Add script for testing --tuple-struct option --- .gitignore | 1 + scripts/mps-verify-tuple-struct.sh | 43 ++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100755 scripts/mps-verify-tuple-struct.sh diff --git a/.gitignore b/.gitignore index af65c3c..4c6ae88 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ __pycache__/ **/*.rs.bk /schemas +/test_tuple_results \ No newline at end of file diff --git a/scripts/mps-verify-tuple-struct.sh b/scripts/mps-verify-tuple-struct.sh new file mode 100755 index 0000000..b1692c4 --- /dev/null +++ b/scripts/mps-verify-tuple-struct.sh @@ -0,0 +1,43 @@ +#!/bin/bash + +cd "$(dirname "$0").." + +datadir=$(mktemp -d -t tmp.XXXXXXXXXX) +function cleanup { + echo "Running cleanup!" + rm -rf "$datadir" +} +trap cleanup EXIT + +scripts/mps-download-schemas.sh + +avro_control=$datadir/avro-control +avro_no_tuple=$datadir/avro-no-tuple +avro_tuple=$datadir/avro-tuple + +bq_control=$datadir/bq-control +bq_no_tuple=$datadir/bq-no-tuple +bq_tuple=$datadir/bq-tuple + + +# get control values +git checkout v1.4.1 + +scripts/mps-generate-schemas.sh $avro_control --type avro --resolve drop +scripts/mps-generate-schemas.sh $bq_control --type bigquery --resolve drop + +git checkout - + +# get values for tuple/no-tuple +scripts/mps-generate-schemas.sh $avro_no_tuple --type avro --resolve drop +scripts/mps-generate-schemas.sh $avro_tuple --type avro --resolve drop --tuple-struct +scripts/mps-generate-schemas.sh $bq_no_tuple --type bigquery --resolve drop +scripts/mps-generate-schemas.sh $bq_tuple --type bigquery --resolve drop --tuple-struct + +outdir="test_tuple_results" +mkdir -p $outdir + +diff -r $avro_control $avro_no_tuple > $outdir/avro-no-tuple.diff +diff -r $bq_control $bq_no_tuple > $outdir/bq-no-tuple.diff +diff -r $avro_no_tuple $avro_tuple > $outdir/avro-tuple.diff +diff -r $bq_no_tuple $bq_tuple > $outdir/bq-tuple.diff From c267d1b08174b6a61b8ba4221e26966ac649ac3c Mon Sep 17 00:00:00 2001 From: Anthony Miyaguchi Date: Wed, 18 Sep 2019 13:11:51 -0700 Subject: [PATCH 11/16] Allow static length tuples --- src/jsonschema.rs | 40 +++++++++++++++++++++++++++++++++++----- 1 file changed, 35 insertions(+), 5 deletions(-) diff --git a/src/jsonschema.rs b/src/jsonschema.rs index 212b126..ca7f6ff 100644 --- a/src/jsonschema.rs +++ b/src/jsonschema.rs @@ -241,9 +241,9 @@ impl Tag { // accepted as valid. The type must be set to // "array", the items a list of sub-schemas, // additionalItems set to a valid type, and maxItems - // set to a value that is longer than the items - // list. Anything else will be directly translated - // into a JSON atom. + // set to a value that is equal to or longer than + // the items list. Anything else will be directly + // translated into a JSON atom. if context.tuple_struct { let items: Result, _> = items .iter() @@ -265,6 +265,17 @@ impl Tag { Some(AdditionalProperties::Bool(false)) => { ast::Type::Tuple(ast::Tuple::new(unwrapped)) } + None => { + // additionalItems may be unspecified if the max_length + // matches the number of items in the tuple. This corresponds + // to optional tuple values (variable length tuple). + let max_items: usize = self.array.max_items.unwrap_or(0); + if max_items == unwrapped.len() { + ast::Type::Tuple(ast::Tuple::new(unwrapped)) + } else { + return Err("maxItems must be set if additionalItems are allowed"); + } + } _ => return Err("additionalItems set incorrectly"), } } else { @@ -768,8 +779,8 @@ mod tests { } #[test] - #[should_panic(expected = "additionalItems")] - fn test_into_ast_tuple_additional_items() { + #[should_panic(expected = "maxItems must be set")] + fn test_into_ast_tuple_invalid() { let data = json!({ "type": "array", "items": [ @@ -795,6 +806,25 @@ mod tests { let expect = json!({"type": {"atom": "json"}, "nullable": false}); assert_eq!(expect, translate_tuple(data)) } + #[test] + fn test_into_ast_tuple_static() { + let data = json!({ + "type": "array", + "items": [ + {"type": "boolean"}, + {"type": "integer"} + ], + "maxItems": 2 + }); + let expect = json!({ + "type": {"tuple": {"items": [ + {"type": {"atom": "boolean"}, "nullable": false}, + {"type": {"atom": "integer"}, "nullable": false} + ]}}, + "nullable": false + }); + assert_eq!(expect, translate_tuple(data)) + } #[test] fn test_into_ast_tuple_valid() { From 59120dfeee7684214affd8ce4d1d1a040504450f Mon Sep 17 00:00:00 2001 From: Anthony Miyaguchi Date: Wed, 18 Sep 2019 13:40:31 -0700 Subject: [PATCH 12/16] Add support for optional tuple fields when `--force-nullable` not set --- src/jsonschema.rs | 37 +++++++++++++++++++++++++++++---- tests/force_nullable.rs | 45 ++++++++++++++++++++++++++++++++++++++++- tests/tuple_struct.rs | 10 +++++---- 3 files changed, 83 insertions(+), 9 deletions(-) diff --git a/src/jsonschema.rs b/src/jsonschema.rs index ca7f6ff..5887a84 100644 --- a/src/jsonschema.rs +++ b/src/jsonschema.rs @@ -250,7 +250,12 @@ impl Tag { .map(|item| item.type_into_ast(context)) .collect(); let mut unwrapped = items?; - + let min_items: usize = + self.array.min_items.unwrap_or(unwrapped.len()); + // set items to optional + for i in min_items..unwrapped.len() { + unwrapped[i].nullable = true; + } match &self.array.additional_items { Some(AdditionalProperties::Object(tag)) => { let max_items: usize = self.array.max_items.unwrap_or(0); @@ -258,7 +263,9 @@ impl Tag { return Err("maxItems is less than tuple length"); } for _ in unwrapped.len()..max_items { - unwrapped.push(tag.type_into_ast(context)?) + let mut ast_tag = tag.type_into_ast(context)?; + ast_tag.nullable = true; + unwrapped.push(ast_tag); } ast::Type::Tuple(ast::Tuple::new(unwrapped)) } @@ -806,6 +813,7 @@ mod tests { let expect = json!({"type": {"atom": "json"}, "nullable": false}); assert_eq!(expect, translate_tuple(data)) } + #[test] fn test_into_ast_tuple_static() { let data = json!({ @@ -826,6 +834,27 @@ mod tests { assert_eq!(expect, translate_tuple(data)) } + #[test] + fn test_into_ast_tuple_static_nullable() { + let data = json!({ + "type": "array", + "items": [ + {"type": "boolean"}, + {"type": "integer"} + ], + "minItems": 1, + "maxItems": 2 + }); + let expect = json!({ + "type": {"tuple": {"items": [ + {"type": {"atom": "boolean"}, "nullable": false}, + {"type": {"atom": "integer"}, "nullable": true} + ]}}, + "nullable": false + }); + assert_eq!(expect, translate_tuple(data)) + } + #[test] fn test_into_ast_tuple_valid() { let data = json!({ @@ -841,8 +870,8 @@ mod tests { "type": {"tuple": {"items": [ {"type": {"atom": "boolean"}, "nullable": false}, {"type": {"atom": "integer"}, "nullable": false}, - {"type": {"atom": "string"}, "nullable": false}, - {"type": {"atom": "string"}, "nullable": false}, + {"type": {"atom": "string"}, "nullable": true}, + {"type": {"atom": "string"}, "nullable": true}, ]}}, "nullable": false, }); diff --git a/tests/force_nullable.rs b/tests/force_nullable.rs index e83040e..7034d86 100644 --- a/tests/force_nullable.rs +++ b/tests/force_nullable.rs @@ -55,9 +55,16 @@ fn test_data() -> Value { "required": ["f"] } ] + }, + "tuple": { + "type": "array", + "items": [ + {"type": "boolean"} + ], + "maxItems": 1 } }, - "required": ["atom", "object", "map", "array", "union"] + "required": ["atom", "object", "map", "array", "union", "tuple"] } "#, ) @@ -68,6 +75,7 @@ fn test_data() -> Value { fn test_bigquery_force_nullable() { let context = Context { force_nullable: true, + tuple_struct: true, ..Default::default() }; @@ -132,6 +140,18 @@ fn test_bigquery_force_nullable() { "name": "object", "type": "RECORD" }, + { + "fields": [ + { + "mode": "NULLABLE", + "name": "_f0", + "type": "BOOL" + } + ], + "mode": "NULLABLE", + "name": "tuple", + "type": "RECORD" + }, { "fields": [ { @@ -161,6 +181,7 @@ fn test_bigquery_force_nullable() { fn test_avro_force_nullable() { let context = Context { force_nullable: true, + tuple_struct: true, ..Default::default() }; let expected: Value = serde_json::from_str( @@ -263,6 +284,28 @@ fn test_avro_force_nullable() { } ] }, + { + "default": null, + "name": "tuple", + "type": [ + {"type": "null"}, + { + "name": "tuple", + "namespace": "root", + "type": "record", + "fields": [ + { + "default": null, + "name": "_f0", + "type": [ + {"type": "null"}, + {"type": "boolean"} + ] + } + ] + } + ] + }, { "default": null, "name": "union", diff --git a/tests/tuple_struct.rs b/tests/tuple_struct.rs index 3d7cef8..b81482c 100644 --- a/tests/tuple_struct.rs +++ b/tests/tuple_struct.rs @@ -112,11 +112,13 @@ fn test_avro_tuple_atomic_with_additional_items() { }, { "name": "_f2", - "type": {"type": "long"} + "default": null, + "type": [{"type": "null"}, {"type": "long"}] }, { "name": "_f3", - "type": {"type": "long"} + "default": null, + "type": [{"type": "null"}, {"type": "long"}] } ], "name": "root", @@ -151,12 +153,12 @@ fn test_bigquery_tuple_atomic_with_additional_items() { "type": "STRING" }, { - "mode": "REQUIRED", + "mode": "NULLABLE", "name": "_f2", "type": "INT64" }, { - "mode": "REQUIRED", + "mode": "NULLABLE", "name": "_f3", "type": "INT64" } From 4ed2dddca20dd23cac54f207282099491d50f717 Mon Sep 17 00:00:00 2001 From: Anthony Miyaguchi Date: Tue, 24 Sep 2019 16:52:55 -0700 Subject: [PATCH 13/16] Use `f{index}_` convention --- src/avro.rs | 6 +++--- src/bigquery.rs | 6 +++--- tests/force_nullable.rs | 4 ++-- tests/tuple_struct.rs | 24 ++++++++++++------------ 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/src/avro.rs b/src/avro.rs index df1c08e..38dbdb3 100644 --- a/src/avro.rs +++ b/src/avro.rs @@ -200,7 +200,7 @@ impl TranslateFrom for Type { .map(|(i, v)| { let default = if v.nullable { Some(json!(null)) } else { None }; ( - format!("_f{}", i), + format!("f{}_", i), Type::translate_from(v.clone(), context), default, ) @@ -634,8 +634,8 @@ mod tests { "type": "record", "name": "__UNNAMED__", "fields": [ - {"name": "_f0", "type": {"type": "boolean"}}, - {"name": "_f1", "type": {"type": "long"}} + {"name": "f0_", "type": {"type": "boolean"}}, + {"name": "f1_", "type": {"type": "long"}} ] }); assert_from_ast_eq(ast, avro); diff --git a/src/bigquery.rs b/src/bigquery.rs index 5567e8a..85c7ebe 100644 --- a/src/bigquery.rs +++ b/src/bigquery.rs @@ -123,7 +123,7 @@ impl TranslateFrom for Tag { .map(|(i, mut v)| { // The name is actually derived from the value, not from // the associated key. Modify the name in place. - let name = format!("_f{}", i); + let name = format!("f{}_", i); v.name = Some(name.clone()); (name, Box::new(v)) }) @@ -557,8 +557,8 @@ mod tests { "type": "RECORD", "mode": "REQUIRED", "fields": [ - {"name": "_f0", "type": "BOOL", "mode": "REQUIRED"}, - {"name": "_f1", "type": "INT64", "mode": "REQUIRED"} + {"name": "f0_", "type": "BOOL", "mode": "REQUIRED"}, + {"name": "f1_", "type": "INT64", "mode": "REQUIRED"} ] }); assert_eq!(expect, transform_tag(data)); diff --git a/tests/force_nullable.rs b/tests/force_nullable.rs index 7034d86..2e483e0 100644 --- a/tests/force_nullable.rs +++ b/tests/force_nullable.rs @@ -144,7 +144,7 @@ fn test_bigquery_force_nullable() { "fields": [ { "mode": "NULLABLE", - "name": "_f0", + "name": "f0_", "type": "BOOL" } ], @@ -296,7 +296,7 @@ fn test_avro_force_nullable() { "fields": [ { "default": null, - "name": "_f0", + "name": "f0_", "type": [ {"type": "null"}, {"type": "boolean"} diff --git a/tests/tuple_struct.rs b/tests/tuple_struct.rs index b81482c..bef8dd7 100644 --- a/tests/tuple_struct.rs +++ b/tests/tuple_struct.rs @@ -49,11 +49,11 @@ fn test_avro_tuple_atomic() { { "fields": [ { - "name": "_f0", + "name": "f0_", "type": {"type": "boolean"} }, { - "name": "_f1", + "name": "f1_", "type": {"type": "string"} } ], @@ -77,12 +77,12 @@ fn test_bigquery_tuple_atomic() { [ { "mode": "REQUIRED", - "name": "_f0", + "name": "f0_", "type": "BOOL" }, { "mode": "REQUIRED", - "name": "_f1", + "name": "f1_", "type": "STRING" } ] @@ -103,20 +103,20 @@ fn test_avro_tuple_atomic_with_additional_items() { { "fields": [ { - "name": "_f0", + "name": "f0_", "type": {"type": "boolean"} }, { - "name": "_f1", + "name": "f1_", "type": {"type": "string"} }, { - "name": "_f2", + "name": "f2_", "default": null, "type": [{"type": "null"}, {"type": "long"}] }, { - "name": "_f3", + "name": "f3_", "default": null, "type": [{"type": "null"}, {"type": "long"}] } @@ -144,22 +144,22 @@ fn test_bigquery_tuple_atomic_with_additional_items() { [ { "mode": "REQUIRED", - "name": "_f0", + "name": "f0_", "type": "BOOL" }, { "mode": "REQUIRED", - "name": "_f1", + "name": "f1_", "type": "STRING" }, { "mode": "NULLABLE", - "name": "_f2", + "name": "f2_", "type": "INT64" }, { "mode": "NULLABLE", - "name": "_f3", + "name": "f3_", "type": "INT64" } ] From 9fcb42a12593c26b26ba0b03735b305ee4f93922 Mon Sep 17 00:00:00 2001 From: Anthony Miyaguchi Date: Tue, 24 Sep 2019 16:42:40 -0700 Subject: [PATCH 14/16] Update scripts --- scripts/mps-generate-avro-data-helper.py | 8 ++++++-- scripts/mps-generate-avro-data.sh | 2 +- scripts/mps-load-avro-bq.sh | 18 +++++++----------- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/scripts/mps-generate-avro-data-helper.py b/scripts/mps-generate-avro-data-helper.py index bb2d316..c8a9af4 100755 --- a/scripts/mps-generate-avro-data-helper.py +++ b/scripts/mps-generate-avro-data-helper.py @@ -15,12 +15,13 @@ else: sys.exit("Error: missing argument for document") +assert os.path.isdir("data") assert any( [document in name for name in os.listdir("data")] -), "document not found in data" +), f"{document} not found in data" assert any( [document in name for name in os.listdir("avro")] -), "document not found in avro schemas" +), f"{document} not found in avro schemas" def format_key(key): @@ -43,6 +44,9 @@ def convert(data, schema): out = {} if not data: return out + # cast tuple into an object before continuing + if isinstance(data, list): + data = {f"f{i}_": v for i, v in enumerate(data)} for key, value in data.items(): # apply the appropriate transformations on the key key = format_key(key) diff --git a/scripts/mps-generate-avro-data.sh b/scripts/mps-generate-avro-data.sh index 3aef5da..cce741a 100755 --- a/scripts/mps-generate-avro-data.sh +++ b/scripts/mps-generate-avro-data.sh @@ -7,7 +7,7 @@ documents=$(ls data | sed 's/.ndjson//') total=0 failed=0 for document in $documents; do - if ! python3 scripts/mps-generate-avro-data-helper.py $document 2> /dev/null; then + if ! python3 scripts/mps-generate-avro-data-helper.py $document; then echo "failed to write $document" rm "avro-data/$document.avro" ((failed++)) diff --git a/scripts/mps-load-avro-bq.sh b/scripts/mps-load-avro-bq.sh index d581d40..a172c61 100755 --- a/scripts/mps-load-avro-bq.sh +++ b/scripts/mps-load-avro-bq.sh @@ -2,30 +2,26 @@ cd "$(dirname "$0")/.." || exit -project_id="test-avro-ingest" +project_id=$(gcloud config get-value project) +dataset_id="test_avro" gsutil -m cp avro-data/* gs://${project_id}/data - -documents=$(ls avro-data | sed 's/.avro//') +bq rm -rf $dataset_id +bq mk $dataset_id total=0 error=0 skip=0 trap "exit" INT -for document in ${documents}; do +for document in $(ls avro-data | sed 's/.avro//'); do # downcase hyphens to underscores before generating names bq_document=$(echo $document | sed 's/-/_/g') namespace=$(echo $bq_document | cut -d. -f1) doctype=$(echo $bq_document | cut -d. -f2) docver=$(echo $bq_document | cut -d. -f3) - if ! bq ls | grep ${namespace} >/dev/null ; then - echo "creating dataset: ${namespace}" - bq mk ${namespace} - fi - - table_exists=$(bq ls ${namespace} | grep ${doctype}_v${docver}) + table_exists=$(bq ls ${dataset_id} | grep ${namespace}__${doctype}_v${docver}) if [[ ! -z ${SKIP_EXISTING+x} ]] && [[ ! -z ${table_exists} ]]; then echo "skipping bq load for ${document}" @@ -36,7 +32,7 @@ for document in ${documents}; do echo "running bq load for ${document}" bq load --source_format=AVRO \ --replace \ - ${namespace}.${doctype}_v${docver} \ + ${dataset_id}.${namespace}__${doctype}_v${docver} \ gs://${project_id}/data/${document}.avro if [[ $? -ne 0 ]]; then From d145f58d82fd8df67329b357e7ee8e4f0a47cfc5 Mon Sep 17 00:00:00 2001 From: Anthony Miyaguchi Date: Wed, 25 Sep 2019 16:35:27 -0700 Subject: [PATCH 15/16] Handle tests for objects inside of tuples --- src/ast.rs | 6 ++ src/jsonschema.rs | 16 ++--- tests/tuple_struct.rs | 132 +++++++++++++++++++++++++++++++++++++++++- 3 files changed, 145 insertions(+), 9 deletions(-) diff --git a/src/ast.rs b/src/ast.rs index 386f277..cccb895 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -478,6 +478,12 @@ impl Tag { set_and_recurse(item, "__union__"); } } + Type::Tuple(tuple) => { + for (i, item) in tuple.items.iter_mut().enumerate() { + let name = format!("f{}_", i); + set_and_recurse(item, &name); + } + } _ => (), } } diff --git a/src/jsonschema.rs b/src/jsonschema.rs index 5887a84..3a6b334 100644 --- a/src/jsonschema.rs +++ b/src/jsonschema.rs @@ -826,8 +826,8 @@ mod tests { }); let expect = json!({ "type": {"tuple": {"items": [ - {"type": {"atom": "boolean"}, "nullable": false}, - {"type": {"atom": "integer"}, "nullable": false} + {"name": "f0_", "type": {"atom": "boolean"}, "nullable": false}, + {"name": "f1_", "type": {"atom": "integer"}, "nullable": false} ]}}, "nullable": false }); @@ -847,8 +847,8 @@ mod tests { }); let expect = json!({ "type": {"tuple": {"items": [ - {"type": {"atom": "boolean"}, "nullable": false}, - {"type": {"atom": "integer"}, "nullable": true} + {"name": "f0_", "type": {"atom": "boolean"}, "nullable": false}, + {"name": "f1_", "type": {"atom": "integer"}, "nullable": true} ]}}, "nullable": false }); @@ -868,10 +868,10 @@ mod tests { }); let expect = json!({ "type": {"tuple": {"items": [ - {"type": {"atom": "boolean"}, "nullable": false}, - {"type": {"atom": "integer"}, "nullable": false}, - {"type": {"atom": "string"}, "nullable": true}, - {"type": {"atom": "string"}, "nullable": true}, + {"name": "f0_", "type": {"atom": "boolean"}, "nullable": false}, + {"name": "f1_", "type": {"atom": "integer"}, "nullable": false}, + {"name": "f2_", "type": {"atom": "string"}, "nullable": true}, + {"name": "f3_", "type": {"atom": "string"}, "nullable": true}, ]}}, "nullable": false, }); diff --git a/tests/tuple_struct.rs b/tests/tuple_struct.rs index bef8dd7..9ced104 100644 --- a/tests/tuple_struct.rs +++ b/tests/tuple_struct.rs @@ -1,5 +1,5 @@ -use jst::Context; use jst::{convert_avro, convert_bigquery}; +use jst::{Context, ResolveMethod}; use pretty_assertions::assert_eq; use serde_json::Value; @@ -38,6 +38,44 @@ fn data_atomic_with_additional_properties() -> Value { .unwrap() } +fn data_object_missing() -> Value { + // The second item has an incompatible field, but will be dropped. + serde_json::from_str( + r#" + { + "additionalItems": false, + "items": [ + {"type": "integer"}, + { + "type": "object", + "properties": { + "first": {"type": "string"}, + "second": {"type": ["string", "object"]} + }, + "required": ["first"] + } + ], + "type": "array" + } + "#, + ) + .unwrap() +} + +fn data_incompatible() -> Value { + serde_json::from_str( + r#" + { + "additionalItems": false, + "items": [ + {"type": ["string", "integer"]} + ] + } + "#, + ) + .unwrap() +} + #[test] fn test_avro_tuple_atomic() { let context = Context { @@ -171,3 +209,95 @@ fn test_bigquery_tuple_atomic_with_additional_items() { convert_bigquery(&data_atomic_with_additional_properties(), context) ); } + +/// Objects within tuples are allowed to have extra fields. The decoding tool +/// should preserve the ordering of the items in the tuples. +#[test] +fn test_avro_tuple_object_drop() { + let context = Context { + tuple_struct: true, + resolve_method: ResolveMethod::Drop, + ..Default::default() + }; + + let expected: Value = serde_json::from_str( + r#" + { + "fields": [ + { + "name": "f0_", + "type": {"type": "long"} + }, + { + "name": "f1_", + "type": { + "name": "f1_", + "namespace": "root", + "type": "record", + "fields": [ + {"name": "first", "type": {"type": "string"}} + ] + } + } + ], + "name": "root", + "type": "record" + } + "#, + ) + .unwrap(); + assert_eq!(expected, convert_avro(&data_object_missing(), context)); +} + +#[test] +fn test_bigquery_tuple_object_drop() { + let context = Context { + tuple_struct: true, + resolve_method: ResolveMethod::Drop, + ..Default::default() + }; + + let expected: Value = serde_json::from_str( + r#" + [ + { + "mode": "REQUIRED", + "name": "f0_", + "type": "INT64" + }, + { + "mode": "REQUIRED", + "name": "f1_", + "type": "RECORD", + "fields": [ + {"name": "first", "type": "STRING", "mode": "REQUIRED"} + ] + } + ] + "#, + ) + .unwrap(); + assert_eq!(expected, convert_bigquery(&data_object_missing(), context)); +} + +#[test] +#[should_panic] +fn test_avro_tuple_object_incompatible() { + let context = Context { + tuple_struct: true, + resolve_method: ResolveMethod::Drop, + ..Default::default() + }; + convert_avro(&data_incompatible(), context); +} + +#[test] +#[should_panic] +fn test_bigquery_tuple_object_incompatible() { + let context = Context { + tuple_struct: true, + resolve_method: ResolveMethod::Drop, + ..Default::default() + }; + convert_bigquery(&data_incompatible(), context); +} From 204bb7d22ce18e29948dcd4beb0b7521b51c3216 Mon Sep 17 00:00:00 2001 From: Anthony Miyaguchi Date: Wed, 18 Sep 2019 13:16:39 -0700 Subject: [PATCH 16/16] Bump version to 1.5.0 --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0c0fd48..6d7d779 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -96,7 +96,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "jsonschema-transpiler" -version = "1.4.1" +version = "1.5.0" dependencies = [ "clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)", "env_logger 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/Cargo.toml b/Cargo.toml index bae0159..278b9ca 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "jsonschema-transpiler" -version = "1.4.1" +version = "1.5.0" authors = ["Anthony Miyaguchi "] description = "A tool to transpile JSON Schema into schemas for data processing" license = "MPL-2.0"