Skip to content

Commit

Permalink
Merge pull request #557 from zinggAI/cleanupmethod
Browse files Browse the repository at this point in the history
ddl schema definition instead of json for csv schema #473
  • Loading branch information
sonalgoyal committed Mar 31, 2023
2 parents 79add07 + 21185e2 commit dd0ab0e
Show file tree
Hide file tree
Showing 171 changed files with 37 additions and 354 deletions.
Expand Up @@ -78,22 +78,7 @@
"delimiter": ",",
"header":false
},
"schema":
"{\"type\" : \"struct\",
\"fields\" : [
{\"name\":\"id\", \"type\":\"string\", \"nullable\":false},
{\"name\":\"fname\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"lname\",\"type\":\"string\",\"nullable\":true} ,
{\"name\":\"stNo\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"add1\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"add2\",\"type\":\"string\",\"nullable\":true} ,
{\"name\":\"city\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"areacode\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"state\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"dob\",\"type\":\"string\",\"nullable\":true} ,
{\"name\":\"ssn\",\"type\":\"string\",\"nullable\":true}
]
}"
"schema": "id string, fname string, lname string, stNo string, add1 string, add2 string, city string, areacode string, state string, dob string, ssn string"
}],
"labelDataSampleSize" : 0.5,
"numPartitions":4,
Expand Down
Expand Up @@ -78,22 +78,7 @@
"delimiter": ",",
"header":false
},
"schema":
"{\"type\" : \"struct\",
\"fields\" : [
{\"name\":\"id\", \"type\":\"string\", \"nullable\":false},
{\"name\":\"fname\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"lname\",\"type\":\"string\",\"nullable\":true} ,
{\"name\":\"stNo\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"add1\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"add2\",\"type\":\"string\",\"nullable\":true} ,
{\"name\":\"city\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"areacode\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"state\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"dob\",\"type\":\"string\",\"nullable\":true} ,
{\"name\":\"ssn\",\"type\":\"string\",\"nullable\":true}
]
}"
"schema": "id string, fname string, lname string, stNo string, add1 string, add2 string, city string, areacode string, state string, dob string, ssn string"
}],
"labelDataSampleSize" : 0.5,
"numPartitions":4,
Expand Down
16 changes: 1 addition & 15 deletions common/core/src/test/resources/testDocumenter/config.json
Expand Up @@ -72,21 +72,7 @@
"delimiter": ",",
"header":false
},
"schema":
"{\"type\" : \"struct\",
\"fields\" : [
{\"name\":\"id\", \"type\":\"string\", \"nullable\":false},
{\"name\":\"fname\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"lname\",\"type\":\"string\",\"nullable\":true} ,
{\"name\":\"stNo\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"add1\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"add2\",\"type\":\"string\",\"nullable\":true} ,
{\"name\":\"city\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"state\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"dob\",\"type\":\"string\",\"nullable\":true} ,
{\"name\":\"ssn\",\"type\":\"string\",\"nullable\":true}
]
}"
"schema": "id string, fname string, lname string, stNo string, add1 string, add2 string, city string, state string, dob string, ssn string"
}],
"labelDataSampleSize" : 0.5,
"numPartitions":4,
Expand Down
17 changes: 1 addition & 16 deletions common/core/src/test/resources/testPeekModel/config.json
Expand Up @@ -84,22 +84,7 @@
"delimiter": ",",
"header":false
},
"schema":
"{\"type\" : \"struct\",
\"fields\" : [
{\"name\":\"id\", \"type\":\"string\", \"nullable\":false},
{\"name\":\"fname\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"lname\",\"type\":\"string\",\"nullable\":true} ,
{\"name\":\"stNo\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"add1\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"add2\",\"type\":\"string\",\"nullable\":true} ,
{\"name\":\"city\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"areacode\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"state\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"dob\",\"type\":\"string\",\"nullable\":true} ,
{\"name\":\"ssn\",\"type\":\"string\",\"nullable\":true}
]
}"
"schema": "id string, fname string, lname string, stNo string, add1 string, add2 string, city string, areacode string, state string, dob string, ssn string"
}],
"labelDataSampleSize" : 0.5,
"numPartitions":4,
Expand Down
22 changes: 2 additions & 20 deletions examples/beerAdvo-rateBeer/config.json
Expand Up @@ -47,16 +47,7 @@
"delimiter": ",",
"header":true
},
"schema":
"{\"type\" : \"struct\",
\"fields\" : [
{\"name\":\"id\", \"type\":\"string\", \"nullable\":false},
{\"name\":\"Beer_Name\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"Brew_Factory_Name\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"Style\",\"type\":\"string\",\"nullable\":true} ,
{\"name\":\"ABV\", \"type\":\"double\", \"nullable\":true}
]
}"
"schema": "id string, Beer_Name string, Brew_Factory_Name string, Style string, ABV double"
},
{
"name":"rateBeer",
Expand All @@ -66,16 +57,7 @@
"delimiter": ",",
"header":true
},
"schema":
"{\"type\" : \"struct\",
\"fields\" : [
{\"name\":\"id\", \"type\":\"string\", \"nullable\":false},
{\"name\":\"Beer_Name\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"Brew_Factory_Name\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"Style\",\"type\":\"string\",\"nullable\":true} ,
{\"name\":\"ABV\", \"type\":\"double\", \"nullable\":true}
]
}"
"schema": "id string, Beer_Name string, Brew_Factory_Name string, Style string, ABV double"
}
],
"labelDataSampleSize" : 0.4,
Expand Down
16 changes: 1 addition & 15 deletions examples/databricks/config120.json
Expand Up @@ -72,21 +72,7 @@
"delimiter": ",",
"header":false
},
"schema":
"{\"type\" : \"struct\",
\"fields\" : [
{\"name\":\"id\", \"type\":\"string\", \"nullable\":false},
{\"name\":\"fname\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"lname\",\"type\":\"string\",\"nullable\":true} ,
{\"name\":\"stNo\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"add1\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"add2\",\"type\":\"string\",\"nullable\":true} ,
{\"name\":\"city\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"state\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"dob\",\"type\":\"string\",\"nullable\":true} ,
{\"name\":\"ssn\",\"type\":\"string\",\"nullable\":true}
]
}"
"schema": "id string, fname string, lname string, stNo string, add1 string, add2 string, city string, state string, dob string, ssn string"
}],
"labelDataSampleSize" : 0.1,
"numPartitions":5000,
Expand Down
34 changes: 2 additions & 32 deletions examples/febrl/configLink.json
Expand Up @@ -78,22 +78,7 @@
"delimiter": ",",
"header":false
},
"schema":
"{\"type\" : \"struct\",
\"fields\" : [
{\"name\":\"id\", \"type\":\"string\", \"nullable\":false},
{\"name\":\"fname\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"lname\",\"type\":\"string\",\"nullable\":true} ,
{\"name\":\"stNo\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"add1\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"add2\",\"type\":\"string\",\"nullable\":true} ,
{\"name\":\"city\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"areacode\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"state\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"dob\",\"type\":\"string\",\"nullable\":true} ,
{\"name\":\"ssn\",\"type\":\"string\",\"nullable\":true}
]
}"
"schema": "id string, fname string, lname string, stNo string, add1 string, add2 string, city string, areacode string, state string, dob string, ssn string"
},
{
"name":"test2",
Expand All @@ -103,22 +88,7 @@
"delimiter": ",",
"header":false
},
"schema":
"{\"type\" : \"struct\",
\"fields\" : [
{\"name\":\"id\", \"type\":\"string\", \"nullable\":false},
{\"name\":\"fname\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"lname\",\"type\":\"string\",\"nullable\":true} ,
{\"name\":\"stNo\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"add1\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"add2\",\"type\":\"string\",\"nullable\":true} ,
{\"name\":\"city\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"areacode\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"state\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"dob\",\"type\":\"string\",\"nullable\":true} ,
{\"name\":\"ssn\",\"type\":\"string\",\"nullable\":true}
]
}"
"schema": "id string, fname string, lname string, stNo string, add1 string, add2 string, city string, areacode string, state string, dob string, ssn string"
}
],
"labelDataSampleSize" : 0.5,
Expand Down
38 changes: 2 additions & 36 deletions examples/febrl/configWithTrainingSamples.json
Expand Up @@ -8,26 +8,7 @@
"header":false,
"badRecordsPath":"/tmp/bad"
},
"schema":
"{\"type\" : \"struct\",
\"fields\" : [

{\"name\" : \"z_cluster\",\"type\" : \"string\",\"nullable\" : false, \"metadata\" : {}},
{\"name\":\"z_ismatch\",\"type\":\"integer\",\"nullable\":true,\"metadata\" : {}},
{\"name\":\"fname\", \"type\":\"string\", \"nullable\":true,\"metadata\" : {}},
{\"name\":\"lname\",\"type\":\"string\",\"nullable\":true,\"metadata\" : {}},
{\"name\":\"stNo\", \"type\":\"string\", \"nullable\":true,\"metadata\" : {}},
{\"name\":\"add1\", \"type\":\"string\", \"nullable\":true,\"metadata\" : {}},
{\"name\":\"add2\",\"type\":\"string\",\"nullable\":true,\"metadata\" : {}},
{\"name\":\"city\", \"type\":\"string\", \"nullable\":true,\"metadata\" : {}},
{\"name\":\"areacode\", \"type\":\"string\", \"nullable\":true,\"metadata\" : {}},
{\"name\":\"state\", \"type\":\"string\", \"nullable\":true,\"metadata\" : {}},
{\"name\":\"dob\",\"type\":\"string\",\"nullable\":true,\"metadata\" : {}},
{\"name\":\"ssn\",\"type\":\"string\",\"nullable\":true,\"metadata\" : {}}

]
}"

"schema": "z_cluster string, z_ismatch integer, fname string, lname string, stNo string, add1 string, add2 string, city string, areacode string, state string, dob string, ssn string"
}],
"fieldDefinition":[
{
Expand Down Expand Up @@ -108,22 +89,7 @@
"delimiter": ",",
"header":false
},
"schema":
"{\"type\" : \"struct\",
\"fields\" : [
{\"name\":\"id\", \"type\":\"string\", \"nullable\":false},
{\"name\":\"fname\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"lname\",\"type\":\"string\",\"nullable\":true} ,
{\"name\":\"stNo\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"add1\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"add2\",\"type\":\"string\",\"nullable\":true} ,
{\"name\":\"city\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"areacode\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"state\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"dob\",\"type\":\"string\",\"nullable\":true} ,
{\"name\":\"ssn\",\"type\":\"string\",\"nullable\":true}
]
}"
"schema": "id string, fname string, lname string, stNo string, add1 string, add2 string, city string, areacode string, state string, dob string, ssn string"
}],
"labelDataSampleSize" : 0.5,
"numPartitions":4,
Expand Down
17 changes: 1 addition & 16 deletions examples/febrl/findTrainingData.json
Expand Up @@ -78,22 +78,7 @@
"delimiter": ",",
"header":false
},
"schema":
"{\"type\" : \"struct\",
\"fields\" : [
{\"name\":\"id\", \"type\":\"string\", \"nullable\":false},
{\"name\":\"fname\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"lname\",\"type\":\"string\",\"nullable\":true} ,
{\"name\":\"stNo\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"add1\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"add2\",\"type\":\"string\",\"nullable\":true} ,
{\"name\":\"city\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"areacode\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"state\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"dob\",\"type\":\"string\",\"nullable\":true} ,
{\"name\":\"ssn\",\"type\":\"string\",\"nullable\":true}
]
}"
"schema": "id string, fname string, lname string, stNo string, add1 string, add2 string, city string, areacode string, state string, dob string, ssn string"
}],
"labelDataSampleSize" : 0.5,
"numPartitions":4
Expand Down
17 changes: 1 addition & 16 deletions examples/febrl120k/config.json
Expand Up @@ -84,22 +84,7 @@
"delimiter": ",",
"header":false
},
"schema":
"{\"type\" : \"struct\",
\"fields\" : [
{\"name\":\"id\", \"type\":\"string\", \"nullable\":false},
{\"name\":\"fname\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"lname\",\"type\":\"string\",\"nullable\":true} ,
{\"name\":\"stNo\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"add1\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"add2\",\"type\":\"string\",\"nullable\":true} ,
{\"name\":\"city\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"areacode\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"state\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"dob\",\"type\":\"string\",\"nullable\":true} ,
{\"name\":\"ssn\",\"type\":\"string\",\"nullable\":true}
]
}"
"schema": "id string, fname string, lname string, stNo string, add1 string, add2 string, city string, areacode string, state string, dob string, ssn string"
}],
"labelDataSampleSize" : 0.1,
"numPartitions":5000,
Expand Down
17 changes: 1 addition & 16 deletions examples/febrl120k/config120k.json
Expand Up @@ -78,22 +78,7 @@
"delimiter": ",",
"header":false
},
"schema":
"{\"type\" : \"struct\",
\"fields\" : [
{\"name\":\"id\", \"type\":\"string\", \"nullable\":false},
{\"name\":\"fname\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"lname\",\"type\":\"string\",\"nullable\":true} ,
{\"name\":\"stNo\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"add1\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"add2\",\"type\":\"string\",\"nullable\":true} ,
{\"name\":\"city\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"areacode\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"state\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"dob\",\"type\":\"string\",\"nullable\":true} ,
{\"name\":\"ssn\",\"type\":\"string\",\"nullable\":true}
]
}"
"schema": "id string, fname string, lname string, stNo string, add1 string, add2 string, city string, areacode string, state string, dob string, ssn string"
}],
"labelDataSampleSize" : 0.1,
"numPartitions":2000,
Expand Down
17 changes: 1 addition & 16 deletions examples/febrl120k/config500k.json
Expand Up @@ -78,22 +78,7 @@
"delimiter": ",",
"header":false
},
"schema":
"{\"type\" : \"struct\",
\"fields\" : [
{\"name\":\"id\", \"type\":\"string\", \"nullable\":false},
{\"name\":\"fname\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"lname\",\"type\":\"string\",\"nullable\":true} ,
{\"name\":\"stNo\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"add1\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"add2\",\"type\":\"string\",\"nullable\":true} ,
{\"name\":\"city\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"areacode\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"state\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"dob\",\"type\":\"string\",\"nullable\":true} ,
{\"name\":\"ssn\",\"type\":\"string\",\"nullable\":true}
]
}"
"schema": "id string, fname string, lname string, stNo string, add1 string, add2 string, city string, areacode string, state string, dob string, ssn string"
}],
"labelDataSampleSize" : 0.1,
"numPartitions":2000,
Expand Down
30 changes: 2 additions & 28 deletions examples/iTunes-amazon/config.json
Expand Up @@ -71,20 +71,7 @@
"delimiter": ",",
"header":true
},
"schema":
"{\"type\" : \"struct\",
\"fields\" : [
{\"name\":\"id\", \"type\":\"string\", \"nullable\":false},
{\"name\":\"Song_Name\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"Artist_Name\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"Album_Name\",\"type\":\"string\",\"nullable\":true},
{\"name\":\"Genre\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"Price\", \"type\":\"double\", \"nullable\":true},
{\"name\":\"CopyRight\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"Time\",\"type\":\"string\",\"nullable\":true},
{\"name\":\"Released\", \"type\":\"string\", \"nullable\":true}
]
}"
"schema": "id string, Song_Name string, Artist_Name string, Album_Name string, Genre string, Price double, CopyRight string, Time string, Released string"
},
{
"name":"amazon",
Expand All @@ -94,20 +81,7 @@
"delimiter": ",",
"header":true
},
"schema":
"{\"type\" : \"struct\",
\"fields\" : [
{\"name\":\"id\", \"type\":\"string\", \"nullable\":false},
{\"name\":\"Song_Name\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"Artist_Name\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"Album_Name\",\"type\":\"string\",\"nullable\":true},
{\"name\":\"Genre\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"Price\", \"type\":\"double\", \"nullable\":true},
{\"name\":\"CopyRight\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"Time\",\"type\":\"string\",\"nullable\":true},
{\"name\":\"Released\", \"type\":\"string\", \"nullable\":true}
]
}"
"schema": "id string, Song_Name string, Artist_Name string, Album_Name string, Genre string, Price double, CopyRight string, Time string, Released string"
}
],
"labelDataSampleSize" : 0.4,
Expand Down

0 comments on commit dd0ab0e

Please sign in to comment.