Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

validating schema in discovery mode #56

Open
wants to merge 33 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
23b316c
Adding time_extracted and bookmark_properties
Nov 22, 2017
1d0416c
Adding commas to fix error
Nov 22, 2017
3524f40
Fixing space errors
Nov 22, 2017
90cc986
Fixing pylint warnings
Nov 22, 2017
9cbdfa2
validating schema in discovery mode
Nov 22, 2017
25b19d2
Check for time_extracted being an aware datetime and ensure timezone …
Nov 27, 2017
d297a0e
Fixing pylint errors
Nov 27, 2017
247d4d3
Wrapping up
Nov 28, 2017
e56e488
Pylint
Nov 28, 2017
c53912f
import pytz
ccapurso Nov 28, 2017
7db81df
handle string or array bookmark_properties
ccapurso Nov 28, 2017
42fe69a
allow bookmark_properties to be None
ccapurso Nov 28, 2017
ea4b5ba
Merge pull request #55 from singer-io/time_extracted_and_bookmark_pro…
ccapurso Nov 28, 2017
1b1f32c
bump version to 5.0.0
ccapurso Nov 28, 2017
5a89d5d
move bookmark property handling from write_schema into SchemaMessage …
Nov 29, 2017
555c6ca
Merge pull request #57 from singer-io/fix/initialize-bookmark-propert…
nick-mccoy Nov 29, 2017
d3dd5ba
bump version to 5.0.1
Nov 29, 2017
231dd45
Forcing SchemaMessage so that bookmark_properties is always an array
Nov 30, 2017
23c389d
Pylint fix
Nov 30, 2017
69f56bf
Merge pull request #58 from singer-io/make_bookmark_properties_list
nick-mccoy Nov 30, 2017
bdbd776
Bumping singer-python version to 5.0.2 and deploying
Nov 30, 2017
65bda5b
Add metadata assignment to Catalog.from_dict. Update test_catalog wit…
flash716 Dec 10, 2017
219b8eb
Add stream_alias to Catalog.to_dict. Add to unit test.
flash716 Dec 10, 2017
c5182e9
Merge pull request #61 from flash716/stream-alias
nick-mccoy Dec 13, 2017
13275d8
Merge pull request #60 from flash716/catalog-metadata
nick-mccoy Dec 14, 2017
be8b565
changing strftime format string
Dec 14, 2017
215171b
fixing strftime format string
Dec 14, 2017
16b53f3
Merge pull request #63 from singer-io/fix_strftime_format
nick-mccoy Dec 15, 2017
cdbf7ed
bumping version
Dec 15, 2017
89c6466
validating schema in discovery mode
Nov 22, 2017
4411df5
Rebasing validating_schema_in_discovery_mode with master
Dec 15, 2017
c8c507b
pylint
Dec 15, 2017
b82f0f1
pylint fixes
Dec 18, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
27 changes: 27 additions & 0 deletions singer/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import sys

from singer.schema import Schema
from jsonschema import ValidationError, Draft4Validator, FormatChecker

# pylint: disable=too-many-instance-attributes
class CatalogEntry(object):
Expand Down Expand Up @@ -109,3 +110,29 @@ def get_stream(self, tap_stream_id):
if stream.tap_stream_id == tap_stream_id:
return stream
return None


CATALOG_SCHEMA = {'type': 'object',
'required': ['streams'],
'properties': {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you might want to add "additionalProperties": false here and in the other nested object in this schema. If you don't the validator will allow objects with other properties. I guess it depends on how strict we want to be in this validation. @cmerrick what are your thoughts on that?

'streams' : {
'type': 'array',
'items': {
'type': 'object',
'required': ['stream', 'tap_stream_id', 'schema'],
'properties': {
'stream': {'type': 'string'},
'tap_stream_id': {'type': 'string'},
'schema': {'type': 'object'}
}
}
}
}
}

CATALOG_VALIDATOR = Draft4Validator(CATALOG_SCHEMA,
format_checker=FormatChecker())

def write_catalog(streams):
CATALOG_VALIDATOR.validate(streams)
json.dump(streams, sys.stdout, indent=2)
19 changes: 13 additions & 6 deletions tests/test_catalog.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import unittest
import singer.catalog

from singer.schema import Schema
from singer.catalog import Catalog, CatalogEntry

class TestToDictAndFromDict(unittest.TestCase):

dict_form = {
dict_form = {
'streams': [
{
'stream': 'users',
Expand Down Expand Up @@ -38,7 +37,7 @@ class TestToDictAndFromDict(unittest.TestCase):
]
}

obj_form = Catalog(streams=[
obj_form = Catalog(streams=[
CatalogEntry(
stream='users',
tap_stream_id='prod_users',
Expand All @@ -62,11 +61,15 @@ class TestToDictAndFromDict(unittest.TestCase):
'id': Schema(type='integer', selected=True),
'amount': Schema(type='number', selected=True)}))])




class TestToDictAndFromDict(unittest.TestCase):
def test_from_dict(self):
self.assertEqual(self.obj_form, Catalog.from_dict(self.dict_form))
self.assertEqual(obj_form, Catalog.from_dict(dict_form))

def test_to_dict(self):
self.assertEqual(self.dict_form, self.obj_form.to_dict())
self.assertEqual(dict_form, obj_form.to_dict())


class TestGetStream(unittest.TestCase):
Expand All @@ -77,3 +80,7 @@ def test(self):
CatalogEntry(tap_stream_id='c')])
entry = catalog.get_stream('b')
self.assertEquals('b', entry.tap_stream_id)

class TestWriteCatalog(unittest.TestCase):
def test(self):
singer.catalog.write_catalog(dict_form)