-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #13 from black-cape/feature/postgres-workflow
Add Postgres for Workflow tracking
- Loading branch information
Showing
13 changed files
with
1,120 additions
and
398 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
"""Contains the Minio implementation of the object store backend interface""" | ||
import json | ||
from datetime import datetime | ||
from sqlite3 import DatabaseError | ||
from typing import Any, Dict, Optional | ||
from asyncpg import InvalidPasswordError | ||
|
||
from asyncpg_utils.databases import PoolDatabase | ||
from asyncpg_utils.managers import TableManager | ||
|
||
from etl.config import settings | ||
from etl.database.interfaces import DatabaseStore, FileObject | ||
from etl.util import get_logger | ||
|
||
LOGGER = get_logger(__name__) | ||
|
||
|
||
class PGDatabase(DatabaseStore): | ||
"""Implements the DatabaseStore interface using Minio as the backend service""" | ||
def __init__(self): | ||
self._database = PoolDatabase(f'postgres://{settings.database_user}:{settings.database_password}@{settings.database_host}/{settings.database_db}') | ||
self._table_manager = TableManager(self._database, 'files', pk_field='id', hooks=None) | ||
|
||
async def create_table(self) -> bool: | ||
""" Check for and create database table """ | ||
LOGGER.info('Creating DB table...') | ||
try: | ||
await self._database.init_pool() | ||
conn = await self._database.get_connection() | ||
await conn.execute( | ||
""" | ||
CREATE TABLE IF NOT EXISTS files ( | ||
id uuid PRIMARY KEY, | ||
bucket_name text, | ||
file_name text, | ||
status text, | ||
processing_status text, | ||
original_filename text, | ||
event_name text, | ||
source_ip text, | ||
size int, | ||
etag text, | ||
content_type text, | ||
create_datetime timestamp with time zone, | ||
update_datetime timestamp with time zone, | ||
classification jsonb, | ||
metadata jsonb | ||
); | ||
""" | ||
) | ||
await conn.close() | ||
return True | ||
except (DatabaseError, InvalidPasswordError) as db_error: | ||
LOGGER.info('Database not active. Exception: %s', db_error) | ||
return False | ||
|
||
async def insert_file(self, filedata: FileObject): | ||
""" Track a new file from Minio""" | ||
LOGGER.info("Inserting file into DB...") | ||
await self._database.insert('files', dict(filedata)) | ||
|
||
async def move_file(self, rowid: str, new_name: str): | ||
""" Track the moving of a file in Minio""" | ||
rec_data = {} | ||
rec_data['path'] = new_name | ||
rec_data['update_datetime'] = f'{datetime.now().isoformat()}Z' | ||
await self._table_manager.update(rowid, rec_data) | ||
|
||
async def delete_file(self, rowid: str): | ||
""" Track the deleting of a file in Minio""" | ||
await self._table_manager.delete(rowid) | ||
|
||
async def list_files(self, metadata: Optional[Dict]): | ||
""" List all tracked files by provided filter """ | ||
return await self._table_manager.list(filters=metadata) | ||
|
||
async def retrieve_file_metadata(self, rowid: str): | ||
""" Retrieve a row based on ID """ | ||
return await self._table_manager.detail(rowid) | ||
|
||
async def update_status(self, rowid: str, new_status: str, new_filename: str): | ||
""" Update the file status/state """ | ||
rec_data = {} | ||
rec_data['status'] = new_status | ||
rec_data['file_name'] = new_filename | ||
rec_data['update_datetime'] = datetime.now() | ||
await self._table_manager.update(rowid, rec_data) | ||
|
||
def parse_notification(self, evt_data: Any): | ||
""" Parse a Minio notification to create a DB row """ | ||
LOGGER.info(evt_data) | ||
bucket_name, file_name = evt_data['Key'].split('/', 1) | ||
metadata = evt_data['Records'][0]['s3']['object'].get('userMetadata', None) | ||
db_evt = { | ||
'id': metadata.get('X-Amz-Meta-Id', None), | ||
'bucket_name': bucket_name, | ||
'file_name': file_name, | ||
'status': 'Queued', | ||
'processing_status': None, | ||
'original_filename': metadata.get('X-Amz-Meta-Originalfilename', None), | ||
'event_name': evt_data['EventName'], | ||
'source_ip': evt_data['Records'][0]['requestParameters']['sourceIPAddress'], | ||
'size': evt_data['Records'][0]['s3']['object']['size'], | ||
'etag': evt_data['Records'][0]['s3']['object']['eTag'], | ||
'content_type': evt_data['Records'][0]['s3']['object']['contentType'], | ||
'create_datetime': datetime.now(), | ||
'classification': metadata.get('X-Amz-Meta-Classification', None), | ||
'metadata': json.dumps(metadata) | ||
} | ||
return db_evt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
"""Describes interface for sending messages to a message broker""" | ||
import abc | ||
from dataclasses import dataclass | ||
from datetime import datetime | ||
from typing import Any, Dict, List, Optional | ||
|
||
|
||
@dataclass | ||
class FileObject: | ||
"""Represents an implementation-neutral file event""" | ||
id: str | ||
bucket_name: str | ||
file_name: str | ||
status: str | ||
processing_status: str | ||
original_filename: str | ||
event_name: str | ||
source_ip: str | ||
size: int | ||
etag: str | ||
content_type: str | ||
create_datetime: datetime | ||
update_datetime: datetime | ||
classification: str | ||
metadata: str | ||
|
||
|
||
class DatabaseStore(abc.ABC): | ||
"""Interface for message producer backend""" | ||
|
||
async def insert_file(self, filedata: FileObject) -> None: | ||
"""Insert a file record | ||
:param file: Dict containing record | ||
""" | ||
raise NotImplementedError | ||
|
||
async def move_file(self, rowid: str, new_name: str) -> None: | ||
"""Rename a file record | ||
:param id: The id | ||
:param newName: New path value | ||
""" | ||
raise NotImplementedError | ||
|
||
async def update_status(self, rowid: str, new_status: str, new_filename: str) -> None: | ||
"""Rename a file record | ||
:param id: The id | ||
:param newStatus: New status value | ||
""" | ||
raise NotImplementedError | ||
|
||
async def delete_file(self, rowid: str) -> None: | ||
"""Delete a record | ||
:param id: The id | ||
""" | ||
raise NotImplementedError | ||
|
||
async def list_files(self, metadata: Optional[Dict]) -> List[Dict]: | ||
"""Retrieve records based metadata criteria | ||
:param metadata: Dict containing query restrictions | ||
""" | ||
raise NotImplementedError | ||
|
||
async def retrieve_file_metadata(self, rowid: str) -> Dict: | ||
"""Retrieve a row based on ID | ||
:param id: The id | ||
""" | ||
raise NotImplementedError | ||
|
||
def parse_notification(self, evt_data: Any) -> Dict: | ||
"""Parse the event into a DB row/dict | ||
:param evt_data: The event data from S3/Minio | ||
""" | ||
raise NotImplementedError |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.