Skip to content

Commit

Permalink
MRG: Merge pull request #123 from octue/release/0.1.11
Browse files Browse the repository at this point in the history
Release/0.1.11
  • Loading branch information
cortadocodes committed Mar 15, 2021
2 parents 6b9e4b2 + 858a7d4 commit e4da82e
Show file tree
Hide file tree
Showing 11 changed files with 112 additions and 66 deletions.
1 change: 0 additions & 1 deletion .github/workflows/python-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ jobs:
env:
GOOGLE_APPLICATION_CREDENTIALS: ${{ secrets.GCP_SERVICE_ACCOUNT }}
TEST_PROJECT_NAME: ${{ secrets.TEST_PROJECT_NAME }}
TEST_BUCKET_NAME: ${{ secrets.TEST_BUCKET_NAME }}
run: |
coverage run --source octue -m unittest discover
coverage report --show-missing
Expand Down
28 changes: 27 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,35 @@

Utilities for running python based data services, digital twins and applications with the Octue toolkit and [twined](https://twined.readthedocs.io/en/latest/?badge=latest) SDK for python based apps running within octue.

## Installation and usage
For usage as a scientist or engineer, run the following command in your environment:
```shell
pip install octue
```

The command line interface (CLI) can then be accessed via:
```shell
octue-app --help
```

## Developer notes

### Installation
For development, run the following from the repository root, which will editably install the package:
```bash
pip install -r requirements-dev.txt
```

### Testing
These environment variables need to be set to run the tests:
* `GOOGLE_APPLICATION_CREDENTIALS=/absolute/path/to/service/account/file.json`
* `TEST_PROJECT_NAME=<name-of-google-cloud-project-to-run-pub-sub-tests-on>`

Then, from the repository root, run
```bash
python3 -m unittest
```

**Documentation for use of the library is [here](https://octue-python-sdk.readthedocs.io). You don't need to pay attention to the following unless you plan to develop `octue-sdk-python` itself.**

### Pre-Commit
Expand Down Expand Up @@ -78,7 +104,7 @@ roadmap, into which you can make your PR. We'll help review the changes and impr
The process for creating a new release is as follows:

1. Check out a branch for the next version, called `vX.Y.Z`
2. Create a Pull Request into the `master` branch.
2. Create a Pull Request into the `main` branch.
3. Undertake your changes, committing and pushing to branch `vX.Y.Z`
4. Ensure that documentation is updated to match changes, and increment the changelog. **Pull requests which do not update documentation will be refused.**
5. Ensure that test coverage is sufficient. **Pull requests that decrease test coverage will be refused.**
Expand Down
4 changes: 2 additions & 2 deletions octue/utils/cloud/emulators.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class GoogleCloudStorageEmulator:
:return None:
"""

def __init__(self, host="localhost", port=9090, in_memory=True, default_bucket=os.environ["TEST_BUCKET_NAME"]):
def __init__(self, host="localhost", port=9090, in_memory=True, default_bucket=None):
self._server = create_server(host, port, in_memory=in_memory, default_bucket=default_bucket)

def __enter__(self):
Expand Down Expand Up @@ -64,7 +64,7 @@ class GoogleCloudStorageEmulatorTestResultModifier:

STORAGE_EMULATOR_HOST_ENVIRONMENT_VARIABLE_NAME = "STORAGE_EMULATOR_HOST"

def __init__(self, host="localhost", in_memory=True, default_bucket_name=os.environ["TEST_BUCKET_NAME"]):
def __init__(self, host="localhost", in_memory=True, default_bucket_name=None):
port = get_free_tcp_port()
self.storage_emulator_host = f"http://{host}:{port}"

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

setup(
name="octue",
version="0.1.10",
version="0.1.11",
py_modules=["cli"],
install_requires=[
"blake3>=0.1.8",
Expand Down
5 changes: 3 additions & 2 deletions tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@


TESTS_DIR = os.path.dirname(__file__)
TEST_PROJECT_NAME = os.environ["TEST_PROJECT_NAME"]
TEST_BUCKET_NAME = "octue-test-bucket"


test_result_modifier = GoogleCloudStorageEmulatorTestResultModifier()
test_result_modifier = GoogleCloudStorageEmulatorTestResultModifier(default_bucket_name=TEST_BUCKET_NAME)
setattr(unittest.TestResult, "startTestRun", test_result_modifier.startTestRun)
setattr(unittest.TestResult, "stopTestRun", test_result_modifier.stopTestRun)
4 changes: 2 additions & 2 deletions tests/resources/communication/google_pub_sub/test_service.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import concurrent.futures
import os
import time
import uuid
import google.api_core.exceptions
Expand All @@ -8,6 +7,7 @@
from octue.resources.communication.google_pub_sub.service import OCTUE_NAMESPACE, Service
from octue.resources.communication.service_backends import GCPPubSubBackend
from octue.resources.manifest import Manifest
from tests import TEST_PROJECT_NAME
from tests.base import BaseTestCase


Expand All @@ -34,7 +34,7 @@ class TestService(BaseTestCase):
(GCP), or a local emulator."""

BACKEND = GCPPubSubBackend(
project_name=os.environ["TEST_PROJECT_NAME"], credentials_environment_variable="GOOGLE_APPLICATION_CREDENTIALS"
project_name=TEST_PROJECT_NAME, credentials_environment_variable="GOOGLE_APPLICATION_CREDENTIALS"
)

@staticmethod
Expand Down
23 changes: 11 additions & 12 deletions tests/resources/test_datafile.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
from octue.mixins import MixinBase, Pathable
from octue.resources import Datafile
from octue.resources.tag import TagSet
from octue.utils.cloud.storage.client import OCTUE_MANAGED_CREDENTIALS, GoogleCloudStorageClient
from octue.utils.cloud.storage.client import GoogleCloudStorageClient
from tests import TEST_BUCKET_NAME, TEST_PROJECT_NAME
from ..base import BaseTestCase


Expand Down Expand Up @@ -135,21 +136,19 @@ def test_hashes_for_the_same_datafile_are_the_same(self):

def test_from_cloud_with_bare_file(self):
"""Test that a Datafile can be constructed from a file on Google Cloud storage with no custom metadata."""
project_name = os.environ["TEST_PROJECT_NAME"]
bucket_name = os.environ["TEST_BUCKET_NAME"]
path_in_bucket = "file_to_upload.txt"

GoogleCloudStorageClient(project_name=project_name, credentials=OCTUE_MANAGED_CREDENTIALS).upload_from_string(
GoogleCloudStorageClient(project_name=TEST_PROJECT_NAME).upload_from_string(
string=json.dumps({"height": 32}),
bucket_name=bucket_name,
bucket_name=TEST_BUCKET_NAME,
path_in_bucket=path_in_bucket,
)

datafile = Datafile.from_cloud(
project_name=project_name, bucket_name=bucket_name, datafile_path=path_in_bucket, timestamp=None
project_name=TEST_PROJECT_NAME, bucket_name=TEST_BUCKET_NAME, datafile_path=path_in_bucket, timestamp=None
)

self.assertEqual(datafile.path, f"gs://{bucket_name}/{path_in_bucket}")
self.assertEqual(datafile.path, f"gs://{TEST_BUCKET_NAME}/{path_in_bucket}")
self.assertEqual(datafile.cluster, 0)
self.assertEqual(datafile.sequence, None)
self.assertEqual(datafile.tags, TagSet())
Expand All @@ -159,8 +158,6 @@ def test_from_cloud_with_bare_file(self):

def test_from_cloud_with_datafile(self):
"""Test that a Datafile can be constructed from a file on Google Cloud storage with custom metadata."""
project_name = os.environ["TEST_PROJECT_NAME"]
bucket_name = os.environ["TEST_BUCKET_NAME"]
path_in_bucket = "file_to_upload.txt"

with tempfile.TemporaryDirectory() as temporary_directory:
Expand All @@ -172,13 +169,15 @@ def test_from_cloud_with_datafile(self):
datafile = Datafile(
timestamp=None, path=file_0_path, cluster=0, sequence=1, tags={"blah:shah:nah", "blib", "glib"}
)
datafile.to_cloud(project_name=project_name, bucket_name=bucket_name, path_in_bucket=path_in_bucket)
datafile.to_cloud(
project_name=TEST_PROJECT_NAME, bucket_name=TEST_BUCKET_NAME, path_in_bucket=path_in_bucket
)

persisted_datafile = Datafile.from_cloud(
project_name=project_name, bucket_name=bucket_name, datafile_path=path_in_bucket
project_name=TEST_PROJECT_NAME, bucket_name=TEST_BUCKET_NAME, datafile_path=path_in_bucket
)

self.assertEqual(persisted_datafile.path, f"gs://{bucket_name}/{path_in_bucket}")
self.assertEqual(persisted_datafile.path, f"gs://{TEST_BUCKET_NAME}/{path_in_bucket}")
self.assertEqual(persisted_datafile.id, datafile.id)
self.assertEqual(persisted_datafile.hash_value, datafile.hash_value)
self.assertEqual(persisted_datafile.cluster, datafile.cluster)
Expand Down
21 changes: 11 additions & 10 deletions tests/resources/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from octue.resources.filter_containers import FilterSet
from octue.utils.cloud import storage
from octue.utils.cloud.storage.client import GoogleCloudStorageClient
from tests import TEST_BUCKET_NAME
from tests.base import BaseTestCase


Expand Down Expand Up @@ -289,7 +290,6 @@ def test_serialise(self):
def test_from_cloud(self):
"""Test that a Dataset in cloud storage can be accessed."""
project_name = "test-project"
bucket_name = os.environ["TEST_BUCKET_NAME"]

with tempfile.TemporaryDirectory() as output_directory:
file_0_path = os.path.join(output_directory, "file_0.txt")
Expand All @@ -309,30 +309,29 @@ def test_from_cloud(self):
},
)

dataset.to_cloud(project_name=project_name, bucket_name=bucket_name, output_directory=output_directory)
dataset.to_cloud(project_name=project_name, bucket_name=TEST_BUCKET_NAME, output_directory=output_directory)

persisted_dataset = Dataset.from_cloud(
project_name=project_name,
bucket_name=bucket_name,
bucket_name=TEST_BUCKET_NAME,
path_to_dataset_directory=storage.path.join(output_directory, dataset.name),
)

self.assertEqual(persisted_dataset.path, f"gs://{bucket_name}{output_directory}/{dataset.name}")
self.assertEqual(persisted_dataset.path, f"gs://{TEST_BUCKET_NAME}{output_directory}/{dataset.name}")
self.assertEqual(persisted_dataset.id, dataset.id)
self.assertEqual(persisted_dataset.name, dataset.name)
self.assertEqual(persisted_dataset.hash_value, dataset.hash_value)
self.assertEqual(persisted_dataset.tags, dataset.tags)
self.assertEqual({file.name for file in persisted_dataset.files}, {file.name for file in dataset.files})

for file in persisted_dataset:
self.assertEqual(file.path, f"gs://{bucket_name}{output_directory}/{dataset.name}/{file.name}")
self.assertEqual(file.path, f"gs://{TEST_BUCKET_NAME}{output_directory}/{dataset.name}/{file.name}")

def test_to_cloud(self):
"""Test that a dataset can be uploaded to the cloud, including all its files and a serialised JSON file of the
Datafile instance.
"""
project_name = "test-project"
bucket_name = os.environ["TEST_BUCKET_NAME"]
output_directory = "my_datasets"

with tempfile.TemporaryDirectory() as temporary_directory:
Expand All @@ -352,24 +351,26 @@ def test_to_cloud(self):
}
)

dataset.to_cloud(project_name, bucket_name, output_directory)
dataset.to_cloud(project_name, TEST_BUCKET_NAME, output_directory)

storage_client = GoogleCloudStorageClient(project_name)

persisted_file_0 = storage_client.download_as_string(
bucket_name=bucket_name, path_in_bucket=storage.path.join(output_directory, dataset.name, "file_0.txt")
bucket_name=TEST_BUCKET_NAME,
path_in_bucket=storage.path.join(output_directory, dataset.name, "file_0.txt"),
)

self.assertEqual(persisted_file_0, "[1, 2, 3]")

persisted_file_1 = storage_client.download_as_string(
bucket_name=bucket_name, path_in_bucket=storage.path.join(output_directory, dataset.name, "file_1.txt")
bucket_name=TEST_BUCKET_NAME,
path_in_bucket=storage.path.join(output_directory, dataset.name, "file_1.txt"),
)
self.assertEqual(persisted_file_1, "[4, 5, 6]")

persisted_dataset = json.loads(
storage_client.download_as_string(
bucket_name=bucket_name,
bucket_name=TEST_BUCKET_NAME,
path_in_bucket=storage.path.join(output_directory, dataset.name, "dataset.json"),
)
)
Expand Down
24 changes: 13 additions & 11 deletions tests/resources/test_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from octue.resources import Datafile, Dataset, Manifest
from octue.utils.cloud import storage
from octue.utils.cloud.storage.client import GoogleCloudStorageClient
from tests import TEST_BUCKET_NAME
from tests.base import BaseTestCase


Expand All @@ -26,7 +27,6 @@ def test_hashes_for_the_same_manifest_are_the_same(self):
def test_to_cloud(self):
"""Test that a manifest can be uploaded to the cloud as a serialised JSON file of the Manifest instance. """
project_name = "test-project"
bucket_name = os.environ["TEST_BUCKET_NAME"]

with tempfile.TemporaryDirectory() as output_directory:
file_0_path = os.path.join(output_directory, "file_0.txt")
Expand All @@ -49,12 +49,14 @@ def test_to_cloud(self):
manifest = Manifest(datasets=[dataset], keys={"my-dataset": 0})

manifest.to_cloud(
project_name, bucket_name, path_to_manifest_file=storage.path.join(output_directory, "manifest.json")
project_name,
TEST_BUCKET_NAME,
path_to_manifest_file=storage.path.join(output_directory, "manifest.json"),
)

persisted_manifest = json.loads(
GoogleCloudStorageClient(project_name).download_as_string(
bucket_name=bucket_name,
bucket_name=TEST_BUCKET_NAME,
path_in_bucket=storage.path.join(output_directory, "manifest.json"),
)
)
Expand All @@ -65,7 +67,6 @@ def test_to_cloud(self):
def test_to_cloud_without_storing_datasets(self):
"""Test that a manifest can be uploaded to the cloud as a serialised JSON file of the Manifest instance. """
project_name = "test-project"
bucket_name = os.environ["TEST_BUCKET_NAME"]

with tempfile.TemporaryDirectory() as output_directory:
file_0_path = os.path.join(output_directory, "file_0.txt")
Expand All @@ -90,14 +91,14 @@ def test_to_cloud_without_storing_datasets(self):

manifest.to_cloud(
project_name,
bucket_name,
TEST_BUCKET_NAME,
path_to_manifest_file=storage.path.join(output_directory, "manifest.json"),
store_datasets=False,
)

persisted_manifest = json.loads(
GoogleCloudStorageClient(project_name).download_as_string(
bucket_name=bucket_name,
bucket_name=TEST_BUCKET_NAME,
path_in_bucket=storage.path.join(output_directory, "manifest.json"),
)
)
Expand All @@ -108,7 +109,6 @@ def test_to_cloud_without_storing_datasets(self):
def test_from_cloud(self):
"""Test that a Manifest can be instantiated from the cloud."""
project_name = "test-project"
bucket_name = os.environ["TEST_BUCKET_NAME"]

with tempfile.TemporaryDirectory() as output_directory:
file_0_path = os.path.join(output_directory, "file_0.txt")
Expand All @@ -130,16 +130,18 @@ def test_from_cloud(self):

manifest = Manifest(datasets=[dataset], keys={"my-dataset": 0})
manifest.to_cloud(
project_name, bucket_name, path_to_manifest_file=storage.path.join(output_directory, "manifest.json")
project_name,
TEST_BUCKET_NAME,
path_to_manifest_file=storage.path.join(output_directory, "manifest.json"),
)

persisted_manifest = Manifest.from_cloud(
project_name=project_name,
bucket_name=bucket_name,
bucket_name=TEST_BUCKET_NAME,
path_to_manifest_file=storage.path.join(output_directory, "manifest.json"),
)

self.assertEqual(persisted_manifest.path, f"gs://{bucket_name}{output_directory}/manifest.json")
self.assertEqual(persisted_manifest.path, f"gs://{TEST_BUCKET_NAME}{output_directory}/manifest.json")
self.assertEqual(persisted_manifest.id, manifest.id)
self.assertEqual(persisted_manifest.hash_value, manifest.hash_value)
self.assertEqual(persisted_manifest.keys, manifest.keys)
Expand All @@ -148,6 +150,6 @@ def test_from_cloud(self):
)

for dataset in persisted_manifest.datasets:
self.assertEqual(dataset.path, f"gs://{bucket_name}{output_directory}/{dataset.name}")
self.assertEqual(dataset.path, f"gs://{TEST_BUCKET_NAME}{output_directory}/{dataset.name}")
self.assertTrue(len(dataset.files), 2)
self.assertTrue(all(isinstance(file, Datafile) for file in dataset.files))

0 comments on commit e4da82e

Please sign in to comment.