Skip to content

Commit

Permalink
Add migrate endpoint to move artifacts to another storage backend
Browse files Browse the repository at this point in the history
fixes: pulp#3358
  • Loading branch information
gerrod3 committed Aug 30, 2023
1 parent 019266a commit 2553416
Show file tree
Hide file tree
Showing 13 changed files with 392 additions and 45 deletions.
1 change: 1 addition & 0 deletions CHANGES/3358.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Added new `/migrate/` endpoint to Domains that allows for migrating artifacts from one storage backend to another.
8 changes: 8 additions & 0 deletions docs/configuration/settings.rst
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,10 @@ REDIRECT_TO_OBJECT_STORAGE

Defaults to ``True``; ignored for local file storage.

.. note::

After changing this value, ``pulpcore-manager migrate`` must be ran for it be properly updated.


MEDIA_ROOT
^^^^^^^^^^
Expand Down Expand Up @@ -257,6 +261,10 @@ HIDE_GUARDED_DISTRIBUTIONS
If activated, the distributions that are protected by a content guard will not be shown on the
directory listing in the content app. Defaults to ``False``.

.. note::

After changing this value, ``pulpcore-manager migrate`` must be ran for it be properly updated.

.. _content-path-prefix:

CONTENT_PATH_PREFIX
Expand Down
4 changes: 4 additions & 0 deletions docs/installation/storage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ Storage
to use another storage backend such as Amazon Simple Storage Service (S3), you'll need to
configure Pulp.

.. note::

After changing any value related to storage settings you must run ``pulpcore-manager migrate``
for the new configuration to be properly registered.

Local Filesystem
^^^^^^^^^^^^^^^^
Expand Down
3 changes: 3 additions & 0 deletions pulpcore/app/apps.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,11 +322,14 @@ def _ensure_default_domain(sender, **kwargs):
settings.HIDE_GUARDED_DISTRIBUTIONS != default.hide_guarded_distributions
or settings.REDIRECT_TO_OBJECT_STORAGE != default.redirect_to_object_storage
or settings.DEFAULT_FILE_STORAGE != default.storage_class
or default.storage_settings
):
default.hide_guarded_distributions = settings.HIDE_GUARDED_DISTRIBUTIONS
default.redirect_to_object_storage = settings.REDIRECT_TO_OBJECT_STORAGE
default.storage_class = settings.DEFAULT_FILE_STORAGE
default.storage_settings = {}
default.save(skip_hooks=True)
print(_("Updated default domain to match current Pulp settings"))


def _populate_roles(sender, apps, verbosity, **kwargs):
Expand Down
4 changes: 1 addition & 3 deletions pulpcore/app/models/domain.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from django.core.files.storage import get_storage_class, default_storage
from django.core.files.storage import get_storage_class
from django.db import models
from django_lifecycle import hook, BEFORE_DELETE, BEFORE_UPDATE

Expand Down Expand Up @@ -39,8 +39,6 @@ class Domain(BaseModel, AutoAddObjPermsMixin):

def get_storage(self):
"""Returns this domain's instantiated storage class."""
if self.name == "default":
return default_storage
storage_class = get_storage_class(self.storage_class)
return storage_class(**self.storage_settings)

Expand Down
2 changes: 1 addition & 1 deletion pulpcore/app/serializers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
SigningServiceSerializer,
SingleArtifactContentSerializer,
)
from .domain import DomainSerializer
from .domain import DomainSerializer, DomainBackendMigratorSerializer
from .exporter import (
ExporterSerializer,
ExportSerializer,
Expand Down
100 changes: 75 additions & 25 deletions pulpcore/app/serializers/domain.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
from gettext import gettext as _
import json

from django.conf import settings
from django.core.files.storage import import_string
from django.utils.encoding import force_bytes, force_str
from django.core.exceptions import ImproperlyConfigured
from drf_spectacular.types import OpenApiTypes
from drf_spectacular.utils import extend_schema_field

from rest_framework import serializers
from rest_framework.validators import UniqueValidator

from pulpcore.app import models
from pulpcore.app.models import Domain, fields
from pulpcore.app.serializers import IdentityField, ModelSerializer, HiddenFieldsMixin


Expand Down Expand Up @@ -41,7 +43,7 @@ def to_representation(self, instance):
if getattr(self.context.get("domain", None), "name", None) == "default":
for setting_name, field in self.SETTING_MAPPING.items():
if value := getattr(settings, setting_name.upper(), None):
instance[field] = value
instance.setdefault(field, value)
return super().to_representation(instance)

def to_internal_value(self, data):
Expand Down Expand Up @@ -278,9 +280,22 @@ def to_internal_value(self, data):
"""Appropriately convert the incoming data based on the Domain's storage class."""
# Handle Creating & Updating
storage_settings = self.root.initial_data.get("storage_settings", {})
if not isinstance(storage_settings, dict):
if isinstance(storage_settings, str):
try:
storage_settings = json.loads(storage_settings)
except json.JSONDecodeError:
raise serializers.ValidationError("Improper JSON string passed in")
else:
raise serializers.ValidationError("Storage settings should be a JSON object.")

if self.root.instance:
storage_class = self.root.instance.storage_class
storage_settings = {**self.root.instance.storage_settings, **storage_settings}
# Use passed in values, if not present fallback onto current values of instance
storage_class = self.root.initial_data.get(
"storage_class", self.root.instance.storage_class
)
if storage_class == self.root.instance.storage_class:
storage_settings = {**self.root.instance.storage_settings, **storage_settings}
else:
storage_class = self.root.initial_data["storage_class"]

Expand All @@ -293,13 +308,36 @@ def to_internal_value(self, data):
return {"storage_settings": ret}


class DomainSerializer(ModelSerializer):
class BackendSettingsValidator:
"""Mixin to handle validating `storage_class` and `storage_settings`."""

@staticmethod
def _validate_storage_backend(storage_class, storage_settings):
"""Ensure that the backend can be used."""
try:
backend = import_string(storage_class)
except (ImportError, ImproperlyConfigured):
raise serializers.ValidationError(
detail={"storage_class": _("Backend is not installed on Pulp.")}
)

try:
backend(**storage_settings)
except ImproperlyConfigured as e:
raise serializers.ValidationError(
detail={
"storage_settings": _("Backend settings contain incorrect values: {}".format(e))
}
)


class DomainSerializer(BackendSettingsValidator, ModelSerializer):
"""Serializer for Domain."""

pulp_href = IdentityField(view_name="domains-detail")
name = serializers.SlugField(
help_text=_("A name for this domain."),
validators=[UniqueValidator(queryset=models.Domain.objects.all())],
validators=[UniqueValidator(queryset=Domain.objects.all())],
)
description = serializers.CharField(
help_text=_("An optional description."), required=False, allow_null=True
Expand All @@ -326,24 +364,6 @@ def validate_name(self, value):
raise serializers.ValidationError(_("Name can not be 'api' or 'content'."))
return value

def _validate_storage_backend(self, storage_class, storage_settings):
"""Ensure that the backend can be used."""
try:
backend = import_string(storage_class)
except (ImportError, ImproperlyConfigured):
raise serializers.ValidationError(
detail={"storage_class": _("Backend is not installed on Pulp.")}
)

try:
backend(**storage_settings)
except ImproperlyConfigured as e:
raise serializers.ValidationError(
detail={
"storage_settings": _("Backend settings contain incorrect values: {}".format(e))
}
)

def validate(self, data):
"""Ensure that Domain settings are valid."""
# Validate for update gets called before ViewSet default check
Expand All @@ -369,7 +389,7 @@ def validate(self, data):
return data

class Meta:
model = models.Domain
model = Domain
fields = ModelSerializer.Meta.fields + (
"name",
"description",
Expand All @@ -378,3 +398,33 @@ class Meta:
"redirect_to_object_storage",
"hide_guarded_distributions",
)


class DomainBackendMigratorSerializer(BackendSettingsValidator, serializers.Serializer):
"""Special serializer for performing a storage backend migration on a Domain."""

storage_class = serializers.ChoiceField(
help_text=_("The new backend storage class to migrate to."),
choices=BACKEND_CHOICES,
)
storage_settings = StorageSettingsSerializer(
source="*", help_text=_("The settings for the new storage class to migrate to.")
)

def encrypt_data(self):
"""Returns the data in the serializer as an encrypted string."""
value = json.dumps(self.validated_data)
return force_str(fields._fernet().encrypt(force_bytes(value)))

@classmethod
def decrypt_data(cls, encrypted_data):
"""Returns a JSON object from the decrypted string."""
value = force_str(fields._fernet().decrypt(force_bytes(encrypted_data)))
return json.loads(value)

def validate(self, data):
"""Validate new backend settings."""
storage_class = data["storage_class"]
storage_settings = data["storage_settings"]
self._validate_storage_backend(storage_class, storage_settings)
return data
26 changes: 26 additions & 0 deletions pulpcore/app/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,32 @@
pass
finally:
connection.close()
# Check if the configuration of the default domain is mismatched with settings
try:
with connection.cursor() as cursor:
cursor.execute(
"SELECT storage_class, redirect_to_object_storage, hide_guarded_distributions,"
" storage_settings FROM core_domain WHERE name = 'default'"
)
row = cursor.fetchone()
mismatched = []
for i, setting in enumerate(
("DEFAULT_FILE_STORAGE", "REDIRECT_TO_OBJECT_STORAGE", "HIDE_GUARDED_DISTRIBUTIONS")
):
if row[i] != getattr(settings, setting, None):
mismatched.append(setting)
if row[3] != "{}":
mismatched.append("STORAGE_SETTINGS")
if mismatched:
_logger.warning(
f"The default domain's fields ({mismatched}) mismatch what is set in settings. "
f"Please check/update the settings file and then run 'pulpcore-manager migrate'"
)
except Exception:
# our check could fail if the table hasn't been created yet or we can't get a db connection
pass
finally:
connection.close()

settings.set("V3_API_ROOT", settings.API_ROOT + "api/v3/") # Not user configurable
settings.set("V3_DOMAIN_API_ROOT", settings.API_ROOT + "<slug:pulp_domain>/api/v3/")
Expand Down
2 changes: 2 additions & 0 deletions pulpcore/app/tasks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@

from .importer import pulp_import

from .migrate import migrate_backend

from .orphan import orphan_cleanup

from .purge import purge
Expand Down
70 changes: 70 additions & 0 deletions pulpcore/app/tasks/migrate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import logging
from gettext import gettext as _

from django.utils.timezone import now
from rest_framework.serializers import ValidationError
from pulpcore.app.models import Artifact, Domain, storage, ProgressReport
from pulpcore.app.serializers import DomainBackendMigratorSerializer
from pulpcore.app.util import get_domain
from pulpcore.constants import TASK_STATES

_logger = logging.getLogger(__name__)


def migrate_backend(data):
"""
Copy the artifacts from the current storage backend to a new one. Then update backend settings.
Args:
data (str): encrypted json string of the new storage backend settings
"""
data = DomainBackendMigratorSerializer.decrypt_data(data)
domain = get_domain()
old_storage = domain.get_storage()
new_storage = Domain(**data).get_storage()

artifacts = Artifact.objects.filter(pulp_domain=domain)
date = now()

with ProgressReport(
message=_("Migrating Artifacts"), code="migrate", total=artifacts.count()
) as pb:
while True:
for digest in pb.iter(artifacts.values_list("sha256", flat=True)):
filename = storage.get_artifact_path(digest)
if not new_storage.exists(filename):
try:
file = old_storage.open(filename)
except FileNotFoundError:
raise ValidationError(
_(
"Found missing file for artifact(sha256={}). Please run the repair "
"task or delete the offending artifact."
).format(digest)
)
new_storage.save(filename, file)
file.close()
# Handle new artifacts saved by the content app
artifacts = Artifact.objects.filter(pulp_domain=domain, pulp_created__gte=date)
if count := artifacts.count():
pb.total += count
pb.save()
date = now()
continue
break

# Update the current domain to the new storage backend settings
msg = _("Update Domain({domain})'s Backend Settings").format(domain=domain.name)
with ProgressReport(message=msg, code="update", total=1) as pb:
# Special handling for default domain
if domain.name == "default":
msg = _(
"PLEASE UPDATE CONFIG FILE WITH THE NEW STORAGE SETTINGS BEFORE NEXT DB MIGRATION!"
)
_logger.warning(msg)
ProgressReport(message=msg, code="URGENT", state=TASK_STATES.SKIPPED).save()

domain.storage_class = data["storage_class"]
domain.storage_settings = data["storage_settings"]
domain.save(update_fields=["storage_class", "storage_settings"])
pb.increment()

0 comments on commit 2553416

Please sign in to comment.