Skip to content

Commit

Permalink
Add migrate endpoint to move artifacts to another storage backend
Browse files Browse the repository at this point in the history
fixes: pulp#3358
  • Loading branch information
gerrod3 committed May 14, 2024
1 parent 11eacf4 commit d96ec2e
Show file tree
Hide file tree
Showing 15 changed files with 437 additions and 59 deletions.
1 change: 1 addition & 0 deletions CHANGES/3358.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Added new `/migrate/` endpoint to Domains that allows for migrating artifacts from one storage backend to another.
8 changes: 8 additions & 0 deletions docs/configuration/settings.rst
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,10 @@ REDIRECT_TO_OBJECT_STORAGE

Defaults to ``True``; ignored for local file storage.

.. note::

After changing this value, ``pulpcore-manager migrate`` must be ran for it be properly updated.


MEDIA_ROOT
^^^^^^^^^^
Expand Down Expand Up @@ -257,6 +261,10 @@ HIDE_GUARDED_DISTRIBUTIONS
If activated, the distributions that are protected by a content guard will not be shown on the
directory listing in the content app. Defaults to ``False``.

.. note::

After changing this value, ``pulpcore-manager migrate`` must be ran for it be properly updated.

.. _content-path-prefix:

CONTENT_PATH_PREFIX
Expand Down
4 changes: 4 additions & 0 deletions docs/installation/storage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ Storage
to use another storage backend such as Amazon Simple Storage Service (S3), you'll need to
configure Pulp.

.. note::

After changing any value related to storage settings you must run ``pulpcore-manager migrate``
for the new configuration to be properly registered.

Local Filesystem
^^^^^^^^^^^^^^^^
Expand Down
5 changes: 5 additions & 0 deletions pulpcore/app/apps.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,18 +315,23 @@ def _ensure_default_domain(sender, **kwargs):
table_names = connection.introspection.table_names()
if "core_domain" in table_names:
from pulpcore.app.util import get_default_domain
from pulpcore.app.serializers.domain import StorageSettingsSerializer

default = get_default_domain() # Cache the default domain
config_settings = StorageSettingsSerializer.get_default_domain_settings(settings)
# Match the Pulp settings
if (
settings.HIDE_GUARDED_DISTRIBUTIONS != default.hide_guarded_distributions
or settings.REDIRECT_TO_OBJECT_STORAGE != default.redirect_to_object_storage
or settings.DEFAULT_FILE_STORAGE != default.storage_class
or default.storage_settings != config_settings
):
default.hide_guarded_distributions = settings.HIDE_GUARDED_DISTRIBUTIONS
default.redirect_to_object_storage = settings.REDIRECT_TO_OBJECT_STORAGE
default.storage_class = settings.DEFAULT_FILE_STORAGE
default.storage_settings = config_settings
default.save(skip_hooks=True)
print(_("Updated default domain to match current Pulp settings"))


def _populate_roles(sender, apps, verbosity, **kwargs):
Expand Down
34 changes: 34 additions & 0 deletions pulpcore/app/migrations/0118_store_default_domain_settings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Generated by Django 4.2.10 on 2024-04-24 11:38

from django.db import migrations
from pulpcore.app.serializers.domain import StorageSettingsSerializer
from django.conf import settings


def default_domain_settings(apps, schema_editor):
Domain = apps.get_model("core", "Domain")
default_domain = Domain.objects.get(name="default")
config_settings = StorageSettingsSerializer.get_default_domain_settings(settings)
default_domain.storage_class = settings.DEFAULT_FILE_STORAGE
default_domain.storage_settings = config_settings
default_domain.hide_guarded_distributions = settings.HIDE_GUARDED_DISTRIBUTIONS
default_domain.redirect_to_object_storage = settings.REDIRECT_TO_OBJECT_STORAGE
default_domain.save(skip_hooks=True)


def reverse_default_domain_settings(apps, schema_editor):
Domain = apps.get_model("core", "Domain")
default_domain = Domain.objects.get(name="default")
default_domain.storage_settings = {}
default_domain.save(skip_hooks=True)


class Migration(migrations.Migration):

dependencies = [
('core', '0117_task_unblocked_at'),
]

operations = [
migrations.RunPython(default_domain_settings, reverse_code=reverse_default_domain_settings),
]
4 changes: 0 additions & 4 deletions pulpcore/app/models/domain.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from opentelemetry.metrics import Observation

from django.core.files.storage import default_storage
from django.db import models
from django_lifecycle import hook, BEFORE_DELETE, BEFORE_UPDATE, AFTER_CREATE

Expand Down Expand Up @@ -45,9 +44,6 @@ class Domain(BaseModel, AutoAddObjPermsMixin):

def get_storage(self):
"""Returns this domain's instantiated storage class."""
if self.name == "default":
return default_storage

if date_storage_tuple := storages.get(self.pulp_id):
last_updated, storage = date_storage_tuple
if self.pulp_last_updated == last_updated:
Expand Down
2 changes: 1 addition & 1 deletion pulpcore/app/serializers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
SigningServiceSerializer,
SingleArtifactContentSerializer,
)
from .domain import DomainSerializer
from .domain import DomainSerializer, DomainBackendMigratorSerializer
from .exporter import (
ExporterSerializer,
ExportSerializer,
Expand Down
130 changes: 90 additions & 40 deletions pulpcore/app/serializers/domain.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from gettext import gettext as _
import json


from django.conf import settings
from django.core.files.storage import import_string
from django.core.exceptions import ImproperlyConfigured
from drf_spectacular.types import OpenApiTypes
Expand All @@ -9,7 +10,7 @@
from rest_framework import serializers
from rest_framework.validators import UniqueValidator

from pulpcore.app import models
from pulpcore.app.models import Domain
from pulpcore.app.serializers import IdentityField, ModelSerializer, HiddenFieldsMixin


Expand All @@ -36,15 +37,6 @@ class BaseSettingsClass(HiddenFieldsMixin, serializers.Serializer):
STORAGE_CLASS = None
SETTING_MAPPING = None

def to_representation(self, instance):
"""Handle getting settings values for default domain case."""
# Should I convert back the saved settings to their Setting names for to_representation?
if getattr(self.context.get("domain", None), "name", None) == "default":
for setting_name, field in self.SETTING_MAPPING.items():
if value := getattr(settings, setting_name.upper(), None):
instance[field] = value
return super().to_representation(instance)

def to_internal_value(self, data):
"""Translate incoming data from storage setting name to storage init arg."""
init_keys = set(self.SETTING_MAPPING.values())
Expand Down Expand Up @@ -345,9 +337,22 @@ def to_internal_value(self, data):
"""Appropriately convert the incoming data based on the Domain's storage class."""
# Handle Creating & Updating
storage_settings = self.root.initial_data.get("storage_settings", {})
if not isinstance(storage_settings, dict):
if isinstance(storage_settings, str):
try:
storage_settings = json.loads(storage_settings)
except json.JSONDecodeError:
raise serializers.ValidationError("Improper JSON string passed in")
else:
raise serializers.ValidationError("Storage settings should be a JSON object.")

if self.root.instance:
storage_class = self.root.instance.storage_class
storage_settings = {**self.root.instance.storage_settings, **storage_settings}
# Use passed in values, if not present fallback onto current values of instance
storage_class = self.root.initial_data.get(
"storage_class", self.root.instance.storage_class
)
if storage_class == self.root.instance.storage_class:
storage_settings = {**self.root.instance.storage_settings, **storage_settings}
else:
storage_class = self.root.initial_data["storage_class"]

Expand All @@ -361,21 +366,68 @@ def to_internal_value(self, data):

def create_storage(self):
"""Instantiate a storage class based on the Domain's storage class."""
instance = self.root.instance
serializer_class = self.STORAGE_MAPPING[instance.storage_class]
serializer = serializer_class(data=instance.storage_settings)
if self.root.instance:
storage_class = self.root.instance.storage_class
storage_settings = self.root.instance.storage_settings
else:
storage_class = self.root.initial_data["storage_class"]
storage_settings = self.root.initial_data["storage_settings"]
serializer_class = self.STORAGE_MAPPING[storage_class]
serializer = serializer_class(data=storage_settings)
serializer.is_valid(raise_exception=True)
return serializer.create(serializer.validated_data)

@classmethod
def get_default_domain_settings(cls, settings):
"""Special helper method to get the backend settings of the default domain."""
serializer_class = cls.STORAGE_MAPPING[settings.DEFAULT_FILE_STORAGE]
data = {}
for setting_name, field in serializer_class.SETTING_MAPPING.items():
if value := getattr(settings, setting_name.upper(), None):
data[field] = value
serializer = serializer_class(data=data)
serializer.is_valid(raise_exception=True)
return serializer.validated_data


class BackendSettingsValidator:
"""Mixin to handle validating `storage_class` and `storage_settings`."""

@staticmethod
def _validate_storage_backend(storage_class, storage_settings):
"""Ensure that the backend can be used."""
try:
backend = import_string(storage_class)
except (ImportError, ImproperlyConfigured):
raise serializers.ValidationError(
detail={"storage_class": _("Backend is not installed on Pulp.")}
)

try:
backend(**storage_settings)
except ImproperlyConfigured as e:
raise serializers.ValidationError(
detail={
"storage_settings": _("Backend settings contain incorrect values: {}".format(e))
}
)

def create_storage(self):
return self.fields["storage_settings"].create_storage()

@classmethod
def get_default_domain_settings(cls, settings):
return cls._declared_fields["storage_settings"].get_default_domain_settings(settings)


class DomainSerializer(ModelSerializer):
class DomainSerializer(BackendSettingsValidator, ModelSerializer):
"""Serializer for Domain."""

pulp_href = IdentityField(view_name="domains-detail")
name = serializers.SlugField(
max_length=50,
help_text=_("A name for this domain."),
validators=[UniqueValidator(queryset=models.Domain.objects.all())],
validators=[UniqueValidator(queryset=Domain.objects.all())],
)
description = serializers.CharField(
help_text=_("An optional description."), required=False, allow_null=True
Expand All @@ -402,24 +454,6 @@ def validate_name(self, value):
raise serializers.ValidationError(_("Name can not be 'api' or 'content'."))
return value

def _validate_storage_backend(self, storage_class, storage_settings):
"""Ensure that the backend can be used."""
try:
backend = import_string(storage_class)
except (ImportError, ImproperlyConfigured):
raise serializers.ValidationError(
detail={"storage_class": _("Backend is not installed on Pulp.")}
)

try:
backend(**storage_settings)
except ImproperlyConfigured as e:
raise serializers.ValidationError(
detail={
"storage_settings": _("Backend settings contain incorrect values: {}".format(e))
}
)

def validate(self, data):
"""Ensure that Domain settings are valid."""
# Validate for update gets called before ViewSet default check
Expand All @@ -444,11 +478,8 @@ def validate(self, data):
)
return data

def create_storage(self):
return self.fields["storage_settings"].create_storage()

class Meta:
model = models.Domain
model = Domain
fields = ModelSerializer.Meta.fields + (
"name",
"description",
Expand All @@ -457,3 +488,22 @@ class Meta:
"redirect_to_object_storage",
"hide_guarded_distributions",
)


class DomainBackendMigratorSerializer(BackendSettingsValidator, serializers.Serializer):
"""Special serializer for performing a storage backend migration on a Domain."""

storage_class = serializers.ChoiceField(
help_text=_("The new backend storage class to migrate to."),
choices=BACKEND_CHOICES,
)
storage_settings = StorageSettingsSerializer(
source="*", help_text=_("The settings for the new storage class to migrate to.")
)

def validate(self, data):
"""Validate new backend settings."""
storage_class = data["storage_class"]
storage_settings = data["storage_settings"]
self._validate_storage_backend(storage_class, storage_settings)
return data
27 changes: 27 additions & 0 deletions pulpcore/app/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -520,6 +520,33 @@
pass
finally:
connection.close()
# Check if the configuration of the default domain is mismatched with settings
try:
with connection.cursor() as cursor:
cursor.execute(
"SELECT storage_class, redirect_to_object_storage, hide_guarded_distributions, "
"pulp_id FROM core_domain WHERE name = 'default'"
)
row = cursor.fetchone()
mismatched = []
for i, setting in enumerate(
("DEFAULT_FILE_STORAGE", "REDIRECT_TO_OBJECT_STORAGE", "HIDE_GUARDED_DISTRIBUTIONS")
):
if row[i] != getattr(settings, setting, None):
mismatched.append(setting)

if mismatched:
_logger.warning(
f"The default domain's fields ({mismatched}) mismatch what is set in settings. "
f"Please check/update the settings file and then run 'pulpcore-manager migrate'"
f" or the migrate task at {settings.API_ROOT}/api/v3/domains/{row[3]}/migrate/"
f" if you wish to move your artifacts to a new backend."
)
except Exception:
# our check could fail if the table hasn't been created yet or we can't get a db connection
pass
finally:
connection.close()

settings.set("V3_API_ROOT", settings.API_ROOT + "api/v3/") # Not user configurable
settings.set("V3_DOMAIN_API_ROOT", settings.API_ROOT + "<slug:pulp_domain>/api/v3/")
Expand Down
2 changes: 2 additions & 0 deletions pulpcore/app/tasks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@

from .importer import pulp_import

from .migrate import migrate_backend

from .orphan import orphan_cleanup

from .purge import purge
Expand Down

0 comments on commit d96ec2e

Please sign in to comment.