Skip to content

Commit

Permalink
Add migrate endpoint to move artifacts to another storage backend
Browse files Browse the repository at this point in the history
fixes: pulp#3358
  • Loading branch information
gerrod3 committed May 16, 2024
1 parent 865e5c9 commit aa9464b
Show file tree
Hide file tree
Showing 15 changed files with 459 additions and 64 deletions.
1 change: 1 addition & 0 deletions CHANGES/3358.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Added new `/migrate/` endpoint to Domains that allows for migrating artifacts from one storage backend to another.
8 changes: 8 additions & 0 deletions docs/configuration/settings.rst
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,10 @@ REDIRECT_TO_OBJECT_STORAGE

Defaults to ``True``; ignored for local file storage.

.. note::

After changing this value, ``pulpcore-manager migrate`` must be ran for it be properly updated.


MEDIA_ROOT
^^^^^^^^^^
Expand Down Expand Up @@ -257,6 +261,10 @@ HIDE_GUARDED_DISTRIBUTIONS
If activated, the distributions that are protected by a content guard will not be shown on the
directory listing in the content app. Defaults to ``False``.

.. note::

After changing this value, ``pulpcore-manager migrate`` must be ran for it be properly updated.

.. _content-path-prefix:

CONTENT_PATH_PREFIX
Expand Down
4 changes: 4 additions & 0 deletions docs/installation/storage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ Storage
to use another storage backend such as Amazon Simple Storage Service (S3), you'll need to
configure Pulp.

.. note::

After changing any value related to storage settings you must run ``pulpcore-manager migrate``
for the new configuration to be properly registered.

Local Filesystem
^^^^^^^^^^^^^^^^
Expand Down
5 changes: 5 additions & 0 deletions pulpcore/app/apps.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,18 +315,23 @@ def _ensure_default_domain(sender, **kwargs):
table_names = connection.introspection.table_names()
if "core_domain" in table_names:
from pulpcore.app.util import get_default_domain
from pulpcore.app.serializers.domain import StorageSettingsSerializer

default = get_default_domain() # Cache the default domain
config_settings = StorageSettingsSerializer.get_default_domain_settings(settings)
# Match the Pulp settings
if (
settings.HIDE_GUARDED_DISTRIBUTIONS != default.hide_guarded_distributions
or settings.REDIRECT_TO_OBJECT_STORAGE != default.redirect_to_object_storage
or settings.DEFAULT_FILE_STORAGE != default.storage_class
or default.storage_settings != config_settings
):
default.hide_guarded_distributions = settings.HIDE_GUARDED_DISTRIBUTIONS
default.redirect_to_object_storage = settings.REDIRECT_TO_OBJECT_STORAGE
default.storage_class = settings.DEFAULT_FILE_STORAGE
default.storage_settings = config_settings
default.save(skip_hooks=True)
print(_("Updated default domain to match current Pulp settings"))


def _populate_roles(sender, apps, verbosity, **kwargs):
Expand Down
34 changes: 34 additions & 0 deletions pulpcore/app/migrations/0119_store_default_domain_settings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Generated by Django 4.2.10 on 2024-04-24 11:38

from django.db import migrations
from pulpcore.app.serializers.domain import StorageSettingsSerializer
from django.conf import settings


def default_domain_settings(apps, schema_editor):
Domain = apps.get_model("core", "Domain")
default_domain = Domain.objects.get(name="default")
config_settings = StorageSettingsSerializer.get_default_domain_settings(settings)
default_domain.storage_class = settings.DEFAULT_FILE_STORAGE
default_domain.storage_settings = config_settings
default_domain.hide_guarded_distributions = settings.HIDE_GUARDED_DISTRIBUTIONS
default_domain.redirect_to_object_storage = settings.REDIRECT_TO_OBJECT_STORAGE
default_domain.save(skip_hooks=True)


def reverse_default_domain_settings(apps, schema_editor):
Domain = apps.get_model("core", "Domain")
default_domain = Domain.objects.get(name="default")
default_domain.storage_settings = {}
default_domain.save(skip_hooks=True)


class Migration(migrations.Migration):

dependencies = [
('core', '0118_task_core_task_unblock_2276a4_idx_and_more'),
]

operations = [
migrations.RunPython(default_domain_settings, reverse_code=reverse_default_domain_settings),
]
4 changes: 0 additions & 4 deletions pulpcore/app/models/domain.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from opentelemetry.metrics import Observation

from django.core.files.storage import default_storage
from django.db import models
from django_lifecycle import hook, BEFORE_DELETE, BEFORE_UPDATE, AFTER_CREATE

Expand Down Expand Up @@ -45,9 +44,6 @@ class Domain(BaseModel, AutoAddObjPermsMixin):

def get_storage(self):
"""Returns this domain's instantiated storage class."""
if self.name == "default":
return default_storage

if date_storage_tuple := storages.get(self.pulp_id):
last_updated, storage = date_storage_tuple
if self.pulp_last_updated == last_updated:
Expand Down
2 changes: 1 addition & 1 deletion pulpcore/app/serializers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
SigningServiceSerializer,
SingleArtifactContentSerializer,
)
from .domain import DomainSerializer
from .domain import DomainSerializer, DomainBackendMigratorSerializer
from .exporter import (
ExporterSerializer,
ExportSerializer,
Expand Down
159 changes: 114 additions & 45 deletions pulpcore/app/serializers/domain.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from gettext import gettext as _
import json


from django.conf import settings
from django.core.files.storage import import_string
from django.core.exceptions import ImproperlyConfigured
from drf_spectacular.types import OpenApiTypes
Expand All @@ -9,7 +10,7 @@
from rest_framework import serializers
from rest_framework.validators import UniqueValidator

from pulpcore.app import models
from pulpcore.app.models import Domain
from pulpcore.app.serializers import IdentityField, ModelSerializer, HiddenFieldsMixin


Expand All @@ -36,15 +37,6 @@ class BaseSettingsClass(HiddenFieldsMixin, serializers.Serializer):
STORAGE_CLASS = None
SETTING_MAPPING = None

def to_representation(self, instance):
"""Handle getting settings values for default domain case."""
# Should I convert back the saved settings to their Setting names for to_representation?
if getattr(self.context.get("domain", None), "name", None) == "default":
for setting_name, field in self.SETTING_MAPPING.items():
if value := getattr(settings, setting_name.upper(), None):
instance[field] = value
return super().to_representation(instance)

def to_internal_value(self, data):
"""Translate incoming data from storage setting name to storage init arg."""
init_keys = set(self.SETTING_MAPPING.values())
Expand Down Expand Up @@ -104,12 +96,12 @@ class SFTPSettingsSerializer(BaseSettingsClass):
SETTING_MAPPING = {
"sftp_storage_host": "host",
"sftp_storage_params": "params",
# 'sftp_storage_interactive': 'interactive', # Can not allow users to set to True
'sftp_storage_interactive': 'interactive',
"sftp_storage_file_mode": "file_mode",
"sftp_storage_dir_mode": "dir_mode",
"sftp_storage_uid": "uid",
"sftp_storage_gid": "gid",
# 'sftp_known_host_file': 'known_host_file', # This is dangerous to allow to be set
'sftp_known_host_file': 'known_host_file',
"sftp_storage_root": "root_path",
"media_url": "base_url",
"sftp_base_url": "base_url",
Expand All @@ -123,6 +115,17 @@ class SFTPSettingsSerializer(BaseSettingsClass):
uid = serializers.CharField(allow_null=True, default=None)
gid = serializers.CharField(allow_null=True, default=None)
base_url = serializers.CharField(allow_null=True, default=None)
interactive = serializers.HiddenField(default=False)
known_host_file = serializers.CharField(allow_null=True, default=None)

def validate_known_host_file(self, value):
"""Ensure known_host_file can only be set in default domain."""
if value is not None:
if getattr(self.context.get("domain", None), "name", None) != "default":
raise serializers.ValidationError(
_("Known Host File can only be set in the default domain.")
)
return value


class TransferConfigSerializer(serializers.Serializer):
Expand Down Expand Up @@ -187,10 +190,9 @@ class AmazonS3SettingsSerializer(BaseSettingsClass):
access_key = serializers.CharField(required=True, write_only=True)
secret_key = serializers.CharField(allow_null=True, default=None, write_only=True)
security_token = serializers.CharField(allow_null=True, default=None, write_only=True)
# Too dangerous to use shared cred file, ensure is always False
session_profile = serializers.HiddenField(default=False)
session_profile = serializers.CharField(allow_null=True, default=None)
file_overwrite = serializers.BooleanField(default=True)
object_parameters = serializers.DictField(default={})
object_parameters = serializers.DictField(default=dict())
bucket_name = serializers.CharField(required=True)
querystring_auth = serializers.BooleanField(default=True)
querystring_expire = serializers.IntegerField(default=3600)
Expand Down Expand Up @@ -224,6 +226,15 @@ def validate_verify(self, value):
value = None
return value

def validate_session_profile(self, value):
"""Ensure session_profile can only be set in default domain."""
if value is not None:
if getattr(self.context.get("domain", None), "name", None) != "default":
raise serializers.ValidationError(
_("Session Profile can only be set in the default domain.")
)
return value

def validate(self, data):
"""Verify that secret_key or security_token is set."""
data = super().validate(data)
Expand Down Expand Up @@ -349,9 +360,22 @@ def to_internal_value(self, data):
"""Appropriately convert the incoming data based on the Domain's storage class."""
# Handle Creating & Updating
storage_settings = self.root.initial_data.get("storage_settings", {})
if not isinstance(storage_settings, dict):
if isinstance(storage_settings, str):
try:
storage_settings = json.loads(storage_settings)
except json.JSONDecodeError:
raise serializers.ValidationError("Improper JSON string passed in")
else:
raise serializers.ValidationError("Storage settings should be a JSON object.")

if self.root.instance:
storage_class = self.root.instance.storage_class
storage_settings = {**self.root.instance.storage_settings, **storage_settings}
# Use passed in values, if not present fallback onto current values of instance
storage_class = self.root.initial_data.get(
"storage_class", self.root.instance.storage_class
)
if storage_class == self.root.instance.storage_class:
storage_settings = {**self.root.instance.storage_settings, **storage_settings}
else:
storage_class = self.root.initial_data["storage_class"]

Expand All @@ -365,21 +389,68 @@ def to_internal_value(self, data):

def create_storage(self):
"""Instantiate a storage class based on the Domain's storage class."""
instance = self.root.instance
serializer_class = self.STORAGE_MAPPING[instance.storage_class]
serializer = serializer_class(data=instance.storage_settings)
if self.root.instance:
storage_class = self.root.instance.storage_class
storage_settings = self.root.instance.storage_settings
else:
storage_class = self.root.initial_data["storage_class"]
storage_settings = self.root.initial_data["storage_settings"]
serializer_class = self.STORAGE_MAPPING[storage_class]
serializer = serializer_class(data=storage_settings)
serializer.is_valid(raise_exception=True)
return serializer.create(serializer.validated_data)

@classmethod
def get_default_domain_settings(cls, settings):
"""Special helper method to get the backend settings of the default domain."""
serializer_class = cls.STORAGE_MAPPING[settings.DEFAULT_FILE_STORAGE]
data = {}
for setting_name, field in serializer_class.SETTING_MAPPING.items():
if value := getattr(settings, setting_name.upper(), None):
data[field] = value
serializer = serializer_class(data=data)
serializer.is_valid(raise_exception=True)
return serializer.validated_data


class BackendSettingsValidator:
"""Mixin to handle validating `storage_class` and `storage_settings`."""

class DomainSerializer(ModelSerializer):
@staticmethod
def _validate_storage_backend(storage_class, storage_settings):
"""Ensure that the backend can be used."""
try:
backend = import_string(storage_class)
except (ImportError, ImproperlyConfigured):
raise serializers.ValidationError(
detail={"storage_class": _("Backend is not installed on Pulp.")}
)

try:
backend(**storage_settings)
except ImproperlyConfigured as e:
raise serializers.ValidationError(
detail={
"storage_settings": _("Backend settings contain incorrect values: {}".format(e))
}
)

def create_storage(self):
return self.fields["storage_settings"].create_storage()

@classmethod
def get_default_domain_settings(cls, settings):
return cls._declared_fields["storage_settings"].get_default_domain_settings(settings)


class DomainSerializer(BackendSettingsValidator, ModelSerializer):
"""Serializer for Domain."""

pulp_href = IdentityField(view_name="domains-detail")
name = serializers.SlugField(
max_length=50,
help_text=_("A name for this domain."),
validators=[UniqueValidator(queryset=models.Domain.objects.all())],
validators=[UniqueValidator(queryset=Domain.objects.all())],
)
description = serializers.CharField(
help_text=_("An optional description."), required=False, allow_null=True
Expand All @@ -406,24 +477,6 @@ def validate_name(self, value):
raise serializers.ValidationError(_("Name can not be 'api' or 'content'."))
return value

def _validate_storage_backend(self, storage_class, storage_settings):
"""Ensure that the backend can be used."""
try:
backend = import_string(storage_class)
except (ImportError, ImproperlyConfigured):
raise serializers.ValidationError(
detail={"storage_class": _("Backend is not installed on Pulp.")}
)

try:
backend(**storage_settings)
except ImproperlyConfigured as e:
raise serializers.ValidationError(
detail={
"storage_settings": _("Backend settings contain incorrect values: {}".format(e))
}
)

def validate(self, data):
"""Ensure that Domain settings are valid."""
# Validate for update gets called before ViewSet default check
Expand All @@ -448,11 +501,8 @@ def validate(self, data):
)
return data

def create_storage(self):
return self.fields["storage_settings"].create_storage()

class Meta:
model = models.Domain
model = Domain
fields = ModelSerializer.Meta.fields + (
"name",
"description",
Expand All @@ -461,3 +511,22 @@ class Meta:
"redirect_to_object_storage",
"hide_guarded_distributions",
)


class DomainBackendMigratorSerializer(BackendSettingsValidator, serializers.Serializer):
"""Special serializer for performing a storage backend migration on a Domain."""

storage_class = serializers.ChoiceField(
help_text=_("The new backend storage class to migrate to."),
choices=BACKEND_CHOICES,
)
storage_settings = StorageSettingsSerializer(
source="*", help_text=_("The settings for the new storage class to migrate to.")
)

def validate(self, data):
"""Validate new backend settings."""
storage_class = data["storage_class"]
storage_settings = data["storage_settings"]
self._validate_storage_backend(storage_class, storage_settings)
return data

0 comments on commit aa9464b

Please sign in to comment.