From cbc799f4ea592dbb68c8ad859adec2888fdc5423 Mon Sep 17 00:00:00 2001 From: Grant Gainey Date: Wed, 12 Jul 2023 14:29:48 -0400 Subject: [PATCH] Fixed content-handler response-headers/object-storage collision. fixes #4028. --- CHANGES/4028.bugfix | 1 + pulpcore/constants.py | 37 +++++++++++++++++++++++++++++++++++++ pulpcore/content/handler.py | 26 +++++++++++++++++--------- 3 files changed, 55 insertions(+), 9 deletions(-) create mode 100644 CHANGES/4028.bugfix diff --git a/CHANGES/4028.bugfix b/CHANGES/4028.bugfix new file mode 100644 index 00000000000..b9a2716fec7 --- /dev/null +++ b/CHANGES/4028.bugfix @@ -0,0 +1 @@ +Taught downloader to correctly handle plugin-specified headers for object-storage backends. diff --git a/pulpcore/constants.py b/pulpcore/constants.py index 48e490eeed8..80b7e46b6b4 100644 --- a/pulpcore/constants.py +++ b/pulpcore/constants.py @@ -67,3 +67,40 @@ ) EXPORT_BATCH_SIZE = 2000 + +# Mapping of http-response-headers to what various block-storage-apis call them +# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3/client/get_object.html +# response-headers S3 respects, and what they map to in an S3 object +S3_RESPONSE_HEADER_MAP = { + "Content-Disposition": "ResponseContentDisposition", + "Content-Type": "ResponseContentType", + "Cache-Control": "ResponseCacheControl", + "Content-Language": "ResponseContentLanguage", + "Expires": "ResponseExpires", + "Content-Encoding": "ResponseContentEncoding", +} +# https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob.contentsettings?view=azure-python +# response-headers azure respects, and what they map to in an azure object +AZURE_RESPONSE_HEADER_MAP = { + "Content-Disposition": "content_disposition", + "Content-Type": "content_type", + "Cache-Control": "cache_control", + "Content-Language": "content_language", + "Content-Encoding": "content_encoding", +} +# https://gcloud.readthedocs.io/en/latest/storage-blobs.html +# response-headers Google Cloud Storage respects, and what they map to in a GCS object +GCS_RESPONSE_HEADER_MAP = { + "Content-Disposition": "content_disposition", + "Content-Type": "content_type", + "Cache-Control": "cache_control", + "Content-Language": "content_language", + "Content-Encoding": "content_encoding", +} + +# Storage-type mapped to storage-response-map +STORAGE_RESPONSE_MAP = { + "storages.backends.s3boto3.S3Boto3Storage": S3_RESPONSE_HEADER_MAP, + "storages.backends.azure_storage.AzureStorage": AZURE_RESPONSE_HEADER_MAP, + "storages.backends.gcloud.GoogleCloudStorage": GCS_RESPONSE_HEADER_MAP, +} diff --git a/pulpcore/content/handler.py b/pulpcore/content/handler.py index 7a12301375e..3dd5743082e 100644 --- a/pulpcore/content/handler.py +++ b/pulpcore/content/handler.py @@ -20,6 +20,7 @@ import django +from pulpcore.constants import STORAGE_RESPONSE_MAP from pulpcore.responses import ArtifactResponse os.environ.setdefault("DJANGO_SETTINGS_MODULE", "pulpcore.app.settings") @@ -894,6 +895,16 @@ async def _serve_content_artifact(self, content_artifact, headers, request): Returns: The :class:`aiohttp.web.FileResponse` for the file. """ + + def _set_params_from_headers(hdrs, storage_domain): + # Map standard-response-headers to storage-object-specific keys + params = {} + if storage_domain in STORAGE_RESPONSE_MAP: + for a_key in STORAGE_RESPONSE_MAP[storage_domain]: + if hdrs.get(a_key, None): + params[STORAGE_RESPONSE_MAP[storage_domain][a_key]] = hdrs[a_key] + return params + artifact_file = content_artifact.artifact.file artifact_name = artifact_file.name filename = os.path.basename(content_artifact.relative_path) @@ -909,9 +920,8 @@ async def _serve_content_artifact(self, content_artifact, headers, request): elif not domain.redirect_to_object_storage: return ArtifactResponse(content_artifact.artifact, headers=headers) elif domain.storage_class == "storages.backends.s3boto3.S3Boto3Storage": - parameters = {"ResponseContentDisposition": content_disposition} - if headers.get("Content-Type"): - parameters["ResponseContentType"] = headers.get("Content-Type") + headers["Content-Disposition"] = content_disposition + parameters = _set_params_from_headers(headers, domain.storage_class) url = URL( artifact_file.storage.url( artifact_name, parameters=parameters, http_method=request.method @@ -920,15 +930,13 @@ async def _serve_content_artifact(self, content_artifact, headers, request): ) raise HTTPFound(url) elif domain.storage_class == "storages.backends.azure_storage.AzureStorage": - parameters = {"content_disposition": content_disposition} - if headers.get("Content-Type"): - parameters["content_type"] = headers.get("Content-Type") + headers["Content-Disposition"] = content_disposition + parameters = _set_params_from_headers(headers, domain.storage_class) url = URL(artifact_file.storage.url(artifact_name, parameters=parameters), encoded=True) raise HTTPFound(url) elif domain.storage_class == "storages.backends.gcloud.GoogleCloudStorage": - parameters = {"response_disposition": content_disposition} - if headers.get("Content-Type"): - parameters["content_type"] = headers.get("Content-Type") + headers["Content-Disposition"] = content_disposition + parameters = _set_params_from_headers(headers, domain.storage_class) url = URL(artifact_file.storage.url(artifact_name, parameters=parameters), encoded=True) raise HTTPFound(url) else: