Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]
### Added
- [747](https://github.com/thoth-pub/thoth/pull/747) - Add `checksum` and `checksum_algorithm` fields to `Location`

## [[1.1.1]](https://github.com/thoth-pub/thoth/releases/tag/v1.1.1) - 2026-04-24
### Security
Expand Down
6 changes: 6 additions & 0 deletions thoth-api/migrations/20260429_v1.2.0/down.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
ALTER TABLE public.location
DROP CONSTRAINT IF EXISTS location_checksum_and_algorithm_all_or_none,
DROP COLUMN IF EXISTS checksum,
DROP COLUMN IF EXISTS checksum_algorithm;

DROP TYPE IF EXISTS public.checksum_algorithm;
10 changes: 10 additions & 0 deletions thoth-api/migrations/20260429_v1.2.0/up.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
CREATE TYPE public.checksum_algorithm AS ENUM (
'MD5',
'SHA256',
'SHA1'
);

ALTER TABLE public.location
ADD COLUMN checksum TEXT,
ADD COLUMN checksum_algorithm public.checksum_algorithm,
ADD CONSTRAINT location_checksum_and_algorithm_all_or_none CHECK ((checksum IS NULL AND checksum_algorithm IS NULL) OR (checksum IS NOT NULL AND checksum_algorithm IS NOT NULL));
12 changes: 11 additions & 1 deletion thoth-api/src/graphql/model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ use crate::model::{
contribution::{Contribution, ContributionType},
contributor::Contributor,
endorsement::{Endorsement, EndorsementOrderBy},
file::{File, FileType},
file::{ChecksumAlgorithm, File, FileType},
funding::Funding,
imprint::{Imprint, ImprintField, ImprintOrderBy},
institution::Institution,
Expand Down Expand Up @@ -1916,6 +1916,16 @@ impl Location {
self.canonical
}

#[graphql(description = "Checksum of the full text file as returned by the platform")]
pub fn checksum(&self) -> Option<&String> {
self.checksum.as_ref()
}

#[graphql(description = "Algorithm used to generate the checksum (MD5, SHA-256 or SHA-1)")]
pub fn checksum_algorithm(&self) -> Option<&ChecksumAlgorithm> {
self.checksum_algorithm.as_ref()
}

#[graphql(description = "Date and time at which the location record was created")]
pub fn created_at(&self) -> Timestamp {
self.created_at
Expand Down
8 changes: 7 additions & 1 deletion thoth-api/src/graphql/mutation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1488,7 +1488,13 @@ impl MutationRoot {
&mime_type,
bytes,
)?;
file_upload.sync_related_metadata(context, &work, &cdn_url, featured_video_dimensions)?;
file_upload.sync_related_metadata(
context,
&work,
&cdn_url,
&file.sha256,
featured_video_dimensions,
)?;

reconcile_replaced_object(
s3_client,
Expand Down
4 changes: 4 additions & 0 deletions thoth-api/src/graphql/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -470,6 +470,8 @@ fn make_new_location(publication_id: Uuid, canonical: bool) -> NewLocation {
full_text_url: Some("https://example.com/full".to_string()),
location_platform: LocationPlatform::Other,
canonical,
checksum: None,
checksum_algorithm: None,
}
}

Expand Down Expand Up @@ -1104,6 +1106,8 @@ fn patch_location(location: &Location) -> PatchLocation {
full_text_url: location.full_text_url.clone(),
location_platform: location.location_platform,
canonical: location.canonical,
checksum: location.checksum.clone(),
checksum_algorithm: location.checksum_algorithm,
}
}

Expand Down
14 changes: 11 additions & 3 deletions thoth-api/src/model/file/crud.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use super::FileType;
use super::{
upload_request_headers, File, FileCleanupCandidate, FilePolicy, FileUpload, FileUploadResponse,
NewFile, NewFileUpload,
upload_request_headers, ChecksumAlgorithm, File, FileCleanupCandidate, FilePolicy, FileType,
FileUpload, FileUploadResponse, NewFile, NewFileUpload,
};
use crate::db::PgPool;
use crate::model::{
Expand Down Expand Up @@ -724,6 +723,7 @@ impl FileUpload {
ctx: &C,
work: &Work,
cdn_url: &str,
cdn_sha256: &str,
featured_video_dimensions: Option<(i32, i32)>,
) -> ThothResult<()> {
match self.file_type {
Expand All @@ -741,6 +741,7 @@ impl FileUpload {
publication_id,
work.landing_page.clone(),
cdn_url,
Some(cdn_sha256.to_string()),
)?;
Comment on lines 741 to 745
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Route checksum writes through LocationPolicy checks

This upload path now forwards cdn_checksum into upsert_thoth_location, which persists location.checksum via Location::create/update without calling LocationPolicy::can_create or can_update. In practice, complete_file_upload is authorized by file/CDN permissions (not superuser-only), so a non-superuser who can complete a publication upload can still add or change a checksum, bypassing the superuser restriction introduced in LocationPolicy.

Useful? React with 👍 / 👎.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Behaviour is desired: checksums should not be manually entered/changed by non-superuser, but automated file uploads should be able to set them for Thoth locations

}
FileType::AdditionalResource => {
Expand Down Expand Up @@ -792,6 +793,7 @@ impl FileUpload {
publication_id: Uuid,
landing_page: Option<String>,
full_text_url: &str,
sha256: Option<String>,
) -> ThothResult<()> {
use crate::schema::location::dsl;

Expand All @@ -809,6 +811,8 @@ impl FileUpload {
patch.full_text_url = Some(full_text_url.to_string());
patch.landing_page = landing_page;
patch.canonical = true;
patch.checksum = sha256;
patch.checksum_algorithm = Some(ChecksumAlgorithm::Sha256);
if patch.canonical {
patch.canonical_record_complete(ctx.db())?;
}
Expand All @@ -830,6 +834,8 @@ impl FileUpload {
full_text_url: Some(full_text_url.to_string()),
location_platform: LocationPlatform::Thoth,
canonical: false,
checksum: sha256,
checksum_algorithm: Some(ChecksumAlgorithm::Sha256),
};
let created_location = Location::create(ctx.db(), &new_location)?;
let mut patch = PatchLocation::from(created_location.clone());
Expand All @@ -845,6 +851,8 @@ impl FileUpload {
full_text_url: Some(full_text_url.to_string()),
location_platform: LocationPlatform::Thoth,
canonical: true,
checksum: sha256,
checksum_algorithm: Some(ChecksumAlgorithm::Sha256),
};
new_location.canonical_record_complete(ctx.db())?;
Location::create(ctx.db(), &new_location)?;
Expand Down
18 changes: 18 additions & 0 deletions thoth-api/src/model/file/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,24 @@ pub enum FileType {
WorkFeaturedVideo,
}

#[cfg_attr(
feature = "backend",
derive(diesel_derive_enum::DbEnum, juniper::GraphQLEnum),
graphql(description = "Algorithm used to create file checksum"),
ExistingTypePath = "crate::schema::sql_types::ChecksumAlgorithm"
)]
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, EnumString, Display)]
#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
#[strum(serialize_all = "SCREAMING_SNAKE_CASE")]
pub enum ChecksumAlgorithm {
#[cfg_attr(feature = "backend", db_rename = "MD5")]
Md5,
#[cfg_attr(feature = "backend", db_rename = "SHA256")]
Sha256,
#[cfg_attr(feature = "backend", db_rename = "SHA1")]
Sha1,
}

#[cfg_attr(feature = "backend", derive(diesel::Queryable))]
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
Expand Down
4 changes: 2 additions & 2 deletions thoth-api/src/model/file/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1243,7 +1243,7 @@ mod crud {

let cover_url = "https://cdn.example.org/10.1234/abc/def_frontcover.jpg";
upload
.sync_related_metadata(&ctx, &work, cover_url, None)
.sync_related_metadata(&ctx, &work, cover_url, "checksum", None)
.expect("Failed to sync frontcover metadata");

let refreshed_work = Work::from_id(pool.as_ref(), &work.work_id)
Expand Down Expand Up @@ -1284,7 +1284,7 @@ mod crud {

let video_url = "https://cdn.example.org/10.1234/abc/def/resources/video.mp4";
upload
.sync_related_metadata(&ctx, &work, video_url, Some((1280, 720)))
.sync_related_metadata(&ctx, &work, video_url, "checksum", Some((1280, 720)))
.expect("Failed to sync featured-video metadata");

let refreshed = crate::model::work_featured_video::WorkFeaturedVideo::from_id(
Expand Down
6 changes: 6 additions & 0 deletions thoth-api/src/model/location/crud.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,12 @@ impl Crud for Location {
LocationField::Canonical => {
apply_directional_order!(query, order.direction, order, canonical)
}
LocationField::Checksum => {
apply_directional_order!(query, order.direction, order, checksum)
}
LocationField::ChecksumAlgorithm => {
apply_directional_order!(query, order.direction, order, checksum_algorithm)
}
LocationField::CreatedAt => {
apply_directional_order!(query, order.direction, order, created_at)
}
Expand Down
12 changes: 11 additions & 1 deletion thoth-api/src/model/location/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use strum::EnumString;
use uuid::Uuid;

use crate::graphql::types::inputs::Direction;
use crate::model::Timestamp;
use crate::model::{file::ChecksumAlgorithm, Timestamp};
#[cfg(feature = "backend")]
use crate::schema::location;
#[cfg(feature = "backend")]
Expand Down Expand Up @@ -165,6 +165,8 @@ pub enum LocationField {
FullTextUrl,
LocationPlatform,
Canonical,
Checksum,
ChecksumAlgorithm,
CreatedAt,
UpdatedAt,
}
Expand All @@ -179,6 +181,8 @@ pub struct Location {
pub full_text_url: Option<String>,
pub location_platform: LocationPlatform,
pub canonical: bool,
pub checksum: Option<String>,
pub checksum_algorithm: Option<ChecksumAlgorithm>,
pub created_at: Timestamp,
pub updated_at: Timestamp,
}
Expand All @@ -195,6 +199,8 @@ pub struct NewLocation {
pub full_text_url: Option<String>,
pub location_platform: LocationPlatform,
pub canonical: bool,
pub checksum: Option<String>,
pub checksum_algorithm: Option<ChecksumAlgorithm>,
}

#[cfg_attr(
Expand All @@ -210,6 +216,8 @@ pub struct PatchLocation {
pub full_text_url: Option<String>,
pub location_platform: LocationPlatform,
pub canonical: bool,
pub checksum: Option<String>,
pub checksum_algorithm: Option<ChecksumAlgorithm>,
}

#[cfg_attr(feature = "backend", derive(diesel::Queryable))]
Expand Down Expand Up @@ -260,6 +268,8 @@ impl From<Location> for PatchLocation {
full_text_url: location.full_text_url,
location_platform: location.location_platform,
canonical: location.canonical,
checksum: location.checksum,
checksum_algorithm: location.checksum_algorithm,
}
}
}
Expand Down
19 changes: 19 additions & 0 deletions thoth-api/src/model/location/policy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@ impl CreatePolicy<NewLocation> for LocationPolicy {
return Err(ThothError::ThothLocationError);
}

// Only superusers can add a checksum.
if !user.is_superuser() && data.checksum.is_some() {
return Err(ThothError::CreateLocationChecksumError);
}

// Canonical locations must be complete; non-canonical locations must satisfy rules.
if data.canonical {
data.canonical_record_complete(ctx.db())?;
Expand Down Expand Up @@ -74,6 +79,20 @@ impl UpdatePolicy<Location, PatchLocation> for LocationPolicy {
return Err(ThothError::ThothUpdateCanonicalError);
}

// Only superusers can add a checksum.
if current.checksum.is_none() && patch.checksum.is_some() && !user.is_superuser() {
return Err(ThothError::UpdateLocationChecksumError);
}

// Only superusers can update or delete an existing checksum.
if ((current.checksum.is_some() && current.checksum != patch.checksum)
|| (current.checksum_algorithm.is_some()
&& current.checksum_algorithm != patch.checksum_algorithm))
Comment on lines +88 to +90
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Treat absent checksum fields as no-op in update checks

PatchLocation uses nullable fields with treat_none_as_null, so clients that do not send the newly added checksum/checksum_algorithm fields produce None in patch even when they are not trying to edit checksum metadata. This condition then interprets omitted fields as a checksum change and rejects non-superuser updates (or clears checksum for superusers), which breaks ordinary update_location calls that only edit unrelated fields on records that already have a checksum.

Useful? React with 👍 / 👎.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Behaviour is desired: the patch will overwrite whatever is in the existing record, so we don't want to let any client submit None for these fields if they have already been set.

&& !user.is_superuser()
{
return Err(ThothError::UpdateLocationChecksumError);
}

// If setting canonical to true, require record completeness.
if patch.canonical {
patch.canonical_record_complete(ctx.db())?;
Expand Down
Loading
Loading