From 75cb16851a350fdd34a432cbc6393203e208ada6 Mon Sep 17 00:00:00 2001 From: Daniel Orner Date: Fri, 14 Feb 2025 15:51:07 -0500 Subject: [PATCH 1/4] Move to S3 bucket for blob storage --- Gemfile | 2 ++ Gemfile.lock | 19 ++++++++++++++ config/environments/staging.rb | 4 +-- config/storage.yml | 5 ++++ .../20250214204624_move_azure_blobs_to_s3.rb | 25 +++++++++++++++++++ 5 files changed, 53 insertions(+), 2 deletions(-) create mode 100644 db/migrate/20250214204624_move_azure_blobs_to_s3.rb diff --git a/Gemfile b/Gemfile index 2ea2453321..161de67f10 100644 --- a/Gemfile +++ b/Gemfile @@ -20,6 +20,8 @@ gem "rails", "7.2.2" # These two gems are used to hook into ActiveStorage to store blobs in Azure Storage Service. # gem 'azure-storage', '~> 0.15.0.preview', require: false +# For ActiveStorage on AWS +gem 'aws-sdk-s3', require: false gem 'azure-storage-blob' # Adds soft delete functionality for models. gem 'discard', '~> 1.3' diff --git a/Gemfile.lock b/Gemfile.lock index 48e7c36440..9383a1a6b3 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -83,6 +83,23 @@ GEM autoprefixer-rails (10.4.13.0) execjs (~> 2) awesome_print (1.9.2) + aws-eventstream (1.3.1) + aws-partitions (1.1050.0) + aws-sdk-core (3.218.1) + aws-eventstream (~> 1, >= 1.3.0) + aws-partitions (~> 1, >= 1.992.0) + aws-sigv4 (~> 1.9) + base64 + jmespath (~> 1, >= 1.6.1) + aws-sdk-kms (1.98.0) + aws-sdk-core (~> 3, >= 3.216.0) + aws-sigv4 (~> 1.5) + aws-sdk-s3 (1.181.0) + aws-sdk-core (~> 3, >= 3.216.0) + aws-sdk-kms (~> 1) + aws-sigv4 (~> 1.5) + aws-sigv4 (1.11.0) + aws-eventstream (~> 1, >= 1.0.2) azure-storage-blob (2.0.3) azure-storage-common (~> 2.0) nokogiri (~> 1, >= 1.10.8) @@ -321,6 +338,7 @@ GEM jbuilder (2.13.0) actionview (>= 5.0.0) activesupport (>= 5.0.0) + jmespath (1.6.2) json (2.9.1) jwt (2.9.1) base64 @@ -734,6 +752,7 @@ PLATFORMS DEPENDENCIES annotate awesome_print + aws-sdk-s3 azure-storage-blob better_errors binding_of_caller diff --git a/config/environments/staging.rb b/config/environments/staging.rb index 89510dec12..0e6a4d6ef7 100644 --- a/config/environments/staging.rb +++ b/config/environments/staging.rb @@ -94,8 +94,8 @@ } end - # Store files locally. - config.active_storage.service = :azure + # Store files on Amazon S3. + config.active_storage.service = :amazon # Use a different logger for distributed setups. # require 'syslog/logger' diff --git a/config/storage.yml b/config/storage.yml index 2d662892a3..313d7d31f9 100644 --- a/config/storage.yml +++ b/config/storage.yml @@ -12,3 +12,8 @@ azure: storage_account_name: <%= ENV['AZURE_STORAGE_ACCOUNT_NAME'] %> storage_access_key: <%= ENV['AZURE_STORAGE_ACCESS_KEY'] %> container: <%= ENV['AZURE_STORAGE_CONTAINER'] %> + +amazon: + service: S3 + bucket: human-essentials-<%= Rails.env %> + region: "us-east-2" diff --git a/db/migrate/20250214204624_move_azure_blobs_to_s3.rb b/db/migrate/20250214204624_move_azure_blobs_to_s3.rb new file mode 100644 index 0000000000..ca823434e3 --- /dev/null +++ b/db/migrate/20250214204624_move_azure_blobs_to_s3.rb @@ -0,0 +1,25 @@ +class MoveAzureBlobsToS3 < ActiveRecord::Migration[7.2] + # https://stackoverflow.com/questions/71699789/activestorage-transfer-all-assets-from-one-bucket-to-another-bucket + def up + source_service = ActiveStorage::Blob.services.fetch(:azure) + destination_service = ActiveStorage::Blob.services.fetch(:amazon) + + ActiveStorage::Blob.where(service_name: source_service.name).find_each do |blob| + key = blob.key + + raise "I can't find blob #{blob.id} (#{key})" unless source_service.exist?(key) + + unless destination_service.exist?(key) + source_service.open(blob.key, checksum: blob.checksum) do |file| + destination_service.upload(blob.key, file, checksum: blob.checksum) + end + end + blob.update_columns(service_name: destination_service.name) + end + + end + + def down + raise IrreversibleMigration + end +end From 95a9c2178a848d052592082dae6e01068e1a0042 Mon Sep 17 00:00:00 2001 From: Daniel Orner Date: Sun, 16 Feb 2025 10:18:31 -0500 Subject: [PATCH 2/4] backup/restore script --- lib/tasks/backup_db_rds.rake | 14 +++++--------- lib/tasks/fetch_latest_db.rake | 27 ++++++++------------------- 2 files changed, 13 insertions(+), 28 deletions(-) diff --git a/lib/tasks/backup_db_rds.rake b/lib/tasks/backup_db_rds.rake index 3abc59f6f5..d442e6e6d3 100644 --- a/lib/tasks/backup_db_rds.rake +++ b/lib/tasks/backup_db_rds.rake @@ -1,3 +1,5 @@ +require 'aws-sdk-s3' + desc "Update the development db to what is being used in prod" task :backup_db_rds => :environment do logger = Logger.new(STDOUT) @@ -6,18 +8,12 @@ task :backup_db_rds => :environment do current_time = Time.current.strftime("%Y%m%d%H%M%S") logger.info("Copying the database...") - backup_filename = "#{current_time}.rds.dump" + backup_filename = "#{Rails.env}-#{current_time}.rds.dump" system("PGPASSWORD='#{ENV["DIAPER_DB_PASSWORD"]}' pg_dump -Fc -v --host=#{ENV["DIAPER_DB_HOST"]} --username=#{ENV["DIAPER_DB_USERNAME"]} --dbname=#{ENV["DIAPER_DB_DATABASE"]} -f #{backup_filename}") - account_name = ENV["AZURE_STORAGE_ACCOUNT_NAME"] - account_key = ENV["AZURE_STORAGE_ACCESS_KEY"] - - blob_client = Azure::Storage::Blob::BlobService.create( - storage_account_name: account_name, - storage_access_key: account_key - ) + client = Aws::S3::Client.new logger.info("Uploading #{backup_filename}") - blob_client.create_block_blob("backups", backup_filename, File.read(backup_filename)) + client.put_object(bucket: "human-essentials-backups", key: "backups/#{backup_filename}", body: File.read(backup_filename)) File.delete(backup_filename) end diff --git a/lib/tasks/fetch_latest_db.rake b/lib/tasks/fetch_latest_db.rake index 394087a141..fdba6cee39 100644 --- a/lib/tasks/fetch_latest_db.rake +++ b/lib/tasks/fetch_latest_db.rake @@ -1,5 +1,7 @@ +require 'aws-sdk-s3' + desc "Update the development db to what is being used in prod" -BACKUP_CONTAINER_NAME = 'backups' +BUCKET_NAME = "human-essentials-backups" PASSWORD_REPLACEMENT = 'password' task :fetch_latest_db do @@ -51,13 +53,13 @@ end private def fetch_latest_backups - backups = blob_client.list_blobs(BACKUP_CONTAINER_NAME) + backups = blob_client.list_objects_v2(bucket: BUCKET_NAME) # # Retrieve the most up to date version of the DB dump # backup = backups.select { |b| b.name.match?(".rds.dump") }.sort do |a,b| - Time.parse(a.properties[:last_modified]) <=> Time.parse(b.properties[:last_modified]) + Time.parse(a.last_modified) <=> Time.parse(b.last_modified) end.reverse.first # @@ -65,30 +67,17 @@ def fetch_latest_backups # filepath = fetch_file_path(backup) puts "\nDownloading blob #{backup.name} to #{filepath}" - blob, content = blob_client.get_blob(BACKUP_CONTAINER_NAME, backup.name) - File.open(filepath, "wb") { |f| f.write(content) } + blob_client.get_object(bucket: BUCKET_NAME, key: backup.name, response_target: filepath) # # At this point, the dumps should be stored on the local # machine of the user under tmp. # - return backup + backup end def blob_client - return @blob_client if @blob_client - - account_name = ENV["AZURE_STORAGE_ACCOUNT_NAME"] - account_key = ENV["AZURE_STORAGE_ACCESS_KEY"] - - if account_name.blank? || account_key.blank? - raise "You must have the correct azure credentials in your ENV" - end - - @blob_client = Azure::Storage::Blob::BlobService.create( - storage_account_name: account_name, - storage_access_key: account_key - ) + Aws::S3::Client.new end def fetch_file_path(backup) From 70363fa047d448f1bb9b718cc5153c995677a2ea Mon Sep 17 00:00:00 2001 From: Daniel Orner Date: Sun, 16 Feb 2025 17:32:01 -0500 Subject: [PATCH 3/4] Fix rake tasks --- lib/tasks/backup_db_rds.rake | 2 +- lib/tasks/fetch_latest_db.rake | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/lib/tasks/backup_db_rds.rake b/lib/tasks/backup_db_rds.rake index d442e6e6d3..4ea75f1fcc 100644 --- a/lib/tasks/backup_db_rds.rake +++ b/lib/tasks/backup_db_rds.rake @@ -11,7 +11,7 @@ task :backup_db_rds => :environment do backup_filename = "#{Rails.env}-#{current_time}.rds.dump" system("PGPASSWORD='#{ENV["DIAPER_DB_PASSWORD"]}' pg_dump -Fc -v --host=#{ENV["DIAPER_DB_HOST"]} --username=#{ENV["DIAPER_DB_USERNAME"]} --dbname=#{ENV["DIAPER_DB_DATABASE"]} -f #{backup_filename}") - client = Aws::S3::Client.new + client = Aws::S3::Client.new(region: 'us-east-2') logger.info("Uploading #{backup_filename}") client.put_object(bucket: "human-essentials-backups", key: "backups/#{backup_filename}", body: File.read(backup_filename)) diff --git a/lib/tasks/fetch_latest_db.rake b/lib/tasks/fetch_latest_db.rake index fdba6cee39..aee2aae4da 100644 --- a/lib/tasks/fetch_latest_db.rake +++ b/lib/tasks/fetch_latest_db.rake @@ -15,7 +15,7 @@ task :fetch_latest_db do system("bin/rails db:environment:set RAILS_ENV=development") system("bin/rails db:drop db:create") - puts "Restoring the database with #{backup.name}" + puts "Restoring the database with #{backup.key}" backup_filepath = fetch_file_path(backup) db_username = ENV["PG_USERNAME"].presence || ENV["USER"].presence || "postgres" db_host = ENV["PG_HOST"].presence || "localhost" @@ -58,7 +58,7 @@ def fetch_latest_backups # # Retrieve the most up to date version of the DB dump # - backup = backups.select { |b| b.name.match?(".rds.dump") }.sort do |a,b| + backup = backups.contents.select { |b| b.key.match?(".rds.dump") }.sort do |a,b| Time.parse(a.last_modified) <=> Time.parse(b.last_modified) end.reverse.first @@ -66,8 +66,8 @@ def fetch_latest_backups # Download each of the backups onto the local disk in tmp # filepath = fetch_file_path(backup) - puts "\nDownloading blob #{backup.name} to #{filepath}" - blob_client.get_object(bucket: BUCKET_NAME, key: backup.name, response_target: filepath) + puts "\nDownloading blob #{backup.key} to #{filepath}" + blob_client.get_object(bucket: BUCKET_NAME, key: backup.key, response_target: filepath) # # At this point, the dumps should be stored on the local @@ -77,11 +77,11 @@ def fetch_latest_backups end def blob_client - Aws::S3::Client.new + Aws::S3::Client.new(region: 'us-east-2') end def fetch_file_path(backup) - File.join(Rails.root, 'tmp', backup.name) + File.join(Rails.root, 'tmp', File.basename(backup.key)) end def replace_user_passwords From b207112e24101980921f4bebb36908cd5bdf5033 Mon Sep 17 00:00:00 2001 From: Daniel Orner Date: Sun, 16 Feb 2025 19:13:04 -0500 Subject: [PATCH 4/4] Only run on staging --- db/migrate/20250214204624_move_azure_blobs_to_s3.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/db/migrate/20250214204624_move_azure_blobs_to_s3.rb b/db/migrate/20250214204624_move_azure_blobs_to_s3.rb index ca823434e3..b752ca309c 100644 --- a/db/migrate/20250214204624_move_azure_blobs_to_s3.rb +++ b/db/migrate/20250214204624_move_azure_blobs_to_s3.rb @@ -1,6 +1,8 @@ class MoveAzureBlobsToS3 < ActiveRecord::Migration[7.2] # https://stackoverflow.com/questions/71699789/activestorage-transfer-all-assets-from-one-bucket-to-another-bucket def up + return unless Rails.env.staging? + source_service = ActiveStorage::Blob.services.fetch(:azure) destination_service = ActiveStorage::Blob.services.fetch(:amazon)