Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@ FROM python:3.9.20

RUN apt-get update && apt-get install -y gcc build-essential && rm -rf /var/lib/apt/lists/*

ENV PYTHONUNBUFFERED 1
ENV PYTHONUNBUFFERED=1

RUN curl -sSL https://install.python-poetry.org | python3 - --version 1.8.3
# Poetry location so future commands (below) work

ENV PATH $PATH:/root/.local/bin
ENV PATH=$PATH:/root/.local/bin
# Want poetry to use system python of docker container
RUN poetry config virtualenvs.create false
RUN poetry config virtualenvs.in-project false
Expand Down
475 changes: 253 additions & 222 deletions poetry.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ channels = "2.4"
channels-redis = "3.2.0"
django-extra-fields = "0.9"
pillow = "10.3.0"
celery = "4.2.1"
celery = "4.4.7"
gunicorn = "22.0.0"
urllib3 = ">=1.21.1,<1.25"
uvicorn = {version = "0.13.3", extras = ["standard"]}
Expand Down
74 changes: 74 additions & 0 deletions src/apps/analytics/tasks.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import time
import logging
from celery_config import app
Expand Down Expand Up @@ -570,3 +571,76 @@ def reset_computed_storage_analytics():
elapsed_time
)
)


@app.task(queue="site-worker")
def delete_orphan_files():
logger.info("Task delete_orphan_files started")

# Find most recent file
most_recent_log_file = get_most_recent_storage_inconsistency_log_file(logger)
if not most_recent_log_file:
logger.warning("No storage inconsistency log file found. Nothing will be removed")
raise Exception("No storage inconsistency log file found")

# Get the list of orphan files from the content of the most recent log file
log_folder = "/app/logs/"
orphan_files_path = get_files_path_from_orphan_log_file(os.path.join(log_folder, most_recent_log_file), logger)

# Delete those files in batch (max 1000 element at once)
batch_size = 1000
for i in range(0, len(orphan_files_path), batch_size):
batch = orphan_files_path[i:i + batch_size]
objects_formatted = [{'Key': path} for path in batch]
BundleStorage.bucket.delete_objects(Delete={'Objects': objects_formatted})

logger.info("Delete oprhan files finished")


def get_most_recent_storage_inconsistency_log_file(logger):
log_folder = "/app/logs/"
try:
log_files = [f for f in os.listdir(log_folder) if os.path.isfile(os.path.join(log_folder, f))]
except FileNotFoundError:
logger.info(f"Folder '{log_folder}' does not exist.")
return None

most_recent_log_file = None
most_recent_datetime = None
datetime_format = "%Y%m%d-%H%M%S"
for file in log_files:
try:
basename = os.path.basename(file)
datetime_str = basename[len("db_storage_inconsistency_"):-len(".log")]
file_datetime = datetime.strptime(datetime_str, datetime_format)
if most_recent_datetime is None or file_datetime > most_recent_datetime:
most_recent_datetime = file_datetime
most_recent_log_file = file
except ValueError:
logger.warning(f"Filename '{file}' does not match the expected format and will be ignored.")

return most_recent_log_file


def get_files_path_from_orphan_log_file(log_file_path, logger):
files_path = []

try:
with open(log_file_path) as log_file:
lines = log_file.readlines()
orphan_files_lines = []
for i, line in enumerate(lines):
if "Orphaned files" in line:
orphan_files_lines = lines[i + 1:]
break

for orphan_files_line in orphan_files_lines:
files_path.append(orphan_files_line.split(maxsplit=1)[0])
except FileNotFoundError:
logger.error(f"File '{log_file_path}' does not exist.")
except PermissionError:
logger.error(f"Permission denied for reading the file '{log_file_path}'.")
except IOError as e:
logger.error(f"An I/O error occurred while accessing the file at {log_file_path}: {e}")

return files_path
1 change: 1 addition & 0 deletions src/apps/api/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@
path('analytics/users_usage/', analytics.users_usage, name='users_usage'),
path('analytics/delete_orphan_files/', analytics.delete_orphan_files, name="delete_orphan_files"),
path('analytics/get_orphan_files/', analytics.get_orphan_files, name="get_orphan_files"),
path('analytics/check_orphans_deletion_status/', analytics.check_orphans_deletion_status, name="check_orphans_deletion_status"),

# API Docs
re_path(r'docs(?P<format>\.json|\.yaml)$', schema_view.without_ui(cache_timeout=0), name='schema-json'),
Expand Down
49 changes: 24 additions & 25 deletions src/apps/api/views/analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from competitions.models import Competition, Submission
from analytics.models import StorageUsageHistory, CompetitionStorageDataPoint, UserStorageDataPoint
from api.serializers.analytics import AnalyticsSerializer
from utils.storage import BundleStorage
from apps.analytics.tasks import delete_orphan_files as delete_orphan_files_async_task

import os
import datetime
Expand All @@ -22,6 +22,7 @@


User = get_user_model()
delete_orphan_files_task = None


class SimpleFilterBackend(BaseFilterBackend):
Expand Down Expand Up @@ -322,36 +323,16 @@ def get_orphan_files(request):
@api_view(["DELETE"])
def delete_orphan_files(request):
"""
Delete all orphan files from the storage based on the last storage analytics
Start the deletion of orphan files task
"""

if not request.user.is_superuser:
raise PermissionDenied(detail="Admin only")

logger = logging.getLogger(__name__)
logger.info("Delete orphan files started")

# The analytics task generates a db_storage_inconsistency_<date>-<time>.log file that lists, among other things, the orphan files. Let's use it

# Find most recent file
most_recent_log_file = get_most_recent_storage_inconsistency_log_file()
if not most_recent_log_file:
logger.warning("No storage inconsistency log file found. Nothing will be removed")
return Response({"message": "No storage inconsistency log file found. Nothing will be removed"}, status=status.HTTP_404_NOT_FOUND)

# Get the list of orphan files from the content of the most recent log file
log_folder = "/app/logs/"
orphan_files_path = get_files_path_from_orphan_log_file(os.path.join(log_folder, most_recent_log_file))

# Delete those files in batch (max 1000 element at once)
batch_size = 1000
for i in range(0, len(orphan_files_path), batch_size):
batch = orphan_files_path[i:i + batch_size]
objects_formatted = [{'Key': path} for path in batch]
BundleStorage.bucket.delete_objects(Delete={'Objects': objects_formatted})
global delete_orphan_files_task
delete_orphan_files_task = delete_orphan_files_async_task.delay()

logger.info("Delete oprhan files finished")
return Response({"message": "done"}, status=status.HTTP_200_OK)
return Response({"success": True, "message": "orphan files deletion started"}, status=status.HTTP_200_OK)


def get_most_recent_storage_inconsistency_log_file():
Expand Down Expand Up @@ -405,3 +386,21 @@ def get_files_path_from_orphan_log_file(log_file_path):
logger.error(f"An I/O error occurred while accessing the file at {log_file_path}: {e}")

return files_path


@api_view(["GET"])
def check_orphans_deletion_status(request):
"""
Get the orphan files deletion task status.
Return one of ["PENDING", "STARTED", "SUCCESS", "FAILURE", "RETRY", "REVOKED"]
"""

if not request.user.is_superuser:
raise PermissionDenied(detail="Admin only")

global delete_orphan_files_task
state = None
if delete_orphan_files_task:
state = delete_orphan_files_task.state

return Response({"status": state}, status=status.HTTP_200_OK)
2 changes: 2 additions & 0 deletions src/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,8 @@
CELERY_BROKER_URL = os.environ.get('BROKER_URL')
if not CELERY_BROKER_URL:
CELERY_BROKER_URL = f'pyamqp://{RABBITMQ_DEFAULT_USER}:{RABBITMQ_DEFAULT_PASS}@{RABBITMQ_HOST}:{RABBITMQ_PORT}//'
CELERY_RESULT_BACKEND = os.environ.get("REDIS_URL", "redis://redis:6379")
CELERY_IGNORE_RESULT = False # Ensure that Celery tracks the state
CELERY_TASK_SERIALIZER = 'json'
CELERY_ACCEPT_CONTENT = ('json',)
CELERY_BEAT_SCHEDULE = {
Expand Down
3 changes: 3 additions & 0 deletions src/static/js/ours/client.js
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,9 @@ CODALAB.api = {
get_orphan_files: () => {
return CODALAB.api.request('GET', `${URLS.API}analytics/get_orphan_files/`)
},
check_orphans_deletion_status: () => {
return CODALAB.api.request('GET', `${URLS.API}analytics/check_orphans_deletion_status/`)
},
/*---------------------------------------------------------------------
User Quota and Cleanup
---------------------------------------------------------------------*/
Expand Down
113 changes: 99 additions & 14 deletions src/static/riot/analytics/analytics.tag
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,9 @@
<a class="item" data-tab="competitions-usage">Competitions usage</a>
<a class="item" data-tab="users-usage">Users usage</a>
<div class="delete-oprhans-container">
<button class="ui red button" onclick="{showConfirmationModal}">
<i class="icon warning"></i>Delete orphan files
<button class="ui red button { disabled: delete_orphans_button_modal_disabled }" onclick="{showConfirmationModal}">
<i class="icon { warning: !delete_orphans_button_modal_loading}"></i>
{delete_orphans_button_modal_text}
</button>
</div>
</div>
Expand Down Expand Up @@ -163,7 +164,7 @@
<h3>Do you want to proceed ?</h3>
</div>
<div class="actions">
<button class="ui icon button {delete_button_color} { loading: delete_button_loading } { disabled: delete_button_disabled }" onclick="{deleteOrphanFiles}">
<button class="ui icon button {delete_button_color} { disabled: delete_button_disabled }" onclick="{deleteOrphanFiles}">
<i if={delete_button_color=="green"} class="check icon"></i>
{delete_button_text}
</button>
Expand Down Expand Up @@ -217,6 +218,11 @@
self.delete_button_disabled = false
self.delete_button_text = "Yes, delete all orphan files"

self.delete_orphans_button_modal_text = "Delete orphan files"
self.delete_orphans_button_modal_loading = false
self.delete_orphans_button_modal_disabled = false
self.pollingInterval;

self.one("mount", function () {
// Semantic UI
$('.tabular.menu .item', self.root).tab();
Expand Down Expand Up @@ -327,6 +333,7 @@
self.time_range_shortcut("month");
self.update_chart_resolution("day");
self.getOrphanFiles();
self.startCheckOrphansDeletionStatus();
})

/*---------------------------------------------------------------------
Expand Down Expand Up @@ -525,23 +532,103 @@
self.update();
}

self.checkOrphansDeletionStatus = function() {
CODALAB.api.check_orphans_deletion_status()
.done(function(data) {
if (data.status) {
if (data.status == "SUCCESS") {
toastr.success("Orphan files deletion successful")
self.delete_button_color = "green";
self.delete_button_text = "Deletion Successful";
}
if (data.status == "FAILURE") {
toastr.error("Orphan files deletion failed")
self.delete_button_color = "red";
self.delete_button_text = "Deletion Failed";
}
if (data.status == "REVOKED") {
toastr.error("Orphan files deletion has been canceled")
self.delete_button_color = "red";
self.delete_button_text = "Deletion canceled";
}
if (data.status == "SUCCESS" || data.status == "FAILURE" || data.status == "REVOKED") {
// Task is over
self.stopCheckOrphansDeletionStatus();
self.delete_orphans_button_modal_text = "Delete orphan files";
self.delete_orphans_button_modal_loading = false;
self.delete_orphans_button_modal_disabled = false;
self.delete_button_loading = false;
self.delete_button_disabled = true;
} else {
// Task is running
self.delete_orphans_button_modal_text = "Orphan files deletion in progress...";
self.delete_orphans_button_modal_disabled = true;
self.delete_orphans_button_modal_loading = true;

self.delete_button_text = "Orphan files deletion in progress...";
self.delete_button_disabled = true;
self.delete_button_loading = true;
}
} else {
// No task running
self.stopCheckOrphansDeletionStatus();

self.delete_orphans_button_modal_text = "Delete orphan files";
self.delete_orphans_button_modal_disabled = false;
self.delete_orphans_button_modal_loading = false;

self.delete_button_color = "red";
self.delete_button_loading = false;
self.delete_button_disabled = false;
self.delete_button_text = "Yes, delete all orphan files";
}

})
.fail(function(response) {
toastr.error("Orphan files deletion's task status check failed")
self.delete_orphans_button_modal_text = "Delete orphan files";
self.delete_orphans_button_modal_loading = false;
self.delete_orphans_button_modal_disabled = false;

self.delete_button_text = "Yes, delete all orphan files";
self.delete_button_color = "red";
self.delete_button_loading = false;
self.delete_button_disabled = false;

self.stopCheckOrphansDeletionStatus();
})
.always(function() {
self.update();
})
}

self.startCheckOrphansDeletionStatus = function () {
self.pollingInterval = setInterval(self.checkOrphansDeletionStatus, 2000);
}

self.stopCheckOrphansDeletionStatus = function() {
if (self.pollingInterval) {
clearInterval(self.pollingInterval);
self.pollingInterval = null;
}
}

self.deleteOrphanFiles = function() {
self.delete_button_loading = true
self.delete_button_disabled = true
self.delete_orphans_button_modal_loading = true;
self.delete_orphans_button_modal_disabled = true;
self.delete_button_text = "Orphan files deletion in progress...";
self.delete_orphans_button_modal_text = "Orphan files deletion in progress...";
self.update()
CODALAB.api.delete_orphan_files()
.done(function (data) {
console.log("done", data);
self.delete_button_color = "green";
self.delete_button_disabled = true;
self.delete_button_text = "Deletion Successful";
if (data && data.success && !self.pollingInterval) {
self.startCheckOrphansDeletionStatus();
}
})
.fail(function (response) {
console.log("fail response=", response);
toastr.error("Deletion failed, error occurred")
self.delete_button_color = "red";
self.delete_button_disabled = false;
self.delete_button_text = "Deletion Failed";
toastr.error("Orphan files deletion failed to start")
})
.always(function () {
self.delete_button_loading = false
Expand All @@ -552,12 +639,10 @@
self.getOrphanFiles = function() {
CODALAB.api.get_orphan_files()
.done(function (data) {
console.log("get_orphan_files success. Response", data);
self.nb_orphan_files = data.data.length
self.update({nb_orphan_files: self.nb_orphan_files});
})
.fail(function (response) {
console.log("get_orphan_files failed. Response=", response);
toastr.error("Get oprhan files failed, error occurred")
});
}
Expand Down