diff --git a/.circleci/config.yml b/.circleci/config.yml index 14ce6ddd0..250ff65ec 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -3,7 +3,7 @@ version: 2 jobs: test: machine: - image: ubuntu-2004:2022.07.1 + image: ubuntu-2204:2024.01.2 steps: - checkout @@ -34,18 +34,18 @@ jobs: - run: name: Build containers and collect static command: | - docker-compose -f docker-compose.yml -f docker-compose.selenium.yml up -d - docker-compose -f docker-compose.yml -f docker-compose.selenium.yml exec django python manage.py collectstatic --noinput + docker compose -f docker-compose.yml -f docker-compose.selenium.yml up -d + docker compose -f docker-compose.yml -f docker-compose.selenium.yml exec django python manage.py collectstatic --noinput - run: docker-compose exec django flake8 src/ - run: name: pytest - command: docker-compose -f docker-compose.yml -f docker-compose.selenium.yml exec django py.test src/ -m "not e2e" + command: docker compose -f docker-compose.yml -f docker-compose.selenium.yml exec django py.test src/ -m "not e2e" - run: name: e2e tests - command: docker-compose -f docker-compose.yml -f docker-compose.selenium.yml exec django py.test src/tests/functional/ -m e2e + command: docker compose -f docker-compose.yml -f docker-compose.selenium.yml exec django py.test src/tests/functional/ -m e2e no_output_timeout: 60m - store_artifacts: diff --git a/.env_circleci b/.env_circleci index 886df1b62..3037e07af 100644 --- a/.env_circleci +++ b/.env_circleci @@ -10,6 +10,7 @@ RABBITMQ_DEFAULT_USER=rabbit-username RABBITMQ_DEFAULT_PASS=rabbit-password-you-should-change RABBITMQ_PORT=5672 RABBITMQ_HOST=rabbit +WORKER_CONNECTION_TIMEOUT=100000000 # milliseconds FLOWER_BASIC_AUTH=root:password-you-should-change diff --git a/.env_sample b/.env_sample index ce6916ef1..a60000460 100644 --- a/.env_sample +++ b/.env_sample @@ -23,6 +23,10 @@ RABBITMQ_DEFAULT_USER=rabbit-username RABBITMQ_DEFAULT_PASS=rabbit-password-you-should-change RABBITMQ_MANAGEMENT_PORT=15672 RABBITMQ_PORT=5672 +WORKER_CONNECTION_TIMEOUT=100000000 # milliseconds +#RABBITMQ_HTTP_PROXY=http://proxy-example:3128 +#RABBITMQ_HTTPS_PROXY=http://proxy-example:3128 +#RABBITMQ_NO_PROXY=localhost,172.0.0.0/8 FLOWER_PUBLIC_PORT=5555 @@ -37,6 +41,9 @@ SELENIUM_HOSTNAME=selenium #EMAIL_HOST_PASSWORD=pass #EMAIL_PORT=587 #EMAIL_USE_TLS=True +#DEFAULT_FROM_EMAIL="Codabench " +#SERVER_EMAIL=noreply@example.com + # ----------------------------------------------------------------------------- # Storage @@ -59,6 +66,22 @@ AWS_STORAGE_PRIVATE_BUCKET_NAME=private AWS_S3_ENDPOINT_URL=http://minio:9000/ AWS_QUERYSTRING_AUTH=False + +# ----------------------------------------------------------------------------- +# Limit for re-running submission +# This is used to limit users to rerun submissions +# on default queue when number of submissions are < RERUN_SUBMISSION_LIMIT +# ----------------------------------------------------------------------------- +RERUN_SUBMISSION_LIMIT=30 + + +# ----------------------------------------------------------------------------- +# Enable or disbale regular email sign-in an sign-up +# ----------------------------------------------------------------------------- +ENABLE_SIGN_UP=True +ENABLE_SIGN_IN=True + + # # S3 storage example # STORAGE_TYPE=s3 # AWS_ACCESS_KEY_ID=12312312312312312331223 diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md new file mode 100644 index 000000000..31aa14768 --- /dev/null +++ b/.github/CONTRIBUTING.md @@ -0,0 +1,23 @@ +# HOW YOU CAN CONTRIBUTE TO THE CODABENCH PROJECT + +## 1. Being a Codabench user. + +- Create a user account on https://codalab.lisn.fr and on https://codabench.org. +- Register on https://codabench.org to this existing competition (IRIS-tuto) https://www.codabench.org/competitions/1115/ and make a submission (from https://github.com/codalab/competition-examples/tree/master/codabench/iris): sample_result_submission and sample_code_submission. See https://github.com/codalab/codabench/wiki/User_Participating-in-a-Competition +- Create your own private competition (from https://github.com/codalab/competition-examples/tree/master/codabench/ ). See https://github.com/codalab/codabench/wiki/Getting-started-with-Codabench + + ## 2. Setting a local instance of Codabench. + +- Follow the tutorial in codabench wiki: https://github.com/codalab/codabench/wiki/Codabench-Installation. According to your hosting OS, you might have to tune your environment file a bit. Try without enabling the SSL protocol (doing so, you don't need a domain name for the server). Try using the embedded Minio storage solution instead of a private cloud storage. +- If needed, you can also look into https://github.com/codalab/codabench/wiki/How-to-deploy-Codabench-on-your-server + +## 3. Using one's local instance + +- Create your own competition and play with it. You can look at the output logs of each different docker container. +- Setting you as an admin of your platform (https://github.com/codalab/codabench/wiki/Administrator-procedures#give-superuser-privileges-to-an-user) and visit the Django Admin menu: https://github.com/codalab/codabench/wiki/Administrator-procedures#give-superuser-privileges-to-an-user + +## 4. Setting an autonomous computer-worker on your PC + +- Configure and launch the docker container: https://github.com/codalab/codabench/wiki/Compute-Worker-Management---Setup +- Create a private queue on your new own competition on the production server codabench.org: https://github.com/codalab/codabench/wiki/Queue-Management#create-queue +- Assign your own compute-worker to this private queue instead of the default queue. diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md new file mode 100644 index 000000000..7c7bda519 --- /dev/null +++ b/.github/ISSUE_TEMPLATE.md @@ -0,0 +1,19 @@ +***This is a template, please remove any non-relevant details to your issue*** + +If you are competition participant: +----------------------------------- + +- This Github repository is about the **platform** itself. +- If you problem is specific to a competition, **please contact directly its organizers**. + +If you are an organizer and have problem hosting your competition: +------------------------------------------------------------------ + +- Please post a link to your competition. + + +If you are having trouble using the site: +----------------------------------------- + +- What browser and version are you using? +- What is the URL of the problem? Codalab is an open source project, we may not be supporting the instance you are using! diff --git a/.gitignore b/.gitignore index b5b34f56a..4ac22b2f4 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,7 @@ var/ var_*/ certs/ backups/ +logs/ src/static/output.css src/static/output.js diff --git a/Dockerfile.rabbitmq b/Dockerfile.rabbitmq new file mode 100644 index 000000000..fc0b411e8 --- /dev/null +++ b/Dockerfile.rabbitmq @@ -0,0 +1,3 @@ +FROM rabbitmq:management +ARG WORKER_CONNECTION_TIMEOUT +RUN echo "consumer_timeout = $WORKER_CONNECTION_TIMEOUT" >> /etc/rabbitmq/conf.d/10-defaults.conf diff --git a/compute_worker/compute_worker.py b/compute_worker/compute_worker.py index 875051090..24e7d25bc 100644 --- a/compute_worker/compute_worker.py +++ b/compute_worker/compute_worker.py @@ -26,9 +26,12 @@ from kombu import Queue, Exchange from urllib3 import Retry - logger = logging.getLogger() + +# ----------------------------------------------- +# Celery + Rabbit MQ +# ----------------------------------------------- # Init celery + rabbit queue definitions app = Celery() app.config_from_object('celery_config') # grabs celery_config.py @@ -38,6 +41,9 @@ ] +# ----------------------------------------------- +# Directories +# ----------------------------------------------- # Setup base directories used by all submissions # note: we need to pass this directory to docker-compose so it knows where to store things! HOST_DIRECTORY = os.environ.get("HOST_DIRECTORY", "/tmp/codabench/") @@ -45,6 +51,10 @@ CACHE_DIR = os.path.join(BASE_DIR, "cache") MAX_CACHE_DIR_SIZE_GB = float(os.environ.get('MAX_CACHE_DIR_SIZE_GB', 10)) + +# ----------------------------------------------- +# Submission status +# ----------------------------------------------- # Status options for submissions STATUS_NONE = "None" STATUS_SUBMITTING = "Submitting" @@ -65,6 +75,10 @@ STATUS_FAILED, ) + +# ----------------------------------------------- +# Container Engine +# ----------------------------------------------- # Setup the container engine that we are using if os.environ.get("CONTAINER_ENGINE_EXECUTABLE"): CONTAINER_ENGINE_EXECUTABLE = os.environ.get("CONTAINER_ENGINE_EXECUTABLE") @@ -75,9 +89,18 @@ CONTAINER_ENGINE_EXECUTABLE = "docker" +# ----------------------------------------------- +# Exceptions +# ----------------------------------------------- class SubmissionException(Exception): pass +class DockerImagePullException(Exception): + pass + +class ExecutionTimeLimitExceeded(Exception): + pass + # ----------------------------------------------------------------------------- # The main compute worker entrypoint, this is how a job is ran at the highest @@ -94,6 +117,8 @@ def run_wrapper(run_args): if run.is_scoring: run.push_scores() run.push_output() + except DockerImagePullException as e: + run._update_status(STATUS_FAILED, str(e)) except SubmissionException as e: run._update_status(STATUS_FAILED, str(e)) except SoftTimeLimitExceeded: @@ -160,14 +185,14 @@ def is_valid_zip(zip_path): return False -class ExecutionTimeLimitExceeded(Exception): - pass - - def alarm_handler(signum, frame): raise ExecutionTimeLimitExceeded +# ----------------------------------------------- +# Class Run +# Respnosible for running a submission inside a docker container +# ----------------------------------------------- class Run: """A "Run" in Codalab is composed of some program, some data to work with, and some signed URLs to upload results to. There is also a secret key to do special commands for just this submission. @@ -309,7 +334,7 @@ def _update_submission(self, data): logger.info(f"Updating submission @ {url} with data = {data}") - resp = self.requests_session.patch(url, data, timeout=15) + resp = self.requests_session.patch(url, data, timeout=150) if resp.status_code == 200: logger.info("Submission updated successfully!") else: @@ -335,13 +360,69 @@ def _update_status(self, status, extra_information=None): def _get_container_image(self, image_name): logger.info("Running pull for image: {}".format(image_name)) + retries, max_retries = (0, 3) + while retries < max_retries: + try: + cmd = [CONTAINER_ENGINE_EXECUTABLE, 'pull', image_name] + container_engine_pull = check_output(cmd) + logger.info("Pull complete for image: {0} with output of {1}".format(image_name, container_engine_pull)) + break # Break if the loop is successful + except CalledProcessError as pull_error: + retries += 1 + if retries >= max_retries: + error_message = f"Pull for image: {image_name} returned a non-zero exit code! Check if the docker image exists on docker hub. {pull_error}" + logger.info(error_message) + # Prepare data to be sent to submissions api + docker_pull_fail_data = { + "type": "Docker_Image_Pull_Fail", + "error_message": error_message, + "is_scoring": self.is_scoring + } + # Send data to be written to ingestion logs + self._update_submission(docker_pull_fail_data) + # Send error through web socket to the frontend + asyncio.run(self._send_data_through_socket(error_message)) + raise DockerImagePullException(f"Pull for {image_name} failed!") + else: + logger.info("Failed. Retrying in 5 seconds...") + time.sleep(5) # Wait 5 seconds before retrying + + async def _send_data_through_socket(self, error_message): + """ + This function gets an error messages and sends it through a web socket. This function is used for sending + - Docker image pull failure logs + - Execution time limit exceeded logs + """ + logger.info(f"Connecting to {self.websocket_url} to send docker image pull error") + + # connect to web socket + websocket = await websockets.connect(self.websocket_url) + + # define websocket errors + websocket_errors = (socket.gaierror, websockets.WebSocketException, websockets.ConnectionClosedError, ConnectionRefusedError) + try: - cmd = [CONTAINER_ENGINE_EXECUTABLE, 'pull', image_name] - container_engine_pull = check_output(cmd) - logger.info("Pull complete for image: {0} with output of {1}".format(image_name, container_engine_pull)) - except CalledProcessError: - logger.info("Pull for image: {} returned a non-zero exit code!") - raise SubmissionException(f"Pull for {image_name} failed!") + # send message + await websocket.send(json.dumps({ + "kind": "stderr", + "message": error_message + })) + + except websocket_errors: + # handle websocket errors + logger.info(f"Error sending failed through websocket") + try: + await websocket.close() + except Exception as e: + logger.error(e) + else: + # no error in websocket message sending + logger.info(f"Error sent successfully through websocket") + + logger.info(f"Disconnecting from websocket {self.websocket_url}") + + # close websocket + await websocket.close() def _get_bundle(self, url, destination, cache=True): """Downloads zip from url and unzips into destination. If cache=True then url is hashed and checked @@ -384,7 +465,7 @@ def _get_bundle(self, url, destination, cache=True): raise # Re-raise the last caught BadZipFile exception else: logger.info("Failed. Retrying in 60 seconds...") - time.sleep(60) # Wait 60 seconds before retrying + time.sleep(60) # Wait 60 seconds before retrying # Return the zip file path for other uses, e.g. for creating a MD5 hash to identify it return bundle_file @@ -426,12 +507,25 @@ async def _run_container_engine_cmd(self, engine_cmd, kind): websocket = await websockets.connect(self.websocket_url) websocket_errors = (socket.gaierror, websockets.WebSocketException, websockets.ConnectionClosedError, ConnectionRefusedError) + # Function to read a line, if the line is larger than the buffer size we will + # return the buffer so we can continue reading until we get a newline, rather + # than getting a LimitOverrunError + async def _readline_or_chunk(stream): + try: + return await stream.readuntil(b"\n") + except asyncio.exceptions.IncompleteReadError as e: + # Just return what has been read so far + return e.partial + except asyncio.exceptions.LimitOverrunError as e: + # If we get a LimitOverrunError, we will return the buffer so we can continue reading + return await stream.read(e.consumed) + while any(v["continue"] for k, v in self.logs[kind].items() if k in ['stdout', 'stderr']): try: logs = [self.logs[kind][key] for key in ('stdout', 'stderr')] for value in logs: try: - out = await asyncio.wait_for(value["stream"].readline(), timeout=.1) + out = await asyncio.wait_for(_readline_or_chunk(value["stream"]), timeout=.1) if out: value["data"] += out print("WS: " + str(out)) @@ -610,7 +704,7 @@ def _put_dir(self, url, directory): start_time = time.time() zip_path = make_archive(os.path.join(self.root_dir, str(uuid.uuid4())), 'zip', directory) duration = time.time() - start_time - logger.info("Time needed to zip archive: {duration} seconds.") + logger.info(f"Time needed to zip archive: {duration} seconds.") if is_valid_zip(zip_path): # Check zip integrity self._put_file(url, file=zip_path) # Send the file break # Leave the loop in case of success @@ -627,7 +721,7 @@ def _put_file(self, url, file=None, raw_data=None, content_type='application/zip """ if file and raw_data: raise Exception("Cannot put both a file and raw_data") - + headers = { # For Azure only, other systems ignore these headers 'x-ms-blob-type': 'BlockBlob', @@ -731,7 +825,19 @@ def start(self): try: loop.run_until_complete(gathered_tasks) except ExecutionTimeLimitExceeded: - raise SubmissionException(f"Execution Time Limit exceeded. Limit was {self.execution_time_limit} seconds") + error_message = f"Execution Time Limit exceeded. Limit was {self.execution_time_limit} seconds" + logger.info(error_message) + # Prepare data to be sent to submissions api + execution_time_limit_exceeded_data = { + "type": "Execution_Time_Limit_Exceeded", + "error_message": error_message, + "is_scoring": self.is_scoring + } + # Send data to be written to ingestion/scoring std_err + self._update_submission(execution_time_limit_exceeded_data) + # Send error through web socket to the frontend + asyncio.run(self._send_data_through_socket(error_message)) + raise SubmissionException(error_message) finally: self.watch = False for kind, logs in self.logs.items(): diff --git a/compute_worker/compute_worker_requirements.txt b/compute_worker/compute_worker_requirements.txt index 89600fa51..c0f4a0c6b 100644 --- a/compute_worker/compute_worker_requirements.txt +++ b/compute_worker/compute_worker_requirements.txt @@ -1,5 +1,7 @@ celery==4.4.0 requests==2.20.0 -watchdog==0.8.3 +watchdog==2.1.1 +argh==0.26.2 websockets==8.1 aiofiles==0.4.0 +pyyaml==5.3.1 \ No newline at end of file diff --git a/docker-compose.selenium.yml b/docker-compose.selenium.yml index b918d72f7..450f2c3e4 100644 --- a/docker-compose.selenium.yml +++ b/docker-compose.selenium.yml @@ -8,7 +8,7 @@ services: - 36475:36475 selenium: - image: selenium/standalone-firefox-debug:3.141.59 + image: selenium/standalone-firefox:124.0 volumes: - ./src/tests/functional/test_files:/test_files/ - ./artifacts:/artifacts/:z diff --git a/docker-compose.yml b/docker-compose.yml index e4e9e8180..08e7a5c84 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -30,6 +30,7 @@ services: - .:/app:delegated - /tmp/codalab-v2/django:/codalab_tmp - ./backups:/app/backups + - ./var/logs:/app/logs restart: unless-stopped ports: - 8000:8000 @@ -126,11 +127,19 @@ services: # Rabbitmq & Flower monitoring tool #----------------------------------------------- rabbit: - image: rabbitmq:3.6-management + build: + context: . + dockerfile: Dockerfile.rabbitmq + args: + - WORKER_CONNECTION_TIMEOUT=${WORKER_CONNECTION_TIMEOUT} # setting hostname here makes data persist properly between # containers being destroyed..! hostname: rabbit env_file: .env + environment: + - http_proxy=${RABBITMQ_HTTP_PROXY} + - https_proxy=${RABBITMQ_HTTPS_PROXY} + - no_proxy=${RABBITMQ_NO_PROXY} ports: - ${RABBITMQ_MANAGEMENT_PORT:-15672}:15672 - ${RABBITMQ_PORT}:5672 @@ -222,4 +231,4 @@ services: logging: options: max-size: "20k" - max-file: "10" + max-file: "10" \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 5a14473b8..c3b35809d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,7 +15,8 @@ urllib3<1.25,>=1.21.1 uvicorn[standard]==0.13.3 #daphne==2.2.2 pyyaml==5.3.1 -watchdog==0.8.3 +watchdog==2.1.1 +argh==0.26.2 python-dateutil==2.7.3 bpython==0.17.1 websockets==8.1 @@ -26,6 +27,7 @@ bleach==3.1.4 # Heroku staging debug tools django-debug-toolbar==3.2 django-querycount==0.7.0 +blessings==1.7 # User impersonation django-su==0.9.0 diff --git a/src/apps/analytics/migrations/0001_initial.py b/src/apps/analytics/migrations/0001_initial.py new file mode 100644 index 000000000..b5aefc6c0 --- /dev/null +++ b/src/apps/analytics/migrations/0001_initial.py @@ -0,0 +1,62 @@ +# Generated by Django 2.2.17 on 2023-09-14 13:19 + +from django.conf import settings +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ('competitions', '0035_auto_20230914_1319'), + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ] + + operations = [ + migrations.CreateModel( + name='AdminStorageDataPoint', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('backups_total', models.DecimalField(blank=True, decimal_places=2, max_digits=14, null=True)), + ('at_date', models.DateTimeField()), + ('created_at', models.DateTimeField(auto_now_add=True)), + ], + ), + migrations.CreateModel( + name='StorageUsageHistory', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('bucket_name', models.CharField(max_length=255)), + ('total_usage', models.DecimalField(blank=True, decimal_places=2, max_digits=14, null=True)), + ('competitions_usage', models.DecimalField(blank=True, decimal_places=2, max_digits=14, null=True)), + ('users_usage', models.DecimalField(blank=True, decimal_places=2, max_digits=14, null=True)), + ('admin_usage', models.DecimalField(blank=True, decimal_places=2, max_digits=14, null=True)), + ('orphaned_file_usage', models.DecimalField(blank=True, decimal_places=2, max_digits=14, null=True)), + ('at_date', models.DateTimeField()), + ('created_at', models.DateTimeField(auto_now_add=True)), + ], + ), + migrations.CreateModel( + name='UserStorageDataPoint', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('datasets_total', models.DecimalField(blank=True, decimal_places=2, max_digits=14, null=True)), + ('submissions_total', models.DecimalField(blank=True, decimal_places=2, max_digits=14, null=True)), + ('at_date', models.DateTimeField()), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('user', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, to=settings.AUTH_USER_MODEL)), + ], + ), + migrations.CreateModel( + name='CompetitionStorageDataPoint', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('datasets_total', models.DecimalField(blank=True, decimal_places=2, max_digits=14, null=True)), + ('at_date', models.DateTimeField()), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('competition', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, to='competitions.Competition')), + ], + ), + ] diff --git a/src/apps/analytics/migrations/__init__.py b/src/apps/analytics/migrations/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/apps/analytics/models.py b/src/apps/analytics/models.py new file mode 100644 index 000000000..f0afe09f3 --- /dev/null +++ b/src/apps/analytics/models.py @@ -0,0 +1,54 @@ +from django.db import models +from django.conf import settings + + +class StorageUsageHistory(models.Model): + bucket_name = models.CharField(max_length=255) + total_usage = models.DecimalField( + max_digits=14, decimal_places=2, null=True, blank=True + ) # in KiB up to ~ 930 TiB + competitions_usage = models.DecimalField( + max_digits=14, decimal_places=2, null=True, blank=True + ) + users_usage = models.DecimalField( + max_digits=14, decimal_places=2, null=True, blank=True + ) + admin_usage = models.DecimalField( + max_digits=14, decimal_places=2, null=True, blank=True + ) + orphaned_file_usage = models.DecimalField( + max_digits=14, decimal_places=2, null=True, blank=True + ) + at_date = models.DateTimeField() + created_at = models.DateTimeField(auto_now_add=True) + + +class CompetitionStorageDataPoint(models.Model): + competition = models.ForeignKey( + "competitions.competition", null=True, on_delete=models.SET_NULL + ) + datasets_total = models.DecimalField( + max_digits=14, decimal_places=2, null=True, blank=True + ) + at_date = models.DateTimeField() + created_at = models.DateTimeField(auto_now_add=True) + + +class UserStorageDataPoint(models.Model): + user = models.ForeignKey(settings.AUTH_USER_MODEL, null=True, on_delete=models.SET_NULL) + datasets_total = models.DecimalField( + max_digits=14, decimal_places=2, null=True, blank=True + ) + submissions_total = models.DecimalField( + max_digits=14, decimal_places=2, null=True, blank=True + ) + at_date = models.DateTimeField() + created_at = models.DateTimeField(auto_now_add=True) + + +class AdminStorageDataPoint(models.Model): + backups_total = models.DecimalField( + max_digits=14, decimal_places=2, null=True, blank=True + ) + at_date = models.DateTimeField() + created_at = models.DateTimeField(auto_now_add=True) diff --git a/src/apps/analytics/tasks.py b/src/apps/analytics/tasks.py new file mode 100644 index 000000000..776baf05f --- /dev/null +++ b/src/apps/analytics/tasks.py @@ -0,0 +1,572 @@ +import time +import logging +from celery_config import app +from datetime import datetime, timezone, timedelta +from django.db.models import ( + Sum, + Q, + F, + Case, + Value, + When, + DecimalField, +) +from django.db.models.functions import TruncDay +from decimal import Decimal + +from competitions.models import Submission, SubmissionDetails +from datasets.models import Data +from utils.storage import BundleStorage +from analytics.models import ( + StorageUsageHistory, + CompetitionStorageDataPoint, + UserStorageDataPoint, + AdminStorageDataPoint, +) +from competitions.models import Competition +from profiles.models import User + +from utils.data import pretty_bytes + +logger = logging.getLogger() + + +@app.task(queue="site-worker", soft_time_limit=60 * 60 * 12) # 12 hours +def create_storage_analytics_snapshot(): + # Timer started ! + logger.info("Task create_storage_analytics_snapshot started") + starting_time = time.process_time() + + # Measure all files with unset size + for dataset in Data.objects.filter(Q(file_size__isnull=True) | Q(file_size__lt=0)): + try: + dataset.file_size = Decimal( + dataset.data_file.size / 1024 + ) # file_size is in KiB + except Exception: + dataset.file_size = Decimal(-1) + finally: + dataset.save() + + for submission in Submission.objects.filter( + Q(prediction_result_file_size__isnull=True) | + Q(prediction_result_file_size__lt=0) + ): + try: + submission.prediction_result_file_size = Decimal( + submission.prediction_result.size / 1024 + ) # prediction_result_file_size is in KiB + except Exception: + submission.prediction_result_file_size = Decimal(-1) + finally: + submission.save() + + for submission in Submission.objects.filter( + Q(scoring_result_file_size__isnull=True) | Q(scoring_result_file_size__lt=0) + ): + try: + submission.scoring_result_file_size = Decimal( + submission.scoring_result.size / 1024 + ) # scoring_result_file_size is in KiB + except Exception: + submission.scoring_result_file_size = Decimal(-1) + finally: + submission.save() + + for submission in Submission.objects.filter( + Q(detailed_result_file_size__isnull=True) | Q(detailed_result_file_size__lt=0) + ): + try: + submission.detailed_result_file_size = Decimal( + submission.detailed_result.size / 1024 + ) # detailed_result_file_size is in KiB + except Exception: + submission.detailed_result_file_size = Decimal(-1) + finally: + submission.save() + + for submissiondetails in SubmissionDetails.objects.filter( + Q(file_size__isnull=True) | Q(file_size__lt=0) + ): + try: + submissiondetails.file_size = Decimal( + submissiondetails.data_file.size / 1024 + ) # file_size is in KiB + except Exception: + submissiondetails.file_size = Decimal(-1) + finally: + submissiondetails.save() + + # Evaluate the storage usage per category (competition, user or admin) and per day + current_datetime = datetime.now(timezone.utc) + max_history_days = 365 # days + + # Competitions + competitions_datasets = ( + Data.objects.filter(competition_id__isnull=False) + .annotate(day=TruncDay("created_when")) + .values("day", "competition_id") + .annotate( + size=Sum( + Case( + When(file_size__gt=0, then=F("file_size")), + default=Value(0), + output_field=DecimalField(), + ) + ) + ) + ) + + last_competition_storage_datapoint = CompetitionStorageDataPoint.objects.order_by( + "-at_date" + ).first() + last_competition_storage_datapoint_date = ( + last_competition_storage_datapoint.at_date + if last_competition_storage_datapoint + else current_datetime - timedelta(days=max_history_days) + ).replace(hour=0, minute=0, second=0, microsecond=0) + competition_storage_days_count = int( + (current_datetime - last_competition_storage_datapoint_date).days + ) + competition_storage_day_range = [ + last_competition_storage_datapoint_date + timedelta(day) + for day in range(1, competition_storage_days_count + 1) + ] + + for date in competition_storage_day_range: + for competition in Competition.objects.order_by("id"): + datasets_usage = competitions_datasets.filter( + Q(competition_id=competition.id) & Q(day__lt=date) + ).aggregate(total=Sum("size"))["total"] + defaults = { + "datasets_total": datasets_usage or 0, + } + lookup_params = {"competition_id": competition.id, "at_date": date} + CompetitionStorageDataPoint.objects.update_or_create( + defaults=defaults, **lookup_params + ) + + # Users + users_datasets = ( + Data.objects.filter(created_by_id__isnull=False) + .annotate(day=TruncDay("created_when")) + .values("day", "created_by_id") + .annotate( + size=Sum( + Case( + When(file_size__gt=0, then=F("file_size")), + default=Value(0), + output_field=DecimalField(), + ) + ) + ) + ) + + users_submissions = ( + Submission.objects.filter(owner_id__isnull=False) + .annotate(day=TruncDay("created_when")) + .values("day", "owner_id") + .annotate( + size=Sum( + Case( + When( + prediction_result_file_size__gt=0, + then=F("prediction_result_file_size"), + ), + default=Value(0), + output_field=DecimalField(), + ) + Case( + When( + scoring_result_file_size__gt=0, + then=F("scoring_result_file_size"), + ), + default=Value(0), + output_field=DecimalField(), + ) + Case( + When( + detailed_result_file_size__gt=0, + then=F("detailed_result_file_size"), + ), + default=Value(0), + output_field=DecimalField(), + ) + ) + ) + ) + + users_submissions_details = ( + SubmissionDetails.objects.filter(submission__owner_id__isnull=False) + .annotate(day=TruncDay("submission__created_when")) + .values("day", "submission__owner_id") + .annotate( + size=Sum( + Case( + When(file_size__gt=0, then=F("file_size")), + default=Value(0), + output_field=DecimalField(), + ) + ) + ) + ) + + last_user_storage_datapoint = UserStorageDataPoint.objects.order_by( + "-at_date" + ).first() + last_user_storage_datapoint_date = ( + last_user_storage_datapoint.at_date + if last_user_storage_datapoint + else current_datetime - timedelta(days=max_history_days) + ).replace(hour=0, minute=0, second=0, microsecond=0) + user_storage_days_count = int( + (current_datetime - last_user_storage_datapoint_date).days + ) + user_storage_day_range = [ + last_user_storage_datapoint_date + timedelta(day) + for day in range(1, user_storage_days_count + 1) + ] + + for date in user_storage_day_range: + for user in User.objects.order_by("id"): + datasets_usage = users_datasets.filter( + Q(created_by_id=user.id) & Q(day__lt=date) + ).aggregate(total=Sum("size"))["total"] + submissions_usage = users_submissions.filter( + Q(owner_id=user.id) & Q(day__lt=date) + ).aggregate(total=Sum("size"))["total"] + submissiondetails_usage = users_submissions_details.filter( + Q(submission__owner_id=user.id) & Q(day__lt=date) + ).aggregate(total=Sum("size"))["total"] + defaults = { + "datasets_total": datasets_usage or 0, + "submissions_total": (submissions_usage or 0) + (submissiondetails_usage or 0), + } + lookup_params = {"user_id": user.id, "at_date": date} + UserStorageDataPoint.objects.update_or_create( + defaults=defaults, **lookup_params + ) + + # Admin + last_admin_storage_datapoint = AdminStorageDataPoint.objects.order_by( + "-at_date" + ).first() + last_admin_storage_datapoint_date = ( + last_admin_storage_datapoint.at_date + if last_admin_storage_datapoint + else current_datetime - timedelta(days=max_history_days) + ).replace(hour=0, minute=0, second=0, microsecond=0) + admin_storage_days_count = int( + (current_datetime - last_admin_storage_datapoint_date).days + ) + admin_storage_day_range = [ + last_admin_storage_datapoint_date + timedelta(day) + for day in range(1, admin_storage_days_count + 1) + ] + admin_storage_at_date = { + last_admin_storage_datapoint_date + timedelta(day): 0 + for day in range(1, admin_storage_days_count + 1) + } + + objects = BundleStorage.bucket.objects.filter(Prefix="backups") + for object in objects: + size = object.size + last_modified = object.last_modified + for date in admin_storage_day_range: + if last_modified < date: + admin_storage_at_date[date] += size + + for date in admin_storage_day_range: + defaults = {"backups_total": admin_storage_at_date[date] / 1024.0} + lookup_params = {"at_date": date} + AdminStorageDataPoint.objects.update_or_create( + defaults=defaults, **lookup_params + ) + + # Check for database <-> storage inconsistency + inconsistencies = {"database": [], "storage": []} + + # Prepare some data + last_storage_usage_history_point = ( + StorageUsageHistory.objects.filter(bucket_name=BundleStorage.bucket.name) + .order_by("-at_date") + .first() + ) + last_storage_usage_history_date = ( + last_storage_usage_history_point.at_date + if last_storage_usage_history_point + else current_datetime - timedelta(days=max_history_days) + ).replace(hour=0, minute=0, second=0, microsecond=0) + storage_usage_history_days_count = int( + (current_datetime - last_storage_usage_history_date).days + ) + storage_usage_history_days = range(1, storage_usage_history_days_count + 1) + storage_usage_history_day_range = [ + last_storage_usage_history_date + timedelta(day) + for day in range(1, storage_usage_history_days_count + 1) + ] + + # Database + nb_missing_files = 0 + + # Datasets + for dataset in Data.objects.all().order_by("id"): + if ( + not dataset.data_file or not dataset.data_file.name or not BundleStorage.exists(dataset.data_file.name) + ): + inconsistencies["database"].append( + {"model": "dataset", "field": "data_file", "id": dataset.id} + ) + nb_missing_files += 1 + + # Submissions + for submission in Submission.objects.all().order_by("id"): + if ( + not submission.prediction_result or not submission.prediction_result.name or not BundleStorage.exists(submission.prediction_result.name) + ): + inconsistencies["database"].append( + { + "model": "submission", + "field": "prediction_result", + "id": submission.id, + } + ) + nb_missing_files += 1 + if ( + not submission.scoring_result or not submission.scoring_result.name or not BundleStorage.exists(submission.scoring_result.name) + ): + inconsistencies["database"].append( + {"model": "submission", "field": "scoring_result", "id": submission.id} + ) + nb_missing_files += 1 + if ( + submission.detailed_result and submission.detailed_result.name and not BundleStorage.exists(submission.detailed_result.name) + ): + inconsistencies["database"].append( + {"model": "submission", "field": "detailed_result", "id": submission.id} + ) + nb_missing_files += 1 + + # Submission details + for submissiondetails in SubmissionDetails.objects.all().order_by("id"): + if ( + not submissiondetails.data_file or not submissiondetails.data_file.name or not BundleStorage.exists(submissiondetails.data_file.name) + ): + inconsistencies["database"].append( + { + "model": "submissiondetails", + "field": "data_file", + "id": submissiondetails.id, + } + ) + nb_missing_files += 1 + + # Storage + nb_orphaned_files = 0 + orphaned_files_total_size = 0 # In bytes + orphaned_files_size_per_date = { + last_storage_usage_history_date + timedelta(day): 0 + for day in range(1, storage_usage_history_days_count + 1) + } + + # Dataset + db_dataset_paths = Data.objects.values_list("data_file", flat=True).distinct() + storage_dataset_paths = [ + obj.key for obj in BundleStorage.bucket.objects.filter(Prefix="dataset") + ] + orphaned_dataset_files = [ + x for x in storage_dataset_paths if x not in set(db_dataset_paths) + ] + nb_orphaned_files += len(orphaned_dataset_files) + for file in orphaned_dataset_files: + size = BundleStorage.size(file) + last_modified = BundleStorage.get_modified_time(file) + inconsistencies["storage"].append({"path": file, "size": size}) + orphaned_files_total_size += size + for date in storage_usage_history_day_range: + if last_modified < date: + orphaned_files_size_per_date[date] += size + + # Detailed result + db_detailed_result_paths = Submission.objects.values_list( + "detailed_result", flat=True + ).distinct() + storage_detailed_result_paths = [ + obj.key for obj in BundleStorage.bucket.objects.filter(Prefix="detailed_result") + ] + orphaned_detailed_result_files = [ + x + for x in storage_detailed_result_paths + if x not in set(db_detailed_result_paths) + ] + nb_orphaned_files += len(orphaned_detailed_result_files) + for file in orphaned_detailed_result_files: + size = BundleStorage.size(file) + last_modified = BundleStorage.get_modified_time(file) + inconsistencies["storage"].append({"path": file, "size": size}) + orphaned_files_total_size += size + for date in storage_usage_history_day_range: + if last_modified < date: + orphaned_files_size_per_date[date] += size + + # Prediction result + db_prediction_result_paths = Submission.objects.values_list( + "prediction_result", flat=True + ).distinct() + storage_prediction_result_paths = [ + obj.key + for obj in BundleStorage.bucket.objects.filter(Prefix="prediction_result") + ] + orphaned_prediction_result_files = [ + x + for x in storage_prediction_result_paths + if x not in set(db_prediction_result_paths) + ] + nb_orphaned_files += len(orphaned_prediction_result_files) + for file in orphaned_prediction_result_files: + size = BundleStorage.size(file) + last_modified = BundleStorage.get_modified_time(file) + inconsistencies["storage"].append({"path": file, "size": size}) + orphaned_files_total_size += size + for date in storage_usage_history_day_range: + if last_modified < date: + orphaned_files_size_per_date[date] += size + + # Scoring result + db_scoring_result_paths = Submission.objects.values_list( + "scoring_result", flat=True + ).distinct() + storage_scoring_result_paths = [ + obj.key for obj in BundleStorage.bucket.objects.filter(Prefix="scoring_result") + ] + orphaned_scoring_result_files = [ + x for x in storage_scoring_result_paths if x not in set(db_scoring_result_paths) + ] + nb_orphaned_files += len(orphaned_scoring_result_files) + for file in orphaned_scoring_result_files: + size = BundleStorage.size(file) + last_modified = BundleStorage.get_modified_time(file) + inconsistencies["storage"].append({"path": file, "size": size}) + orphaned_files_total_size += size + for date in storage_usage_history_day_range: + if last_modified < date: + orphaned_files_size_per_date[date] += size + + # Submission details + db_submission_details_paths = SubmissionDetails.objects.values_list( + "data_file", flat=True + ).distinct() + storage_submission_details_paths = [ + obj.key + for obj in BundleStorage.bucket.objects.filter(Prefix="submission_details") + ] + orphaned_submission_details_files = [ + x + for x in storage_submission_details_paths + if x not in set(db_submission_details_paths) + ] + nb_orphaned_files += len(orphaned_submission_details_files) + for file in orphaned_submission_details_files: + size = BundleStorage.size(file) + last_modified = BundleStorage.get_modified_time(file) + inconsistencies["storage"].append({"path": file, "size": size}) + orphaned_files_total_size += size + for date in storage_usage_history_day_range: + if last_modified < date: + orphaned_files_size_per_date[date] += size + + # Log the results + log_file = ( + "/app/logs/" + + "db_storage_inconsistency_" + + current_datetime.strftime("%Y%m%d-%H%M%S") + + ".log" + ) + with open(log_file, "w") as file: + file.write("Database <---> Storage Inconsistency\n\n") + file.write(f"Bucket: {BundleStorage.bucket.name}\n") + file.write(f"Datetime: {current_datetime.isoformat()}\n\n") + file.write(f"Missing files: {nb_missing_files} files\n") + for missing_file in inconsistencies["database"]: + file.write( + f'{missing_file["model"]} of id={missing_file["id"]} is missing its {missing_file["field"]}\n' + ) + file.write( + f"\nOrphaned files: {nb_orphaned_files} files for a total of {pretty_bytes(orphaned_files_total_size)} ({orphaned_files_total_size}B)\n" + ) + for orphaned_file in inconsistencies["storage"]: + file.write( + f'{orphaned_file["path"]} {pretty_bytes(orphaned_file["size"])} ({orphaned_file["size"]}B)\n' + ) + + # Save the storage usage history points + for date in [ + last_storage_usage_history_date + timedelta(day) + for day in storage_usage_history_days + ]: + competitions_usage = ( + competitions_datasets.filter(day__lt=date).aggregate(total=Sum("size"))[ + "total" + ] or 0 + ) + users_usage = ( + ( + users_datasets.filter(day__lt=date).aggregate(total=Sum("size"))[ + "total" + ] or 0 + ) + + ( + users_submissions.filter(day__lt=date).aggregate(total=Sum("size"))[ + "total" + ] or 0 + ) + + ( + users_submissions_details.filter(day__lt=date).aggregate( + total=Sum("size") + )["total"] or 0 + ) + ) + admin_data_point = AdminStorageDataPoint.objects.filter(at_date=date).first() + admin_usage = (admin_data_point.backups_total or 0) if admin_data_point else 0 + orphaned_file_usage = Decimal(orphaned_files_size_per_date[date] / 1024) + total_usage = ( + users_usage + admin_usage + orphaned_file_usage + ) # competitions_usage is included inside users_usage + storage_usage_history_point = { + "bucket_name": BundleStorage.bucket.name, + "total_usage": total_usage, + "competitions_usage": competitions_usage, + "users_usage": users_usage, + "admin_usage": admin_usage, + "orphaned_file_usage": orphaned_file_usage, + "at_date": date, + } + StorageUsageHistory.objects.create(**storage_usage_history_point) + + # Stop the count! + elapsed_time = time.process_time() - starting_time + logger.info( + "Task create_storage_analytics_snapshot stoped. Duration = {:.3f} seconds".format( + elapsed_time + ) + ) + + +@app.task(queue="site-worker") # 12 hours +def reset_computed_storage_analytics(): + logger.info("Task reset_computed_storage_analytics started") + starting_time = time.process_time() + + # Reset the value of all computed file sizes so they will be re-computed again without any shifting on the next run of the storage analytics task + Submission.objects.all().update( + prediction_result_file_size=None, + scoring_result_file_size=None, + detailed_result_file_size=None, + ) + SubmissionDetails.objects.all().update(file_size=None) + Data.objects.all().update(file_size=None) + + elapsed_time = time.process_time() - starting_time + logger.info( + "Task reset_computed_storage_analytics stoped. Duration = {:.3f} seconds".format( + elapsed_time + ) + ) diff --git a/src/apps/api/serializers/competitions.py b/src/apps/api/serializers/competitions.py index 2f580903b..41cd51341 100644 --- a/src/apps/api/serializers/competitions.py +++ b/src/apps/api/serializers/competitions.py @@ -9,7 +9,7 @@ from api.serializers.profiles import CollaboratorSerializer from api.serializers.submissions import SubmissionScoreSerializer from api.serializers.tasks import PhaseTaskInstanceSerializer -from competitions.models import Competition, Phase, Page, CompetitionCreationTaskStatus, CompetitionParticipant +from competitions.models import Competition, Phase, Page, CompetitionCreationTaskStatus, CompetitionParticipant, CompetitionWhiteListEmail from forums.models import Forum from leaderboards.models import Leaderboard from profiles.models import User @@ -24,6 +24,7 @@ class PhaseSerializer(WritableNestedModelSerializer): tasks = serializers.SlugRelatedField(queryset=Task.objects.all(), required=True, allow_null=False, slug_field='key', many=True) status = serializers.SerializerMethodField() + is_final_phase = serializers.SerializerMethodField() class Meta: model = Phase @@ -48,6 +49,14 @@ class Meta: 'is_final_phase', ) + def get_is_final_phase(self, obj): + if len(obj.competition.phases.all()) > 1: + return obj.is_final_phase + elif len(obj.competition.phases.all()) == 1: + obj.is_final_phase = True + obj.save() + return obj.is_final_phase + def get_status(self, obj): now = datetime.now().replace(tzinfo=None) @@ -208,6 +217,12 @@ class Meta: ) +class CompetitionWhitelistSerializer(serializers.ModelSerializer): + class Meta: + model = CompetitionWhiteListEmail + fields = ['email'] + + class CompetitionSerializer(DefaultUserCreateMixin, WritableNestedModelSerializer): created_by = serializers.CharField(source='created_by.username', read_only=True) pages = PageSerializer(many=True) @@ -217,6 +232,7 @@ class CompetitionSerializer(DefaultUserCreateMixin, WritableNestedModelSerialize # We're using a Base64 image field here so we can send JSON for create/update of this object, if we wanted # include the logo as a _file_ then we would need to use FormData _not_ JSON. logo = NamedBase64ImageField(required=True, allow_null=True) + whitelist_emails = CompetitionWhitelistSerializer(many=True, required=False) class Meta: model = Competition @@ -238,6 +254,10 @@ class Meta: 'registration_auto_approve', 'queue', 'enable_detailed_results', + 'show_detailed_results_in_submission_panel', + 'show_detailed_results_in_leaderboard', + 'auto_run_submissions', + 'can_participants_make_submissions_public', 'make_programs_available', 'make_input_data_available', 'docker_image', @@ -247,6 +267,7 @@ class Meta: 'reward', 'contact_email', 'report', + 'whitelist_emails' ) def validate_phases(self, phases): @@ -287,6 +308,26 @@ def create(self, validated_data): return instance + def update(self, instance, validated_data): + + # Get the updated whitelist emails from the validated data + updated_whitelist_emails = validated_data.get('whitelist_emails', []) + + # Delete all existing emails + instance.whitelist_emails.all().delete() + + # Save the updated whitelist emails to the instance + for whitelist_email in updated_whitelist_emails: + CompetitionWhiteListEmail.objects.create(competition=instance, email=whitelist_email["email"]) + + # Remove the 'whitelist_emails' key from validated_data to prevent it from being processed again + validated_data.pop('whitelist_emails', None) + + # Continue with the regular update process + super(CompetitionSerializer, self).update(instance, validated_data) + + return instance + class CompetitionUpdateSerializer(CompetitionSerializer): phases = PhaseUpdateSerializer(many=True) @@ -299,6 +340,8 @@ class CompetitionCreateSerializer(CompetitionSerializer): class CompetitionDetailSerializer(serializers.ModelSerializer): created_by = serializers.CharField(source='created_by.username', read_only=True) + owner_display_name = serializers.SerializerMethodField() + logo_icon = NamedBase64ImageField(allow_null=True) pages = PageSerializer(many=True) phases = PhaseDetailSerializer(many=True) leaderboards = serializers.SerializerMethodField() @@ -307,6 +350,7 @@ class CompetitionDetailSerializer(serializers.ModelSerializer): participant_count = serializers.IntegerField(read_only=True) submission_count = serializers.IntegerField(read_only=True) queue = QueueSerializer(read_only=True) + whitelist_emails = serializers.SerializerMethodField() class Meta: model = Competition @@ -316,8 +360,10 @@ class Meta: 'published', 'secret_key', 'created_by', + 'owner_display_name', 'created_when', 'logo', + 'logo_icon', 'terms', 'pages', 'phases', @@ -330,6 +376,10 @@ class Meta: 'submission_count', 'queue', 'enable_detailed_results', + 'show_detailed_results_in_submission_panel', + 'show_detailed_results_in_leaderboard', + 'auto_run_submissions', + 'can_participants_make_submissions_public', 'make_programs_available', 'make_input_data_available', 'docker_image', @@ -340,6 +390,7 @@ class Meta: 'reward', 'contact_email', 'report', + 'whitelist_emails', ) def get_leaderboards(self, instance): @@ -352,9 +403,19 @@ def get_leaderboards(self, instance): raise Exception(f'KeyError on context. Context: {self.context}') return LeaderboardSerializer(qs, many=True).data + def get_whitelist_emails(self, instance): + whitelist_emails_query = instance.whitelist_emails.all() + whitelist_emails_list = [entry.email for entry in whitelist_emails_query] + return whitelist_emails_list + + def get_owner_display_name(self, obj): + # Get the user's display name if not None, otherwise return username + return obj.created_by.display_name if obj.created_by.display_name else obj.created_by.username + class CompetitionSerializerSimple(serializers.ModelSerializer): - created_by = serializers.CharField(source='created_by.username') + created_by = serializers.CharField(source='created_by.username', read_only=True) + owner_display_name = serializers.SerializerMethodField() participant_count = serializers.IntegerField(read_only=True) class Meta: @@ -363,10 +424,12 @@ class Meta: 'id', 'title', 'created_by', + 'owner_display_name', 'created_when', 'published', 'participant_count', 'logo', + 'logo_icon', 'description', 'competition_type', 'reward', @@ -374,6 +437,14 @@ class Meta: 'report', ) + def get_created_by(self, obj): + # Get the user's display name if not None, otherwise return username + return obj.created_by.display_name if obj.created_by.display_name else obj.created_by.username + + def get_owner_display_name(self, obj): + # Get the user's display name if not None, otherwise return username + return obj.created_by.display_name if obj.created_by.display_name else obj.created_by.username + PageSerializer.competition = CompetitionSerializer(many=True, source='competition') diff --git a/src/apps/api/serializers/datasets.py b/src/apps/api/serializers/datasets.py index 25e069afc..7543afe54 100644 --- a/src/apps/api/serializers/datasets.py +++ b/src/apps/api/serializers/datasets.py @@ -74,7 +74,8 @@ class Meta: class DataDetailSerializer(serializers.ModelSerializer): - created_by = serializers.CharField(source='created_by.username') + created_by = serializers.CharField(source='created_by.username', read_only=True) + owner_display_name = serializers.SerializerMethodField() competition = serializers.SerializerMethodField() value = serializers.CharField(source='key', required=False) @@ -83,6 +84,7 @@ class Meta: fields = ( 'id', 'created_by', + 'owner_display_name', 'created_when', 'name', 'type', @@ -108,6 +110,9 @@ def get_competition(self, obj): } return None + def get_owner_display_name(self, instance): + return instance.created_by.display_name if instance.created_by.display_name else instance.created_by.username + class DataGroupSerializer(serializers.ModelSerializer): class Meta: diff --git a/src/apps/api/serializers/queues.py b/src/apps/api/serializers/queues.py index deea441d0..b3d84aee6 100644 --- a/src/apps/api/serializers/queues.py +++ b/src/apps/api/serializers/queues.py @@ -3,8 +3,8 @@ from api.mixins import DefaultUserCreateMixin from queues.models import Queue - from profiles.models import User +from django.db.models import Q class OrganizerSerializer(serializers.ModelSerializer): @@ -86,3 +86,30 @@ class Meta: 'created_when', 'is_owner', ) + + +class QueueListSerializer(QueueSerializer): + competitions = serializers.SerializerMethodField() + + class Meta(QueueSerializer.Meta): + fields = QueueSerializer.Meta.fields + ('competitions',) + + def get_competitions(self, obj): + # get user from the context request + user = self.context['request'].user + + # for super user return all competiitons using this queue + # for admin return competitions where this user is organizer using this queue + # for non-admin return public competitions using this queue + if user.is_superuser: + # Fetch all competitions + competitions = obj.competitions.all().values('id', 'title') + else: + # Fetch all competitions where user is organizer or competition is published + competitions = obj.competitions.filter( + Q(published=True) | + Q(created_by=user) | + Q(collaborators=user) + ).values('id', 'title') + + return competitions diff --git a/src/apps/api/serializers/submissions.py b/src/apps/api/serializers/submissions.py index ef3cdf39b..05191e8e3 100644 --- a/src/apps/api/serializers/submissions.py +++ b/src/apps/api/serializers/submissions.py @@ -41,6 +41,8 @@ class SubmissionSerializer(serializers.ModelSerializer): on_leaderboard = serializers.BooleanField(read_only=True) task = TaskSerializer() created_when = serializers.DateTimeField(format="%Y-%m-%d %H:%M") + auto_run = serializers.SerializerMethodField(read_only=True) + can_make_submissions_public = serializers.SerializerMethodField(read_only=True) class Meta: model = Submission @@ -66,6 +68,8 @@ class Meta: 'leaderboard', 'on_leaderboard', 'task', + 'auto_run', + 'can_make_submissions_public', ) read_only_fields = ( 'pk', @@ -79,6 +83,14 @@ class Meta: def get_filename(self, instance): return basename(instance.data.data_file.name) + def get_auto_run(self, instance): + # returns this submission's competition auto_run_submissions Flag + return instance.phase.competition.auto_run_submissions + + def get_can_make_submissions_public(self, instance): + # returns this submission's competition can_participants_make_submissions_public Flag + return instance.phase.competition.can_participants_make_submissions_public + class SubmissionLeaderBoardSerializer(serializers.ModelSerializer): scores = SubmissionScoreSerializer(many=True) @@ -86,6 +98,7 @@ class SubmissionLeaderBoardSerializer(serializers.ModelSerializer): display_name = serializers.CharField(source='owner.display_name') slug_url = serializers.CharField(source='owner.slug_url') organization = SimpleOrganizationSerializer(allow_null=True) + created_when = serializers.DateTimeField(format="%Y-%m-%d %H:%M") class Meta: model = Submission @@ -100,7 +113,8 @@ class Meta: 'display_name', 'slug_url', 'organization', - 'detailed_result' + 'detailed_result', + 'created_when' ) extra_kwargs = { "scores": {"read_only": True}, @@ -149,9 +163,12 @@ def get_filename(self, instance): def create(self, validated_data): tasks = validated_data.pop('tasks', None) - sub = super().create(validated_data) - sub.start(tasks=tasks) + + # Check if auto_run_submissions is enabled then run the submission + # Otherwise organizer will run manually + if sub.phase.competition.auto_run_submissions: + sub.start(tasks=tasks) return sub diff --git a/src/apps/api/serializers/tasks.py b/src/apps/api/serializers/tasks.py index b98ce36ea..62080890d 100644 --- a/src/apps/api/serializers/tasks.py +++ b/src/apps/api/serializers/tasks.py @@ -90,7 +90,8 @@ def get_validated(self, instance): class TaskDetailSerializer(WritableNestedModelSerializer): - created_by = serializers.CharField(source='created_by.username', read_only=True, required=False) + created_by = serializers.CharField(source='created_by.username', read_only=True) + owner_display_name = serializers.SerializerMethodField() input_data = DataSimpleSerializer(read_only=True) ingestion_program = DataSimpleSerializer(read_only=True) reference_data = DataSimpleSerializer(read_only=True) @@ -107,6 +108,7 @@ class Meta: 'description', 'key', 'created_by', + 'owner_display_name', 'created_when', 'is_public', 'validated', @@ -126,18 +128,26 @@ def get_validated(self, task): def get_shared_with(self, instance): return self.context['shared_with'][instance.pk] + def get_owner_display_name(self, instance): + # Get the user's display name if not None, otherwise return username + return instance.created_by.display_name if instance.created_by.display_name else instance.created_by.username + class TaskListSerializer(serializers.ModelSerializer): solutions = SolutionListSerializer(many=True, required=False, read_only=True) value = serializers.CharField(source='key', required=False) competitions = serializers.SerializerMethodField() shared_with = serializers.SerializerMethodField() + created_by = serializers.CharField(source='created_by.username', read_only=True) + owner_display_name = serializers.SerializerMethodField() class Meta: model = Task fields = ( 'id', 'created_when', + 'created_by', + 'owner_display_name', 'key', 'name', 'solutions', @@ -159,6 +169,10 @@ def get_competitions(self, instance): def get_shared_with(self, instance): return self.context['shared_with'][instance.pk] + def get_owner_display_name(self, instance): + # Get the user's display name if not None, otherwise return username + return instance.created_by.display_name if instance.created_by.display_name else instance.created_by.username + class PhaseTaskInstanceSerializer(serializers.HyperlinkedModelSerializer): task = serializers.SlugRelatedField(queryset=Task.objects.all(), required=True, allow_null=False, slug_field='key', diff --git a/src/apps/api/tests/test_datasets.py b/src/apps/api/tests/test_datasets.py index 664178116..944981757 100644 --- a/src/apps/api/tests/test_datasets.py +++ b/src/apps/api/tests/test_datasets.py @@ -3,6 +3,7 @@ from rest_framework.test import APITestCase from datasets.models import Data from factories import UserFactory, DataFactory +from utils.data import pretty_bytes faker = Factory.create() @@ -11,7 +12,7 @@ class DatasetAPITests(APITestCase): def setUp(self): self.creator = UserFactory(username='creator', password='creator') - self.existing_dataset = DataFactory(created_by=self.creator, name="Test!") + self.existing_dataset = DataFactory(created_by=self.creator, name="Test!", file_size=1024) def test_dataset_api_checks_duplicate_names_for_same_user(self): self.client.login(username='creator', password='creator') @@ -22,6 +23,7 @@ def test_dataset_api_checks_duplicate_names_for_same_user(self): 'type': Data.COMPETITION_BUNDLE, 'request_sassy_file_name': faker.file_name(), 'file_name': faker.file_name(), + 'file_size': 1024, }) assert resp.status_code == 400 @@ -32,6 +34,7 @@ def test_dataset_api_checks_duplicate_names_for_same_user(self): 'name': 'Test!', 'type': Data.COMPETITION_BUNDLE, 'request_sassy_file_name': faker.file_name(), + 'file_size': 1024, }) assert resp.status_code == 200 @@ -43,3 +46,34 @@ def test_dataset_api_checks_for_authentication(self): 'request_sassy_file_name': faker.file_name(extension='.zip'), }) assert resp.status_code == 403 + + def test_dataset_api_check_quota(self): + self.client.login(username='creator', password='creator') + + quota = float(self.creator.quota) + storage_used = float(self.creator.get_used_storage_space()) + available_space = quota - storage_used + file_size = 1024 * 1024 * 1024 * 1024 + + # Fake upload a very big dataset + resp = self.client.post(reverse("data-list"), { + 'name': 'new-file-test', + 'type': Data.COMPETITION_BUNDLE, + 'request_sassy_file_name': faker.file_name(), + 'file_name': faker.file_name(), + 'file_size': file_size, + }) + + assert resp.status_code == 400 + assert resp.data["data_file"][0] == f'Insufficient space. Your available space is {pretty_bytes(available_space)}. The file size is {pretty_bytes(file_size)}. Please free up some space and try again. You can manage your files in the Resources page.' + + # Fake upload a small file + file_size = available_space - 1024 + resp = self.client.post(reverse("data-list"), { + 'name': 'new-file-test', + 'type': Data.COMPETITION_BUNDLE, + 'request_sassy_file_name': faker.file_name(), + 'file_name': faker.file_name(), + 'file_size': file_size, + }) + assert resp.status_code == 201 diff --git a/src/apps/api/urls.py b/src/apps/api/urls.py index 0bb521b3e..f6ae6bce3 100644 --- a/src/apps/api/urls.py +++ b/src/apps/api/urls.py @@ -57,11 +57,17 @@ # User quota and cleanup path('user_quota_cleanup/', quota.user_quota_cleanup, name="user_quota_cleanup"), + path('user_quota/', quota.user_quota, name="user_quota"), path('delete_unused_tasks/', quota.delete_unused_tasks, name="delete_unused_tasks"), path('delete_unused_datasets/', quota.delete_unused_datasets, name="delete_unused_datasets"), path('delete_unused_submissions/', quota.delete_unused_submissions, name="delete_unused_submissions"), path('delete_failed_submissions/', quota.delete_failed_submissions, name="delete_failed_submissions"), + # Analytics + path('analytics/storage_usage_history/', analytics.storage_usage_history, name='storage_usage_history'), + path('analytics/competitions_usage/', analytics.competitions_usage, name='competitions_usage'), + path('analytics/users_usage/', analytics.users_usage, name='users_usage'), + # API Docs re_path(r'docs(?P\.json|\.yaml)$', schema_view.without_ui(cache_timeout=0), name='schema-json'), path('docs/', schema_view.with_ui('swagger', cache_timeout=0), name='schema-swagger-ui'), diff --git a/src/apps/api/views/analytics.py b/src/apps/api/views/analytics.py index 084b042e7..26cb8f8ef 100644 --- a/src/apps/api/views/analytics.py +++ b/src/apps/api/views/analytics.py @@ -1,12 +1,16 @@ from django.db.models import Count, F from django.contrib.auth import get_user_model from django.http import Http404 +from rest_framework import status +from rest_framework.exceptions import PermissionDenied from rest_framework.views import APIView from rest_framework.response import Response from rest_framework.renderers import JSONRenderer from rest_framework.filters import BaseFilterBackend +from rest_framework.decorators import api_view from rest_framework_csv import renderers as r from competitions.models import Competition, Submission +from analytics.models import StorageUsageHistory, CompetitionStorageDataPoint, UserStorageDataPoint from api.serializers.analytics import AnalyticsSerializer import datetime @@ -158,3 +162,127 @@ def get(self, request): 'end_date': end_date, 'time_unit': time_unit, }) + + +@api_view(["GET"]) +def storage_usage_history(request): + """ + Gets the storage usage timeline between the 2 provided dates at the given resolution + """ + if not request.user.is_superuser: + raise PermissionDenied(detail="Admin only") + + storage_usage_history = {} + last_storage_usage_history_snapshot = StorageUsageHistory.objects.order_by("at_date").last() + if last_storage_usage_history_snapshot: + start_date = request.query_params.get("start_date", (datetime.datetime.today() - datetime.timedelta(weeks=4)).strftime("%Y-%m-%d")) + end_date = request.query_params.get("end_date", datetime.datetime.today().strftime("%Y-%m-%d")) + resolution = request.query_params.get("resolution", "day") + + query = StorageUsageHistory.objects.filter( + bucket_name=last_storage_usage_history_snapshot.bucket_name, + at_date__range=(start_date, end_date), + ).dates("at_date", resolution).values() + for su in query.order_by("-at_date"): + storage_usage_history[su['datefield'].isoformat()] = { + 'total_usage': su['total_usage'], + 'competitions_usage': su['competitions_usage'], + 'users_usage': su['users_usage'], + 'admin_usage': su['admin_usage'], + 'orphaned_file_usage': su['orphaned_file_usage'] + } + + response = { + "last_storage_calculation_date": last_storage_usage_history_snapshot.created_at.isoformat() if last_storage_usage_history_snapshot else None, + "storage_usage_history": storage_usage_history + } + + return Response(response, status=status.HTTP_200_OK) + + +@api_view(["GET"]) +def competitions_usage(request): + """ + Gets the competitions usage between the 2 provided dates at the given resolution + """ + if not request.user.is_superuser: + raise PermissionDenied(detail="Admin only") + + competitions_usage = {} + last_competition_storage_snapshot = CompetitionStorageDataPoint.objects.order_by("at_date").last() + if last_competition_storage_snapshot: + start_date = request.query_params.get("start_date", (datetime.datetime.today() - datetime.timedelta(weeks=4)).strftime("%Y-%m-%d")) + end_date = request.query_params.get("end_date", datetime.datetime.today().strftime("%Y-%m-%d")) + resolution = request.query_params.get("resolution", "day") + + query = CompetitionStorageDataPoint.objects.filter( + at_date__range=(start_date, end_date), + ).dates("at_date", resolution).values( + 'id', + 'competition__id', + 'competition__title', + 'competition__created_by__username', + 'competition__created_by__email', + 'competition__created_when', + 'datasets_total', + 'datefield' + ) + for su in query.order_by("-datefield", "competition__id"): + competitions_usage.setdefault(su['datefield'].isoformat(), {})[su['competition__id']] = { + 'snapshot_id': su['id'], + 'title': su['competition__title'], + 'organizer': su['competition__created_by__username'] + " (" + su['competition__created_by__email'] + ")", + 'created_when': su['competition__created_when'], + 'datasets': su['datasets_total'], + } + + response = { + "last_storage_calculation_date": last_competition_storage_snapshot.at_date.isoformat() if last_competition_storage_snapshot else None, + "competitions_usage": competitions_usage + } + + return Response(response, status=status.HTTP_200_OK) + + +@api_view(["GET"]) +def users_usage(request): + """ + Gets the users usage between the 2 provided dates at the given resolution + """ + if not request.user.is_superuser: + raise PermissionDenied(detail="Admin only") + + users_usage = {} + last_user_storage_snapshot = UserStorageDataPoint.objects.order_by("at_date").last() + if last_user_storage_snapshot: + start_date = request.query_params.get("start_date", (datetime.datetime.today() - datetime.timedelta(weeks=4)).strftime("%Y-%m-%d")) + end_date = request.query_params.get("end_date", datetime.datetime.today().strftime("%Y-%m-%d")) + resolution = request.query_params.get("resolution", "day") + + query = UserStorageDataPoint.objects.filter( + at_date__range=(start_date, end_date), + ).dates("at_date", resolution).values( + 'id', + 'user__id', + 'user__username', + 'user__email', + 'user__date_joined', + 'datasets_total', + 'submissions_total', + 'datefield' + ) + for su in query.order_by("-datefield", "user__id"): + users_usage.setdefault(su['datefield'].isoformat(), {})[su['user__id']] = { + 'snapshot_id': su['id'], + 'name': su['user__username'] + " (" + su['user__email'] + ")", + 'date_joined': su['user__date_joined'], + 'datasets': su['datasets_total'], + 'submissions': su['submissions_total'], + } + + response = { + "last_storage_calculation_date": last_user_storage_snapshot.at_date.isoformat() if last_user_storage_snapshot else None, + "users_usage": users_usage + } + + return Response(response, status=status.HTTP_200_OK) diff --git a/src/apps/api/views/competitions.py b/src/apps/api/views/competitions.py index d58c057e3..11f5f4aad 100644 --- a/src/apps/api/views/competitions.py +++ b/src/apps/api/views/competitions.py @@ -18,8 +18,6 @@ from rest_framework.response import Response from rest_framework.renderers import JSONRenderer from rest_framework_csv.renderers import CSVRenderer -from rest_framework_extensions.cache.decorators import cache_response -from rest_framework_extensions.key_constructor.constructors import DefaultListKeyConstructor from api.pagination import LargePagination from api.renderers import ZipRenderer from rest_framework.viewsets import ModelViewSet @@ -38,6 +36,7 @@ from api.permissions import IsOrganizerOrCollaborator from datetime import datetime from django.db import transaction +from django.conf import settings class CompetitionViewSet(ModelViewSet): @@ -111,32 +110,34 @@ def get_queryset(self): # not called from search bar # not called with a valid secret key if (not mine) and (not participating_in) and (not secret_key) and (not search_query): - - # Return the following --- - # All competitions which belongs to you (private or public) - # And competitions where you are admin - # And public competitions - # And competitions where you are approved participant - # this filters out all private compettions from other users - base_qs = qs.filter( - (Q(created_by=self.request.user)) | - (Q(collaborators__in=[self.request.user])) | - (Q(published=True) & ~Q(created_by=self.request.user)) | - (Q(participants__user=self.request.user) & Q(participants__status="approved")) - ) - - # Additional condition of action - # allow private competition when action is register and has valid secret key - if self.request.method == 'POST' and self.action == 'register': - # get secret_key from request data - register_secret_key = self.request.data.get('secret_key', None) - # use secret key if available - if register_secret_key: - qs = base_qs | qs.filter(Q(secret_key=register_secret_key)) + # If authenticated user is not super user + if not self.request.user.is_superuser: + # Return the following --- + # All competitions which belongs to you (private or public) + # And competitions where you are admin + # And public competitions + # And competitions where you are approved participant + # this filters out all private compettions from other users + base_qs = qs.filter( + (Q(created_by=self.request.user)) | + (Q(collaborators__in=[self.request.user])) | + (Q(published=True) & ~Q(created_by=self.request.user)) | + (Q(participants__user=self.request.user) & Q(participants__status="approved")) + ) + + # Additional condition of action + # allow private competition when action is register and has valid secret key + if self.request.method == 'POST' and self.action == 'register': + # get secret_key from request data + register_secret_key = self.request.data.get('secret_key', None) + # use secret key if available + if register_secret_key: + qs = base_qs | qs.filter(Q(secret_key=register_secret_key)) + else: + qs = base_qs else: qs = base_qs - else: - qs = base_qs + # select distinct competitions qs = qs.distinct() @@ -194,6 +195,8 @@ def get_permissions(self): def get_serializer_class(self): if self.action == 'list': return CompetitionSerializerSimple + if self.action == 'public': + return CompetitionSerializerSimple elif self.action in ['get_phases', 'results', 'get_leaderboard_frontend_object']: return LeaderboardPhaseSerializer elif self.request.method == 'GET': @@ -250,7 +253,7 @@ def update(self, request, *args, **kwargs): # save leaderboard individually, then pass pk to each phase if 'leaderboards' in data: leaderboard_data = data['leaderboards'][0] - if(leaderboard_data['id']): + if leaderboard_data['id']: leaderboard_instance = Leaderboard.objects.get(id=leaderboard_data['id']) leaderboard = LeaderboardSerializer(leaderboard_instance, data=data['leaderboards'][0]) else: @@ -295,8 +298,16 @@ def update(self, request, *args, **kwargs): phase['starting_kit'] = Data.objects.filter(key=phase['starting_kit']['value'])[0].id except TypeError: phase['starting_kit'] = None + + # Get whitelist emails from data + whitelist_emails = data['whitelist_emails'] + # Delete white_list emails from data because it is not in a list of dict format, it is just list of emails + data.pop('whitelist_emails', None) + # Loop over whitelist emails and add them back to whitelist emails in dict format + for email in whitelist_emails: + data.setdefault('whitelist_emails', []).append({'email': email}) + serializer = self.get_serializer(instance, data=data, partial=partial) - type(serializer) serializer.is_valid(raise_exception=True) self.perform_update(serializer) @@ -339,8 +350,13 @@ def register(self, request, pk): participant.status = 'approved' send_participation_accepted_emails(participant) else: - participant.status = 'pending' - send_participation_requested_emails(participant) + # check if user is in whitelist emails then approve directly + if user.email in list(competition.whitelist_emails.values_list('email', flat=True)): + participant.status = 'approved' + send_participation_accepted_emails(participant) + else: + participant.status = 'pending' + send_participation_requested_emails(participant) participant.save() return Response({'participant_status': participant.status}, status=status.HTTP_201_CREATED) @@ -517,11 +533,9 @@ def create_dump(self, request, pk=None): serializer = CompetitionCreationTaskStatusSerializer({"status": "Success. Competition dump is being created."}) return Response(serializer.data, status=201) - @cache_response(key_func=DefaultListKeyConstructor()) @action(detail=False, methods=('GET',), pagination_class=LargePagination) def public(self, request): - qs = self.get_queryset() - qs = qs.filter(published=True) + qs = Competition.objects.filter(published=True) qs = qs.order_by('-id') queryset = self.filter_queryset(qs) @@ -614,15 +628,56 @@ def manually_migrate(self, request, pk): @action(detail=True, url_name='rerun_submissions') def rerun_submissions(self, request, pk): + phase = self.get_object() comp = phase.competition - if request.user not in comp.all_organizers and not request.user.is_superuser: - raise PermissionDenied('You do not have permission to re-run submissions') - submissions = phase.submissions.all() - for submission in submissions: - submission.re_run() - rerun_count = len(submissions) - return Response({"count": rerun_count}) + + # Get submissions with no parent + submissions = phase.submissions.filter(parent__isnull=True) + + can_re_run_submissions = False + error_message = "" + + # Super admin can rerun without any restrictions + if request.user.is_superuser: + can_re_run_submissions = True + + # competition admin can run only if + elif request.user in comp.all_organizers: + + # submissions are in limit + if len(submissions) <= int(settings.RERUN_SUBMISSION_LIMIT): + can_re_run_submissions = True + + # submissions are not in limit + else: + # Codabemch public queue + if comp.queue is None: + can_re_run_submissions = False + error_message = f"You cannot rerun more than {settings.RERUN_SUBMISSION_LIMIT} submissions on Codabench public queue! Contact us on `info@codalab.org` to request a rerun." + + # Other queue where user is not owner and not organizer + elif request.user != comp.queue.owner and request.user not in comp.queue.organizers.all(): + can_re_run_submissions = False + error_message = f"You cannot rerun more than {settings.RERUN_SUBMISSION_LIMIT} submissions on a queue which is not yours! Contact us on `info@codalab.org` to request a rerun." + + # User can rerun submissions where he is owner or organizer + else: + can_re_run_submissions = True + + else: + can_re_run_submissions = False + error_message = 'You do not have permission to re-run submissions' + + # error when user is not super user or admin of the competition + if can_re_run_submissions: + # rerun all submissions + for submission in submissions: + submission.re_run() + rerun_count = len(submissions) + return Response({"count": rerun_count}) + else: + raise PermissionDenied(error_message) @swagger_auto_schema(responses={200: PhaseResultsSerializer}) @action(detail=True, methods=['GET']) @@ -647,20 +702,20 @@ def get_leaderboard(self, request, pk): submission_detailed_results = {} for submission in query['submissions']: # count number of entries/number of submissions for the owner of this submission for this phase - # count all submissions with no parent and count all parents without counting the children + # count all submissions except: + # - child submissions (submissions who has a parent i.e. parent field is not null) + # - Failed submissions + # - Cancelled submissions num_entries = Submission.objects.filter( - Q(owner__username=submission['owner']) | Q(parent__owner__username=submission['owner']), + Q(owner__username=submission['owner']) | + Q(parent__owner__username=submission['owner']), phase=phase, ).exclude( - parent__isnull=False + Q(status=Submission.FAILED) | + Q(status=Submission.CANCELLED) | + Q(parent__isnull=False) ).count() - # get date of last submission by the owner of this submission for this phase - last_entry_date = Submission.objects.filter(owner__username=submission['owner'], phase=phase)\ - .values('created_when')\ - .order_by('-created_when')[0]['created_when']\ - .strftime('%Y-%m-%d') - submission_key = f"{submission['owner']}{submission['parent'] or submission['id']}" # gather detailed result from submissions for each task @@ -684,7 +739,7 @@ def get_leaderboard(self, request, pk): 'slug_url': submission['slug_url'], 'organization': submission['organization'], 'num_entries': num_entries, - 'last_entry_date': last_entry_date + 'created_when': submission['created_when'] }) for score in submission['scores']: diff --git a/src/apps/api/views/datasets.py b/src/apps/api/views/datasets.py index fd2ae17cb..2175b9dc0 100644 --- a/src/apps/api/views/datasets.py +++ b/src/apps/api/views/datasets.py @@ -14,7 +14,7 @@ from api.serializers import datasets as serializers from datasets.models import Data, DataGroup from competitions.models import CompetitionCreationTaskStatus -from utils.data import make_url_sassy +from utils.data import make_url_sassy, pretty_bytes class DataViewSet(ModelViewSet): @@ -79,6 +79,17 @@ def get_serializer_class(self): return serializers.DataSerializer def create(self, request, *args, **kwargs): + # Check User quota + storage_used = float(request.user.get_used_storage_space()) + quota = float(request.user.quota) + file_size = float(request.data['file_size']) + if storage_used + file_size > quota: + available_space = pretty_bytes(quota - storage_used) + file_size = pretty_bytes(file_size) + message = f'Insufficient space. Your available space is {available_space}. The file size is {file_size}. Please free up some space and try again. You can manage your files in the Resources page.' + return Response({'data_file': [message]}, status=status.HTTP_400_BAD_REQUEST) + + # All good, let's proceed serializer = self.get_serializer(data=request.data) serializer.is_valid(raise_exception=True) new_dataset = serializer.save() # request_sassy_file_name is temporarily set via this serializer diff --git a/src/apps/api/views/queues.py b/src/apps/api/views/queues.py index db96188c6..23e486e85 100644 --- a/src/apps/api/views/queues.py +++ b/src/apps/api/views/queues.py @@ -13,7 +13,7 @@ class QueueViewSet(ModelViewSet): queryset = Queue.objects.all() - serializer_class = serializers.QueueSerializer + serializer_class = serializers.QueueListSerializer filter_fields = ('owner', 'is_public', 'name') filter_backends = (DjangoFilterBackend, SearchFilter) search_fields = ('name',) @@ -29,7 +29,7 @@ def get_queryset(self): def get_serializer_class(self): if self.request.method == 'GET': - return serializers.QueueSerializer + return serializers.QueueListSerializer else: return serializers.QueueCreationSerializer diff --git a/src/apps/api/views/quota.py b/src/apps/api/views/quota.py index 869861576..4d9368e82 100644 --- a/src/apps/api/views/quota.py +++ b/src/apps/api/views/quota.py @@ -50,6 +50,13 @@ def user_quota_cleanup(request): }) +@api_view(["GET"]) +def user_quota(request): + quota = request.user.quota + storage_used = request.user.get_used_storage_space() + return Response({"quota": quota, "storage_used": storage_used}) + + @api_view(['DELETE']) def delete_unused_tasks(request): try: diff --git a/src/apps/api/views/submissions.py b/src/apps/api/views/submissions.py index d4d82fba2..fccaeefd4 100644 --- a/src/apps/api/views/submissions.py +++ b/src/apps/api/views/submissions.py @@ -1,11 +1,11 @@ import json import uuid +import logging from django.db.models import Q from django_filters.rest_framework import DjangoFilterBackend from rest_framework import status from rest_framework.decorators import api_view, permission_classes, action -from django.http import Http404 from rest_framework.exceptions import PermissionDenied, ValidationError from rest_framework.filters import SearchFilter from rest_framework.generics import get_object_or_404 @@ -14,14 +14,17 @@ from rest_framework.settings import api_settings from rest_framework.viewsets import ModelViewSet from rest_framework_csv import renderers +from django.core.files.base import ContentFile from profiles.models import Organization, Membership from tasks.models import Task from api.serializers.submissions import SubmissionCreationSerializer, SubmissionSerializer, SubmissionFilesSerializer -from competitions.models import Submission, Phase, CompetitionParticipant +from competitions.models import Submission, SubmissionDetails, Phase, CompetitionParticipant from leaderboards.strategies import put_on_leaderboard_by_submission_rule from leaderboards.models import SubmissionScore, Column, Leaderboard +logger = logging.getLogger() + class SubmissionViewSet(ModelViewSet): queryset = Submission.objects.all().order_by('-pk') @@ -50,9 +53,46 @@ def check_object_permissions(self, request, obj): hostname = request.data['status_details'].replace('scoring_hostname-', '') obj.scoring_worker_hostname = hostname obj.save() + + # check if type is in request data. type can have the following values + # - Docker_Image_Pull_Fail + # - Execution_Time_Limit_Exceeded + if "type" in self.request.data.keys(): + + if request.data["type"] in ["Docker_Image_Pull_Fail", "Execution_Time_Limit_Exceeded"]: + + # Get the error message + error_message = request.data['error_message'] + + # Set file name to ingestion std error as default + error_file_name = "prediction_ingestion_stderr" + + # Change error file name to scoring_stderr when error occurs during scoring + if request.data.get("is_scoring", "False") == "True": + error_file_name = "scoring_stderr" + + try: + # Get submission detail for this submission + submission_detail = SubmissionDetails.objects.get( + name=error_file_name, + submission=obj, + ) + + # Read the existing content from the file + existing_content = submission_detail.data_file.read().decode("utf-8") + + # Append the new error message to the existing content + modified_content = existing_content + "\n" + error_message + + # write error message to the file + submission_detail.data_file.save(submission_detail.data_file.name, ContentFile(modified_content.encode("utf-8"))) + + except SubmissionDetails.DoesNotExist: + logger.warning("SubmissionDetails object not found.") + not_bot_user = self.request.user.is_authenticated and not self.request.user.is_bot - if self.action in ['update_fact_sheet', 're_run_submission']: + if self.action in ['update_fact_sheet', 'run_submission', 're_run_submission']: # get_queryset will stop us from re-running something we're not supposed to pass elif not self.request.user.is_authenticated or not_bot_user: @@ -165,14 +205,24 @@ def has_admin_permission(self, user, submission): @action(detail=True, methods=('POST', 'DELETE')) def submission_leaderboard_connection(self, request, pk): + + # get submission submission = self.get_object() + + # get submission phase phase = submission.phase - if not (request.user.is_superuser or request.user == submission.owner): - if not phase.competition.collaborators.filter(pk=request.user.pk).exists(): - raise Http404 + # only super user, owner of submission and competition organizer can proceed + if not ( + request.user.is_superuser or + request.user == submission.owner or + request.user in phase.competition.all_organizers + ): + raise PermissionDenied("You cannot perform this action, contact the competition organizer!") + + # only super user and with these leaderboard rules (FORCE_LAST, FORCE_BEST, FORCE_LATEST_MULTIPLE) can proceed if submission.phase.leaderboard.submission_rule in Leaderboard.AUTO_SUBMISSION_RULES and not request.user.is_superuser: - raise ValidationError("Users are not allowed to edit the leaderboard on this Competition") + raise PermissionDenied("Users are not allowed to edit the leaderboard on this Competition") if request.method == 'POST': # Removing any existing submissions on leaderboard unless multiples are allowed @@ -187,7 +237,7 @@ def submission_leaderboard_connection(self, request, pk): if request.method == 'DELETE': if submission.phase.leaderboard.submission_rule not in [Leaderboard.ADD_DELETE, Leaderboard.ADD_DELETE_MULTIPLE]: - raise ValidationError("You are not allowed to remove a submission on this phase") + raise PermissionDenied("You are not allowed to remove a submission on this phase") submission.leaderboard = None submission.save() Submission.objects.filter(parent=submission).update(leaderboard=None) @@ -205,6 +255,21 @@ def cancel_submission(self, request, pk): canceled = submission.cancel() return Response({'canceled': canceled}) + @action(detail=True, methods=('POST',)) + def run_submission(self, request, pk): + submission = self.get_object() + + # Only organizer of the competition can run the submission + if not self.has_admin_permission(request.user, submission): + raise PermissionDenied('You do not have permission to run this submission') + + # Allow only to run a submission with status `Submitting` + if submission.status != Submission.SUBMITTING: + raise PermissionDenied('Cannot run a submission which is not in submitting status') + + new_sub = submission.run() + return Response({'id': new_sub.id}) + @action(detail=True, methods=('POST',)) def re_run_submission(self, request, pk): submission = self.get_object() @@ -229,7 +294,14 @@ def re_run_submission(self, request, pk): rerun_kwargs = {} new_sub = submission.re_run(**rerun_kwargs) - return Response({'id': new_sub.id}) + if new_sub is None: + # return error + return Response({ + "error_msg": "You cannot rerun this submission because one or more tasks this submission was running are deleted, resubmit the submission or contact the competition organizer!"}, + status=status.HTTP_404_NOT_FOUND + ) + else: + return Response({'id': new_sub.id}) @action(detail=False, methods=('POST',)) def re_run_many_submissions(self, request): @@ -280,15 +352,15 @@ def get_detail_result(self, request, pk): ) else: return Response({ - "error_msg": "Visualizations are disabled"}, + "error_msg": "Detailed results are disable for this competition!"}, status=status.HTTP_404_NOT_FOUND ) @action(detail=True, methods=('GET',)) def toggle_public(self, request, pk): submission = super().get_object() - if not self.has_admin_permission(request.user, submission): - raise PermissionDenied(f'You do not have permission to publish this submissions') + if not submission.phase.competition.can_participants_make_submissions_public: + raise PermissionDenied("You do not have permission to make this submissions public/private") is_public = not submission.is_public submission.data.is_public = is_public submission.data.save(send=False) diff --git a/src/apps/competitions/migrations/0035_auto_20230914_1319.py b/src/apps/competitions/migrations/0035_auto_20230914_1319.py new file mode 100644 index 000000000..60e6cd96a --- /dev/null +++ b/src/apps/competitions/migrations/0035_auto_20230914_1319.py @@ -0,0 +1,33 @@ +# Generated by Django 2.2.17 on 2023-09-14 13:19 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('competitions', '0034_auto_20230727_1147'), + ] + + operations = [ + migrations.AddField( + model_name='submission', + name='detailed_result_file_size', + field=models.DecimalField(blank=True, decimal_places=2, max_digits=10, null=True), + ), + migrations.AddField( + model_name='submission', + name='prediction_result_file_size', + field=models.DecimalField(blank=True, decimal_places=2, max_digits=10, null=True), + ), + migrations.AddField( + model_name='submission', + name='scoring_result_file_size', + field=models.DecimalField(blank=True, decimal_places=2, max_digits=10, null=True), + ), + migrations.AddField( + model_name='submissiondetails', + name='file_size', + field=models.DecimalField(blank=True, decimal_places=2, max_digits=10, null=True), + ), + ] diff --git a/src/apps/competitions/migrations/0040_auto_20231113_1103.py b/src/apps/competitions/migrations/0040_auto_20231113_1103.py new file mode 100644 index 000000000..2dc06eb14 --- /dev/null +++ b/src/apps/competitions/migrations/0040_auto_20231113_1103.py @@ -0,0 +1,18 @@ +# Generated by Django 2.2.17 on 2023-11-13 11:03 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('competitions', '0039_merge_20230906_1305'), + ] + + operations = [ + migrations.AlterField( + model_name='phase', + name='has_max_submissions', + field=models.BooleanField(default=True), + ), + ] diff --git a/src/apps/competitions/migrations/0040_competitionwhitelistemail.py b/src/apps/competitions/migrations/0040_competitionwhitelistemail.py new file mode 100644 index 000000000..5be18d55a --- /dev/null +++ b/src/apps/competitions/migrations/0040_competitionwhitelistemail.py @@ -0,0 +1,22 @@ +# Generated by Django 2.2.17 on 2023-11-12 14:44 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('competitions', '0039_merge_20230906_1305'), + ] + + operations = [ + migrations.CreateModel( + name='CompetitionWhiteListEmail', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('email', models.EmailField(max_length=254)), + ('competition', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='competitions.Competition')), + ], + ), + ] diff --git a/src/apps/competitions/migrations/0040_merge_20231123_1456.py b/src/apps/competitions/migrations/0040_merge_20231123_1456.py new file mode 100644 index 000000000..3172cbb95 --- /dev/null +++ b/src/apps/competitions/migrations/0040_merge_20231123_1456.py @@ -0,0 +1,14 @@ +# Generated by Django 2.2.17 on 2023-11-23 14:56 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('competitions', '0039_merge_20230906_1305'), + ('competitions', '0035_auto_20230914_1319'), + ] + + operations = [ + ] diff --git a/src/apps/competitions/migrations/0041_auto_20231112_1446.py b/src/apps/competitions/migrations/0041_auto_20231112_1446.py new file mode 100644 index 000000000..d65214d5f --- /dev/null +++ b/src/apps/competitions/migrations/0041_auto_20231112_1446.py @@ -0,0 +1,19 @@ +# Generated by Django 2.2.17 on 2023-11-12 14:46 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('competitions', '0040_competitionwhitelistemail'), + ] + + operations = [ + migrations.AlterField( + model_name='competitionwhitelistemail', + name='competition', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='whitelist_emails', to='competitions.Competition'), + ), + ] diff --git a/src/apps/competitions/migrations/0042_merge_20231120_1551.py b/src/apps/competitions/migrations/0042_merge_20231120_1551.py new file mode 100644 index 000000000..de243fa02 --- /dev/null +++ b/src/apps/competitions/migrations/0042_merge_20231120_1551.py @@ -0,0 +1,14 @@ +# Generated by Django 2.2.17 on 2023-11-20 15:51 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('competitions', '0041_auto_20231112_1446'), + ('competitions', '0040_auto_20231113_1103'), + ] + + operations = [ + ] diff --git a/src/apps/competitions/migrations/0043_merge_20231213_0948.py b/src/apps/competitions/migrations/0043_merge_20231213_0948.py new file mode 100644 index 000000000..c74629a95 --- /dev/null +++ b/src/apps/competitions/migrations/0043_merge_20231213_0948.py @@ -0,0 +1,14 @@ +# Generated by Django 2.2.17 on 2023-12-13 09:48 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('competitions', '0042_merge_20231120_1551'), + ('competitions', '0035_auto_20230914_1319'), + ] + + operations = [ + ] diff --git a/src/apps/competitions/migrations/0044_merge_20231221_1416.py b/src/apps/competitions/migrations/0044_merge_20231221_1416.py new file mode 100644 index 000000000..547c72b86 --- /dev/null +++ b/src/apps/competitions/migrations/0044_merge_20231221_1416.py @@ -0,0 +1,14 @@ +# Generated by Django 2.2.17 on 2023-12-21 14:16 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('competitions', '0040_merge_20231123_1456'), + ('competitions', '0043_merge_20231213_0948'), + ] + + operations = [ + ] diff --git a/src/apps/competitions/migrations/0045_auto_20240129_2314.py b/src/apps/competitions/migrations/0045_auto_20240129_2314.py new file mode 100644 index 000000000..2cb752724 --- /dev/null +++ b/src/apps/competitions/migrations/0045_auto_20240129_2314.py @@ -0,0 +1,19 @@ +# Generated by Django 2.2.17 on 2024-01-29 23:14 + +from django.db import migrations, models +import utils.data + + +class Migration(migrations.Migration): + + dependencies = [ + ('competitions', '0044_merge_20231221_1416'), + ] + + operations = [ + migrations.AddField( + model_name='competition', + name='logo_icon', + field=models.ImageField(blank=True, null=True, upload_to=utils.data.PathWrapper('logos', manual_override=True)), + ), + ] diff --git a/src/apps/competitions/migrations/0045_competition_auto_run_submissions.py b/src/apps/competitions/migrations/0045_competition_auto_run_submissions.py new file mode 100644 index 000000000..86161e98c --- /dev/null +++ b/src/apps/competitions/migrations/0045_competition_auto_run_submissions.py @@ -0,0 +1,18 @@ +# Generated by Django 2.2.17 on 2024-01-22 10:24 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('competitions', '0044_merge_20231221_1416'), + ] + + operations = [ + migrations.AddField( + model_name='competition', + name='auto_run_submissions', + field=models.BooleanField(default=True), + ), + ] diff --git a/src/apps/competitions/migrations/0046_merge_20240222_1916.py b/src/apps/competitions/migrations/0046_merge_20240222_1916.py new file mode 100644 index 000000000..347e64bc9 --- /dev/null +++ b/src/apps/competitions/migrations/0046_merge_20240222_1916.py @@ -0,0 +1,14 @@ +# Generated by Django 2.2.17 on 2024-02-22 19:16 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('competitions', '0045_competition_auto_run_submissions'), + ('competitions', '0045_auto_20240129_2314'), + ] + + operations = [ + ] diff --git a/src/apps/competitions/migrations/0047_competition_can_participants_make_submissions_public.py b/src/apps/competitions/migrations/0047_competition_can_participants_make_submissions_public.py new file mode 100644 index 000000000..2b750fa02 --- /dev/null +++ b/src/apps/competitions/migrations/0047_competition_can_participants_make_submissions_public.py @@ -0,0 +1,18 @@ +# Generated by Django 2.2.17 on 2024-03-28 13:00 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('competitions', '0046_merge_20240222_1916'), + ] + + operations = [ + migrations.AddField( + model_name='competition', + name='can_participants_make_submissions_public', + field=models.BooleanField(default=True), + ), + ] diff --git a/src/apps/competitions/migrations/0048_auto_20240401_1646.py b/src/apps/competitions/migrations/0048_auto_20240401_1646.py new file mode 100644 index 000000000..3ed2ad446 --- /dev/null +++ b/src/apps/competitions/migrations/0048_auto_20240401_1646.py @@ -0,0 +1,23 @@ +# Generated by Django 2.2.17 on 2024-04-01 16:46 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('competitions', '0047_competition_can_participants_make_submissions_public'), + ] + + operations = [ + migrations.AddField( + model_name='competition', + name='show_detailed_results_in_leaderboard', + field=models.BooleanField(default=True), + ), + migrations.AddField( + model_name='competition', + name='show_detailed_results_in_submission_panel', + field=models.BooleanField(default=True), + ), + ] diff --git a/src/apps/competitions/models.py b/src/apps/competitions/models.py index f139ce29b..777dbb86a 100644 --- a/src/apps/competitions/models.py +++ b/src/apps/competitions/models.py @@ -1,9 +1,12 @@ import logging import uuid +import os +import io from django.conf import settings from django.contrib.sites.models import Site from django.contrib.postgres.fields import JSONField +from django.core.files.base import ContentFile from django.db import models from django.db.models import Q from django.urls import reverse @@ -15,6 +18,7 @@ from profiles.models import User, Organization from utils.data import PathWrapper from utils.storage import BundleStorage +from PIL import Image from tasks.models import Task @@ -32,6 +36,7 @@ class Competition(ChaHubSaveMixin, models.Model): title = models.CharField(max_length=256) logo = models.ImageField(upload_to=PathWrapper('logos'), null=True, blank=True) + logo_icon = models.ImageField(upload_to=PathWrapper('logos', manual_override=True), null=True, blank=True) created_by = models.ForeignKey(settings.AUTH_USER_MODEL, null=True, blank=True, on_delete=models.SET_NULL, related_name="competitions") created_when = models.DateTimeField(default=now) @@ -44,6 +49,10 @@ class Competition(ChaHubSaveMixin, models.Model): description = models.TextField(null=True, blank=True) docker_image = models.CharField(max_length=128, default="codalab/codalab-legacy:py37") enable_detailed_results = models.BooleanField(default=False) + # If true, show detailed results in submission panel + show_detailed_results_in_submission_panel = models.BooleanField(default=True) + # If true, show detailed results in leaderboard + show_detailed_results_in_leaderboard = models.BooleanField(default=True) make_programs_available = models.BooleanField(default=False) make_input_data_available = models.BooleanField(default=False) @@ -60,6 +69,13 @@ class Competition(ChaHubSaveMixin, models.Model): reward = models.CharField(max_length=256, null=True, blank=True) report = models.CharField(max_length=256, null=True, blank=True) + # if true, submissions are auto-run when submitted + # if false, submissions run will be intiiated by organizer + auto_run_submissions = models.BooleanField(default=True) + + # If true, participants see the make their submissions public + can_participants_make_submissions_public = models.BooleanField(default=True) + def __str__(self): return f"competition-{self.title}-{self.pk}-{self.competition_type}" @@ -214,8 +230,37 @@ def get_chahub_data(self): return self.clean_private_data(data) + def make_logo_icon(self): + if self.logo: + # Read the content of the logo file + self.logo.name + self.logo_icon + icon_dirname_only = os.path.dirname(self.logo.name) # Get just the path + icon_basename_only = os.path.basename(self.logo.name) # Get just the filename + file_name = os.path.splitext(icon_basename_only)[0] + ext = os.path.splitext(icon_basename_only)[1] + new_path = os.path.join(icon_dirname_only, f"{file_name}_icon{ext}") + logo_content = self.logo.read() + original_logo = Image.open(io.BytesIO(logo_content)) + # Resize the image to a smaller size for logo_icon + width, height = original_logo.size + new_width = 100 # Specify the desired width for the logo_icon + new_height = int((new_width / width) * height) + resized_logo = original_logo.resize((new_width, new_height)) + # Create a BytesIO object to save the resized image + icon_content = io.BytesIO() + resized_logo.save(icon_content, format='PNG') + # Save the resized logo as logo_icon + self.logo_icon.save(new_path, ContentFile(icon_content.getvalue()), save=False) + def save(self, *args, **kwargs): super().save(*args, **kwargs) + if not self.logo: + pass + elif not self.logo_icon: + self.make_logo_icon() + elif os.path.dirname(self.logo.name) != os.path.dirname(self.logo_icon.name): + self.make_logo_icon() to_create = User.objects.filter( Q(id=self.created_by_id) | Q(id__in=self.collaborators.all().values_list('id', flat=True)) ).exclude(id__in=self.participants.values_list('user_id', flat=True)).distinct() @@ -280,7 +325,7 @@ class Phase(ChaHubSaveMixin, models.Model): has_been_migrated = models.BooleanField(default=False) hide_output = models.BooleanField(default=False) - has_max_submissions = models.BooleanField(default=False) + has_max_submissions = models.BooleanField(default=True) max_submissions_per_day = models.PositiveIntegerField(default=5, null=True, blank=True) max_submissions_per_person = models.PositiveIntegerField(default=100, null=True, blank=True) @@ -403,9 +448,22 @@ class SubmissionDetails(models.Model): ] name = models.CharField(max_length=50) data_file = models.FileField(upload_to=PathWrapper('submission_details'), storage=BundleStorage) + file_size = models.DecimalField(max_digits=10, decimal_places=2, null=True, blank=True) # in KiB submission = models.ForeignKey('Submission', on_delete=models.CASCADE, related_name='details') is_scoring = models.BooleanField(default=False) + def save(self, *args, **kwargs): + if self.data_file and (not self.file_size or self.file_size == -1): + try: + # save file size as KiB + # self.data_file.size returns bytes + self.file_size = self.data_file.size / 1024 + except TypeError: + # file returns a None size, can't divide None / 1024 + # -1 indicates an error + self.file_size = -1 + return super().save(*args, **kwargs) + class Submission(ChaHubSaveMixin, models.Model): NONE = "None" @@ -447,6 +505,10 @@ class Submission(ChaHubSaveMixin, models.Model): detailed_result = models.FileField(upload_to=PathWrapper('detailed_result'), null=True, blank=True, storage=BundleStorage) + prediction_result_file_size = models.DecimalField(max_digits=10, decimal_places=2, null=True, blank=True) # in KiB + scoring_result_file_size = models.DecimalField(max_digits=10, decimal_places=2, null=True, blank=True) # in KiB + detailed_result_file_size = models.DecimalField(max_digits=10, decimal_places=2, null=True, blank=True) # in KiB + secret = models.UUIDField(default=uuid.uuid4) celery_task_id = models.UUIDField(null=True, blank=True) task = models.ForeignKey(Task, on_delete=models.SET_NULL, null=True, blank=True, related_name="submissions") @@ -483,10 +545,17 @@ def __str__(self): return f"{self.phase.competition.title} submission PK={self.pk} by {self.owner.username}" def delete(self, **kwargs): + + # Check if any other submissions are using the same data + other_submissions_using_data = Submission.objects.filter(data=self.data).exclude(pk=self.pk).exists() + + if not other_submissions_using_data: + # If no other submissions are using the same data, delete it + self.data.delete() + # Also clean up details on delete self.details.all().delete() - # Call this here so that the data_file for the submission also gets deleted from storage - self.data.delete() + super().delete(**kwargs) def save(self, ignore_submission_limit=False, **kwargs): @@ -499,31 +568,81 @@ def save(self, ignore_submission_limit=False, **kwargs): if self.status == Submission.RUNNING and not self.started_when: self.started_when = now() + files_and_sizes_dict = { + 'prediction_result': 'prediction_result_file_size', + 'scoring_result': 'scoring_result_file_size', + 'detailed_result': 'detailed_result_file_size', + } + for file_path_attr, file_size_attr in files_and_sizes_dict.items(): + if getattr(self, file_path_attr) and (not getattr(self, file_size_attr) or getattr(self, file_size_attr) == -1): + try: + # save file size as KiB + # self.data_file.size returns bytes + setattr(self, file_size_attr, getattr(self, file_path_attr).size / 1024) + except TypeError: + # file returns a None size, can't divide None / 1024 + # -1 indicates an error + setattr(self, file_size_attr, -1) + super().save(**kwargs) def start(self, tasks=None): from .tasks import run_submission run_submission(self.pk, tasks=tasks) + def run(self): + # get tasks from the phase + tasks = self.phase.tasks.all() + # start submission providing the tasks + self.start(tasks=tasks) + return self + def re_run(self, task=None): + + # task to use in the new submission + new_submission_task = task or self.task + + # set is_specific_task_re_run + is_specific_task_re_run = bool(task) + + flag_rerun_specific_task_or_has_no_children = False + # Check if this submission needs to rerun on specific children or has no children + if not self.has_children or is_specific_task_re_run: + flag_rerun_specific_task_or_has_no_children = True + + # Check if task exists in case of specific task rerun or no children + if flag_rerun_specific_task_or_has_no_children and new_submission_task is None: + logger.error(f"Cannot rerun `{self}` because the task is None (deleted)") + return None + else: + children_tasks = self.children.values_list('task', flat=True) + if None in children_tasks: + logger.error(f"Cannot rerun `{self}` because one or more children submission tasks are None (deleted)") + return None + + # Create a new submission submission_arg_dict = { 'owner': self.owner, - 'task': task or self.task, + 'task': new_submission_task, 'phase': self.phase, 'data': self.data, 'has_children': self.has_children, - 'is_specific_task_re_run': bool(task), + 'is_specific_task_re_run': is_specific_task_re_run, 'fact_sheet_answers': self.fact_sheet_answers, } sub = Submission(**submission_arg_dict) sub.save(ignore_submission_limit=True) - # No need to rerun on children if this is running on a specific task - if not self.has_children or sub.is_specific_task_re_run: - self.refresh_from_db() + # set tasks for rerunning + if flag_rerun_specific_task_or_has_no_children: + # in case of a submission with no children or specific task rerun + # submission with no children is same as submission with one task tasks = [sub.task] else: + # in case submission has multiple children or multiple task rerun + # tasks are gathered from the children submissions tasks = Task.objects.filter(pk__in=self.children.values_list('task', flat=True)) + sub.start(tasks=tasks) return sub @@ -678,3 +797,15 @@ class CompetitionDump(models.Model): def __str__(self): return f"Comp dump created by {self.dataset.created_by} - {self.status}" + + +# Competition White List Email Model class +# related to Competition Model +# Each Competition can have multiple white list emails +# These are used to auto approve if competition white list has this email +class CompetitionWhiteListEmail(models.Model): + competition = models.ForeignKey(Competition, on_delete=models.CASCADE, related_name='whitelist_emails') + email = models.EmailField() + + def __str__(self): + return f"{self.email} - Competition: {self.competition.title}" diff --git a/src/apps/competitions/statistics.py b/src/apps/competitions/statistics.py new file mode 100644 index 000000000..d77cc624a --- /dev/null +++ b/src/apps/competitions/statistics.py @@ -0,0 +1,111 @@ +# -------------------------------------------------- +# Imports +# -------------------------------------------------- +import os +from competitions.models import Competition + +# -------------------------------------------------- +# Setting constants +# -------------------------------------------------- +BASE_URL = "https://www.codabench.org/competitions/" +STATISTICS_DIR = "/app/statistics/" +CSV_FILE_NAME = "codabench_competition_statistics.csv" +CSV_PATH = STATISTICS_DIR + CSV_FILE_NAME + + +def create_codabench_statistics(): + """ + This function prepares a CSV file with all published competitions + """ + + # Create statistics directory if not already createad + if not os.path.exists(STATISTICS_DIR): + os.makedirs(STATISTICS_DIR) + + # Write header of the CSV file + with open(CSV_PATH, 'w', newline='') as output_file: + # Header for the csv + header = 'title; description; participants; submissions; year; phases; reward; duration (days); url;\n' + output_file.write(header) + + # loop over published competitions + for comp in Competition.objects.filter(published=True): + + # get title + title = comp.title + title = clean_string(title) + + # get description + desc = comp.description + desc = clean_string(desc) + + # get participants + num_participants = comp.participants.count() + + # get phases + phases = comp.phases.all() + num_phases = len(phases) + + # get submissions + num_submissions = 0 + for phase in phases: + num_submissions += phase.submissions.count() + + # get competition first phase year + year = phases[0].start.year + + # get competition start and end date + start_date = phases[0].start + end_date = phases[num_phases - 1].end + # if last phase has no end date, set end date to last phase start date + if end_date is None: + end_date = phases[num_phases - 1].start + + # compute duration of the competition + duration = (end_date - start_date).days + + # get reward + reward = comp.reward + # set reward to empty string if none + if reward is None: + reward = "" + else: + reward = clean_string(reward) + + # prepare competition url + url = f"{BASE_URL}{comp.id}" + + # prepare a row with all the computed information for one competition + row = '{}; {}; {}; {}; {}; {}; {}; {}; {}; \n'.format( + title, + desc, + num_participants, + num_submissions, + year, + num_phases, + reward, + duration, + url + ) + + # write row in the CSV file + with open(CSV_PATH, 'a') as output_file: + output_file.write(row) + + +def clean_string(text): + """ + This function cleans an input text + """ + if ";" in text: + text = text.replace(";", ",") + + if '\n' in text: + text = text.replace(r'\n', ' ') + + if '\r' in text: + text = text.replace(r'\r', ' ') + + text = ''.join(text.splitlines()) + + return text diff --git a/src/apps/competitions/tests/test_submissions.py b/src/apps/competitions/tests/test_submissions.py index 4e58ebd93..a7ae024f2 100644 --- a/src/apps/competitions/tests/test_submissions.py +++ b/src/apps/competitions/tests/test_submissions.py @@ -155,7 +155,21 @@ def test_only_owner_can_add_submission_to_leaderboard(self): self.client.force_login(different_user) url = reverse('submission-submission-leaderboard-connection', kwargs={'pk': parent_sub.pk}) resp = self.client.post(url) - assert resp.status_code == 404 + assert resp.status_code == 403 + assert resp.data["detail"] == "You cannot perform this action, contact the competition organizer!" + + def test_only_owner_can_remove_submission_from_leaderboard(self): + parent_sub = SubmissionFactory(has_children=True) + leaderboard = LeaderboardFactory() + parent_sub.phase.leaderboard = leaderboard + parent_sub.phase.save() + + different_user = UserFactory() + self.client.force_login(different_user) + url = reverse('submission-submission-leaderboard-connection', kwargs={'pk': parent_sub.pk}) + resp = self.client.delete(url) + assert resp.status_code == 403 + assert resp.data["detail"] == "You cannot perform this action, contact the competition organizer!" def test_adding_submission_removes_other_submissions_from_owner(self): leaderboard = LeaderboardFactory() diff --git a/src/apps/competitions/unpackers/v1.py b/src/apps/competitions/unpackers/v1.py index 6802002ac..dc476a5c2 100644 --- a/src/apps/competitions/unpackers/v1.py +++ b/src/apps/competitions/unpackers/v1.py @@ -23,6 +23,9 @@ def __init__(self, *args, **kwargs): "description": self.competition_yaml.get("description", ""), "docker_image": docker_image, "enable_detailed_results": self.competition_yaml.get('enable_detailed_results', False), + "show_detailed_results_in_submission_panel": self.competition_yaml.get('show_detailed_results_in_submission_panel', True), + "show_detailed_results_in_leaderboard": self.competition_yaml.get('show_detailed_results_in_leaderboard', True), + "auto_run_submissions": self.competition_yaml.get('auto_run_submissions', True), "make_programs_available": self.competition_yaml.get('make_programs_available', False), "make_input_data_available": self.competition_yaml.get('make_input_data_available', False), "end_date": self.competition_yaml.get('end_date', None), diff --git a/src/apps/competitions/unpackers/v2.py b/src/apps/competitions/unpackers/v2.py index b3c87b2f4..b825e9ee2 100644 --- a/src/apps/competitions/unpackers/v2.py +++ b/src/apps/competitions/unpackers/v2.py @@ -14,6 +14,10 @@ def __init__(self, *args, **kwargs): "registration_auto_approve": self.competition_yaml.get('registration_auto_approve', False), "docker_image": self.competition_yaml.get('docker_image', 'codalab/codalab-legacy:py37'), "enable_detailed_results": self.competition_yaml.get('enable_detailed_results', False), + "show_detailed_results_in_submission_panel": self.competition_yaml.get('show_detailed_results_in_submission_panel', True), + "show_detailed_results_in_leaderboard": self.competition_yaml.get('show_detailed_results_in_leaderboard', True), + "auto_run_submissions": self.competition_yaml.get('auto_run_submissions', True), + "can_participants_make_submissions_public": self.competition_yaml.get('can_participants_make_submissions_public', True), "make_programs_available": self.competition_yaml.get('make_programs_available', False), "make_input_data_available": self.competition_yaml.get('make_input_data_available', False), "description": self.competition_yaml.get("description", ""), diff --git a/src/apps/datasets/models.py b/src/apps/datasets/models.py index 48d50f3f3..729e025a1 100644 --- a/src/apps/datasets/models.py +++ b/src/apps/datasets/models.py @@ -52,7 +52,7 @@ class Data(ChaHubSaveMixin, models.Model): key = models.UUIDField(default=uuid.uuid4, blank=True, unique=True) is_public = models.BooleanField(default=False) upload_completed_successfully = models.BooleanField(default=False) - file_size = models.DecimalField(max_digits=10, decimal_places=2, null=True, blank=True) + file_size = models.DecimalField(max_digits=10, decimal_places=2, null=True, blank=True) # in KiB # This is true if the Data model was created as part of unpacking a competition. Competition bundles themselves # are NOT marked True, since they are not created by unpacking! @@ -65,13 +65,15 @@ def get_download_url(self): return reverse('datasets:download', kwargs={'key': self.key}) def save(self, *args, **kwargs): - if not self.file_size and self.data_file: + if self.data_file and (not self.file_size or self.file_size == -1): try: - # save file size as kbs + # save file size as KiB + # self.data_file.size returns bytes self.file_size = self.data_file.size / 1024 except TypeError: # file returns a None size, can't divide None / 1024 - self.file_size = 0 + # -1 indicates an error + self.file_size = -1 if not self.name: self.name = f"{self.created_by.username} - {self.type}" return super().save(*args, **kwargs) diff --git a/src/apps/oidc_configurations/__init__.py b/src/apps/oidc_configurations/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/apps/oidc_configurations/admin.py b/src/apps/oidc_configurations/admin.py new file mode 100644 index 000000000..5ea6e683f --- /dev/null +++ b/src/apps/oidc_configurations/admin.py @@ -0,0 +1,6 @@ +from django.contrib import admin +from .models import Auth_Organization + +admin.site.register(Auth_Organization) + +# Register your models here. diff --git a/src/apps/oidc_configurations/apps.py b/src/apps/oidc_configurations/apps.py new file mode 100644 index 000000000..3d757062b --- /dev/null +++ b/src/apps/oidc_configurations/apps.py @@ -0,0 +1,5 @@ +from django.apps import AppConfig + + +class OidcConfigurationsConfig(AppConfig): + name = 'oidc_configurations' diff --git a/src/apps/oidc_configurations/migrations/0001_initial.py b/src/apps/oidc_configurations/migrations/0001_initial.py new file mode 100644 index 000000000..085e64983 --- /dev/null +++ b/src/apps/oidc_configurations/migrations/0001_initial.py @@ -0,0 +1,29 @@ +# Generated by Django 2.2.17 on 2024-03-04 06:16 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ] + + operations = [ + migrations.CreateModel( + name='Auth_Organization', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=255)), + ('client_id', models.CharField(max_length=255)), + ('client_secret', models.CharField(max_length=255)), + ('authorization_url', models.CharField(max_length=255)), + ('token_url', models.CharField(max_length=255)), + ('user_info_url', models.CharField(max_length=255)), + ('redirect_url', models.CharField(max_length=255)), + ('button_bg_color', models.CharField(default='#2C3E4C', max_length=20)), + ('button_text_color', models.CharField(default='#FFFFFF', max_length=20)), + ], + ), + ] diff --git a/src/apps/oidc_configurations/migrations/__init__.py b/src/apps/oidc_configurations/migrations/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/apps/oidc_configurations/models.py b/src/apps/oidc_configurations/models.py new file mode 100644 index 000000000..9e2b0c66c --- /dev/null +++ b/src/apps/oidc_configurations/models.py @@ -0,0 +1,14 @@ +# oidc_configurations/models.py +from django.db import models + + +class Auth_Organization(models.Model): + name = models.CharField(max_length=255) + client_id = models.CharField(max_length=255) + client_secret = models.CharField(max_length=255) + authorization_url = models.CharField(max_length=255) + token_url = models.CharField(max_length=255) + user_info_url = models.CharField(max_length=255) + redirect_url = models.CharField(max_length=255) + button_bg_color = models.CharField(max_length=20, default='#2C3E4C') + button_text_color = models.CharField(max_length=20, default='#FFFFFF') diff --git a/src/apps/oidc_configurations/urls.py b/src/apps/oidc_configurations/urls.py new file mode 100644 index 000000000..7bfae4f99 --- /dev/null +++ b/src/apps/oidc_configurations/urls.py @@ -0,0 +1,10 @@ +# oidc_configurations/urls.py +from django.urls import path +from .views import organization_oidc_login, oidc_complete + +app_name = 'oidc_configurations' + +urlpatterns = [ + path('organization_oidc_login/', organization_oidc_login, name='organization_oidc_login'), + path('complete//', oidc_complete, name='oidc_complete'), +] diff --git a/src/apps/oidc_configurations/views.py b/src/apps/oidc_configurations/views.py new file mode 100644 index 000000000..6b04be8ff --- /dev/null +++ b/src/apps/oidc_configurations/views.py @@ -0,0 +1,203 @@ +# oidc_configurations/views.py +import base64 +import requests +from django.shortcuts import render, redirect, get_object_or_404 +from .models import Auth_Organization +from django.contrib.auth import get_user_model, login +import re + +User = get_user_model() + +BACKEND = 'django.contrib.auth.backends.ModelBackend' + + +def organization_oidc_login(request): + # Check if this is a post request and it contains organization_oauth2_login + if request.method == 'POST' and 'organization_oidc_login' in request.POST: + # Get auth organization id from the request + auth_organization_id = request.POST.get('organization_oidc_login') + + # Get auth organization using its id + organization = get_object_or_404(Auth_Organization, pk=auth_organization_id) + + if organization: + # Create a redirect url consisiting of + # - authorization_url + # - client_id + # - response_type + # - redirect_uri + oidc_auth_url = ( + f"{organization.authorization_url}?" + f"client_id={organization.client_id}&" + "response_type=code&" + "scope=openid profile email&" + f"redirect_uri={organization.redirect_url}" + ) + + # Redirect the user to the OIDC provider's authorization URL + return redirect(oidc_auth_url) + + # Handle other cases or render a different template if needed + return render(request, 'registration/login.html') + + +def oidc_complete(request, auth_organization_id): + + # create empty context + context = {} + + # Get error or authorization code from the query string + error = request.GET.get('error', None) + error_description = request.GET.get('error_description', None) + authorization_code = request.GET.get('code', None) + + if error: + context["error"] = error + + if error_description: + context["error_description"] = error_description + + # Token exhange process + if authorization_code: + + try: + # STEP 1: Get auth organization using its id + organization = get_object_or_404(Auth_Organization, pk=auth_organization_id) + + if organization: + + # STEP 2: Get access token + access_token, token_error = get_access_token(organization, authorization_code) + + if token_error: + context["error"] = token_error + else: + # STEP 3: Get user info + user_info, user_info_error = get_user_info(organization, access_token) + if user_info_error: + context["error"] = user_info_error + else: + + # get email and nickname (username) of the user + user_email = user_info.get("email", None) + user_nickname = user_info.get("nickname", None) + if user_email: + # get user with this email + user = get_user_by_email(user_email) + # STEP 4: Check if user exists and user is created using oidc and oidc orgnaization matches this one + if user: + login(request, user, backend=BACKEND) + # Redirect the user home page + return redirect('pages:home') + else: + return register_and_authenticate_user(request, user_email, user_nickname, organization) + + else: + context["error"] = "Unable to extract email from user info! Please contact platform" + else: + context["error"] = "Invalid Organization ID!" + except Exception as e: + context["error"] = f"{e}" + + return render(request, 'oidc/oidc_complete.html', context) + + +def get_access_token(organization, authorization_code): + + token_url = organization.token_url + client_id = organization.client_id + client_secret = organization.client_secret + redirect_url = organization.redirect_url + + auth_header = base64.b64encode(f"{client_id}:{client_secret}".encode()).decode("utf-8") + headers = { + "Content-Type": "application/x-www-form-urlencoded", + "Authorization": f"Basic {auth_header}", + } + data = { + "grant_type": "authorization_code", + "code": authorization_code, + "redirect_uri": redirect_url, + } + + try: + response = requests.request("POST", token_url, data=data, headers=headers) + response.raise_for_status() # Raise an HTTPError for bad responses (4xx or 5xx) + token_data = response.json() + access_token = token_data.get('access_token') + return access_token, None + except requests.exceptions.RequestException as e: + print(f"Error during token request: {e}") + return None, e + except Exception as e: + print(f"Error parsing token response: {e}") + return None, e + + +def get_user_info(organization, access_token): + + user_info_url = organization.user_info_url + + headers = { + 'Authorization': f'Bearer {access_token}', + } + + response = requests.get(user_info_url, headers=headers) + + try: + user_info = response.json() + return user_info, None + except Exception as e: + return None, e + + +def register_and_authenticate_user(request, user_email, user_nickname, organization): + + if not user_nickname: + username = re.sub(r'[^a-zA-Z0-9]', '', user_email.split('@')[0]) + else: + username = user_nickname + + # Ensure the username is unique + username = create_unique_username(username) + + # Create a new user + user = User.objects.create( + username=username, + email=user_email, + is_created_using_oidc=True, + oidc_organization=organization, + ) + + if user: + # login user + login(request, user, backend=BACKEND) + # Redirect to the home page + return redirect('pages:home') + + else: + # Handle authentication failure i.e. go back to login + return redirect('accounts:login') + + +def create_unique_username(username): + # Check if the username already exists + if User.objects.filter(username=username).exists(): + # If the username already exists, modify it to make it unique + suffix = 1 + new_username = f"{username}_{suffix}" + while User.objects.filter(username=new_username).exists(): + suffix += 1 + new_username = f"{username}_{suffix}" + return new_username + else: + # If the username doesn't exist, use it as is + return username + + +def get_user_by_email(email): + try: + user = User.objects.get(email=email) + return user + except User.DoesNotExist: + return None diff --git a/src/apps/pages/views.py b/src/apps/pages/views.py index a555e80fa..fad6d33cd 100644 --- a/src/apps/pages/views.py +++ b/src/apps/pages/views.py @@ -90,10 +90,19 @@ def get_context_data(self, *args, **kwargs): for submission in context['submissions']: # Get filesize from each submissions's data submission.file_size = self.format_file_size(submission.data.file_size) + # Get queue from each submission - queue_name = "*" if submission.queue is None else submission.queue.name + queue_name = "" + # if submission has parent get queue from parent otherwise from the submission iteset + if submission.parent: + queue_name = "*" if submission.parent.queue is None else submission.parent.queue.name + else: + queue_name = "*" if submission.queue is None else submission.queue.name submission.competition_queue = queue_name + # Add submission owner display name + submission.owner_display_name = submission.owner.display_name if submission.owner.display_name else submission.owner.username + return context def format_file_size(self, file_size): diff --git a/src/apps/profiles/migrations/0012_user_quota.py b/src/apps/profiles/migrations/0012_user_quota.py new file mode 100644 index 000000000..20d3ac2fe --- /dev/null +++ b/src/apps/profiles/migrations/0012_user_quota.py @@ -0,0 +1,19 @@ +# Generated by Django 2.2.17 on 2023-11-22 19:57 + +from django.conf import settings +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('profiles', '0011_auto_20230902_0936'), + ] + + operations = [ + migrations.AddField( + model_name='user', + name='quota', + field=models.BigIntegerField(default=settings.DEFAULT_USER_QUOTA), + ), + ] diff --git a/src/apps/profiles/migrations/0013_auto_20240304_0616.py b/src/apps/profiles/migrations/0013_auto_20240304_0616.py new file mode 100644 index 000000000..121ca477c --- /dev/null +++ b/src/apps/profiles/migrations/0013_auto_20240304_0616.py @@ -0,0 +1,25 @@ +# Generated by Django 2.2.17 on 2024-03-04 06:16 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('oidc_configurations', '0001_initial'), + ('profiles', '0012_user_quota'), + ] + + operations = [ + migrations.AddField( + model_name='user', + name='is_created_using_oidc', + field=models.BooleanField(default=False), + ), + migrations.AddField( + model_name='user', + name='oidc_organization', + field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='authorized_users', to='oidc_configurations.Auth_Organization'), + ), + ] diff --git a/src/apps/profiles/models.py b/src/apps/profiles/models.py index 67abeb665..b150e54e4 100644 --- a/src/apps/profiles/models.py +++ b/src/apps/profiles/models.py @@ -7,6 +7,16 @@ from django.utils.text import slugify from utils.data import PathWrapper from django.urls import reverse +from django.conf import settings +from django.db.models import ( + Sum, + F, + Case, + Value, + When, + DecimalField, +) +from oidc_configurations.models import Auth_Organization PROFILE_DATA_BLACKLIST = [ 'password', @@ -61,6 +71,11 @@ class User(ChaHubSaveMixin, AbstractBaseUser, PermissionsMixin): date_joined = models.DateTimeField(default=now) is_active = models.BooleanField(default=True) is_staff = models.BooleanField(default=False) + quota = models.BigIntegerField(default=settings.DEFAULT_USER_QUOTA, null=False) + + # Fields for OIDC authentication + is_created_using_oidc = models.BooleanField(default=False) + oidc_organization = models.ForeignKey(Auth_Organization, null=True, blank=True, on_delete=models.SET_NULL, related_name="authorized_users") # Notifications organizer_direct_message_updates = models.BooleanField(default=True) @@ -124,6 +139,60 @@ def get_chahub_is_valid(self): # By default, always push return True + def get_used_storage_space(self): + from datasets.models import Data + from competitions.models import Submission, SubmissionDetails + + storage_used = 0 + + # Datasets + users_datasets = Data.objects.filter( + created_by_id=self.id, file_size__gt=0, file_size__isnull=False + ).aggregate(Sum("file_size"))["file_size__sum"] + + storage_used += users_datasets * 1024 if users_datasets else 0 + + # Submissions + users_submissions = Submission.objects.filter(owner_id=self.id).aggregate( + size=Sum( + Case( + When( + prediction_result_file_size__gt=0, + then=F("prediction_result_file_size"), + ), + default=Value(0), + output_field=DecimalField(), + ) + + Case( + When( + scoring_result_file_size__gt=0, + then=F("scoring_result_file_size"), + ), + default=Value(0), + output_field=DecimalField(), + ) + + Case( + When( + detailed_result_file_size__gt=0, + then=F("detailed_result_file_size"), + ), + default=Value(0), + output_field=DecimalField(), + ) + ) + ) + + storage_used += users_submissions["size"] * 1024 if users_submissions["size"] else 0 + + # Submissions details + users_submissions_details = SubmissionDetails.objects.filter( + submission__owner_id=self.id, file_size__gt=0, file_size__isnull=False + ).aggregate(Sum("file_size"))["file_size__sum"] + + storage_used += users_submissions_details * 1024 if users_submissions_details else 0 + + return storage_used + class GithubUserInfo(models.Model): # Required Info diff --git a/src/apps/profiles/urls_accounts.py b/src/apps/profiles/urls_accounts.py index 03acf8254..779292962 100644 --- a/src/apps/profiles/urls_accounts.py +++ b/src/apps/profiles/urls_accounts.py @@ -8,10 +8,6 @@ urlpatterns = [ url(r'^signup', views.sign_up, name="signup"), path('login/', views.log_in, name='login'), - # url(r'^user_profile', views.user_profile, name="user_profile"), - # path('login/', auth_views.LoginView.as_view(extra_context=extra_context), name='login'), - # path('login/', views.LoginView.as_view(), name='login'), - # path('logout/', auth_views.LogoutView.as_view(), name='logout'), path('logout/', views.LogoutView.as_view(), name='logout'), path('password_reset/', views.CustomPasswordResetView.as_view(), name='password_reset'), path('password_reset/done/', auth_views.PasswordResetDoneView.as_view(), name='password_reset_done'), diff --git a/src/apps/profiles/views.py b/src/apps/profiles/views.py index 33ab6235d..a6a21cb64 100644 --- a/src/apps/profiles/views.py +++ b/src/apps/profiles/views.py @@ -21,6 +21,7 @@ UserNotificationSerializer from .forms import SignUpForm, LoginForm from .models import User, Organization, Membership +from oidc_configurations.models import Auth_Organization from .tokens import account_activation_token @@ -104,6 +105,12 @@ def activateEmail(request, user, to_email): def sign_up(request): + + # If sign up is not enabled then redirect to login + # this is for security as some users may access sign up page using the url + if not settings.ENABLE_SIGN_UP: + return redirect('accounts:login') + context = {} context['chahub_signup_url'] = "{}/profiles/signup?next={}/social/login/chahub".format( settings.SOCIAL_AUTH_CHAHUB_BASE_URL, @@ -172,6 +179,11 @@ def log_in(request): else: context['form'] = form + # Fetch auth_organizations from the database + auth_organizations = Auth_Organization.objects.all() + if auth_organizations: + context['auth_organizations'] = auth_organizations + if not context.get('form'): context['form'] = LoginForm() return render(request, 'registration/login.html', context) diff --git a/src/settings/base.py b/src/settings/base.py index 0c756334d..27e76045a 100644 --- a/src/settings/base.py +++ b/src/settings/base.py @@ -1,6 +1,7 @@ import os import sys from datetime import timedelta +from celery.schedules import crontab import dj_database_url @@ -59,6 +60,7 @@ 'health', 'forums', 'announcements', + 'oidc_configurations', ) INSTALLED_APPS = THIRD_PARTY_APPS + OUR_APPS @@ -223,6 +225,14 @@ 'task': 'competitions.tasks.submission_status_cleanup', 'schedule': timedelta(seconds=3600) }, + 'create_storage_analytics_snapshot': { + 'task': 'analytics.tasks.create_storage_analytics_snapshot', + 'schedule': crontab(hour='2', minute='0', day_of_week='sun') # Every Sunday at 02:00 UTC time + }, + 'reset_computed_storage_analytics': { + 'task': 'analytics.tasks.reset_computed_storage_analytics', + 'schedule': crontab(hour='2', minute='0', day_of_month='1', month_of_year="*/3") # Every 3 month at 02:00 UTC on the 1st + }, } CELERY_TIMEZONE = 'UTC' CELERY_WORKER_PREFETCH_MULTIPLIER = 1 @@ -397,6 +407,9 @@ GS_PRIVATE_BUCKET_NAME = os.environ.get('GS_PRIVATE_BUCKET_NAME') GS_BUCKET_NAME = GS_PUBLIC_BUCKET_NAME # Default bucket set to public bucket +# Quota +DEFAULT_USER_QUOTA = 15 * 1024 * 1024 * 1024 # 15GB + # ============================================================================= # Debug # ============================================================================= @@ -450,3 +463,17 @@ # Django-Su (User impersonation) SU_LOGIN_CALLBACK = 'profiles.admin.su_login_callback' AJAX_LOOKUP_CHANNELS = {'django_su': dict(model='profiles.User', search_field='username')} + +# ============================================================================= +# Limit for re-running submission +# This is used to limit users to rerun submissions +# on default queue when number of submissions are < RERUN_SUBMISSION_LIMIT +# ============================================================================= +RERUN_SUBMISSION_LIMIT = os.environ.get('RERUN_SUBMISSION_LIMIT', 30) + + +# ============================================================================= +# Enable or disbale regular email sign-in an sign-up +# ============================================================================= +ENABLE_SIGN_UP = os.environ.get('ENABLE_SIGN_UP', 'True').lower() == 'true' +ENABLE_SIGN_IN = os.environ.get('ENABLE_SIGN_IN', 'True').lower() == 'true' diff --git a/src/static/img/chalearn-logo.svg b/src/static/img/chalearn-logo.svg new file mode 100644 index 000000000..4533182d4 --- /dev/null +++ b/src/static/img/chalearn-logo.svg @@ -0,0 +1,30 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/static/img/cnrs-logo.svg b/src/static/img/cnrs-logo.svg new file mode 100644 index 000000000..692f4aca3 --- /dev/null +++ b/src/static/img/cnrs-logo.svg @@ -0,0 +1,15 @@ + + + + + + + + + + + + \ No newline at end of file diff --git a/src/static/img/lisn-logo.svg b/src/static/img/lisn-logo.svg new file mode 100644 index 000000000..e0bfdf4aa --- /dev/null +++ b/src/static/img/lisn-logo.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/src/static/img/paper.png b/src/static/img/paper.png new file mode 100644 index 000000000..8cf0231d8 Binary files /dev/null and b/src/static/img/paper.png differ diff --git a/src/static/img/paris-saclay-logo.svg b/src/static/img/paris-saclay-logo.svg new file mode 100644 index 000000000..83ee67740 --- /dev/null +++ b/src/static/img/paris-saclay-logo.svg @@ -0,0 +1,97 @@ + + + + +logo +Created with Sketch. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/static/img/partners/4paradigm.jpg b/src/static/img/partners/4paradigm.jpg new file mode 100644 index 000000000..80821eeed Binary files /dev/null and b/src/static/img/partners/4paradigm.jpg differ diff --git a/src/static/img/partners/anr.png b/src/static/img/partners/anr.png new file mode 100644 index 000000000..4665577bc Binary files /dev/null and b/src/static/img/partners/anr.png differ diff --git a/src/static/img/partners/anr2.jpg b/src/static/img/partners/anr2.jpg new file mode 100644 index 000000000..ffeb84ef4 Binary files /dev/null and b/src/static/img/partners/anr2.jpg differ diff --git a/src/static/img/partners/barcelona.png b/src/static/img/partners/barcelona.png new file mode 100644 index 000000000..2ccdf0151 Binary files /dev/null and b/src/static/img/partners/barcelona.png differ diff --git a/src/static/img/partners/chalearn.png b/src/static/img/partners/chalearn.png new file mode 100644 index 000000000..1965dae7a Binary files /dev/null and b/src/static/img/partners/chalearn.png differ diff --git a/src/static/img/partners/cnrs.png b/src/static/img/partners/cnrs.png new file mode 100644 index 000000000..434d6a653 Binary files /dev/null and b/src/static/img/partners/cnrs.png differ diff --git a/src/static/img/partners/eit_health.jpg b/src/static/img/partners/eit_health.jpg new file mode 100644 index 000000000..ab7c32709 Binary files /dev/null and b/src/static/img/partners/eit_health.jpg differ diff --git a/src/static/img/partners/google.jpg b/src/static/img/partners/google.jpg new file mode 100644 index 000000000..133a9dcf7 Binary files /dev/null and b/src/static/img/partners/google.jpg differ diff --git a/src/static/img/partners/idf.jpg b/src/static/img/partners/idf.jpg new file mode 100644 index 000000000..1fafd4bd0 Binary files /dev/null and b/src/static/img/partners/idf.jpg differ diff --git a/src/static/img/partners/inria.png b/src/static/img/partners/inria.png new file mode 100644 index 000000000..5632b6e0e Binary files /dev/null and b/src/static/img/partners/inria.png differ diff --git a/src/static/img/partners/lisn.png b/src/static/img/partners/lisn.png new file mode 100644 index 000000000..a9dd600c4 Binary files /dev/null and b/src/static/img/partners/lisn.png differ diff --git a/src/static/img/partners/microsoft.png b/src/static/img/partners/microsoft.png new file mode 100644 index 000000000..4f9e99c3d Binary files /dev/null and b/src/static/img/partners/microsoft.png differ diff --git a/src/static/img/partners/paris-saclay.png b/src/static/img/partners/paris-saclay.png new file mode 100644 index 000000000..9d63b43ad Binary files /dev/null and b/src/static/img/partners/paris-saclay.png differ diff --git a/src/static/img/partners/stanford.png b/src/static/img/partners/stanford.png new file mode 100644 index 000000000..c1d724694 Binary files /dev/null and b/src/static/img/partners/stanford.png differ diff --git a/src/static/js/ours/client.js b/src/static/js/ours/client.js index 764fb109a..9a8801a6e 100644 --- a/src/static/js/ours/client.js +++ b/src/static/js/ours/client.js @@ -102,6 +102,9 @@ CODALAB.api = { cancel_submission: function (id) { return CODALAB.api.request('GET', `${URLS.API}submissions/${id}/cancel_submission/`) }, + run_submission: function (id) { + return CODALAB.api.request('POST', `${URLS.API}submissions/${id}/run_submission/`) + }, re_run_submission: function (id) { return CODALAB.api.request('POST', `${URLS.API}submissions/${id}/re_run_submission/`) }, @@ -165,6 +168,7 @@ CODALAB.api = { // Pass the requested file name for the SAS url metadata.request_sassy_file_name = data_file.name metadata.file_name = data_file.name + metadata.file_size = data_file.size // This will be set on successful dataset creation, then used to complete the dataset upload var dataset = {} @@ -308,12 +312,24 @@ CODALAB.api = { get_analytics: (filters) => { return CODALAB.api.request('GET', `${URLS.API}analytics/`, filters) }, + get_storage_usage_history: (filters) => { + return CODALAB.api.request('GET', `${URLS.API}analytics/storage_usage_history/`, filters); + }, + get_competitions_usage: (filters) => { + return CODALAB.api.request('GET', `${URLS.API}analytics/competitions_usage/`, filters); + }, + get_users_usage: (filters) => { + return CODALAB.api.request('GET', `${URLS.API}analytics/users_usage/`, filters); + }, /*--------------------------------------------------------------------- User Quota and Cleanup ---------------------------------------------------------------------*/ get_user_quota_cleanup: () => { return CODALAB.api.request('GET', `${URLS.API}user_quota_cleanup/`) }, + get_user_quota: () => { + return CODALAB.api.request('GET', `${URLS.API}user_quota/`) + }, delete_unused_tasks: () => { return CODALAB.api.request('DELETE', `${URLS.API}delete_unused_tasks/`) }, diff --git a/src/static/js/ours/latex_markdown_html.js b/src/static/js/ours/latex_markdown_html.js index 4a0e645c2..6ee169f08 100644 --- a/src/static/js/ours/latex_markdown_html.js +++ b/src/static/js/ours/latex_markdown_html.js @@ -1,28 +1,29 @@ // Function to render Markdown, HTML and Latex and return updated content function renderMarkdownWithLatex(content) { - if(content === null){ - return [] - } - const parsedHtml = new DOMParser().parseFromString(marked(content), "text/html") - - const traverseAndRenderLatex = (node) => { - if (node.nodeType === Node.ELEMENT_NODE) { - const latexPattern = /\$\$([\s\S]*?)\$\$|\$([^\$\n]*?)\$/g - const hasLatex = latexPattern.test(node.textContent) - if (hasLatex) { - const tempDiv = document.createElement('div') - tempDiv.innerHTML = node.innerHTML.replace(latexPattern, (_, formula1, formula2) => { - const formula = formula1 || formula2 - return katex.renderToString(formula, { throwOnError: false }) - }); - node.innerHTML = tempDiv.innerHTML - } + if(content === null){ + return [] + } + const parsedHtml = new DOMParser().parseFromString(marked(content), "text/html") + + const traverseAndRenderLatex = (node) => { + if (node.nodeType === Node.ELEMENT_NODE) { + const latexPattern = /\$\$([\s\S]*?)\$\$|\$([^\$\n]*?)\$/g + const hasLatex = latexPattern.test(node.textContent) + if (hasLatex) { + const tempDiv = document.createElement('div') + tempDiv.innerHTML = node.innerHTML.replace(latexPattern, (_, formula1, formula2) => { + const formula = formula1 || formula2 + const decodedFormula = formula.replace(/</g, '<').replace(/>/g, '>') + return katex.renderToString(decodedFormula, { throwOnError: false }) + }); + node.innerHTML = tempDiv.innerHTML } - - node.childNodes.forEach(traverseAndRenderLatex) - }; - - traverseAndRenderLatex(parsedHtml.body) - - return parsedHtml.body.childNodes + } + + node.childNodes.forEach(traverseAndRenderLatex) + }; + + traverseAndRenderLatex(parsedHtml.body) + + return parsedHtml.body.childNodes } \ No newline at end of file diff --git a/src/static/js/ours/utils.js b/src/static/js/ours/utils.js index 7407c5aa2..5a0edc358 100644 --- a/src/static/js/ours/utils.js +++ b/src/static/js/ours/utils.js @@ -89,6 +89,17 @@ function pretty_date(date_string) { } } +function pretty_bytes(bytes, decimal_places=1, suffix="B") { + const units = ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi']; + for (const unit of units) { + if (Math.abs(bytes) < 1024.0 || unit == 'PiB') { + return bytes.toFixed(decimal_places) + unit + suffix; + } + bytes /= 1024.0; + } + return bytes.toFixed(decimal_places) + "Pi" + suffix; +} + /* ---------------------------------------------------------------------------- Form data helpers ----------------------------------------------------------------------------*/ @@ -151,13 +162,23 @@ const easyMDE_rendering_config = { } } -function create_easyMDE(element) { +function create_easyMDE(element, showToolBar = true, showStatusBar = true, editorHeight = '300px') { + + var toolbarIcons = [] + if(showToolBar){ + toolbarIcons = ["bold", "italic", "heading", "|", "quote", "unordered-list", "ordered-list", "|", "link", "image", "|", "preview", "guide"] + } + let statusItems = ["lines", "words", "cursor"] + + var markdown_editor = new EasyMDE({ element: element, autoRefresh: true, forceSync: true, - hideIcons: ["side-by-side", "fullscreen"], - renderingConfig: easyMDE_rendering_config + toolbar: toolbarIcons, + renderingConfig: easyMDE_rendering_config, + status: showStatusBar ? statusItems : showStatusBar, + minHeight: editorHeight || '300px' // Adjust the height, default is 300 }) element.EASY_MDE = markdown_editor return markdown_editor @@ -300,7 +321,12 @@ function getBase64(file) { debug: $.tablesort.DEBUG, asc: 'sorted ascending', desc: 'sorted descending', - compare: function(a, b) { + compare: function(a, b, settings) { + // Convert the values to numbers for proper sorting + if (!isNaN(parseFloat(a)) && !isNaN(parseFloat(b))) { + var a = parseFloat(a); + var b = parseFloat(b); + } if (a > b) { return 1; } else if (a < b) { diff --git a/src/static/riot/analytics/_competitions_usage.tag b/src/static/riot/analytics/_competitions_usage.tag new file mode 100644 index 000000000..353fb0684 --- /dev/null +++ b/src/static/riot/analytics/_competitions_usage.tag @@ -0,0 +1,574 @@ + +
+ + + +

{lastSnapshotDate ? "Last snaphost date: " + pretty_date(lastSnapshotDate) : "No snapshot has been taken yet"}

+
+
+ +
+
+
+ + +
+
+
+ +
+ + + + + + + + + + + + + + + + + + +
CompetitionOrganizerCreation dateDatasets
{ competitionUsage.title }{ competitionUsage.organizer }{ formatDate(competitionUsage.created_when) }{ formatSize(competitionUsage.datasets) }
+ + + + +
\ No newline at end of file diff --git a/src/static/riot/analytics/_usage_history.tag b/src/static/riot/analytics/_usage_history.tag new file mode 100644 index 000000000..6a75a5e53 --- /dev/null +++ b/src/static/riot/analytics/_usage_history.tag @@ -0,0 +1,193 @@ + +
+ + +

{lastSnapshotDate ? "Last snaphost date: " + pretty_date(lastSnapshotDate) : "No snapshot has been taken yet"}

+
+ +
+ +
+ + + + +
\ No newline at end of file diff --git a/src/static/riot/analytics/_users_usage.tag b/src/static/riot/analytics/_users_usage.tag new file mode 100644 index 000000000..b83a8a52c --- /dev/null +++ b/src/static/riot/analytics/_users_usage.tag @@ -0,0 +1,655 @@ + +
+ + + +

{lastSnapshotDate ? "Last snaphost date: " + pretty_date(lastSnapshotDate) : "No snapshot has been taken yet"}

+
+
+ +
+
+
+ + +
+
+
+
+ +
+
+ +
+
+ + + + + + + + + + + + + + + + + + + + +
UserJoined atDatasetsSubmissionsTotal
{ userUsage.name }{ formatDate(userUsage.date_joined) }{ formatSize(userUsage.datasets) }{ formatSize(userUsage.submissions) }{ formatSize(userUsage.datasets + userUsage.submissions) }
+ + + + +
\ No newline at end of file diff --git a/src/static/riot/analytics/analytics.tag b/src/static/riot/analytics/analytics.tag index a1b89f8a3..f0a8d99b4 100644 --- a/src/static/riot/analytics/analytics.tag +++ b/src/static/riot/analytics/analytics.tag @@ -1,14 +1,19 @@

Analytics

+ +

Date Range

-
-