diff --git a/src/apps/api/tests/test_tasks.py b/src/apps/api/tests/test_tasks.py index 521c531ba..257fa5aaa 100644 --- a/src/apps/api/tests/test_tasks.py +++ b/src/apps/api/tests/test_tasks.py @@ -1,8 +1,10 @@ +import os from django.urls import reverse from rest_framework.test import APITestCase +from rest_framework import status from competitions.models import Submission -from factories import UserFactory, CompetitionFactory, TaskFactory, SolutionFactory, PhaseFactory, SubmissionFactory +from factories import UserFactory, CompetitionFactory, TaskFactory, SolutionFactory, PhaseFactory, SubmissionFactory, DataFactory class TestTasks(APITestCase): @@ -47,3 +49,140 @@ def test_task_shown_as_validated_properly(self): resp = self.client.get(url) assert resp.status_code == 200 assert not resp.data["validated"] + + +class TestUploadTask(APITestCase): + def setUp(self): + self.user = UserFactory(username='user', password='password') + self.user_low_quota = UserFactory(username='user_low_quota', password='password_low_quota', quota=0) + self.user2 = UserFactory(username='user2', password='password2') + + uuid1 = "96187a93-94ea-40a1-b394-af2e7e3edb2e" + uuid2 = "a0f80316-8c46-4c04-a5d4-6184904bdb69" + uuid3 = "6c3e6dde-d0fa-4c22-af66-030187dbfd4f" + uuid4 = "c4179c3f-498c-486a-8ac5-1e194036a3ed" + uuid5 = "f861a11c-36cb-4907-9f82-4aa609b4e822" + + self.ingestion_program = DataFactory(created_by=self.user, type='ingestion_program', key=uuid1) + self.scoring_program = DataFactory(created_by=self.user, type='scoring_program', key=uuid2) + self.input_data = DataFactory(created_by=self.user, type='input_data', key=uuid3) + self.reference_data = DataFactory(created_by=self.user, type='reference_data', key=uuid4) + + self.ingestion_program_from_user2 = DataFactory(created_by=self.user2, type='ingestion_program', key=uuid5) + + def test_file_not_uploaded(self): + self.client.login(username=self.user.username, password='password') + + response = self.client.post(reverse('tasks:upload_task'), {}, format='multipart') + assert response.status_code == status.HTTP_400_BAD_REQUEST + assert "No attached file found, please try again!" == response.data['error'] + + def test_quota_not_enough(self): + self.client.login(username=self.user_low_quota.username, password='password_low_quota') + + file_path = os.path.join(os.path.dirname(__file__), 'upload_task_test_files', 'valid_task_with_files.zip') + with open(file_path, 'rb') as zip_file: + response = self.client.post(reverse('tasks:upload_task'), {'file': zip_file}, format='multipart') + assert response.status_code == status.HTTP_507_INSUFFICIENT_STORAGE + assert "Insufficient space! Please free up some space and try again. You can manage your files in the Resources page." == response.data['error'] + + def test_yaml_not_found_in_zip(self): + self.client.login(username=self.user.username, password='password') + + file_path = os.path.join(os.path.dirname(__file__), 'upload_task_test_files', 'no_yaml.zip') + with open(file_path, 'rb') as zip_file: + response = self.client.post(reverse('tasks:upload_task'), {'file': zip_file}, format='multipart') + assert response.status_code == status.HTTP_400_BAD_REQUEST + assert "task.yaml not found in the zip file" == response.data['error'] + + def test_yaml_cannot_be_parsed(self): + self.client.login(username=self.user.username, password='password') + + file_path = os.path.join(os.path.dirname(__file__), 'upload_task_test_files', 'invalid_yaml.zip') + with open(file_path, 'rb') as zip_file: + response = self.client.post(reverse('tasks:upload_task'), {'file': zip_file}, format='multipart') + assert response.status_code == status.HTTP_400_BAD_REQUEST + assert "Error parsing task.yaml:" in response.data['error'] + + def test_yaml_missing_name(self): + self.client.login(username=self.user.username, password='password') + + file_path = os.path.join(os.path.dirname(__file__), 'upload_task_test_files', 'missing_name.zip') + with open(file_path, 'rb') as zip_file: + response = self.client.post(reverse('tasks:upload_task'), {'file': zip_file}, format='multipart') + assert response.status_code == status.HTTP_400_BAD_REQUEST + assert "Missing: name, task must have a name" == response.data['error'] + + def test_yaml_missing_description(self): + self.client.login(username=self.user.username, password='password') + + file_path = os.path.join(os.path.dirname(__file__), 'upload_task_test_files', 'missing_description.zip') + with open(file_path, 'rb') as zip_file: + response = self.client.post(reverse('tasks:upload_task'), {'file': zip_file}, format='multipart') + assert response.status_code == status.HTTP_400_BAD_REQUEST + assert "Missing: description, task must have a description" == response.data['error'] + + def test_yaml_missing_scoring_program(self): + self.client.login(username=self.user.username, password='password') + + file_path = os.path.join(os.path.dirname(__file__), 'upload_task_test_files', 'missing_scoring_program.zip') + with open(file_path, 'rb') as zip_file: + response = self.client.post(reverse('tasks:upload_task'), {'file': zip_file}, format='multipart') + assert response.status_code == status.HTTP_400_BAD_REQUEST + assert "Missing: scoring_program, task must have a scoring_program" == response.data['error'] + + def test_dataset_not_belongs_to_user(self): + self.client.login(username=self.user.username, password='password') + + file_path = os.path.join(os.path.dirname(__file__), 'upload_task_test_files', 'invalid_ingestion_key.zip') + with open(file_path, 'rb') as zip_file: + response = self.client.post(reverse('tasks:upload_task'), {'file': zip_file}, format='multipart') + assert response.status_code == status.HTTP_400_BAD_REQUEST + assert "ingestion_program with key 'f861a11c-36cb-4907-9f82-4aa609b4e822' not found." == response.data['error'] + + def test_missing_key_and_zip_for_scoring_program(self): + self.client.login(username=self.user.username, password='password') + + file_path = os.path.join(os.path.dirname(__file__), 'upload_task_test_files', 'scoring_program_missing_key_and_zip.zip') + with open(file_path, 'rb') as zip_file: + response = self.client.post(reverse('tasks:upload_task'), {'file': zip_file}, format='multipart') + assert response.status_code == status.HTTP_400_BAD_REQUEST + assert "scoring_program must have either a key or zip" == response.data['error'] + + def test_dataset_file_missing_in_zip(self): + self.client.login(username=self.user.username, password='password') + + file_path = os.path.join(os.path.dirname(__file__), 'upload_task_test_files', 'missing_ingestion_zip.zip') + with open(file_path, 'rb') as zip_file: + response = self.client.post(reverse('tasks:upload_task'), {'file': zip_file}, format='multipart') + assert response.status_code == status.HTTP_400_BAD_REQUEST + assert "Dataset file 'iris-ingestion-program.zip' not found in the uploaded zip file." == response.data['error'] + + def test_dataset_file_not_zip(self): + self.client.login(username=self.user.username, password='password') + + file_path = os.path.join(os.path.dirname(__file__), 'upload_task_test_files', 'invalid_ingestion_zip.zip') + with open(file_path, 'rb') as zip_file: + response = self.client.post(reverse('tasks:upload_task'), {'file': zip_file}, format='multipart') + assert response.status_code == status.HTTP_400_BAD_REQUEST + assert "Dataset file 'iris-ingestion-program.txt' should be a zip file." == response.data['error'] + + def test_task_created_successfully_with_keys(self): + self.client.login(username=self.user.username, password='password') + + file_path = os.path.join(os.path.dirname(__file__), 'upload_task_test_files', 'valid_task_with_keys.zip') + + with open(file_path, 'rb') as zip_file: + response = self.client.post(reverse('tasks:upload_task'), {'file': zip_file}, format='multipart') + assert response.status_code == status.HTTP_201_CREATED + assert "Task 'Iris Task' created successfully!" == response.data['message'] + + def test_task_created_successfully_with_zips(self): + self.client.login(username=self.user.username, password='password') + + file_path = os.path.join(os.path.dirname(__file__), 'upload_task_test_files', 'valid_task_with_files.zip') + + with open(file_path, 'rb') as zip_file: + response = self.client.post(reverse('tasks:upload_task'), {'file': zip_file}, format='multipart') + assert response.status_code == status.HTTP_201_CREATED + assert "Task 'Iris Task' created successfully!" == response.data['message'] diff --git a/src/apps/api/tests/upload_task_test_files/invalid_ingestion_key.zip b/src/apps/api/tests/upload_task_test_files/invalid_ingestion_key.zip new file mode 100644 index 000000000..b4ff47c8b Binary files /dev/null and b/src/apps/api/tests/upload_task_test_files/invalid_ingestion_key.zip differ diff --git a/src/apps/api/tests/upload_task_test_files/invalid_ingestion_zip.zip b/src/apps/api/tests/upload_task_test_files/invalid_ingestion_zip.zip new file mode 100644 index 000000000..59f22cec4 Binary files /dev/null and b/src/apps/api/tests/upload_task_test_files/invalid_ingestion_zip.zip differ diff --git a/src/apps/api/tests/upload_task_test_files/invalid_yaml.zip b/src/apps/api/tests/upload_task_test_files/invalid_yaml.zip new file mode 100644 index 000000000..293381a3b Binary files /dev/null and b/src/apps/api/tests/upload_task_test_files/invalid_yaml.zip differ diff --git a/src/apps/api/tests/upload_task_test_files/missing_description.zip b/src/apps/api/tests/upload_task_test_files/missing_description.zip new file mode 100644 index 000000000..9a3799079 Binary files /dev/null and b/src/apps/api/tests/upload_task_test_files/missing_description.zip differ diff --git a/src/apps/api/tests/upload_task_test_files/missing_ingestion_zip.zip b/src/apps/api/tests/upload_task_test_files/missing_ingestion_zip.zip new file mode 100644 index 000000000..ac5e66cbe Binary files /dev/null and b/src/apps/api/tests/upload_task_test_files/missing_ingestion_zip.zip differ diff --git a/src/apps/api/tests/upload_task_test_files/missing_name.zip b/src/apps/api/tests/upload_task_test_files/missing_name.zip new file mode 100644 index 000000000..8c9a78765 Binary files /dev/null and b/src/apps/api/tests/upload_task_test_files/missing_name.zip differ diff --git a/src/apps/api/tests/upload_task_test_files/missing_scoring_program.zip b/src/apps/api/tests/upload_task_test_files/missing_scoring_program.zip new file mode 100644 index 000000000..7110a9dd4 Binary files /dev/null and b/src/apps/api/tests/upload_task_test_files/missing_scoring_program.zip differ diff --git a/src/apps/api/tests/upload_task_test_files/no_yaml.zip b/src/apps/api/tests/upload_task_test_files/no_yaml.zip new file mode 100644 index 000000000..76941c5eb Binary files /dev/null and b/src/apps/api/tests/upload_task_test_files/no_yaml.zip differ diff --git a/src/apps/api/tests/upload_task_test_files/scoring_program_missing_key_and_zip.zip b/src/apps/api/tests/upload_task_test_files/scoring_program_missing_key_and_zip.zip new file mode 100644 index 000000000..af8b5c176 Binary files /dev/null and b/src/apps/api/tests/upload_task_test_files/scoring_program_missing_key_and_zip.zip differ diff --git a/src/apps/api/tests/upload_task_test_files/valid_task_with_files.zip b/src/apps/api/tests/upload_task_test_files/valid_task_with_files.zip new file mode 100644 index 000000000..43fbbf0a2 Binary files /dev/null and b/src/apps/api/tests/upload_task_test_files/valid_task_with_files.zip differ diff --git a/src/apps/api/tests/upload_task_test_files/valid_task_with_keys.zip b/src/apps/api/tests/upload_task_test_files/valid_task_with_keys.zip new file mode 100644 index 000000000..acb5a2831 Binary files /dev/null and b/src/apps/api/tests/upload_task_test_files/valid_task_with_keys.zip differ diff --git a/src/apps/api/views/tasks.py b/src/apps/api/views/tasks.py index 71bfb59bd..5ed1a3ffa 100644 --- a/src/apps/api/views/tasks.py +++ b/src/apps/api/views/tasks.py @@ -1,6 +1,10 @@ +import io +import yaml +import zipfile +from django.core.files.uploadedfile import InMemoryUploadedFile from collections import defaultdict - from django.db.models import Q, OuterRef, Subquery +from django.db import transaction from rest_framework import status from rest_framework.decorators import action from rest_framework.exceptions import PermissionDenied @@ -14,6 +18,8 @@ from competitions.models import Submission, Phase from profiles.models import User from tasks.models import Task +from datasets.models import Data +from utils.data import pretty_bytes # TODO:// TaskViewSimple uses simple serializer from tasks, which exists purely for the use of Select2 on phase modal @@ -137,6 +143,191 @@ def delete_many(self, request): status=status.HTTP_400_BAD_REQUEST if errors else status.HTTP_200_OK ) + @action(detail=False, methods=('POST',)) + def upload_task(self, request): + + """ + This function is used to upload a task. To upload a task, a zip file is created from the components of the task: + - task.yaml (required) + - ingestion_program.zip (optional) + - scoring_program.zip (optional) + - input_data.zip (optional) + - reference_data.zip (optional) + + task.yaml has the following structure: + name: Task Name + description: Task Description + is_public: true/false + input_data: + key: Your dataset key + reference_data: + key: Your dataset key + scoring_program: + zip: scoring_program.zip + ingestion_program: + zip: ingestion_program.zip + + Note: + - You can upload a task.yaml file without any other files if you want to create a task from existing datasets/programs using keys + - You can use a mix of key and zip to upload a task e.g. to use already uploaded input data and reference data but upload new ingestion and scoring programs + - You can choose to upload all the datasets and programs without using the key + + """ + + # Access uploaded file + uploaded_file = request.FILES.get('file') + + # ----------- + # Check File + # ----------- + + # Check if a file is provided + if not uploaded_file: + return Response({"error": "No attached file found, please try again!"}, status=status.HTTP_400_BAD_REQUEST) + + # ------------ + # Check Quota + # ------------ + + # Check if user has enough quota to proceed + storage_used = float(request.user.get_used_storage_space()) + quota = float(request.user.quota) + file_size = uploaded_file.size + if storage_used + file_size > quota: + file_size = pretty_bytes(file_size) + return Response({'error': "Insufficient space! Please free up some space and try again. You can manage your files in the Resources page."}, status=status.HTTP_507_INSUFFICIENT_STORAGE) + + # ---------------------- + # Process Task zip file + # ---------------------- + try: + # Process the zip file + with zipfile.ZipFile(uploaded_file, 'r') as zip_file: + + # ------------------ + # Process yaml file + # ------------------ + + # Check if 'task.yaml' exists + if 'task.yaml' not in zip_file.namelist(): + return Response({"error": "task.yaml not found in the zip file"}, status=status.HTTP_400_BAD_REQUEST) + + # Read the task.yaml file + with zip_file.open('task.yaml') as task_file: + try: + task_data = yaml.safe_load(task_file) + except yaml.YAMLError as e: + return Response({"error": f"Error parsing task.yaml: {str(e)}"}, status=status.HTTP_400_BAD_REQUEST) + + # ------------------ + # Yaml file checks + # ------------------ + + # Check if task has a name + if "name" not in task_data: + return Response({"error": f"Missing: name, task must have a name"}, status=status.HTTP_400_BAD_REQUEST) + + # Check if task has a description + if "description" not in task_data: + return Response({"error": f"Missing: description, task must have a description"}, status=status.HTTP_400_BAD_REQUEST) + + # Check if task has a scoring program + if Data.SCORING_PROGRAM not in task_data: + return Response({"error": f"Missing: scoring_program, task must have a scoring_program"}, status=status.HTTP_400_BAD_REQUEST) + + # ------------------------------ + # Process datasets and programs + # ------------------------------ + + # Begin atomic transaction to ensure rollback if any error occurs + with transaction.atomic(): + # Initialize task fields + task_kwargs = { + 'name': task_data.get('name'), + 'description': task_data.get('description'), + 'created_by': request.user, + 'is_public': task_data.get('is_public', False), + 'ingestion_only_during_scoring': task_data.get('ingestion_only_during_scoring', False), + } + + # Function to create or get dataset from either zip or key + # If both key and zip are present, key is used and zip is ignored + def create_or_get_data(data_type, data_info): + # Process dataset/program if data_info is not empty i.e. provided in the yaml file + if data_info: + key = data_info.get('key', None) + zip_name = data_info.get('zip', None) + + if key: + # Retrieve dataset by key if provided + try: + return Data.objects.get(key=key, created_by=request.user, type=data_type) + except Data.DoesNotExist: + raise ValueError(f"{data_type} with key '{key}' not found.") + elif zip_name: + # Check that the zip file exists in the main zip and create dataset + if zip_name not in zip_file.namelist(): + raise ValueError(f"Dataset file '{zip_name}' not found in the uploaded zip file.") + if not zip_name.endswith(".zip"): + raise ValueError(f"Dataset file '{zip_name}' should be a zip file.") + try: + # Createa a new dataset using the zip file for dataset/program + with zip_file.open(zip_name) as data_zip_file: + # Read file content + file_content = data_zip_file.read() + + # Get the file size in bytes + file_size = len(file_content) + + # Create a BytesIO object for the dataset file + data_file = InMemoryUploadedFile( + file=io.BytesIO(file_content), + field_name='data_file', + name=zip_name, + content_type='application/zip', + size=file_size, + charset=None + ) + # Create dataset + dataset = Data.objects.create( + name=zip_name, + created_by=request.user, + data_file=data_file, + type=data_type + ) + return dataset + except zipfile.BadZipFile: + raise ValueError(f"{zip_name} is not a valid ZIP file.") + except Exception as e: + raise ValueError(f"Error processing {zip_name}: {str(e)}") + + # For scoring program key or zip is required because task must have a scoring program + if data_type == Data.SCORING_PROGRAM: + raise ValueError(f"{data_type} must have either a key or zip") + else: + return None + + # Create datasets based on task.yaml contents + # Loop over all possible datasets and programs and create or get that dataset. + # If a dataset is not provided in the yaml, use None value for it + datasets_and_programs = [Data.INGESTION_PROGRAM, Data.SCORING_PROGRAM, Data.INPUT_DATA, Data.REFERENCE_DATA] + for dataset in datasets_and_programs: + task_kwargs[dataset] = create_or_get_data(data_type=dataset, data_info=task_data.get(dataset, {})) + + # Create the Task using the task kwrgs created from yaml and datasets/programs + task = Task.objects.create(**task_kwargs) + + # Return a success message + return Response({"message": f"Task '{task.name}' created successfully!"}, status=status.HTTP_201_CREATED) + + except ValueError as e: + # catch all value errors here + return Response({"error": str(e)}, status=status.HTTP_400_BAD_REQUEST) + + except Exception as e: + # catch all other unexpected errors here + return Response({"error": f"An error occurred while creating the task.\n {e}"}, status=status.HTTP_500_INTERNAL_SERVER_ERROR) + # This function allows for multiple errors when deleting multiple objects def check_delete_permissions(self, request, task): if request.user != task.created_by: diff --git a/src/apps/tasks/urls.py b/src/apps/tasks/urls.py index 96387c563..5cf5ec428 100644 --- a/src/apps/tasks/urls.py +++ b/src/apps/tasks/urls.py @@ -1,10 +1,12 @@ from django.urls import path from . import views +from api.views.tasks import TaskViewSet app_name = "tasks" urlpatterns = [ path('', views.TaskManagement.as_view(), name='task_management'), - path('/', views.TaskDetailView.as_view(), name='detail') + path('/', views.TaskDetailView.as_view(), name='detail'), + path('upload_task/', TaskViewSet.as_view({'post': 'upload_task'}), name='upload_task'), ] diff --git a/src/static/js/ours/client.js b/src/static/js/ours/client.js index 489a01273..c0addcc44 100644 --- a/src/static/js/ours/client.js +++ b/src/static/js/ours/client.js @@ -257,6 +257,30 @@ CODALAB.api = { create_task: (data) => { return CODALAB.api.request('POST', `${URLS.API}tasks/`, data) }, + upload_task: (data_file, progress_update_callback) => { + var form_data = new FormData() + form_data.append('file', data_file) + return $.ajax({ + type: 'POST', + url: URLS.API + 'tasks/upload_task/', + data: form_data, + processData: false, + contentType: false, + xhr: function () { + var xhr = new window.XMLHttpRequest(); + // Track upload progress + xhr.upload.addEventListener('progress', function (event) { + if (event.lengthComputable) { + var percent_complete = (event.loaded / event.total) * 100; + if (progress_update_callback) { + progress_update_callback(percent_complete); + } + } + }, false); + return xhr; + } + }); + }, share_task: (pk, data) => { return CODALAB.api.request('PATCH', `${URLS.API}tasks/${pk}/`, data) }, diff --git a/src/static/riot/tasks/management.tag b/src/static/riot/tasks/management.tag index 927bdad3b..83ceb68de 100644 --- a/src/static/riot/tasks/management.tag +++ b/src/static/riot/tasks/management.tag @@ -7,6 +7,9 @@ +
+ Upload Task +
Create Task
@@ -167,6 +170,38 @@ + + +