Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
141 changes: 140 additions & 1 deletion src/apps/api/tests/test_tasks.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import os
from django.urls import reverse
from rest_framework.test import APITestCase
from rest_framework import status

from competitions.models import Submission
from factories import UserFactory, CompetitionFactory, TaskFactory, SolutionFactory, PhaseFactory, SubmissionFactory
from factories import UserFactory, CompetitionFactory, TaskFactory, SolutionFactory, PhaseFactory, SubmissionFactory, DataFactory


class TestTasks(APITestCase):
Expand Down Expand Up @@ -47,3 +49,140 @@ def test_task_shown_as_validated_properly(self):
resp = self.client.get(url)
assert resp.status_code == 200
assert not resp.data["validated"]


class TestUploadTask(APITestCase):
def setUp(self):
self.user = UserFactory(username='user', password='password')
self.user_low_quota = UserFactory(username='user_low_quota', password='password_low_quota', quota=0)
self.user2 = UserFactory(username='user2', password='password2')

uuid1 = "96187a93-94ea-40a1-b394-af2e7e3edb2e"
uuid2 = "a0f80316-8c46-4c04-a5d4-6184904bdb69"
uuid3 = "6c3e6dde-d0fa-4c22-af66-030187dbfd4f"
uuid4 = "c4179c3f-498c-486a-8ac5-1e194036a3ed"
uuid5 = "f861a11c-36cb-4907-9f82-4aa609b4e822"

self.ingestion_program = DataFactory(created_by=self.user, type='ingestion_program', key=uuid1)
self.scoring_program = DataFactory(created_by=self.user, type='scoring_program', key=uuid2)
self.input_data = DataFactory(created_by=self.user, type='input_data', key=uuid3)
self.reference_data = DataFactory(created_by=self.user, type='reference_data', key=uuid4)

self.ingestion_program_from_user2 = DataFactory(created_by=self.user2, type='ingestion_program', key=uuid5)

def test_file_not_uploaded(self):
self.client.login(username=self.user.username, password='password')

response = self.client.post(reverse('tasks:upload_task'), {}, format='multipart')
assert response.status_code == status.HTTP_400_BAD_REQUEST
assert "No attached file found, please try again!" == response.data['error']

def test_quota_not_enough(self):
self.client.login(username=self.user_low_quota.username, password='password_low_quota')

file_path = os.path.join(os.path.dirname(__file__), 'upload_task_test_files', 'valid_task_with_files.zip')
with open(file_path, 'rb') as zip_file:
response = self.client.post(reverse('tasks:upload_task'), {'file': zip_file}, format='multipart')
assert response.status_code == status.HTTP_507_INSUFFICIENT_STORAGE
assert "Insufficient space! Please free up some space and try again. You can manage your files in the Resources page." == response.data['error']

def test_yaml_not_found_in_zip(self):
self.client.login(username=self.user.username, password='password')

file_path = os.path.join(os.path.dirname(__file__), 'upload_task_test_files', 'no_yaml.zip')
with open(file_path, 'rb') as zip_file:
response = self.client.post(reverse('tasks:upload_task'), {'file': zip_file}, format='multipart')
assert response.status_code == status.HTTP_400_BAD_REQUEST
assert "task.yaml not found in the zip file" == response.data['error']

def test_yaml_cannot_be_parsed(self):
self.client.login(username=self.user.username, password='password')

file_path = os.path.join(os.path.dirname(__file__), 'upload_task_test_files', 'invalid_yaml.zip')
with open(file_path, 'rb') as zip_file:
response = self.client.post(reverse('tasks:upload_task'), {'file': zip_file}, format='multipart')
assert response.status_code == status.HTTP_400_BAD_REQUEST
assert "Error parsing task.yaml:" in response.data['error']

def test_yaml_missing_name(self):
self.client.login(username=self.user.username, password='password')

file_path = os.path.join(os.path.dirname(__file__), 'upload_task_test_files', 'missing_name.zip')
with open(file_path, 'rb') as zip_file:
response = self.client.post(reverse('tasks:upload_task'), {'file': zip_file}, format='multipart')
assert response.status_code == status.HTTP_400_BAD_REQUEST
assert "Missing: name, task must have a name" == response.data['error']

def test_yaml_missing_description(self):
self.client.login(username=self.user.username, password='password')

file_path = os.path.join(os.path.dirname(__file__), 'upload_task_test_files', 'missing_description.zip')
with open(file_path, 'rb') as zip_file:
response = self.client.post(reverse('tasks:upload_task'), {'file': zip_file}, format='multipart')
assert response.status_code == status.HTTP_400_BAD_REQUEST
assert "Missing: description, task must have a description" == response.data['error']

def test_yaml_missing_scoring_program(self):
self.client.login(username=self.user.username, password='password')

file_path = os.path.join(os.path.dirname(__file__), 'upload_task_test_files', 'missing_scoring_program.zip')
with open(file_path, 'rb') as zip_file:
response = self.client.post(reverse('tasks:upload_task'), {'file': zip_file}, format='multipart')
assert response.status_code == status.HTTP_400_BAD_REQUEST
assert "Missing: scoring_program, task must have a scoring_program" == response.data['error']

def test_dataset_not_belongs_to_user(self):
self.client.login(username=self.user.username, password='password')

file_path = os.path.join(os.path.dirname(__file__), 'upload_task_test_files', 'invalid_ingestion_key.zip')
with open(file_path, 'rb') as zip_file:
response = self.client.post(reverse('tasks:upload_task'), {'file': zip_file}, format='multipart')
assert response.status_code == status.HTTP_400_BAD_REQUEST
assert "ingestion_program with key 'f861a11c-36cb-4907-9f82-4aa609b4e822' not found." == response.data['error']

def test_missing_key_and_zip_for_scoring_program(self):
self.client.login(username=self.user.username, password='password')

file_path = os.path.join(os.path.dirname(__file__), 'upload_task_test_files', 'scoring_program_missing_key_and_zip.zip')
with open(file_path, 'rb') as zip_file:
response = self.client.post(reverse('tasks:upload_task'), {'file': zip_file}, format='multipart')
assert response.status_code == status.HTTP_400_BAD_REQUEST
assert "scoring_program must have either a key or zip" == response.data['error']

def test_dataset_file_missing_in_zip(self):
self.client.login(username=self.user.username, password='password')

file_path = os.path.join(os.path.dirname(__file__), 'upload_task_test_files', 'missing_ingestion_zip.zip')
with open(file_path, 'rb') as zip_file:
response = self.client.post(reverse('tasks:upload_task'), {'file': zip_file}, format='multipart')
assert response.status_code == status.HTTP_400_BAD_REQUEST
assert "Dataset file 'iris-ingestion-program.zip' not found in the uploaded zip file." == response.data['error']

def test_dataset_file_not_zip(self):
self.client.login(username=self.user.username, password='password')

file_path = os.path.join(os.path.dirname(__file__), 'upload_task_test_files', 'invalid_ingestion_zip.zip')
with open(file_path, 'rb') as zip_file:
response = self.client.post(reverse('tasks:upload_task'), {'file': zip_file}, format='multipart')
assert response.status_code == status.HTTP_400_BAD_REQUEST
assert "Dataset file 'iris-ingestion-program.txt' should be a zip file." == response.data['error']

def test_task_created_successfully_with_keys(self):
self.client.login(username=self.user.username, password='password')

file_path = os.path.join(os.path.dirname(__file__), 'upload_task_test_files', 'valid_task_with_keys.zip')

with open(file_path, 'rb') as zip_file:
response = self.client.post(reverse('tasks:upload_task'), {'file': zip_file}, format='multipart')
assert response.status_code == status.HTTP_201_CREATED
assert "Task 'Iris Task' created successfully!" == response.data['message']

def test_task_created_successfully_with_zips(self):
self.client.login(username=self.user.username, password='password')

file_path = os.path.join(os.path.dirname(__file__), 'upload_task_test_files', 'valid_task_with_files.zip')

with open(file_path, 'rb') as zip_file:
response = self.client.post(reverse('tasks:upload_task'), {'file': zip_file}, format='multipart')
assert response.status_code == status.HTTP_201_CREATED
assert "Task 'Iris Task' created successfully!" == response.data['message']
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
193 changes: 192 additions & 1 deletion src/apps/api/views/tasks.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
import io
import yaml
import zipfile
from django.core.files.uploadedfile import InMemoryUploadedFile
from collections import defaultdict

from django.db.models import Q, OuterRef, Subquery
from django.db import transaction
from rest_framework import status
from rest_framework.decorators import action
from rest_framework.exceptions import PermissionDenied
Expand All @@ -14,6 +18,8 @@
from competitions.models import Submission, Phase
from profiles.models import User
from tasks.models import Task
from datasets.models import Data
from utils.data import pretty_bytes


# TODO:// TaskViewSimple uses simple serializer from tasks, which exists purely for the use of Select2 on phase modal
Expand Down Expand Up @@ -137,6 +143,191 @@ def delete_many(self, request):
status=status.HTTP_400_BAD_REQUEST if errors else status.HTTP_200_OK
)

@action(detail=False, methods=('POST',))
def upload_task(self, request):

"""
This function is used to upload a task. To upload a task, a zip file is created from the components of the task:
- task.yaml (required)
- ingestion_program.zip (optional)
- scoring_program.zip (optional)
- input_data.zip (optional)
- reference_data.zip (optional)

task.yaml has the following structure:
name: Task Name
description: Task Description
is_public: true/false
input_data:
key: Your dataset key
reference_data:
key: Your dataset key
scoring_program:
zip: scoring_program.zip
ingestion_program:
zip: ingestion_program.zip

Note:
- You can upload a task.yaml file without any other files if you want to create a task from existing datasets/programs using keys
- You can use a mix of key and zip to upload a task e.g. to use already uploaded input data and reference data but upload new ingestion and scoring programs
- You can choose to upload all the datasets and programs without using the key

"""

# Access uploaded file
uploaded_file = request.FILES.get('file')

# -----------
# Check File
# -----------

# Check if a file is provided
if not uploaded_file:
return Response({"error": "No attached file found, please try again!"}, status=status.HTTP_400_BAD_REQUEST)

# ------------
# Check Quota
# ------------

# Check if user has enough quota to proceed
storage_used = float(request.user.get_used_storage_space())
quota = float(request.user.quota)
file_size = uploaded_file.size
if storage_used + file_size > quota:
file_size = pretty_bytes(file_size)
return Response({'error': "Insufficient space! Please free up some space and try again. You can manage your files in the Resources page."}, status=status.HTTP_507_INSUFFICIENT_STORAGE)

# ----------------------
# Process Task zip file
# ----------------------
try:
# Process the zip file
with zipfile.ZipFile(uploaded_file, 'r') as zip_file:

# ------------------
# Process yaml file
# ------------------

# Check if 'task.yaml' exists
if 'task.yaml' not in zip_file.namelist():
return Response({"error": "task.yaml not found in the zip file"}, status=status.HTTP_400_BAD_REQUEST)

# Read the task.yaml file
with zip_file.open('task.yaml') as task_file:
try:
task_data = yaml.safe_load(task_file)
except yaml.YAMLError as e:
return Response({"error": f"Error parsing task.yaml: {str(e)}"}, status=status.HTTP_400_BAD_REQUEST)

# ------------------
# Yaml file checks
# ------------------

# Check if task has a name
if "name" not in task_data:
return Response({"error": f"Missing: name, task must have a name"}, status=status.HTTP_400_BAD_REQUEST)

# Check if task has a description
if "description" not in task_data:
return Response({"error": f"Missing: description, task must have a description"}, status=status.HTTP_400_BAD_REQUEST)

# Check if task has a scoring program
if Data.SCORING_PROGRAM not in task_data:
return Response({"error": f"Missing: scoring_program, task must have a scoring_program"}, status=status.HTTP_400_BAD_REQUEST)

# ------------------------------
# Process datasets and programs
# ------------------------------

# Begin atomic transaction to ensure rollback if any error occurs
with transaction.atomic():
# Initialize task fields
task_kwargs = {
'name': task_data.get('name'),
'description': task_data.get('description'),
'created_by': request.user,
'is_public': task_data.get('is_public', False),
'ingestion_only_during_scoring': task_data.get('ingestion_only_during_scoring', False),
}

# Function to create or get dataset from either zip or key
# If both key and zip are present, key is used and zip is ignored
def create_or_get_data(data_type, data_info):
# Process dataset/program if data_info is not empty i.e. provided in the yaml file
if data_info:
key = data_info.get('key', None)
zip_name = data_info.get('zip', None)

if key:
# Retrieve dataset by key if provided
try:
return Data.objects.get(key=key, created_by=request.user, type=data_type)
except Data.DoesNotExist:
raise ValueError(f"{data_type} with key '{key}' not found.")
elif zip_name:
# Check that the zip file exists in the main zip and create dataset
if zip_name not in zip_file.namelist():
raise ValueError(f"Dataset file '{zip_name}' not found in the uploaded zip file.")
if not zip_name.endswith(".zip"):
raise ValueError(f"Dataset file '{zip_name}' should be a zip file.")
try:
# Createa a new dataset using the zip file for dataset/program
with zip_file.open(zip_name) as data_zip_file:
# Read file content
file_content = data_zip_file.read()

# Get the file size in bytes
file_size = len(file_content)

# Create a BytesIO object for the dataset file
data_file = InMemoryUploadedFile(
file=io.BytesIO(file_content),
field_name='data_file',
name=zip_name,
content_type='application/zip',
size=file_size,
charset=None
)
# Create dataset
dataset = Data.objects.create(
name=zip_name,
created_by=request.user,
data_file=data_file,
type=data_type
)
return dataset
except zipfile.BadZipFile:
raise ValueError(f"{zip_name} is not a valid ZIP file.")
except Exception as e:
raise ValueError(f"Error processing {zip_name}: {str(e)}")

# For scoring program key or zip is required because task must have a scoring program
if data_type == Data.SCORING_PROGRAM:
raise ValueError(f"{data_type} must have either a key or zip")
else:
return None

# Create datasets based on task.yaml contents
# Loop over all possible datasets and programs and create or get that dataset.
# If a dataset is not provided in the yaml, use None value for it
datasets_and_programs = [Data.INGESTION_PROGRAM, Data.SCORING_PROGRAM, Data.INPUT_DATA, Data.REFERENCE_DATA]
for dataset in datasets_and_programs:
task_kwargs[dataset] = create_or_get_data(data_type=dataset, data_info=task_data.get(dataset, {}))

# Create the Task using the task kwrgs created from yaml and datasets/programs
task = Task.objects.create(**task_kwargs)

# Return a success message
return Response({"message": f"Task '{task.name}' created successfully!"}, status=status.HTTP_201_CREATED)

except ValueError as e:
# catch all value errors here
return Response({"error": str(e)}, status=status.HTTP_400_BAD_REQUEST)

except Exception as e:
# catch all other unexpected errors here
return Response({"error": f"An error occurred while creating the task.\n {e}"}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)

# This function allows for multiple errors when deleting multiple objects
def check_delete_permissions(self, request, task):
if request.user != task.created_by:
Expand Down
Loading