diff --git a/src/apps/api/serializers/competitions.py b/src/apps/api/serializers/competitions.py index a0fb14fda..c1919d847 100644 --- a/src/apps/api/serializers/competitions.py +++ b/src/apps/api/serializers/competitions.py @@ -10,6 +10,7 @@ from api.serializers.submissions import SubmissionScoreSerializer from api.serializers.tasks import PhaseTaskInstanceSerializer from competitions.models import Competition, Phase, Page, CompetitionCreationTaskStatus, CompetitionParticipant, CompetitionWhiteListEmail +from datasets.access import user_can_access_competition_phase_resource from forums.models import Forum from leaderboards.models import Leaderboard from profiles.models import User @@ -104,8 +105,8 @@ def validate_leaderboard(self, value): class PhaseDetailSerializer(serializers.ModelSerializer): tasks = PhaseTaskInstanceSerializer(source='task_instances', many=True) status = serializers.SerializerMethodField() - public_data = DataDetailSerializer(read_only=True) - starting_kit = DataDetailSerializer(read_only=True) + public_data = serializers.SerializerMethodField() + starting_kit = serializers.SerializerMethodField() used_submissions_per_day = serializers.SerializerMethodField() used_submissions_per_person = serializers.SerializerMethodField() @@ -201,6 +202,20 @@ def get_used_submissions_per_person(self, obj): return total_submission_count return 0 + def get_public_data(self, obj): + request = self.context.get("request") + user = getattr(request, "user", None) + if obj.public_data and user_can_access_competition_phase_resource(user, obj): + return DataDetailSerializer(obj.public_data).data + return None + + def get_starting_kit(self, obj): + request = self.context.get("request") + user = getattr(request, "user", None) + if obj.starting_kit and user_can_access_competition_phase_resource(user, obj): + return DataDetailSerializer(obj.starting_kit).data + return None + class PhaseUpdateSerializer(PhaseSerializer): tasks = PhaseTaskInstanceSerializer(source='task_instances', many=True) diff --git a/src/apps/api/serializers/tasks.py b/src/apps/api/serializers/tasks.py index 639f6e594..4f7126dd7 100644 --- a/src/apps/api/serializers/tasks.py +++ b/src/apps/api/serializers/tasks.py @@ -3,6 +3,7 @@ from api.mixins import DefaultUserCreateMixin from api.serializers.datasets import DataDetailSerializer, DataSimpleSerializer from competitions.models import PhaseTaskInstance, Phase +from datasets.access import user_can_access_task_dataset, user_can_access_task_solution from datasets.models import Data from tasks.models import Task, Solution from competitions.models import Competition @@ -212,31 +213,31 @@ class Meta: def get_solutions(self, instance): qs = instance.task.solutions.all() + request = self.context.get("request") + user = getattr(request, "user", None) + qs = [ + solution for solution in qs + if user_can_access_task_solution(user, instance.phase, solution) + ] return SolutionSerializer(qs, many=True).data def get_public_datasets(self, instance): + request = self.context.get("request") + user = getattr(request, "user", None) + + datasets = [ + instance.task.input_data, + instance.task.reference_data, + instance.task.ingestion_program, + instance.task.scoring_program, + ] + datasets = [ + dataset for dataset in datasets + if user_can_access_task_dataset(user, instance.phase, dataset) + ] - input_data = instance.task.input_data - reference_data = instance.task.reference_data - ingestion_program = instance.task.ingestion_program - scoring_program = instance.task.scoring_program - - # Some tasks may not have input data, reference data and ingestion program - # Checking all the datasets and programs and adding them to dataset_list_ids - dataset_list_ids = [] - if input_data: - dataset_list_ids.append(input_data.id) - if reference_data: - dataset_list_ids.append(reference_data.id) - if ingestion_program: - dataset_list_ids.append(ingestion_program.id) - if scoring_program: - dataset_list_ids.append(scoring_program.id) - - # Serializing the datasets try: - qs = Data.objects.filter(id__in=dataset_list_ids) - return DataDetailSerializer(qs, many=True).data + return DataDetailSerializer(datasets, many=True).data except Exception: # No datasets or programs to return return [] diff --git a/src/apps/api/tests/test_competitions.py b/src/apps/api/tests/test_competitions.py index 2c96e78b0..4122a1f68 100644 --- a/src/apps/api/tests/test_competitions.py +++ b/src/apps/api/tests/test_competitions.py @@ -4,13 +4,16 @@ from zipfile import ZipFile from io import StringIO, BytesIO from unittest import mock +from django.contrib.auth.models import AnonymousUser from django.urls import reverse -from rest_framework.test import APITestCase +from rest_framework.test import APITestCase, APIRequestFactory -from api.serializers.competitions import CompetitionSerializer +from api.serializers.competitions import CompetitionSerializer, PhaseDetailSerializer +from api.serializers.tasks import PhaseTaskInstanceSerializer from competitions.models import CompetitionParticipant, Submission, Competition from factories import UserFactory, CompetitionFactory, CompetitionParticipantFactory, PhaseFactory, LeaderboardFactory, \ - ColumnFactory, SubmissionFactory, SubmissionScoreFactory, TaskFactory + ColumnFactory, SubmissionFactory, SubmissionScoreFactory, TaskFactory, DataFactory, SolutionFactory +from datasets.models import Data class CompetitionTests(APITestCase): @@ -347,3 +350,138 @@ def test_competition_fact_sheet_bad_question_type(self): } competition_serializer = CompetitionSerializer(data=new_comp_data) assert not competition_serializer.is_valid() + + +class CompetitionTaskDatasetVisibilityTests(APITestCase): + def setUp(self): + self.factory = APIRequestFactory() + self.creator = UserFactory(username="creator-datasets") + self.organizer = UserFactory(username="organizer-datasets") + self.participant = UserFactory(username="participant-datasets") + + self.competition = CompetitionFactory( + created_by=self.creator, + logo=None, + published=True, + make_input_data_available=True, + make_programs_available=True, + ) + self.competition.collaborators.add(self.organizer) + + CompetitionParticipantFactory( + user=self.participant, + competition=self.competition, + status=CompetitionParticipant.APPROVED, + ) + + hidden_input = DataFactory(created_by=self.creator, type=Data.INPUT_DATA) + hidden_reference = DataFactory(created_by=self.creator, type=Data.REFERENCE_DATA) + hidden_ingestion = DataFactory(created_by=self.creator, type=Data.INGESTION_PROGRAM) + hidden_scoring = DataFactory(created_by=self.creator, type=Data.SCORING_PROGRAM) + hidden_public_data = DataFactory(created_by=self.creator, type=Data.PUBLIC_DATA) + hidden_starting_kit = DataFactory(created_by=self.creator, type=Data.STARTING_KIT) + hidden_solution_data = DataFactory(created_by=self.creator, type=Data.SOLUTION) + visible_input = DataFactory(created_by=self.creator, type=Data.INPUT_DATA) + visible_reference = DataFactory(created_by=self.creator, type=Data.REFERENCE_DATA) + visible_ingestion = DataFactory(created_by=self.creator, type=Data.INGESTION_PROGRAM) + visible_scoring = DataFactory(created_by=self.creator, type=Data.SCORING_PROGRAM) + visible_public_data = DataFactory(created_by=self.creator, type=Data.PUBLIC_DATA) + visible_starting_kit = DataFactory(created_by=self.creator, type=Data.STARTING_KIT) + visible_solution_data = DataFactory(created_by=self.creator, type=Data.SOLUTION) + + hidden_solution = SolutionFactory(data=hidden_solution_data) + visible_solution = SolutionFactory(data=visible_solution_data) + + self.hidden_task = TaskFactory( + created_by=self.creator, + input_data=hidden_input, + reference_data=hidden_reference, + ingestion_program=hidden_ingestion, + scoring_program=hidden_scoring, + solutions=[hidden_solution], + ) + self.visible_task = TaskFactory( + created_by=self.creator, + input_data=visible_input, + reference_data=visible_reference, + ingestion_program=visible_ingestion, + scoring_program=visible_scoring, + solutions=[visible_solution], + ) + + self.hidden_phase = PhaseFactory( + competition=self.competition, + leaderboard=LeaderboardFactory(hidden=True), + hide_output=True, + index=0, + public_data=hidden_public_data, + starting_kit=hidden_starting_kit, + tasks=[self.hidden_task], + ) + self.visible_phase = PhaseFactory( + competition=self.competition, + leaderboard=LeaderboardFactory(hidden=False), + index=1, + public_data=visible_public_data, + starting_kit=visible_starting_kit, + tasks=[self.visible_task], + ) + + self.hidden_task_instance = self.hidden_phase.task_instances.get(task=self.hidden_task) + self.visible_task_instance = self.visible_phase.task_instances.get(task=self.visible_task) + + def _get_public_dataset_types(self, task_instance, user=None): + request = self.factory.get("/") + request.user = user or AnonymousUser() + serializer = PhaseTaskInstanceSerializer(task_instance, context={"request": request}) + return {dataset["type"] for dataset in serializer.data["public_datasets"]} + + def _get_solution_names(self, task_instance, user=None): + request = self.factory.get("/") + request.user = user or AnonymousUser() + serializer = PhaseTaskInstanceSerializer(task_instance, context={"request": request}) + return {solution["name"] for solution in serializer.data["solutions"]} + + def _serialize_phase(self, phase, user=None): + request = self.factory.get("/") + request.user = user or AnonymousUser() + return PhaseDetailSerializer(phase, context={"request": request}).data + + def test_anonymous_users_do_not_receive_hidden_phase_task_datasets(self): + self.assertEqual(self._get_public_dataset_types(self.hidden_task_instance), set()) + + def test_approved_participants_do_not_receive_hidden_phase_task_datasets(self): + self.assertEqual(self._get_public_dataset_types(self.hidden_task_instance, self.participant), set()) + + def test_approved_participants_only_receive_allowed_visible_phase_task_datasets(self): + self.assertEqual( + self._get_public_dataset_types(self.visible_task_instance, self.participant), + {Data.INPUT_DATA, Data.INGESTION_PROGRAM, Data.SCORING_PROGRAM}, + ) + + def test_organizers_receive_hidden_phase_task_datasets(self): + self.assertEqual( + self._get_public_dataset_types(self.hidden_task_instance, self.organizer), + {Data.INPUT_DATA, Data.REFERENCE_DATA, Data.INGESTION_PROGRAM, Data.SCORING_PROGRAM}, + ) + + def test_approved_participants_do_not_receive_hidden_phase_solutions(self): + self.assertEqual(self._get_solution_names(self.hidden_task_instance, self.participant), set()) + + def test_approved_participants_receive_visible_phase_solutions(self): + self.assertEqual(len(self._get_solution_names(self.visible_task_instance, self.participant)), 1) + + def test_approved_participants_do_not_receive_hidden_phase_assets(self): + phase_data = self._serialize_phase(self.hidden_phase, self.participant) + self.assertIsNone(phase_data["public_data"]) + self.assertIsNone(phase_data["starting_kit"]) + + def test_approved_participants_receive_visible_phase_assets(self): + phase_data = self._serialize_phase(self.visible_phase, self.participant) + self.assertEqual(phase_data["public_data"]["type"], Data.PUBLIC_DATA) + self.assertEqual(phase_data["starting_kit"]["type"], Data.STARTING_KIT) + + def test_organizers_receive_hidden_phase_assets(self): + phase_data = self._serialize_phase(self.hidden_phase, self.organizer) + self.assertEqual(phase_data["public_data"]["type"], Data.PUBLIC_DATA) + self.assertEqual(phase_data["starting_kit"]["type"], Data.STARTING_KIT) diff --git a/src/apps/api/tests/test_datasets.py b/src/apps/api/tests/test_datasets.py index 8f676d90a..2dbf79683 100644 --- a/src/apps/api/tests/test_datasets.py +++ b/src/apps/api/tests/test_datasets.py @@ -7,10 +7,14 @@ UserFactory, DataFactory, CompetitionFactory, + CompetitionParticipantFactory, PhaseFactory, + LeaderboardFactory, TaskFactory, - SubmissionFactory + SubmissionFactory, + SolutionFactory, ) +from competitions.models import CompetitionParticipant from utils.data import pretty_bytes, gb_to_bytes from unittest.mock import patch @@ -331,6 +335,170 @@ def test_cannot_create_dataset_unauthenticated(self): self.assertEqual(resp.status_code, 403) +class CompetitionDatasetDownloadAccessTests(TestCase): + def setUp(self): + self.creator = UserFactory(username="creator-downloads") + self.organizer = UserFactory(username="organizer-downloads") + self.participant = UserFactory(username="participant-downloads") + self.client.force_login(self.creator) + + self.competition = CompetitionFactory( + created_by=self.creator, + logo=None, + published=True, + make_input_data_available=True, + make_programs_available=True, + ) + self.competition.collaborators.add(self.organizer) + + CompetitionParticipantFactory( + user=self.participant, + competition=self.competition, + status=CompetitionParticipant.APPROVED, + ) + + self.hidden_input = DataFactory(created_by=self.creator, type=Data.INPUT_DATA) + self.hidden_reference = DataFactory(created_by=self.creator, type=Data.REFERENCE_DATA) + self.hidden_public_data = DataFactory(created_by=self.creator, type=Data.PUBLIC_DATA) + self.hidden_starting_kit = DataFactory(created_by=self.creator, type=Data.STARTING_KIT) + self.hidden_solution_data = DataFactory(created_by=self.creator, type=Data.SOLUTION) + self.visible_input = DataFactory(created_by=self.creator, type=Data.INPUT_DATA) + self.visible_public_data = DataFactory(created_by=self.creator, type=Data.PUBLIC_DATA) + self.visible_solution_data = DataFactory(created_by=self.creator, type=Data.SOLUTION) + + hidden_solution = SolutionFactory(data=self.hidden_solution_data) + visible_solution = SolutionFactory(data=self.visible_solution_data) + + hidden_task = TaskFactory( + created_by=self.creator, + input_data=self.hidden_input, + reference_data=self.hidden_reference, + solutions=[hidden_solution], + ) + visible_task = TaskFactory( + created_by=self.creator, + input_data=self.visible_input, + solutions=[visible_solution], + ) + + PhaseFactory( + competition=self.competition, + leaderboard=LeaderboardFactory(hidden=True), + hide_output=True, + index=0, + public_data=self.hidden_public_data, + starting_kit=self.hidden_starting_kit, + tasks=[hidden_task], + ) + PhaseFactory( + competition=self.competition, + leaderboard=LeaderboardFactory(hidden=False), + index=1, + public_data=self.visible_public_data, + tasks=[visible_task], + ) + + @patch("datasets.views.make_url_sassy") + def test_anonymous_user_cannot_download_hidden_phase_input_data_by_key(self, mock_make_url_sassy): + self.client.logout() + + response = self.client.get(reverse("datasets:download", args=[self.hidden_input.key])) + + self.assertEqual(response.status_code, 404) + mock_make_url_sassy.assert_not_called() + + @patch("datasets.views.make_url_sassy") + def test_approved_participant_cannot_download_hidden_phase_reference_data_by_key(self, mock_make_url_sassy): + self.client.force_login(self.participant) + + response = self.client.get(reverse("datasets:download", args=[self.hidden_reference.key])) + + self.assertEqual(response.status_code, 404) + mock_make_url_sassy.assert_not_called() + + @patch("datasets.views.make_url_sassy") + def test_approved_participant_cannot_download_hidden_phase_public_data_by_key(self, mock_make_url_sassy): + self.client.force_login(self.participant) + + response = self.client.get(reverse("datasets:download", args=[self.hidden_public_data.key])) + + self.assertEqual(response.status_code, 404) + mock_make_url_sassy.assert_not_called() + + @patch("datasets.views.make_url_sassy") + def test_approved_participant_cannot_download_hidden_phase_starting_kit_by_key(self, mock_make_url_sassy): + self.client.force_login(self.participant) + + response = self.client.get(reverse("datasets:download", args=[self.hidden_starting_kit.key])) + + self.assertEqual(response.status_code, 404) + mock_make_url_sassy.assert_not_called() + + @patch("datasets.views.make_url_sassy") + def test_approved_participant_cannot_download_hidden_phase_solution_by_key(self, mock_make_url_sassy): + self.client.force_login(self.participant) + + response = self.client.get(reverse("datasets:download", args=[self.hidden_solution_data.key])) + + self.assertEqual(response.status_code, 404) + mock_make_url_sassy.assert_not_called() + + @patch("datasets.views.make_url_sassy") + def test_approved_participant_can_download_visible_input_data_by_key(self, mock_make_url_sassy): + self.client.force_login(self.participant) + mock_make_url_sassy.return_value = "http://codebench-storage/visible_input.zip" + + response = self.client.get(reverse("datasets:download", args=[self.visible_input.key])) + + self.assertEqual(response.status_code, 302) + self.assertEqual(response["Location"], "http://codebench-storage/visible_input.zip") + mock_make_url_sassy.assert_called_once() + + @patch("datasets.views.make_url_sassy") + def test_approved_participant_can_download_visible_public_data_by_key(self, mock_make_url_sassy): + self.client.force_login(self.participant) + mock_make_url_sassy.return_value = "http://codebench-storage/visible_public_data.zip" + + response = self.client.get(reverse("datasets:download", args=[self.visible_public_data.key])) + + self.assertEqual(response.status_code, 302) + self.assertEqual(response["Location"], "http://codebench-storage/visible_public_data.zip") + mock_make_url_sassy.assert_called_once() + + @patch("datasets.views.make_url_sassy") + def test_approved_participant_can_download_visible_solution_by_key(self, mock_make_url_sassy): + self.client.force_login(self.participant) + mock_make_url_sassy.return_value = "http://codebench-storage/visible_solution.zip" + + response = self.client.get(reverse("datasets:download", args=[self.visible_solution_data.key])) + + self.assertEqual(response.status_code, 302) + self.assertEqual(response["Location"], "http://codebench-storage/visible_solution.zip") + mock_make_url_sassy.assert_called_once() + + @patch("datasets.views.make_url_sassy") + def test_organizer_can_download_hidden_phase_reference_data_by_key(self, mock_make_url_sassy): + self.client.force_login(self.organizer) + mock_make_url_sassy.return_value = "http://codebench-storage/hidden_reference.zip" + + response = self.client.get(reverse("datasets:download", args=[self.hidden_reference.key])) + + self.assertEqual(response.status_code, 302) + self.assertEqual(response["Location"], "http://codebench-storage/hidden_reference.zip") + mock_make_url_sassy.assert_called_once() + + @patch("datasets.views.make_url_sassy") + def test_organizer_can_download_hidden_phase_public_data_by_key(self, mock_make_url_sassy): + self.client.force_login(self.organizer) + mock_make_url_sassy.return_value = "http://codebench-storage/hidden_public_data.zip" + + response = self.client.get(reverse("datasets:download", args=[self.hidden_public_data.key])) + + self.assertEqual(response.status_code, 302) + self.assertEqual(response["Location"], "http://codebench-storage/hidden_public_data.zip") + mock_make_url_sassy.assert_called_once() + + class DatasetDeleteTests(APITestCase): def setUp(self): self.user = UserFactory(username='user', password='user') diff --git a/src/apps/datasets/access.py b/src/apps/datasets/access.py new file mode 100644 index 000000000..f2da0ba1d --- /dev/null +++ b/src/apps/datasets/access.py @@ -0,0 +1,140 @@ +from competitions.models import CompetitionParticipant, Phase +from datasets.models import Data + + +TASK_DATASET_PHASE_LOOKUPS = { + Data.INPUT_DATA: "tasks__input_data", + Data.REFERENCE_DATA: "tasks__reference_data", + Data.INGESTION_PROGRAM: "tasks__ingestion_program", + Data.SCORING_PROGRAM: "tasks__scoring_program", +} + +TASK_DATASET_COMPETITION_FLAGS = { + Data.INPUT_DATA: "make_input_data_available", + Data.INGESTION_PROGRAM: "make_programs_available", + Data.SCORING_PROGRAM: "make_programs_available", +} + + +def phase_is_hidden_from_participants(phase): + leaderboard = getattr(phase, "leaderboard", None) + # Treat blind-output phases as hidden for downloadable assets too. + return bool( + phase.hide_output + or phase.hide_prediction_output + or phase.hide_score_output + or (leaderboard and leaderboard.hidden) + ) + + +def user_is_approved_participant(user, competition): + if competition is None or not getattr(user, "is_authenticated", False): + return False + + participant_status = getattr(competition, "participant_status", None) + if participant_status is not None: + return participant_status == CompetitionParticipant.APPROVED + + return competition.participants.filter( + user=user, + status=CompetitionParticipant.APPROVED, + ).exists() + + +def user_can_access_competition_phase_resource(user, phase): + competition = phase.competition + if competition is None: + return False + + if competition.user_has_admin_permission(user): + return True + + if not user_is_approved_participant(user, competition): + return False + + return not phase_is_hidden_from_participants(phase) + + +def user_can_access_task_solution(user, phase, solution): + if solution is None: + return False + + return user_can_access_competition_phase_resource(user, phase) + + +def user_can_access_task_dataset(user, phase, dataset): + if dataset is None: + return False + + if dataset.is_public: + return True + + if getattr(user, "is_authenticated", False) and dataset.created_by_id == user.id: + return True + + competition = phase.competition + if competition is None: + return False + + if competition.user_has_admin_permission(user): + return True + + if not user_is_approved_participant(user, competition): + return False + + if dataset.type == Data.REFERENCE_DATA: + return False + + if phase_is_hidden_from_participants(phase): + return False + + availability_flag = TASK_DATASET_COMPETITION_FLAGS.get(dataset.type) + if availability_flag is None: + return False + + return getattr(competition, availability_flag, False) + + +def user_can_download_dataset(user, dataset): + if dataset.is_public: + return True + + if getattr(user, "is_authenticated", False) and dataset.created_by_id == user.id: + return True + + for phase in _get_task_dataset_phases(dataset): + if user_can_access_task_dataset(user, phase, dataset): + return True + + for phase in _get_phase_resource_phases(dataset): + if user_can_access_competition_phase_resource(user, phase): + return True + + return False + + +def _get_task_dataset_phases(dataset): + lookup = TASK_DATASET_PHASE_LOOKUPS.get(dataset.type) + if lookup is None: + return Phase.objects.none() + + return Phase.objects.filter(**{lookup: dataset}).select_related( + "competition", + "leaderboard", + ).distinct() + + +def _get_phase_resource_phases(dataset): + if dataset.type == Data.PUBLIC_DATA: + return dataset.phase_public_data.select_related("competition", "leaderboard").all() + + if dataset.type == Data.STARTING_KIT: + return dataset.phase_starting_kit.select_related("competition", "leaderboard").all() + + if dataset.type == Data.SOLUTION: + return Phase.objects.filter(tasks__solutions__data=dataset).select_related( + "competition", + "leaderboard", + ).distinct() + + return Phase.objects.none() diff --git a/src/apps/datasets/views.py b/src/apps/datasets/views.py index b6c312970..6143e9b30 100644 --- a/src/apps/datasets/views.py +++ b/src/apps/datasets/views.py @@ -3,6 +3,7 @@ from django.shortcuts import get_object_or_404 from django.views.generic import TemplateView, DetailView +from datasets.access import user_can_download_dataset from datasets.models import Data from utils.data import make_url_sassy from api.serializers.datasets import DatasetSerializer @@ -47,13 +48,17 @@ def get_context_data(self, **kwargs): def download(request, key): data = get_object_or_404(Data, key=key) + + if not user_can_download_dataset(request.user, data): + raise Http404() + return HttpResponseRedirect(make_url_sassy(data.data_file.name)) def download_by_pk(request, pk): dataset = get_object_or_404(Data, pk=pk) - if dataset.is_public or dataset.created_by == request.user: + if user_can_download_dataset(request.user, dataset): # Increment download count dataset.downloads = (dataset.downloads or 0) + 1 dataset.save(update_fields=["downloads"])