Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
c1884de
code removed that was copying submission files to predictions dir
ihsaan-ullah May 5, 2025
632111a
Caddy image update
May 22, 2025
79091f4
fix Caddyfile indentation
curious-broccoli May 27, 2025
450b70a
Merge pull request #1863 from curious-broccoli/format-caddyfile
Didayolo May 27, 2025
16d73c3
Merge pull request #1841 from codalab/submission_duplicate_in_predict…
Didayolo Jun 5, 2025
3aff869
Merge pull request #1864 from codalab/format-caddyfile
Didayolo Jun 5, 2025
e67088a
Changed base image from python3.9 to Fedora 42, reducing size from 1.…
Jun 18, 2025
abcbfe2
Merge pull request #1860 from codalab/caddyUpdate
Didayolo Jun 19, 2025
c2d57ad
Do not allow signup with email with `*` (#1882)
ihsaan-ullah Jun 20, 2025
ac30787
User model filters - remove `deleted` (#1887)
ihsaan-ullah Jun 20, 2025
587442f
Filters added in public competitions page (#1896)
ihsaan-ullah Jun 26, 2025
d237a83
Delete non active users (#1885)
ihsaan-ullah Jun 26, 2025
f092601
public competitions api documentation added
ihsaan-ullah Jun 27, 2025
1d28f63
Merge pull request #1897 from codalab/documentation_public_competitions
Didayolo Jun 30, 2025
0ba2994
option added to download all participants
ihsaan-ullah Jul 3, 2025
a5d5230
code commented for clarity
ihsaan-ullah Jul 3, 2025
e163cf5
latex rendering problems fixed
ihsaan-ullah Jul 4, 2025
b1cdb57
Put back copy of submission files
Didayolo Jul 11, 2025
dc19e4a
Merge pull request #1923 from codalab/revert-copy-compute-worker
Didayolo Jul 14, 2025
026c8be
Update mc command from "config host add" to "alias set"
Didayolo Jul 14, 2025
60055ea
clamp length of competition search results description
curious-broccoli Jul 14, 2025
aaff5c8
Merge pull request #1928 from curious-broccoli/clamp-search-description
Didayolo Jul 15, 2025
00d3c45
Merge pull request #1929 from codalab/clamp-search-description
Didayolo Jul 15, 2025
850c1f9
Removed `num entries` from leaderboard (#1912)
ihsaan-ullah Jul 15, 2025
9f790f1
Merge pull request #1910 from codalab/latex_rendering
Didayolo Jul 15, 2025
b34376f
Merge pull request #1903 from codalab/download_participants
Didayolo Jul 15, 2025
645c852
Improved organization delete error + Remove organization reference fr…
ihsaan-ullah Jul 15, 2025
9a44d2b
File size for submissions clarified (#1925)
ihsaan-ullah Jul 15, 2025
39b9c2a
Compute worker - Allow to unzip files with parent directory (#1905)
Didayolo Jul 15, 2025
d98abf9
Merge pull request #1880 from codalab/workerUpdates
Didayolo Jul 15, 2025
054b8f4
Merge pull request #1927 from codalab/minio-alias-command
Didayolo Jul 16, 2025
0f91e85
Django to 3.0 (#1730)
bbearce Jul 17, 2025
7032f1a
set page titles on some pages
curious-broccoli Jul 20, 2025
92407f7
Set page titles on some pages (#1939)
Didayolo Jul 22, 2025
875a552
fix missing username in email template, improve message
curious-broccoli Jul 22, 2025
c91b32a
Revert "Compute worker - Allow to unzip files with parent directory (…
ihsaan-ullah Jul 23, 2025
c65ce85
Merge pull request #1944 from curious-broccoli/fix-email-template
Didayolo Jul 24, 2025
52ec752
Merge pull request #1943 from codalab/set-page-titles
Didayolo Jul 25, 2025
6b6877c
Merge pull request #1950 from codalab/fix-email-template
Didayolo Jul 25, 2025
27489d0
Merge pull request #1946 from codalab/revert-1905-unzip_ingestion_sco…
Didayolo Jul 31, 2025
3df6f00
Merge pull request #1948 from codalab/versionbump
ObadaS Aug 7, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ jobs:
docker compose -f docker-compose.yml -f docker-compose.selenium.yml exec django python manage.py collectstatic --noinput

- run: docker-compose exec django flake8 src/
- run: docker pull codalab/codalab-legacy:py37 # not available without "not e2e" tests as they pull ahead of time
- run: docker pull codalab/codalab-legacy:py3 # not available without "not e2e" tests as they pull ahead of time
- run: docker pull vergilgxw/autotable:v2 # not available without "not e2e" tests as they pull ahead of time

- run:
name: pytest
Expand All @@ -48,6 +51,13 @@ jobs:
command: docker compose -f docker-compose.yml -f docker-compose.selenium.yml exec django py.test src/tests/functional/ -m e2e
no_output_timeout: 60m

# Example to run specific set of tests (for debugging individual tests from a batch of tests)
# - run:
# name: e2e tests - competitions
# command: docker compose -f docker-compose.yml -f docker-compose.selenium.yml exec django py.test src/tests/functional/test_competitions.py -m e2e
# no_output_timeout: 60m


- store_artifacts:
path: artifacts/

Expand Down
13 changes: 6 additions & 7 deletions Caddyfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
# HTTPS Options
tls {$TLS_EMAIL}


# Test HTTPS setup
# tls {$TLS_EMAIL} {
# ca https://acme-staging-v02.api.letsencrypt.org/directory
Expand All @@ -20,23 +19,23 @@
root /srv
try_files maintenance.on
}
handle @maintenanceModeActive {
handle @maintenanceModeActive {
root * /srv
redir @noRedirect /maintenance.html
file_server {
file_server {
status 503
}
}
}

# Serves static files, should be the same as `STATIC_ROOT` setting:
root * /var/www/django
root * /var/www/django
file_server

@noStatic {
not path /static/*
not path /media/*
}


# Serving dynamic requests:
reverse_proxy @noStatic django:8000
Expand Down
15 changes: 11 additions & 4 deletions Dockerfile.compute_worker
Original file line number Diff line number Diff line change
@@ -1,21 +1,28 @@
FROM --platform=linux/amd64 python:3.9
FROM --platform=linux/amd64 fedora:42

# This makes output not buffer and return immediately, nice for seeing results in stdout
ENV PYTHONUNBUFFERED 1

# Install Docker
RUN apt-get update && curl -fsSL https://get.docker.com | sh
RUN dnf -y install dnf-plugins-core && \
dnf-3 config-manager --add-repo https://download.docker.com/linux/fedora/docker-ce.repo && \
dnf -y update && \
dnf install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin && \
dnf install -y python3.9 && \
dnf clean all && \
rm -rf /var/cache /var/log/dnf* /var/log/yum.*


RUN curl -sSL https://install.python-poetry.org | python3 - --version 1.8.3
RUN curl -sSL https://install.python-poetry.org | python3.9 - --version 1.8.3
# Poetry location so future commands (below) work
ENV PATH $PATH:/root/.local/bin
# Want poetry to use system python of docker container
RUN poetry config virtualenvs.create false
RUN poetry config virtualenvs.in-project false
COPY ./compute_worker/pyproject.toml ./
COPY ./compute_worker/poetry.lock ./
RUN poetry install
# To use python3.9 instead of system python
RUN poetry config virtualenvs.prefer-active-python true && poetry install

ADD compute_worker .

Expand Down
40 changes: 7 additions & 33 deletions Dockerfile.compute_worker_gpu
Original file line number Diff line number Diff line change
@@ -1,38 +1,12 @@
FROM --platform=linux/amd64 python:3.9

# This makes output not buffer and return immediately, nice for seeing results in stdout
ENV PYTHONUNBUFFERED 1

# Install Docker
RUN apt-get update && curl -fsSL https://get.docker.com | sh


FROM --platform=linux/amd64 codalab/competitions-v2-compute-worker:latest
# Nvidia Container Toolkit for cuda use with docker
# [source](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html)
RUN curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \
&& curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
RUN apt-get update -y;
RUN apt-get install -y nvidia-container-toolkit
# Include deps
RUN dnf -y config-manager addrepo --from-repofile=https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo && \
dnf -y update && \
dnf -y install nvidia-container-runtime nvidia-container-toolkit --exclude container-selinux && \
dnf clean all && \
rm -rf /var/cache /var/log/dnf* /var/log/yum.*
# Make it explicit that we're using GPUs
# BB - not convinced we need this
ENV USE_GPU 1

RUN curl -sSL https://install.python-poetry.org | python3 - --version 1.8.3
# Poetry location so future commands (below) work
ENV PATH $PATH:/root/.local/bin
# Want poetry to use system python of docker container
RUN poetry config virtualenvs.create false
RUN poetry config virtualenvs.in-project false
COPY ./compute_worker/pyproject.toml ./
COPY ./compute_worker/poetry.lock ./
RUN poetry install

ADD compute_worker .

CMD celery -A compute_worker worker \
-l info \
-Q compute-worker \
-n compute-worker@%n \
--concurrency=1
55 changes: 38 additions & 17 deletions compute_worker/compute_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,9 @@ def get_detailed_results_file_path(self):
async def send_detailed_results(self, file_path):
logger.info(f"Updating detailed results {file_path} - {self.detailed_results_url}")
self._put_file(self.detailed_results_url, file=file_path, content_type='text/html')
async with websockets.connect(self.websocket_url) as websocket:
websocket_url = f"{self.websocket_url}?kind=detailed_results"
logger.info(f"Connecting to {websocket_url} for detailed results")
async with websockets.connect(websocket_url) as websocket:
await websocket.send(json.dumps({
"kind": 'detailed_result_update',
}))
Expand Down Expand Up @@ -390,10 +392,14 @@ async def _send_data_through_socket(self, error_message):
- Docker image pull failure logs
- Execution time limit exceeded logs
"""
logger.info(f"Connecting to {self.websocket_url} to send docker image pull error")
# Create a unique websocket URL for error messages
websocket_url = f"{self.websocket_url}?kind=error_logs"
logger.info(f"Connecting to {websocket_url} to send error message")

logger.info(f"Connecting to {websocket_url} to send docker image pull error")

# connect to web socket
websocket = await websockets.connect(self.websocket_url)
websocket = await websockets.connect(websocket_url)

# define websocket errors
websocket_errors = (socket.gaierror, websockets.WebSocketException, websockets.ConnectionClosedError, ConnectionRefusedError)
Expand All @@ -416,7 +422,7 @@ async def _send_data_through_socket(self, error_message):
# no error in websocket message sending
logger.info(f"Error sent successfully through websocket")

logger.info(f"Disconnecting from websocket {self.websocket_url}")
logger.info(f"Disconnecting from websocket {websocket_url}")

# close websocket
await websocket.close()
Expand Down Expand Up @@ -500,8 +506,11 @@ async def _run_container_engine_cmd(self, engine_cmd, kind):
}

# Start websocket, it will reconnect in the stdout/stderr listener loop below
logger.info(f"Connecting to {self.websocket_url}")
websocket = await websockets.connect(self.websocket_url)
# This ensures each task has its own independent WebSocket connection
websocket_url = f"{self.websocket_url}?kind={kind}"
logger.debug(f"WORKER_MARKER: Connecting to {websocket_url}")
websocket = await websockets.connect(websocket_url)
# websocket = await websockets.connect(self.websocket_url) # old BB
websocket_errors = (socket.gaierror, websockets.WebSocketException, websockets.ConnectionClosedError, ConnectionRefusedError)

# Function to read a line, if the line is larger than the buffer size we will
Expand All @@ -522,7 +531,7 @@ async def _readline_or_chunk(stream):
logs = [self.logs[kind][key] for key in ('stdout', 'stderr')]
for value in logs:
try:
out = await asyncio.wait_for(_readline_or_chunk(value["stream"]), timeout=.1)
out = await asyncio.wait_for(_readline_or_chunk(value["stream"]), timeout=0.1)
if out:
value["data"] += out
print("WS: " + str(out))
Expand All @@ -535,32 +544,36 @@ async def _readline_or_chunk(stream):
except asyncio.TimeoutError:
continue
except websocket_errors:
logger.debug("\n\nWebsocket error (line 538)\n\n")
try:
# do we need to await websocket.close() on the old socket? before making a new one probably not?
await websocket.close()
except Exception as e:
logger.error(e)
logger.info(e)
# TODO: catch proper exceptions here..! What can go wrong failing to close?
pass

# try to reconnect a few times
tries = 0
while tries < 3 and not websocket.open:
try:
websocket = await websockets.connect(self.websocket_url)
logger.debug(f"\n\nAttempting to reconnect in 2 seconds (attempt {tries+1}/3)")
websocket = await websockets.connect(websocket_url)
logger.debug(f"\n\nSuccessfully reconnected to {websocket_url}")
except websocket_errors:
logger.error(f"\n\nReconnection attempt {tries+1} failed: {websocket_errors}")
await asyncio.sleep(2)
tries += 1

self.logs[kind]["end"] = time.time()

logger.info(f"Process exited with {proc.returncode}")
logger.info(f"Disconnecting from websocket {self.websocket_url}")
logger.debug(f"Process exited with {proc.returncode}")
logger.debug(f"Disconnecting from websocket {websocket_url}")

# Communicate that the program is closing
self.completed_program_counter += 1

logger.debug(f"WORKER_MARKER: Disconnecting from {websocket_url}, program counter = {self.completed_program_counter}")
await websocket.close()

def _get_host_path(self, *paths):
Expand All @@ -583,7 +596,14 @@ def _get_host_path(self, *paths):

return path

async def _run_program_directory(self, program_dir, kind, can_be_output=False):
async def _run_program_directory(self, program_dir, kind):
"""
Function responsible for running program directory

Args:
- program_dir : can be either ingestion program or program/submission
- kind : either `program` or `ingestion`
"""
# If the directory doesn't even exist, move on
if not os.path.exists(program_dir):
logger.info(f"{program_dir} not found, no program to execute")
Expand All @@ -597,12 +617,13 @@ async def _run_program_directory(self, program_dir, kind, can_be_output=False):
elif os.path.exists(os.path.join(program_dir, "metadata")):
metadata_path = 'metadata'
else:
if can_be_output:
# Display a warning in logs when there is no metadata file in submission/program dir
if kind == "program":
logger.info(
"Program directory missing metadata, assuming it's going to be handled by ingestion "
"program so move it to output"
"Program directory missing metadata, assuming it's going to be handled by ingestion"
)
# Copying so that we don't move a code submission w/out a metadata command
# Copy submission files into prediction output
# This is useful for results submissions but wrongly uses storage
shutil.copytree(program_dir, self.output_dir)
return
else:
Expand Down Expand Up @@ -815,7 +836,7 @@ def start(self):
logger.info("Running scoring program, and then ingestion program")
loop = asyncio.new_event_loop()
gathered_tasks = asyncio.gather(
self._run_program_directory(program_dir, kind='program', can_be_output=True),
self._run_program_directory(program_dir, kind='program'),
self._run_program_directory(ingestion_program_dir, kind='ingestion'),
self.watch_detailed_results(),
loop=loop,
Expand Down
3 changes: 2 additions & 1 deletion docker-compose.selenium.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@ services:
environment:
- SELENIUM_HOSTNAME=selenium
- SUBMISSIONS_API_URL=http://django:36475/api
- WEBSOCKET_ALLOWED_ORIGINS=*
ports:
- 36475:36475

selenium:
image: selenium/standalone-firefox:124.0
image: selenium/standalone-firefox:120.0
volumes:
- ./src/tests/functional/test_files:/test_files/
- ./artifacts:/artifacts/:z
Expand Down
4 changes: 2 additions & 2 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ services:
# Web Services
#-----------------------------------------------
caddy:
image: caddy:2.8.4
image: caddy:2.10.0
env_file: .env
environment:
- ACME_AGREE=true
Expand Down Expand Up @@ -78,7 +78,7 @@ services:
/bin/sh -c "
set -x;
if [ -n \"$MINIO_ACCESS_KEY\" ] && [ -n \"$MINIO_SECRET_KEY\" ] && [ -n \"$MINIO_PORT\" ]; then
until /usr/bin/mc config host add minio_docker http://minio:$MINIO_PORT $MINIO_ACCESS_KEY $MINIO_SECRET_KEY && break; do
until /usr/bin/mc alias set minio_docker http://minio:$MINIO_PORT $MINIO_ACCESS_KEY $MINIO_SECRET_KEY && break; do
echo '...waiting...' && sleep 5;
done;
/usr/bin/mc mb minio_docker/$AWS_STORAGE_BUCKET_NAME || echo 'Bucket $AWS_STORAGE_BUCKET_NAME already exists.';
Expand Down
Loading