diff --git a/.devcontainer/caption-image/devcontainer.json b/.devcontainer/caption-image/devcontainer.json new file mode 100644 index 00000000..d515086f --- /dev/null +++ b/.devcontainer/caption-image/devcontainer.json @@ -0,0 +1,30 @@ +{ + "name": "caption-image", + "dockerComposeFile": [ + "docker-compose.yml" + ], + "service": "caption-image", + "workspaceFolder": "/workflows", + "customizations": { + "vscode": { + "extensions": [ + "ms-python.python", + "ms-python.pylint", + "ms-python.black-formatter", + "ms-toolsai.jupyter" + ] + } + }, + "settings": { + "python.defaultInterpreterPath": "/opt/venv/bin/python", + "python.formatting.provider": "black", + "python.linting.enabled": true, + "python.linting.pylintEnabled": true, + "files.exclude": { + "**/__pycache__": true, + "**/*.pyc": true + } + }, + "initializeCommand": "./initcommand.sh", + "postCreateCommand": "./postinstall.sh" +} \ No newline at end of file diff --git a/.devcontainer/caption-image/docker-compose.yml b/.devcontainer/caption-image/docker-compose.yml new file mode 100644 index 00000000..0c372911 --- /dev/null +++ b/.devcontainer/caption-image/docker-compose.yml @@ -0,0 +1,99 @@ +name: aperturedb-local-linux + +services: + ca: + image: alpine/openssl + restart: on-failure + command: req -x509 -newkey rsa:4096 -days 3650 -nodes -keyout /cert/tls.key -out /cert/tls.crt -subj "/C=US/O=ApertureData Inc./CN=localhost" + volumes: + - ./aperturedb/certificate:/cert + + lenz: + depends_on: + ca: + condition: service_completed_successfully + aperturedb: + condition: service_started + image: aperturedata/lenz:latest + ports: + - ${ADB_PORT}:55551 + restart: always + environment: + LNZ_HEALTH_PORT: 58085 + LNZ_TCP_PORT: 55551 + LNZ_HTTP_PORT: 8080 + LNZ_ADB_BACKENDS: '["aperturedb:55553"]' + LNZ_REPLICAS: 1 + LNZ_ADB_MAX_CONCURRENCY: 48 + LNZ_FORCE_SSL: false + LNZ_CERTIFICATE_PATH: /etc/lenz/certificate/tls.crt + LNZ_PRIVATE_KEY_PATH: /etc/lenz/certificate/tls.key + volumes: + - ./aperturedb/certificate:/etc/lenz/certificate + + aperturedb: + image: aperturedata/aperturedb-community:latest + volumes: + - ./aperturedb/db:/aperturedb/db + - ./aperturedb/logs:/aperturedb/logs + restart: always + environment: + ADB_KVGD_DB_SIZE: "204800" + ADB_LOG_PATH: "logs" + ADB_ENABLE_DEBUG: 1 + ADB_MASTER_KEY: "admin" + ADB_PORT: 55553 + ADB_FORCE_SSL: false + + webui: + image: aperturedata/aperturedata-platform-web-private:latest + restart: always + + nginx: + depends_on: + ca: + condition: service_completed_successfully + image: nginx + restart: always + ports: + - 8081:80 + - 8443:443 + configs: + - source: nginx.conf + target: /etc/nginx/conf.d/default.conf + volumes: + - ./aperturedb/certificate:/etc/nginx/certificate + + caption-image: + build: + context: ../../apps/caption-image + volumes: + - ../../:/workflows + environment: + WF_LOGS_AWS_CREDENTIALS: "aws-credentials" + DB_HOST: lenz + DB_PORT: 55551 + PORT: 8080 + PROMETHEUS_PORT: 8001 + command: bash -c "while true; do sleep 1000; done" + depends_on: + aperturedb: + condition: service_started + +configs: + nginx.conf: + content: | + server { + listen 80; + listen 443 ssl; + client_max_body_size 256m; + ssl_certificate /etc/nginx/certificate/tls.crt; + ssl_certificate_key /etc/nginx/certificate/tls.key; + location / { + proxy_pass http://webui; + } + location /api/ { + proxy_pass http://lenz:8080; + } + } + diff --git a/.devcontainer/crawl-website/devcontainer.json b/.devcontainer/crawl-website/devcontainer.json new file mode 100644 index 00000000..a36eb044 --- /dev/null +++ b/.devcontainer/crawl-website/devcontainer.json @@ -0,0 +1,30 @@ +{ + "name": "crawl-website", + "dockerComposeFile": [ + "docker-compose.yml" + ], + "service": "crawl-website", + "workspaceFolder": "/workflows", + "customizations": { + "vscode": { + "extensions": [ + "ms-python.python", + "ms-python.pylint", + "ms-python.black-formatter", + "ms-toolsai.jupyter" + ] + } + }, + "settings": { + "python.defaultInterpreterPath": "/opt/venv/bin/python", + "python.formatting.provider": "black", + "python.linting.enabled": true, + "python.linting.pylintEnabled": true, + "files.exclude": { + "**/__pycache__": true, + "**/*.pyc": true + } + }, + "initializeCommand": "./initcommand.sh", + "postCreateCommand": "./postinstall.sh" +} \ No newline at end of file diff --git a/.devcontainer/crawl-website/docker-compose.yml b/.devcontainer/crawl-website/docker-compose.yml new file mode 100644 index 00000000..87ae1bc8 --- /dev/null +++ b/.devcontainer/crawl-website/docker-compose.yml @@ -0,0 +1,99 @@ +name: aperturedb-local-linux + +services: + ca: + image: alpine/openssl + restart: on-failure + command: req -x509 -newkey rsa:4096 -days 3650 -nodes -keyout /cert/tls.key -out /cert/tls.crt -subj "/C=US/O=ApertureData Inc./CN=localhost" + volumes: + - ./aperturedb/certificate:/cert + + lenz: + depends_on: + ca: + condition: service_completed_successfully + aperturedb: + condition: service_started + image: aperturedata/lenz:latest + ports: + - ${ADB_PORT}:55551 + restart: always + environment: + LNZ_HEALTH_PORT: 58085 + LNZ_TCP_PORT: 55551 + LNZ_HTTP_PORT: 8080 + LNZ_ADB_BACKENDS: '["aperturedb:55553"]' + LNZ_REPLICAS: 1 + LNZ_ADB_MAX_CONCURRENCY: 48 + LNZ_FORCE_SSL: false + LNZ_CERTIFICATE_PATH: /etc/lenz/certificate/tls.crt + LNZ_PRIVATE_KEY_PATH: /etc/lenz/certificate/tls.key + volumes: + - ./aperturedb/certificate:/etc/lenz/certificate + + aperturedb: + image: aperturedata/aperturedb-community:latest + volumes: + - ./aperturedb/db:/aperturedb/db + - ./aperturedb/logs:/aperturedb/logs + restart: always + environment: + ADB_KVGD_DB_SIZE: "204800" + ADB_LOG_PATH: "logs" + ADB_ENABLE_DEBUG: 1 + ADB_MASTER_KEY: "admin" + ADB_PORT: 55553 + ADB_FORCE_SSL: false + + webui: + image: aperturedata/aperturedata-platform-web-private:latest + restart: always + + nginx: + depends_on: + ca: + condition: service_completed_successfully + image: nginx + restart: always + ports: + - 8081:80 + - 8443:443 + configs: + - source: nginx.conf + target: /etc/nginx/conf.d/default.conf + volumes: + - ./aperturedb/certificate:/etc/nginx/certificate + + crawl-website: + build: + context: ../../apps/crawl-website + volumes: + - ../../:/workflows + environment: + WF_LOGS_AWS_CREDENTIALS: "aws-credentials" + DB_HOST: lenz + DB_PORT: 55551 + PORT: 8080 + PROMETHEUS_PORT: 8001 + command: bash -c "while true; do sleep 1000; done" + depends_on: + aperturedb: + condition: service_started + +configs: + nginx.conf: + content: | + server { + listen 80; + listen 443 ssl; + client_max_body_size 256m; + ssl_certificate /etc/nginx/certificate/tls.crt; + ssl_certificate_key /etc/nginx/certificate/tls.key; + location / { + proxy_pass http://webui; + } + location /api/ { + proxy_pass http://lenz:8080; + } + } + diff --git a/.devcontainer/dataset-ingestion/devcontainer.json b/.devcontainer/dataset-ingestion/devcontainer.json new file mode 100644 index 00000000..223ded36 --- /dev/null +++ b/.devcontainer/dataset-ingestion/devcontainer.json @@ -0,0 +1,30 @@ +{ + "name": "dataset-ingestion", + "dockerComposeFile": [ + "docker-compose.yml" + ], + "service": "dataset-ingestion", + "workspaceFolder": "/workflows", + "customizations": { + "vscode": { + "extensions": [ + "ms-python.python", + "ms-python.pylint", + "ms-python.black-formatter", + "ms-toolsai.jupyter" + ] + } + }, + "settings": { + "python.defaultInterpreterPath": "/opt/venv/bin/python", + "python.formatting.provider": "black", + "python.linting.enabled": true, + "python.linting.pylintEnabled": true, + "files.exclude": { + "**/__pycache__": true, + "**/*.pyc": true + } + }, + "initializeCommand": "./initcommand.sh", + "postCreateCommand": "./postinstall.sh" +} \ No newline at end of file diff --git a/.devcontainer/dataset-ingestion/docker-compose.yml b/.devcontainer/dataset-ingestion/docker-compose.yml new file mode 100644 index 00000000..f050eafa --- /dev/null +++ b/.devcontainer/dataset-ingestion/docker-compose.yml @@ -0,0 +1,99 @@ +name: aperturedb-local-linux + +services: + ca: + image: alpine/openssl + restart: on-failure + command: req -x509 -newkey rsa:4096 -days 3650 -nodes -keyout /cert/tls.key -out /cert/tls.crt -subj "/C=US/O=ApertureData Inc./CN=localhost" + volumes: + - ./aperturedb/certificate:/cert + + lenz: + depends_on: + ca: + condition: service_completed_successfully + aperturedb: + condition: service_started + image: aperturedata/lenz:latest + ports: + - ${ADB_PORT}:55551 + restart: always + environment: + LNZ_HEALTH_PORT: 58085 + LNZ_TCP_PORT: 55551 + LNZ_HTTP_PORT: 8080 + LNZ_ADB_BACKENDS: '["aperturedb:55553"]' + LNZ_REPLICAS: 1 + LNZ_ADB_MAX_CONCURRENCY: 48 + LNZ_FORCE_SSL: false + LNZ_CERTIFICATE_PATH: /etc/lenz/certificate/tls.crt + LNZ_PRIVATE_KEY_PATH: /etc/lenz/certificate/tls.key + volumes: + - ./aperturedb/certificate:/etc/lenz/certificate + + aperturedb: + image: aperturedata/aperturedb-community:latest + volumes: + - ./aperturedb/db:/aperturedb/db + - ./aperturedb/logs:/aperturedb/logs + restart: always + environment: + ADB_KVGD_DB_SIZE: "204800" + ADB_LOG_PATH: "logs" + ADB_ENABLE_DEBUG: 1 + ADB_MASTER_KEY: "admin" + ADB_PORT: 55553 + ADB_FORCE_SSL: false + + webui: + image: aperturedata/aperturedata-platform-web-private:latest + restart: always + + nginx: + depends_on: + ca: + condition: service_completed_successfully + image: nginx + restart: always + ports: + - 8081:80 + - 8443:443 + configs: + - source: nginx.conf + target: /etc/nginx/conf.d/default.conf + volumes: + - ./aperturedb/certificate:/etc/nginx/certificate + + dataset-ingestion: + build: + context: ../../apps/dataset-ingestion + volumes: + - ../../:/workflows + environment: + WF_DATA_SOURCE_GCP_BUCKET: "ad-demos-datasets" + WF_LOGS_AWS_CREDENTIALS: "aws-credentials" + DB_HOST: lenz + DB_PORT: 55551 + PORT: 8080 + PROMETHEUS_PORT: 8001 + command: bash -c "while true; do sleep 1000; done" + depends_on: + aperturedb: + condition: service_started + +configs: + nginx.conf: + content: | + server { + listen 80; + listen 443 ssl; + client_max_body_size 256m; + ssl_certificate /etc/nginx/certificate/tls.crt; + ssl_certificate_key /etc/nginx/certificate/tls.key; + location / { + proxy_pass http://webui; + } + location /api/ { + proxy_pass http://lenz:8080; + } + } diff --git a/.gitignore b/.gitignore index d73f198c..dfefecac 100644 --- a/.gitignore +++ b/.gitignore @@ -173,3 +173,4 @@ cython_debug/ apps/dataset-ingestion/input log.txt input/ +aperturedb/ \ No newline at end of file diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 00000000..ff8c469c --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,16 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + + { + "name": "Python Debugger: Current File with Arguments", + "type": "debugpy", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal" + } + ] +} \ No newline at end of file diff --git a/apps/caption-image/README.md b/apps/caption-image/README.md index 4f202533..c13166a9 100644 --- a/apps/caption-image/README.md +++ b/apps/caption-image/README.md @@ -1,6 +1,66 @@ -# Example App +# Caption Image App This workflow retrieves all images from ApertureDB that have not been analyzed before, and runs them through a [BLIP (Bootstrapping Language-Image Pre-training)](https://github.com/salesforce/BLIP) model to generate a caption for each image. + +The workflow will run once and process all uncaptioned images. + +## Database details + +```mermaid +sequenceDiagram + participant W as Caption Image + participant A as ApertureDB instance + + W->>A: FindImage + A-->>W: count + loop Until done + W->>A: FindImage + A-->>W: images + W->>A: UpdateImage + end +``` + +Each image is updated with a caption property (`wf_caption_image`) containing the generated caption text. The BLIP model processes each image to generate descriptive text that describes the visual content of the image. + +## Running in Docker + +``` +docker run \ + -e RUN_NAME=my_testing_run \ + -e DB_HOST=workflowstesting.gcp.cloud.aperturedata.dev \ + -e DB_PASS="password" \ + -e NUM_WORKERS=4 \ + -e BATCH_SIZE=32 \ + -e LOG_LEVEL=INFO \ + aperturedata/workflows-caption-image +``` + +Parameters: +* **`NUM_WORKERS`**: Specifies the number of worker threads that will be running simultaneously, +retrieving and processing images in parallel. Default is `1`. +* **`BATCH_SIZE`**: Specifies the batch size for processing images. Default is `1`. +* **`LOG_LEVEL`**: Set log level for workflow code. Available options: DEBUG, INFO, WARNING, ERROR. Default is `INFO`. + +See [Common Parameters](../../README.md#common-parameters) for common parameters. + +## Cleaning up + +To clean all captions generated by this workflow, simply run the following query: + +``` +q = [ + { + "UpdateImage": { + "constraints": { + "wf_caption_image": ["!=", null] + }, + "remove_props": ["wf_caption_image"] + } + } + ] +``` + +or manually remove the `wf_caption_image` property from images that have been processed. \ No newline at end of file diff --git a/apps/caption-image/app/weights.py b/apps/caption-image/app/weights.py index 6f701877..61f570ef 100644 --- a/apps/caption-image/app/weights.py +++ b/apps/caption-image/app/weights.py @@ -14,3 +14,5 @@ output = model.generate(**inputs) caption = processor.decode(output[0], skip_special_tokens=True) print(caption) + +assert "cat" in caption.lower(), f"{caption} does not contain 'cat'" diff --git a/base/docker/scripts/sitecustomize.py b/base/docker/scripts/sitecustomize.py index d2558a11..26aa47eb 100644 --- a/base/docker/scripts/sitecustomize.py +++ b/base/docker/scripts/sitecustomize.py @@ -1,17 +1,21 @@ +"""Site customization module for setting up global exception handling.""" import sys -from status_tools import StatusUpdater, WorkFlowError import logging +from status_tools import StatusUpdater, WorkFlowError + + old_handler = sys.excepthook logging.info("Setting up exception handler") updater = StatusUpdater() -def exception_handler(type, value, tb): +def exception_handler(etype, value, tb): + """Handle uncaught exceptions by posting status updates.""" updater.post_update( - error_message=f"Exception: {type.__name__} {value}", + error_message=f"Exception: {etype.__name__} {value}", error_code=WorkFlowError.WORKFLOW_ERROR ) - old_handler(type, value, tb) + old_handler(etype, value, tb) sys.excepthook = exception_handler diff --git a/configuration_params.py b/configuration_params.py new file mode 100644 index 00000000..70a1b5bb --- /dev/null +++ b/configuration_params.py @@ -0,0 +1,11 @@ +import platform + + +def is_mac(): + return platform.system() == "Darwin" + +def main(): + print(f"ADB_PORT={55557 if is_mac() else 55555}") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/initcommand.sh b/initcommand.sh new file mode 100755 index 00000000..7ec1075d --- /dev/null +++ b/initcommand.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +docker build --build-arg WORKFLOW_VERSION=\"latest\" -t aperturedata/workflows-base base/docker +python3 configuration_params.py > .devcontainer/caption-image/.env \ No newline at end of file diff --git a/pipeline.py b/pipeline.py new file mode 100644 index 00000000..f1a01b79 --- /dev/null +++ b/pipeline.py @@ -0,0 +1,20 @@ +from prefect import flow, task +import httpx + + +@task(log_prints=True) +def get_stars(repo: str): + url = f"https://api.github.com/repos/{repo}" + count = httpx.get(url).json()["stargazers_count"] + print(f"{repo} has {count} stars!") + + +@flow(name="GitHub Stars") +def github_stars(repos: list[str]): + for repo in repos: + get_stars(repo) + + +# run the flow! +if __name__=="__main__": + github_stars(["PrefectHQ/Prefect"]) \ No newline at end of file diff --git a/postinstall.sh b/postinstall.sh new file mode 100755 index 00000000..10196c24 --- /dev/null +++ b/postinstall.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +/opt/venv/bin/adb config create default --host=${DB_HOST} --port=${DB_PORT} --no-interactive +/opt/venv/bin/adb --install-completion \ No newline at end of file diff --git a/workflows-devcontiner.code-workspace b/workflows-devcontiner.code-workspace new file mode 100644 index 00000000..6ff8bea8 --- /dev/null +++ b/workflows-devcontiner.code-workspace @@ -0,0 +1,11 @@ +{ + "folders": [ + { + "path": "." + }, + { + "path": "../app" + } + ], + "settings": {} +} \ No newline at end of file