diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..110b8b8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +symlink/ +symlink \ No newline at end of file diff --git a/README.md b/README.md index 970b089..8b63e64 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,32 @@ # PAM Personal Assistance Machine +This artificial intelligence consists of a large language model for tropical storm simulations. + +# Large Language Model (LLM) Files (GGUF) +The system is locally hosted. The prerequisite are the associated GGUF files. Please create a directory called `symlink` in the root that directs the orchestrator to find the LLM files. To create the symbolic link, use the following command by replacing the `/path/to/ggufs` with the path to the LLM files. + +`ln -s /path/to/ggufs ./symlink` + +## Updating Model +If we are switching the LLM or otherwise updating it, we need to ensure that the llama.cpp server configuration reflects it. In the `docker/llm` directory there is a `entrypoint.sh` file that contains the initial configuration. Please update the GGUF filename with the first in the sequence for the `--model` flag. + +## Downloading Models +Currently, the design supports command line interfaces such as the [huggingface-cli](https://huggingface.co/docs/huggingface_hub/en/guides/cli). Please reference the following command to understand how to download a LLM to a locally specified directory. + +``` +sudo huggingface-cli download DevQuasar/swiss-ai.Apertus-70B-Instruct-2509-GGUF --include "*Q8*" --local-dir . +``` + +# Trusted Platform Module (TPM) +The TPM can be configured such that the application is compliant with rigorous security standards and for data science purposes such as random number generation. The following command describes how to configure a TPM chip to work with a container. + +`docker run --device /dev/tpm0:/dev/tpm0 --device /dev/tpmrm0:/dev/tpmrm0` # Quickstart 1. `docker build -t pam docker/` 2. `sudo docker run -d -v .:/data/ -p 10000:10000 pam` + +# Networking Quickstart + +The reverse proxy is configured externally. For example, [this configuration file](https://github.com/hammad93/hurricane-server/blob/main/docker/proxy/conf.d/open-webui.conf) is an example of how to configure it. \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..7443f2f --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,18 @@ +services: + llm: + build: + context: ./docker/llm + ports: + - "10000:10000" + volumes: + - ./symlink:/data + restart: unless-stopped + open-webui: + image: ghcr.io/open-webui/open-webui:main + container_name: open-webui + ports: + - "8080:8080" + volumes: + - /var/lib/docker/volumes/open-webui/_data:/app/backend/data + network_mode: host + restart: unless-stopped \ No newline at end of file diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh deleted file mode 100644 index 2607f05..0000000 --- a/docker/entrypoint.sh +++ /dev/null @@ -1,7 +0,0 @@ -cd /llama.cpp/build/bin/ -./llama-server \ - --model /data/DeepSeek-R1-GGUF/DeepSeek-R1-UD-Q2_K_XL/DeepSeek-R1-UD-Q2_K_XL-00001-of-00005.gguf \ - --port 10000 \ - --ctx-size 8192 \ - --threads 64 \ - --host 0.0.0.0 diff --git a/docker/Dockerfile b/docker/llm/Dockerfile similarity index 65% rename from docker/Dockerfile rename to docker/llm/Dockerfile index 1ad5d5d..9c09ae6 100644 --- a/docker/Dockerfile +++ b/docker/llm/Dockerfile @@ -1,11 +1,12 @@ FROM continuumio/anaconda3 RUN apt-get update && apt-get install -y build-essential +# RUN apt-get install -y tpm2tools libtss2-dev # Install llama.cpp -RUN git clone https://github.com/ggerganov/llama.cpp.git +RUN git clone -b apertus-implementation https://github.com/pwilkin/llama.cpp.git WORKDIR llama.cpp RUN apt install cmake -y -RUN cmake -B build +RUN cmake -B build -DLLAMA_CURL=OFF RUN cmake --build build --config Release # Entrypoint diff --git a/docker/llm/entrypoint.sh b/docker/llm/entrypoint.sh new file mode 100644 index 0000000..d32236f --- /dev/null +++ b/docker/llm/entrypoint.sh @@ -0,0 +1,10 @@ +cd /llama.cpp/build/bin/ +./llama-server \ + --model /data/swiss-ai.Apertus-70B-Instruct-2509.Q8_0-00001-of-00006.gguf \ + --port 10000 \ + --ctx-size 4096 \ + --threads 64 \ + --host 0.0.0.0 \ + --mlock \ + --no-mmap \ + --offline diff --git a/start.sh b/start.sh new file mode 100644 index 0000000..4c0fc0c --- /dev/null +++ b/start.sh @@ -0,0 +1,12 @@ +#!/bin/bash +# Check if the symbolic link exists +if [[ -L "./symlink" ]]; then + echo "Symbolic link exists: ./symlink with files:" + ls -la symlink/ +else + echo "Error: Symbolic link does not exist: ./symlink" >&2 + exit 1 +fi + +docker compose build --no-cache +docker compose up -d \ No newline at end of file diff --git a/stop.sh b/stop.sh new file mode 100644 index 0000000..dd327ba --- /dev/null +++ b/stop.sh @@ -0,0 +1,2 @@ +#!/bin/bash +docker compose down \ No newline at end of file