diff --git a/.gitignore b/.gitignore index a781c98..b0bf348 100644 --- a/.gitignore +++ b/.gitignore @@ -327,3 +327,9 @@ dist # Ignore master key for decrypting credentials and more. /config/master.key + +pyrightconfig.json + + +meta-llama-8b-instruct-q4_K_M.gguf +llama-model.gguf diff --git a/llama-demo/Dockerfile.gcp b/llama-demo/Dockerfile.gcp new file mode 100644 index 0000000..944afbc --- /dev/null +++ b/llama-demo/Dockerfile.gcp @@ -0,0 +1,19 @@ +FROM python:3.11-slim + +WORKDIR /code + +RUN apt-get update && apt-get install -y --no-install-recommends g++ gcc + +COPY ./main.py /code/main.py +COPY ./infra.py /code/infra.py +COPY ./llama_demo /code/llama_demo +COPY ./templates /code/templates +COPY ./requirements.txt /code/requirements.txt + +RUN pip install uv +RUN uv pip install --system -r requirements.txt + +ENV PORT=80 +EXPOSE $PORT + +CMD fastapi run main.py --host 0.0.0.0 --port $PORT diff --git a/llama-demo/infra.py b/llama-demo/infra.py new file mode 100644 index 0000000..dbd8b7c --- /dev/null +++ b/llama-demo/infra.py @@ -0,0 +1,50 @@ +"""infra.py + +This file is used to customize the infrastructure your application deploys to. + +Create your cloud infrastructure with: + lf create + +Deploy your application with: + lf deploy + +""" + +import launchflow as lf + +if lf.environment == "lf-llama-gcp": + llama_service = lf.gcp.ComputeEngineService( + "launchflow-llama-service", + dockerfile="Dockerfile.gcp", # Path to your Dockerfile + machine_type="e2-standard-4", + build_directory="llama_server", + disk_size_gb=50, + ) + model_bucket = lf.gcp.GCSBucket("launchflow-llama-demo") +elif lf.environment == "lf-llama-aws": + llama_service = lf.aws.ECSFargateService( + "launchflow-llama-service", + dockerfile="Dockerfile.aws", # Path to your Dockerfile + build_directory="llama_server", + cpu=8192, # 8 cpus are required for GPU support + memory=16384, # 16 GB of memory are required for GPU support + # load_balancer=lf.aws.alb.InternalHTTP(), + ) + serving_service = lf.aws.LambdaService( + "launchflow-llama-serving-demo", + handler="main.handler", + build_ignore=[ + "llama_server", + "Dockerfile.*", + "requirements*", + "launchflow.yaml", + ], + runtime=lf.aws.lambda_service.PythonRuntime( + requirements_txt_path="requirements-aws.txt" + ), + timeout_seconds=900, + env={"LLAMA_SERVER_ADDRESS": lf.Depends(llama_service).service_url}, # type: ignore + ) + model_bucket = lf.aws.S3Bucket("launchflow-llama-demo") +else: + raise ValueError(f"Unknown environment: {lf.environment}") diff --git a/llama-demo/launchflow.yaml b/llama-demo/launchflow.yaml new file mode 100644 index 0000000..ff44589 --- /dev/null +++ b/llama-demo/launchflow.yaml @@ -0,0 +1,2 @@ +project: llama-demo +backend: lf://default diff --git a/llama-demo/llama_demo/chat_router.py b/llama-demo/llama_demo/chat_router.py new file mode 100644 index 0000000..7a524ec --- /dev/null +++ b/llama-demo/llama_demo/chat_router.py @@ -0,0 +1,56 @@ +from typing import Annotated +from fastapi import APIRouter +from fastapi.params import Depends + +from fastapi.responses import JSONResponse +from pydantic import BaseModel +from starlette.responses import StreamingResponse + +from llama_demo.settings import settings +from llama_demo.schemas import Chat +from llama_demo.llama_client import LlamaClient + +router = APIRouter(prefix="/v1", tags=["v1"]) + + +class ChatResponse(BaseModel): + content: str + + +@router.post("/chat", response_model=None) +async def chat( + chat: Chat, model: Annotated[LlamaClient, Depends(LlamaClient)] +) -> StreamingResponse | JSONResponse: + messages = [] + for message in chat.context: + messages.append({"role": message.role, "content": message.content}) + + context_string = "".join([msg["content"] for msg in messages]) + if len(context_string) > 512: + trimmed_context = [] + current_length = 0 + for msg in reversed(messages): + msg_length = len(msg["content"]) + if current_length + msg_length <= settings.context_window: + trimmed_context.append(msg) + current_length += msg_length + else: + num_to_append = msg_length - (settings.context_window - current_length) + msg["content"] = msg["content"][-num_to_append:] + trimmed_context.append(msg) + break + messages = list(reversed(trimmed_context)) + + completion = model.chat(messages=messages) + + def iter_content(): + for item in completion: + yield item + + if settings.streaming: + return StreamingResponse(iter_content()) + + full_content = "" + for item in iter_content(): + full_content += item + return JSONResponse(content={"content": full_content}) diff --git a/llama-demo/llama_demo/llama_client.py b/llama-demo/llama_demo/llama_client.py new file mode 100644 index 0000000..4df9e22 --- /dev/null +++ b/llama-demo/llama_demo/llama_client.py @@ -0,0 +1,30 @@ +import httpx +from openai.types.chat import ChatCompletionMessageParam +from llama_demo.settings import settings +import openai +from typing import Dict, Any, Iterable + + +async def httpx_client(): + async with httpx.AsyncClient() as client: + yield client + + +class LlamaClient: + def __init__(self): + self.client = openai.Client( + base_url=settings.llama_server_address, api_key="no-api-key" + ) + + def chat(self, messages: Iterable[ChatCompletionMessageParam]): + result = self.client.chat.completions.create( + messages=messages, + model="gpt-3.5-turbo", + stream=True, + max_tokens=int(settings.context_window / 2), + timeout=600, + ) + for r in result: + content = r.choices[0].delta.content + if content is not None: + yield content diff --git a/llama-demo/llama_demo/schemas.py b/llama-demo/llama_demo/schemas.py new file mode 100644 index 0000000..94c6e08 --- /dev/null +++ b/llama-demo/llama_demo/schemas.py @@ -0,0 +1,13 @@ +from typing import Literal + +from pydantic import BaseModel + + +class ChatMessage(BaseModel): + content: str + role: Literal["system", "user", "assistant"] + + +class Chat(BaseModel): + message: str + context: list[ChatMessage] diff --git a/llama-demo/llama_demo/settings.py b/llama-demo/llama_demo/settings.py new file mode 100644 index 0000000..cf187c2 --- /dev/null +++ b/llama-demo/llama_demo/settings.py @@ -0,0 +1,17 @@ +import os + +from dotenv import load_dotenv + +load_dotenv() + + +class Settings: + context_window: int = int(os.environ.get("CONTEXT_WINDOW", 5000)) + llama_server_address: str = os.environ.get( + "LLAMA_SERVER_ADDRESS", + "http://launchflow-llama-service-l-79dfc-799820891.us-east-1.elb.amazonaws.com", + ) + streaming: bool = os.environ.get("LAUNCHFLOW_ENVIRONMENT") == "lf-llama-gcp" + + +settings = Settings() diff --git a/llama-demo/llama_server/Dockerfile.aws b/llama-demo/llama_server/Dockerfile.aws new file mode 100644 index 0000000..7022a1d --- /dev/null +++ b/llama-demo/llama_server/Dockerfile.aws @@ -0,0 +1,28 @@ +# Use the existing Dockerfile from the repo as the base +FROM public.ecr.aws/docker/library/ubuntu:22.04 + +# Install necessary dependencies +ARG DEBIAN_FRONTEND=noninteractive +ENV TZ=Etc/UTC +RUN apt-get update && apt-get install -y \ + git build-essential cmake awscli wget unzip \ + && rm -rf /var/lib/apt/lists/* + +# Clone the llama.cpp repository +RUN git clone https://github.com/ggerganov/llama.cpp.git /app/llama.cpp +WORKDIR /app/llama.cpp + +# Build server +RUN make llama-server + +# Expose necessary port for the server +EXPOSE 80 + +# Copy a custom script that handles the download at runtime +COPY download_and_run.sh /app/download_and_run.sh +RUN chmod +x /app/download_and_run.sh + +WORKDIR /app + +# Start the server through the custom script +ENTRYPOINT ["/app/download_and_run.sh"] diff --git a/llama-demo/llama_server/Dockerfile.gcp b/llama-demo/llama_server/Dockerfile.gcp new file mode 100644 index 0000000..5cc8ae5 --- /dev/null +++ b/llama-demo/llama_server/Dockerfile.gcp @@ -0,0 +1,32 @@ +# Use the existing Dockerfile from the repo as the base +FROM ubuntu:22.04 + +# Install necessary dependencies +RUN apt-get update && apt-get install -y \ + git build-essential cmake wget unzip curl python3 python3-distutils \ + && rm -rf /var/lib/apt/lists/* + +# Install Google Cloud SDK (for gsutil) +RUN curl -sSL https://sdk.cloud.google.com | bash && \ + /root/google-cloud-sdk/install.sh + +ENV PATH=$PATH:/root/google-cloud-sdk/bin + +# Clone the llama.cpp repository +RUN git clone https://github.com/ggerganov/llama.cpp.git /app/llama.cpp +WORKDIR /app/llama.cpp + +# Build server +RUN make llama-server + +# Expose necessary port for the server +EXPOSE 80 + +# Copy a custom script that handles the download at runtime +COPY download_and_run.sh /app/download_and_run.sh +RUN chmod +x /app/download_and_run.sh + +WORKDIR /app + +# Start the server through the custom script +ENTRYPOINT ["/app/download_and_run.sh"] diff --git a/llama-demo/llama_server/download_and_run.sh b/llama-demo/llama_server/download_and_run.sh new file mode 100644 index 0000000..34efc01 --- /dev/null +++ b/llama-demo/llama_server/download_and_run.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +# Define S3 bucket and model file +LOCAL_MODAL_PATH="/models/llama.gguf" +REMOTE_MODEL_FILE_NAME="meta-llama-8b-instruct-q4_K_M.gguf" + + + +if [ "${LAUNCHFLOW_ENVIRONMENT}" = "lf-llama-aws" ]; then + bucket_url="s3://launchflow-llama-demo" + echo "Downloading model from ${S3_BUCKET_URL}/${MODEL_FILENAME}..." + aws s3 cp ${bucket_url}/${REMOTE_MODEL_FILE_NAME} $LOCAL_MODAL_PATH +elif [ "${LAUNCHFLOW_ENVIRONMENT}" = "lf-llama-gcp" ]; then + bucket_url="gs://launchflow-llama-demo" + echo "Downloading model from ${bucket_url}/${REMOTE_MODEL_FILE_NAME}..." + gsutil cp ${bucket_url}/${REMOTE_MODEL_FILE_NAME} $LOCAL_MODAL_PATH +fi + +# Start the server with the downloaded model + +cd llama.cpp +./llama-server --model $LOCAL_MODAL_PATH --ctx-size 5000 --port 80 --host 0.0.0.0 diff --git a/llama-demo/main.py b/llama-demo/main.py new file mode 100644 index 0000000..1cee5fd --- /dev/null +++ b/llama-demo/main.py @@ -0,0 +1,34 @@ +from fastapi import FastAPI +from fastapi.requests import Request +from fastapi.templating import Jinja2Templates +from llama_demo.chat_router import router +from mangum import Mangum + +from llama_demo.settings import settings + +app = FastAPI() + +app.include_router(router) + +templates = Jinja2Templates(directory="templates") + + +@app.get("/") +def index(request: Request): + return templates.TemplateResponse( + request=request, + name="index.html", + context={ + "navigation": [ + { + "caption": "Llama Chat Demo", + "href": "/", + "active": True, + } + ], + "streaming": settings.streaming, + }, + ) + + +handler = Mangum(app, lifespan="off") diff --git a/llama-demo/requirements-aws.in b/llama-demo/requirements-aws.in new file mode 100644 index 0000000..466a46c --- /dev/null +++ b/llama-demo/requirements-aws.in @@ -0,0 +1,7 @@ +fastapi +launchflow[aws] +pydantic +openai +mangum +durationpy==0.6 +python-dotenv diff --git a/llama-demo/requirements-aws.txt b/llama-demo/requirements-aws.txt new file mode 100644 index 0000000..5f7b0bb --- /dev/null +++ b/llama-demo/requirements-aws.txt @@ -0,0 +1,181 @@ +# This file was autogenerated by uv v0.1.1 via the following command: +# uv pip compile requirements-aws.in -o requirements-aws.txt +annotated-types==0.7.0 + # via pydantic +anyio==4.6.0 + # via + # httpx + # openai + # starlette +backoff==2.2.1 + # via posthog +beaupy==3.9.2 + # via launchflow +boto3==1.35.27 + # via launchflow +botocore==1.35.27 + # via + # boto3 + # s3transfer +cachetools==5.5.0 + # via google-auth +certifi==2024.8.30 + # via + # httpcore + # httpx + # kubernetes + # requests +charset-normalizer==3.3.2 + # via requests +click==8.1.7 + # via typer +deepdiff==8.0.1 + # via launchflow +distro==1.9.0 + # via openai +docker==7.1.0 + # via launchflow +durationpy==0.6 +emoji==2.13.2 + # via beaupy +fastapi==0.115.0 +google-auth==2.35.0 + # via kubernetes +h11==0.14.0 + # via httpcore +httpcore==1.0.5 + # via httpx +httpx==0.27.2 + # via + # launchflow + # openai +idna==3.10 + # via + # anyio + # httpx + # requests +jinja2==3.1.4 + # via launchflow +jiter==0.5.0 + # via openai +jmespath==1.0.1 + # via + # boto3 + # botocore +kubernetes==30.1.0 + # via launchflow +launchflow==0.4.12 +mangum==0.18.0 +markdown-it-py==3.0.0 + # via rich +markupsafe==2.1.5 + # via jinja2 +mdurl==0.1.2 + # via markdown-it-py +monotonic==1.6 + # via posthog +oauthlib==3.2.2 + # via + # kubernetes + # requests-oauthlib +openai==1.48.0 +orderly-set==5.2.2 + # via deepdiff +pathspec==0.12.1 + # via launchflow +posthog==3.6.6 + # via launchflow +pyasn1==0.6.1 + # via + # pyasn1-modules + # rsa +pyasn1-modules==0.4.1 + # via google-auth +pydantic==2.9.2 + # via + # fastapi + # launchflow + # openai +pydantic-core==2.23.4 + # via pydantic +pygments==2.18.0 + # via rich +pyjwt==2.9.0 + # via launchflow +python-dateutil==2.9.0.post0 + # via + # botocore + # kubernetes + # posthog +python-dotenv==1.0.1 +python-yakh==0.3.2 + # via + # beaupy + # questo +pyyaml==6.0.2 + # via + # kubernetes + # launchflow +questo==0.3.0 + # via beaupy +requests==2.32.3 + # via + # docker + # kubernetes + # launchflow + # posthog + # requests-oauthlib +requests-oauthlib==2.0.0 + # via kubernetes +rich==13.8.1 + # via + # beaupy + # launchflow + # questo + # typer +rsa==4.9 + # via google-auth +s3transfer==0.10.2 + # via boto3 +setuptools==75.1.0 + # via launchflow +shellingham==1.5.4 + # via typer +six==1.16.0 + # via + # kubernetes + # posthog + # python-dateutil +sniffio==1.3.1 + # via + # anyio + # httpx + # openai +starlette==0.38.6 + # via fastapi +terminaltexteffects==0.11.0 + # via launchflow +toml==0.10.2 + # via launchflow +tqdm==4.66.5 + # via openai +typer==0.12.5 + # via launchflow +typing-extensions==4.12.2 + # via + # fastapi + # mangum + # openai + # pydantic + # pydantic-core + # typer +urllib3==2.2.3 + # via + # botocore + # docker + # kubernetes + # requests +uvloop==0.20.0 + # via launchflow +websocket-client==1.8.0 + # via kubernetes diff --git a/llama-demo/requirements-gcp.in b/llama-demo/requirements-gcp.in new file mode 100644 index 0000000..b6cbbe8 --- /dev/null +++ b/llama-demo/requirements-gcp.in @@ -0,0 +1,8 @@ +fastapi[standard] +launchflow[gcp] +pydantic +pydantic-settings +openai +mangum +durationpy==0.6 +python-dotenv diff --git a/llama-demo/requirements-gcp.txt b/llama-demo/requirements-gcp.txt new file mode 100644 index 0000000..8d49ead --- /dev/null +++ b/llama-demo/requirements-gcp.txt @@ -0,0 +1,391 @@ +# This file was autogenerated by uv v0.1.1 via the following command: +# uv pip compile requirements.in -o requirements.txt +aiofiles==24.1.0 + # via cloud-sql-python-connector +aiohappyeyeballs==2.4.0 + # via aiohttp +aiohttp==3.10.6 + # via cloud-sql-python-connector +aiosignal==1.3.1 + # via aiohttp +annotated-types==0.7.0 + # via pydantic +anyio==4.6.0 + # via + # httpx + # openai + # starlette + # watchfiles +attrs==24.2.0 + # via aiohttp +backoff==2.2.1 + # via posthog +beaupy==3.9.2 + # via launchflow +boto3==1.35.26 + # via launchflow +botocore==1.35.26 + # via + # boto3 + # s3transfer +cachetools==5.5.0 + # via google-auth +certifi==2024.8.30 + # via + # httpcore + # httpx + # kubernetes + # requests +cffi==1.17.1 + # via cryptography +charset-normalizer==3.3.2 + # via requests +click==8.1.7 + # via + # typer + # uvicorn +cloud-sql-python-connector==1.12.1 + # via launchflow +cryptography==43.0.1 + # via cloud-sql-python-connector +deepdiff==8.0.1 + # via launchflow +distro==1.9.0 + # via openai +dnspython==2.6.1 + # via email-validator +docker==7.1.0 + # via launchflow +durationpy==0.6 +email-validator==2.2.0 + # via fastapi +emoji==2.13.2 + # via beaupy +fastapi==0.115.0 +fastapi-cli==0.0.5 + # via fastapi +frozenlist==1.4.1 + # via + # aiohttp + # aiosignal +google-api-core==2.20.0 + # via + # google-api-python-client + # google-cloud-bigquery + # google-cloud-billing + # google-cloud-build + # google-cloud-compute + # google-cloud-container + # google-cloud-core + # google-cloud-pubsub + # google-cloud-resource-manager + # google-cloud-run + # google-cloud-secret-manager + # google-cloud-service-usage + # google-cloud-storage + # google-cloud-tasks +google-api-python-client==2.147.0 + # via launchflow +google-auth==2.35.0 + # via + # cloud-sql-python-connector + # google-api-core + # google-api-python-client + # google-auth-httplib2 + # google-cloud-bigquery + # google-cloud-billing + # google-cloud-build + # google-cloud-compute + # google-cloud-container + # google-cloud-core + # google-cloud-pubsub + # google-cloud-resource-manager + # google-cloud-run + # google-cloud-secret-manager + # google-cloud-service-usage + # google-cloud-storage + # google-cloud-tasks + # kubernetes +google-auth-httplib2==0.2.0 + # via google-api-python-client +google-cloud-bigquery==3.25.0 + # via launchflow +google-cloud-billing==1.13.6 + # via launchflow +google-cloud-build==3.25.0 + # via launchflow +google-cloud-compute==1.19.2 + # via launchflow +google-cloud-container==2.51.0 + # via launchflow +google-cloud-core==2.4.1 + # via + # google-cloud-bigquery + # google-cloud-storage +google-cloud-pubsub==2.23.1 + # via launchflow +google-cloud-resource-manager==1.12.5 + # via launchflow +google-cloud-run==0.10.8 + # via launchflow +google-cloud-secret-manager==2.20.2 + # via launchflow +google-cloud-service-usage==1.10.5 + # via launchflow +google-cloud-storage==2.18.2 + # via launchflow +google-cloud-tasks==2.16.5 + # via launchflow +google-crc32c==1.6.0 + # via + # google-cloud-storage + # google-resumable-media +google-resumable-media==2.7.2 + # via + # google-cloud-bigquery + # google-cloud-storage +googleapis-common-protos==1.65.0 + # via + # google-api-core + # grpc-google-iam-v1 + # grpcio-status +grpc-google-iam-v1==0.13.1 + # via + # google-cloud-billing + # google-cloud-build + # google-cloud-pubsub + # google-cloud-resource-manager + # google-cloud-run + # google-cloud-secret-manager + # google-cloud-tasks +grpcio==1.66.1 + # via + # google-api-core + # google-cloud-pubsub + # googleapis-common-protos + # grpc-google-iam-v1 + # grpcio-status +grpcio-status==1.66.1 + # via + # google-api-core + # google-cloud-pubsub +h11==0.14.0 + # via + # httpcore + # uvicorn +httpcore==1.0.5 + # via httpx +httplib2==0.22.0 + # via + # google-api-python-client + # google-auth-httplib2 +httptools==0.6.1 + # via uvicorn +httpx==0.27.2 + # via + # fastapi + # launchflow + # openai +idna==3.10 + # via + # anyio + # email-validator + # httpx + # requests + # yarl +jinja2==3.1.4 + # via + # fastapi + # launchflow +jiter==0.5.0 + # via openai +jmespath==1.0.1 + # via + # boto3 + # botocore +kubernetes==30.1.0 + # via launchflow +launchflow==0.4.12 +mangum==0.18.0 +markdown-it-py==3.0.0 + # via rich +markupsafe==2.1.5 + # via jinja2 +mdurl==0.1.2 + # via markdown-it-py +monotonic==1.6 + # via posthog +multidict==6.1.0 + # via + # aiohttp + # yarl +oauthlib==3.2.2 + # via + # kubernetes + # requests-oauthlib +openai==1.48.0 +orderly-set==5.2.2 + # via deepdiff +packaging==24.1 + # via google-cloud-bigquery +pathspec==0.12.1 + # via launchflow +posthog==3.6.6 + # via launchflow +proto-plus==1.24.0 + # via + # google-api-core + # google-cloud-billing + # google-cloud-build + # google-cloud-compute + # google-cloud-container + # google-cloud-pubsub + # google-cloud-resource-manager + # google-cloud-run + # google-cloud-secret-manager + # google-cloud-service-usage + # google-cloud-tasks +protobuf==5.28.2 + # via + # google-api-core + # google-cloud-billing + # google-cloud-build + # google-cloud-compute + # google-cloud-container + # google-cloud-pubsub + # google-cloud-resource-manager + # google-cloud-run + # google-cloud-secret-manager + # google-cloud-service-usage + # google-cloud-tasks + # googleapis-common-protos + # grpc-google-iam-v1 + # grpcio-status + # proto-plus +pyasn1==0.6.1 + # via + # pyasn1-modules + # rsa +pyasn1-modules==0.4.1 + # via google-auth +pycparser==2.22 + # via cffi +pydantic==2.9.2 + # via + # fastapi + # launchflow + # openai + # pydantic-settings +pydantic-core==2.23.4 + # via pydantic +pydantic-settings==2.5.2 +pygments==2.18.0 + # via rich +pyjwt==2.9.0 + # via launchflow +pyparsing==3.1.4 + # via httplib2 +python-dateutil==2.9.0.post0 + # via + # botocore + # google-cloud-bigquery + # kubernetes + # posthog +python-dotenv==1.0.1 + # via + # pydantic-settings + # uvicorn +python-multipart==0.0.10 + # via fastapi +python-yakh==0.3.2 + # via + # beaupy + # questo +pyyaml==6.0.2 + # via + # kubernetes + # launchflow + # uvicorn +questo==0.3.0 + # via beaupy +requests==2.32.3 + # via + # cloud-sql-python-connector + # docker + # google-api-core + # google-cloud-bigquery + # google-cloud-storage + # kubernetes + # launchflow + # posthog + # requests-oauthlib +requests-oauthlib==2.0.0 + # via kubernetes +rich==13.8.1 + # via + # beaupy + # launchflow + # questo + # typer +rsa==4.9 + # via google-auth +s3transfer==0.10.2 + # via boto3 +setuptools==75.1.0 + # via launchflow +shellingham==1.5.4 + # via typer +six==1.16.0 + # via + # kubernetes + # posthog + # python-dateutil +sniffio==1.3.1 + # via + # anyio + # httpx + # openai +starlette==0.38.6 + # via fastapi +terminaltexteffects==0.11.0 + # via launchflow +toml==0.10.2 + # via launchflow +tqdm==4.66.5 + # via openai +typer==0.12.5 + # via + # fastapi-cli + # launchflow +typing-extensions==4.12.2 + # via + # fastapi + # mangum + # openai + # pydantic + # pydantic-core + # typer +uritemplate==4.1.1 + # via google-api-python-client +urllib3==2.2.3 + # via + # botocore + # docker + # kubernetes + # requests +uvicorn==0.30.6 + # via + # fastapi + # fastapi-cli +uvloop==0.20.0 + # via + # launchflow + # uvicorn +watchfiles==0.24.0 + # via uvicorn +websocket-client==1.8.0 + # via kubernetes +websockets==13.1 + # via uvicorn +yarl==1.12.1 + # via aiohttp diff --git a/llama-demo/templates/index.html b/llama-demo/templates/index.html new file mode 100644 index 0000000..c1bf2be --- /dev/null +++ b/llama-demo/templates/index.html @@ -0,0 +1,249 @@ + + +
+ + +