apple · yt-koike · Dec 17, 2025 · Dec 17, 2025 · Dec 17, 2025 · Dec 23, 2025
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,6 @@
+.dockerignore
+.gitignore
+*.md
+Dockerfile
+compose.yml
+data/
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,30 @@
+FROM nvidia/cuda:12.9.1-cudnn-devel-ubuntu24.04
+
+# Install Python 3.13
+RUN apt-get update && apt-get install -y wget software-properties-common build-essential && apt-get clean && rm -rf /var/lib/apt/lists/*
+RUN add-apt-repository ppa:deadsnakes/ppa
+RUN apt-get update && apt-get install -y python3.13 python3.13-venv python3.13-dev ninja-build && apt-get clean && rm -rf /var/lib/apt/lists/*
+
+# Install Sharp and dependencies
+RUN mkdir /app
+COPY pyproject.toml requirements.txt requirements.in /app/
+COPY src/ /app/src/
+WORKDIR /app
+RUN python3.13 -m venv .venv
+ENV TORCH_CUDA_ARCH_LIST="8.0;8.6;8.7;8.9;9.0+PTX"
+ENV FORCE_CUDA="1"
+RUN .venv/bin/pip install ninja
+RUN .venv/bin/pip install -r requirements.txt
+RUN .venv/bin/pip install gradio
+RUN ln -s /app/.venv/bin/sharp /usr/local/bin/sharp
+
+# Test run to download model and check if it works
+RUN wget https://apple.github.io/ml-sharp/thumbnails/Unsplash_-5wkyNA2BPc_0000-0001.jpg -O /tmp/test.jpg
+RUN sharp predict -i /tmp/test.jpg -o /tmp/test
+RUN rm /tmp/test.jpg /tmp/test -rf
+
+# Copy other files
+COPY . /app
+
+# Start Gradio web server
+CMD [".venv/bin/python3.13", "-u", "/app/gradio_web.py"]
diff --git a/README.md b/README.md
@@ -66,6 +66,17 @@ sharp predict -i /path/to/input/images -o /path/to/output/gaussians --render
 sharp render -i /path/to/output/gaussians -o /path/to/output/renderings
 ```
 
+## Using the Docker
+
+We provide a docker image to run the code. You can start the Gradio app using
+
+```
+docker compose up --build --remove-orphans
+```
+
+The app will be available at `http://localhost:7860`.
+You need to install Docker with CUDA support in order to use the docker image.
+
 ## Evaluation
 
 Please refer to the paper for both quantitative and qualitative evaluations.

diff --git a/compose.yml b/compose.yml
@@ -0,0 +1,16 @@
+services:
+  sharp:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    volumes:
+      - ./data:/app/data
+    ports:
+      - "7860:7860"
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
diff --git a/gradio_web.py b/gradio_web.py
@@ -0,0 +1,61 @@
+
+import gradio as gr
+import subprocess
+import os
+import shutil
+import time
+import glob
+
+def predict(image):
+    # Ensure data directory exists
+    os.makedirs("/app/data", exist_ok=True)
+
+    input_path = "/app/data/input.jpg"
+
+    # Save/Copy input image
+    # image provided by gradio (type='filepath') is a temp path
+    shutil.copy(image, input_path)
+
+    # Run sharp command
+    # sharp predict -i /app/data/input.jpg -o /app/data/output --render
+    cmd = [
+        "sharp", "predict",
+        "-i", input_path,
+        "-o", "/app/data/output",
+        "--render"
+    ]
+
+    # Execute command
+    try:
+        t = time.time()
+        print("Sharp started")
+        subprocess.run(cmd, check=True, capture_output=True)
+        print(f"Sharp command took {round(time.time() - t, 3)} seconds")
+    except subprocess.CalledProcessError as e:
+        print(f"Error running sharp: {e}")
+        print(f"Stdout: {e.stdout.decode()}")
+        print(f"Stderr: {e.stderr.decode()}")
+        return None
+
+    # Find output videos
+    rgb_video = "/app/data/output/input.mp4"
+    depth_video = "/app/data/output/input.depth.mp4"
+
+    if os.path.exists(rgb_video) and os.path.exists(depth_video):
+        return rgb_video, depth_video
+    elif os.path.exists(rgb_video):
+        return rgb_video, None
+
+    return None, None
+
+demo = gr.Interface(
+    fn=predict,
+    inputs=gr.Image(type="filepath", label="Input Image"),
+    outputs=[gr.Video(label="RGB Video"), gr.Video(label="Depth Video")],
+    title="Sharp 3D View Synthesis",
+    description="Upload an image to generate a 3D view synthesis video."
+)
+
+if __name__ == "__main__":
+    print("Sharp Monocular View Synthesis in Less Than a Second (https://github.com/apple/ml-sharp)")
+    demo.launch(server_name="0.0.0.0", server_port=7860)