ServiceNow · tscholak · Oct 21, 2024 · Oct 16, 2024 · Oct 17, 2024 · Oct 17, 2024
diff --git a/.github/workflows/build_documentation.yaml b/.github/workflows/build_documentation.yaml
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -0,0 +1,85 @@
+name: CI
+
+on:
+  schedule:
+    - cron: "0 10 * * *"
+  push:
+    branches:
+      - "main"
+    tags:
+      - "v*.*.*"
+  pull_request:
+    branches:
+      - "main"
+
+jobs:
+  test:
+    name: Test
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+          cache: 'pip'
+
+      - name: Install dependencies
+        run: |
+          pip install "torch>=2.2.2"
+          FLASH_ATTENTION_SKIP_CUDA_BUILD=TRUE FLASH_ATTENTION_FORCE_BUILD=TRUE pip install --no-build-isolation -e ".[CORE,OPTIONAL,DEV,DOCS]"
+
+      - name: Run tests
+        run: pytest .
+
+  docker:
+    name: Docker
+    runs-on: ubuntu-latest
+    needs: test
+    steps:
+      - name: Clean unused files
+        run: |
+          sudo rm -rf /usr/local/lib/android || true  # will release about 10 GB
+          sudo rm -rf /usr/share/dotnet || true  # will release about 20GB
+          sudo rm -rf /opt/ghc || true
+          sudo rm -rf /usr/local/.ghcup || true
+
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Docker meta
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: |
+            ghcr.io/servicenow/fast-llm
+          tags: |
+            type=schedule
+            type=ref,event=branch
+            type=semver,pattern={{version}}
+            type=semver,pattern={{major}}.{{minor}}
+            type=semver,pattern={{major}}
+            type=sha
+            type=raw,value=latest,enabled={{github.ref == 'refs/heads/main'}}
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Login to GHCR
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.repository_owner }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Build and push
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          # push: ${{ github.event_name != 'pull_request' }}
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          cache-from: type=registry,ref=ghcr.io/servicenow/fast-llm:cache
+          cache-to: type=registry,ref=ghcr.io/servicenow/fast-llm:cache,mode=max
diff --git a/.github/workflows/deploy_documentation.yaml b/.github/workflows/deploy_documentation.yaml
diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
@@ -0,0 +1,59 @@
+name: Documentation
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - main
+
+permissions:
+  contents: write
+
+jobs:
+  build:
+    name: Build
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+          cache: "pip"
+      - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV 
+      - uses: actions/cache@v4
+        with:
+          key: mkdocs-material-${{ env.cache_id }}
+          path: .cache
+          restore-keys: |
+            mkdocs-material-
+      - run: |
+          pip install "torch>=2.2.2"
+          FLASH_ATTENTION_SKIP_CUDA_BUILD=TRUE FLASH_ATTENTION_FORCE_BUILD=TRUE pip install --no-build-isolation -e ".[CORE,OPTIONAL,DEV,DOCS]"
+      - name: Build the documentation
+        run: mkdocs build
+
+  deploy:
+    if: github.event_name == 'push'
+    name: Deploy
+    needs: build
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+          cache: "pip"
+      - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV 
+      - uses: actions/cache@v4
+        with:
+          key: mkdocs-material-${{ env.cache_id }}
+          path: .cache
+          restore-keys: |
+            mkdocs-material-
+      - run: |
+          pip install "torch>=2.2.2"
+          FLASH_ATTENTION_SKIP_CUDA_BUILD=TRUE FLASH_ATTENTION_FORCE_BUILD=TRUE pip install --no-build-isolation -e ".[CORE,OPTIONAL,DEV,DOCS]"
+      - name: Publish the documentation
+        run: mkdocs gh-deploy --force --dirty
diff --git a/README.md b/README.md
@@ -0,0 +1,161 @@
+<div align="center" style="margin-bottom: 1em;">
+
+<img width=50% src="docs/assets/images/logo.png" alt="Fast-LLM Logo"></img>
+
+[![Docker][ci-badge]][ci-workflow]
+[![Documentation][docs-badge]][docs-workflow]
+[![License][license-badge]][license]
+
+*Accelerating your LLM training to full speed*
+
+Made with ❤️ by [ServiceNow Research][servicenow-research]
+
+</div>
+
+## Overview
+
+Fast-LLM is a new open-source library for training large language models, built on [PyTorch][pytorch] and [Triton][triton]. It is extremely fast, scales to large clusters, supports a wide range of model architectures, and is easy to use. Unlike commercial frameworks like Megatron-LM, which are largely closed off and fragmented across forks, Fast-LLM is fully open-source and encourages community-driven development. Researchers can freely customize and optimize as needed, making it a flexible and hackable alternative that combines the speed of specialized tools with the openness of libraries like [Hugging Face Transformers][transformers].
+
+> [!NOTE]
+> Fast-LLM is not affiliated with Fast.AI, FastHTML, FastAPI, FastText, or other similarly named projects. Our library's name refers to its speed and efficiency in language model training.
+
+## Why Fast-LLM?
+
+1. 🚀 **Fast-LLM is Blazingly Fast**:
+    - ⚡️ Optimized kernel efficiency and reduced overheads.
+    - 🔋 Optimized memory usage for best performance.
+    - ⏳ Minimizes training time and cost.
+
+2. 📈 **Fast-LLM is Highly Scalable**:
+    - 📡 Distributed training across multiple GPUs and nodes using 3D parallelism (Data, Tensor, and Pipeline).
+    - 🔗 Supports sequence length parallelism to handle longer sequences effectively.
+    - 🧠 ZeRO-1, ZeRO-2, and ZeRO-3 implementations for improved memory efficiency.
+    - 🎛️ Mixed precision training support for better performance.
+    - 🏋️‍♂️ Large batch training and gradient accumulation support.
+    - 🔄 Reproducible training with deterministic behavior.
+
+3. 🎨 **Fast-LLM is Incredibly Flexible**:
+    - 🤖 Compatible with all common language model architectures in a unified class.
+    - ⚡ Efficient dropless Mixture-of-Experts (MoE) implementation with SoTA performance.
+    - 🧩 Customizable language model architectures, data loaders, loss functions, and optimizers (in progress).
+    - 🤗 Seamless integration with [Hugging Face Transformers][transformers].
+
+4. 🎯 **Fast-LLM is Super Easy to Use**:
+    - 📦 [Pre-built Docker images](https://github.com/ServiceNow/Fast-LLM/pkgs/container/fast-llm) for quick deployment.
+    - 📝 Simple YAML configuration for hassle-free setup.
+    - 💻 Command-line interface for easy launches.
+    - 📊 Detailed logging and real-time monitoring features.
+    - 📚 Extensive [documentation][docs] and practical tutorials (in progress).
+
+5. 🌐 **Fast-LLM is Truly Open Source**:
+    - ⚖️ Licensed under [Apache 2.0][license] for maximum freedom to use Fast-LLM at work, in your projects, or for research.
+    - 💻 Fully developed on GitHub with a public [roadmap][roadmap] and transparent [issue tracking][issues].
+    - 🤝 Contributions and collaboration are always welcome!
+
+## Usage
+
+We'll walk you through how to use Fast-LLM to train a large language model on a cluster with multiple nodes and GPUs. We'll show an example setup using a Slurm cluster and a Kubernetes cluster.
+
+For this demo, we will train a Mistral-7B model from scratch for 100 steps on random data. The config file `examples/mistral-4-node-benchmark.yaml` is pre-configured for a multi-node setup with 4 DGX nodes, each with 8 A100-80GB or H100-80GB GPUs.
+
+> [!NOTE]
+> Fast-LLM scales from a single GPU to large clusters. You can start small and expand based on your resources.
+
+Expect to see a significant speedup in training time compared to other libraries! For training Mistral-7B, Fast-LLM is expected to achieve a throughput of **9,800 tokens/s/H100** (batch size 32, sequence length 8k) on a 4-node cluster with 32 H100s.
+
+### Running Fast-LLM on a Slurm Cluster
+
+#### Prerequisites
+
+- A [Slurm](https://slurm.schedmd.com/) cluster with at least 4 DGX nodes with 8 A100-80GB or H100-80GB GPUs each.
+- CUDA 12.1 or higher.
+- Dependencies: [PyTorch][pytorch], [Triton][triton], and [Apex](https://github.com/NVIDIA/apex) installed on all nodes.
+
+#### Steps
+
+1. Deploy the [nvcr.io/nvidia/pytorch:24.07-py3](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch) Docker image to all nodes (recommended), because it contains all the necessary dependencies.
+2. Install Fast-LLM on all nodes:
+
+    ```bash
+    sbatch <<EOF
+    #!/bin/bash
+    #SBATCH --nodes=$(scontrol show node | grep -c NodeName)
+    #SBATCH --ntasks-per-node=1
+    #SBATCH --ntasks=$(scontrol show node | grep -c NodeName)
+    #SBATCH --exclusive
+
+    srun bash -c 'pip install --no-cache-dir -e "git+https://github.com/ServiceNow/Fast-LLM.git#egg=llm[CORE,OPTIONAL,DEV]"'
+    EOF
+    ```
+
+3. Use the example Slurm job script [examples/fast-llm.sbat](examples/fast-llm.sbat) to submit the job to the cluster:
+
+    ```bash
+    sbatch examples/fast-llm.sbat
+    ```
+
+4. Monitor the job's progress:
+
+    - Logs: Follow `job_output.log` and `job_error.log` in your working directory for logs.
+    - Status: Use `squeue -u $USER` to see the job status.
+
+Now, you can sit back and relax while Fast-LLM trains your model at full speed! ☕
+
+### Running Fast-LLM on a Kubernetes Cluster
+
+#### Prerequisites
+
+- A [Kubernetes](https://kubernetes.io/) cluster with at least 4 DGX nodes with 8 A100-80GB or H100-80GB GPUs each.
+- [KubeFlow](https://www.kubeflow.org/) installed.
+- Locked memory limit set to unlimited at the host level on all nodes. Ask your cluster admin to do this if needed.
+
+#### Steps
+
+1. Create a Kubernetes [PersistentVolumeClaim](https://kubernetes.io/docs/concepts/storage/persistent-volumes/) (PVC) named `fast-llm-home` that will be mounted to `/home/fast-llm` in the container using [examples/fast-llm-pvc.yaml](examples/fast-llm-pvc.yaml):
+
+    ```bash
+    kubectl apply -f examples/fast-llm-pvc.yaml
+    ```
+
+2. Create a [PyTorchJob](https://www.kubeflow.org/docs/components/training/user-guides/pytorch/) resource using the example configuration file [examples/fast-llm.pytorchjob.yaml](examples/fast-llm.pytorchjob.yaml):
+
+    ```bash
+    kubectl apply -f examples/fast-llm.pytorchjob.yaml
+    ```
+
+3. Monitor the job status:
+
+    - Use `kubectl get pytorchjobs` to see the job status.
+    - Use `kubectl logs -f fast-llm-master-0 -c pytorch` to follow the logs.
+
+That's it! You're now up and running with Fast-LLM on Kubernetes. 🚀
+
+## Next Steps
+
+📖 **Want to learn more?** Check out our [documentation][docs] for more information on how to use Fast-LLM.
+
+🔨 **We welcome contributions to Fast-LLM!** Have a look at our [contribution guidelines](CONTRIBUTING.md).
+
+🐞 **Something doesn't work?** Open an [issue](https://github.com/ServiceNow/Fast-LLM/issues)!
+
+## License
+
+Fast-LLM is licensed by ServiceNow, Inc. under the Apache 2.0 License. See [LICENSE][license] for more information.
+
+## Vulnerability Reporting
+
+For security issues, email [disclosure@servicenow.com](mailto:disclosure@servicenow.com). See our [security policy](SECURITY.md).
+
+[roadmap]: https://github.com/ServiceNow/Fast-LLM/milestones
+[issues]: https://github.com/ServiceNow/Fast-LLM/issues
+[ci-badge]: https://github.com/ServiceNow/Fast-LLM/actions/workflows/ci.yaml/badge.svg
+[ci-workflow]: https://github.com/ServiceNow/Fast-LLM/actions/workflows/ci.yaml
+[docs-badge]: https://github.com/ServiceNow/Fast-LLM/actions/workflows/docs.yaml/badge.svg
+[docs-workflow]: https://github.com/ServiceNow/Fast-LLM/actions/workflows/docs.yaml
+[docs]: https://servicenow.github.io/Fast-LLM
+[license-badge]: https://img.shields.io/badge/License-Apache%202.0-blue.svg
+[license]: LICENSE
+[servicenow-research]: https://www.servicenow.com/research/
+[pytorch]: https://pytorch.org/
+[triton]: https://triton-lang.org
+[transformers]: https://huggingface.co/transformers
diff --git a/SECURITY.md b/SECURITY.md
@@ -0,0 +1,27 @@
+# Security Policy
+
+## Supported Versions
+
+The Fast-LLM project is currently in a pre-release state. There are no officially released versions supported for security updates at this time. This section will be updated once formal releases are made.
+
+<!-- | Version | Supported          |
+| ------- | ------------------ |
+| 0.1.x   | :white_check_mark: |
+| < 0.1.0 | :x:                | -->
+
+## Reporting a Vulnerability
+
+To report a security vulnerability in Fast-LLM, please email our [Product Security Incident Response Team (PSIRT)](https://securitylab.servicenow.com) at [disclosure@servicenow.com](mailto:disclosure@servicenow.com). Include a detailed description of the issue, steps to reproduce it, and any relevant information that may help in investigating the matter.
+
+## Guidelines
+
+Please follow the guidelines below when [disclosing vulnerabilities](https://www.servicenow.com/company/trust/privacy/responsible-disclosure.html):
+
+- Report any potential security issue as soon as possible. ServiceNow will make every effort to quickly resolve the issue.
+- Provide sufficient detail to reproduce the vulnerability, including proof of concept. The use of ReproNow to demonstrate reproducibility is encouraged but not required.
+- Please do not disclose an issue to the public or any third party until ServiceNow has resolved it.
+- Make a good faith effort to avoid privacy violations, data destruction, and interruption or degradation of our services. Only interact with accounts you own or have explicit permission from the account holder to access.
+- Redact any language or images that may identify the program or ServiceNow customers from information about a resolved vulnerability.
+- Do not engage in disruptive testing (such as Denial of Service attacks) or any action that could impact the confidentiality, integrity, or availability of information and systems.
+- Do not engage in social engineering or phishing against customers or employees.
+- Please do not request compensation for time, materials, or discovered vulnerabilities through the Responsible Disclosure Program.