Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions include/infinicore/nn.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@

#include "nn/embedding.hpp"
#include "nn/linear.hpp"
#include "nn/mrope.hpp"
#include "nn/rmsnorm.hpp"
59 changes: 59 additions & 0 deletions include/infinicore/nn/mrope.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#pragma once

#include "../context/context.hpp"
#include "../tensor.hpp"
#include "module.hpp"
#include <array>
#include <string>
#include <utility>

namespace infinicore::nn {

class MRoPE : public Module {
public:
MRoPE(size_t head_dim,
size_t rotary_dim,
size_t max_seq_len,
double theta,
std::array<int, 3> section,
bool interleaved,
const DataType &dtype,
const Device &device);

std::pair<Tensor, Tensor> forward(const Tensor &q,
const Tensor &k,
const Tensor &positions) const;

std::pair<Tensor, Tensor> forward(const Tensor &q_out,
const Tensor &k_out,
const Tensor &q,
const Tensor &k,
const Tensor &positions) const;

size_t rotary_dim() const { return rotary_dim_; }
size_t head_dim() const { return head_dim_; }
size_t max_seq_len() const { return max_seq_len_; }
double theta() const { return theta_; }
const std::array<int, 3> &section() const { return section_; }
bool interleaved() const { return interleaved_; }
DataType dtype() const { return dtype_; }

std::string extra_repr() const;

protected:
INFINICORE_NN_BUFFER(sin_cache);
INFINICORE_NN_BUFFER(cos_cache);

private:
void initialize_cache();

size_t head_dim_;
size_t rotary_dim_;
size_t max_seq_len_;
double theta_;
std::array<int, 3> section_;
bool interleaved_;
DataType dtype_;
};

} // namespace infinicore::nn
2 changes: 2 additions & 0 deletions python/infinicore/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@
moore_mate_flash_attn_decode,
moore_mate_flash_attn_prefill,
)
from infinicore.ops.mrope import mrope
from infinicore.ops.mul import mul
from infinicore.ops.narrow import narrow
from infinicore.ops.nrm2 import nrm2
Expand Down Expand Up @@ -215,6 +216,7 @@
"addbmm",
"floor",
"attention",
"mrope",
"block_diag",
"kron",
"bitwise_right_shift",
Expand Down
3 changes: 2 additions & 1 deletion python/infinicore/nn/modules/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from .container import InfiniCoreModuleList as ModuleList
from .linear import Linear
from .module import InfiniCoreModule as Module
from .mrope import MRoPE
from .normalization import RMSNorm
from .rope import RoPE
from .sparse import Embedding

__all__ = ["Linear", "RMSNorm", "Embedding", "RoPE", "ModuleList", "Module"]
__all__ = ["Linear", "RMSNorm", "Embedding", "RoPE", "MRoPE", "ModuleList", "Module"]
105 changes: 105 additions & 0 deletions python/infinicore/nn/modules/mrope.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import numpy as np

import infinicore
from infinicore.ops.mrope import mrope

from ...tensor import Tensor
from .module import InfiniCoreModule as Module


def create_sin_cos_table_numpy(max_position, rotary_dim, theta=10000.0):
if rotary_dim % 2 != 0:
raise ValueError("rotary_dim must be even")
pos = np.arange(0, max_position)
freqs = 1.0 / (
theta
** (np.arange(0, rotary_dim, 2)[: (rotary_dim // 2)].astype(float) / rotary_dim)
)
angles = np.outer(pos, freqs)
sin_table = np.sin(angles, dtype=np.float32)
cos_table = np.cos(angles, dtype=np.float32)
return sin_table, cos_table


def create_sin_cos_table(
max_position, rotary_dim, theta=10000.0, device=None, dtype=None
):
sin_table_np, cos_table_np = create_sin_cos_table_numpy(
max_position, rotary_dim, theta
)
return (
infinicore.from_numpy(sin_table_np, dtype=dtype, device=device),
infinicore.from_numpy(cos_table_np, dtype=dtype, device=device),
)


class MRoPE(Module):
r"""Multimodal rotary position embedding with vLLM-style 2D sin/cos cache."""

__constants__ = [
"max_position_embeddings",
"rope_theta",
"head_dim",
"rotary_dim",
"section",
"interleaved",
]

def __init__(
self,
max_position_embeddings: int,
rope_theta: float,
head_dim: int,
rotary_dim: int,
section: tuple[int, int, int],
interleaved: bool = False,
device=None,
dtype=None,
):
super().__init__()
if rotary_dim <= 0 or rotary_dim > head_dim or rotary_dim % 2 != 0:
raise ValueError("rotary_dim must be positive, even, and <= head_dim")
if len(section) != 3 or 2 * sum(section) != rotary_dim:
raise ValueError("section must contain 3 values and sum to rotary_dim / 2")

factory_kwargs = {
"device": infinicore.device("cpu", 0) if device is None else device,
"dtype": infinicore.float32 if dtype is None else dtype,
}

self.max_position_embeddings = max_position_embeddings
self.rope_theta = rope_theta
self.head_dim = head_dim
self.rotary_dim = rotary_dim
self.section = tuple(section)
self.interleaved = interleaved

self._sin_table, self._cos_table = create_sin_cos_table(
self.max_position_embeddings,
self.rotary_dim,
self.rope_theta,
**factory_kwargs,
)

def forward(
self,
q: Tensor,
k: Tensor,
positions: Tensor,
*,
out: tuple[Tensor, Tensor] | None = None,
) -> tuple[Tensor, Tensor]:
return mrope(
q,
k,
self._cos_table,
self._sin_table,
positions,
self.head_dim,
self.rotary_dim,
self.section[0],
self.section[1],
self.section[2],
self.interleaved,
out=out,
)
52 changes: 52 additions & 0 deletions python/infinicore/ops/mrope.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor


def mrope(
q: Tensor,
k: Tensor,
cos: Tensor,
sin: Tensor,
positions: Tensor,
head_size: int,
rotary_dim: int,
section_t: int,
section_h: int,
section_w: int,
interleaved: bool,
*,
out=None,
) -> tuple[Tensor, Tensor]:
if out is None:
q_out, k_out = _infinicore.mrope(
q._underlying,
k._underlying,
cos._underlying,
sin._underlying,
positions._underlying,
head_size,
rotary_dim,
section_t,
section_h,
section_w,
interleaved,
)
return Tensor(q_out), Tensor(k_out)

q_out, k_out = out
_infinicore.mrope_(
q_out._underlying,
k_out._underlying,
q._underlying,
k._underlying,
cos._underlying,
sin._underlying,
positions._underlying,
head_size,
rotary_dim,
section_t,
section_h,
section_w,
interleaved,
)
return q_out, k_out
Loading
Loading