Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<p align="center">
<picture>
<source media="(prefers-color-scheme: dark)" srcset="./docs/assets/logo.png">
<img alt="llmaz" src="./docs/assets/logo.png" width=55%>
<img alt="llmaz" src="https://github.com/InftyAI/llmaz/blob/main/docs/assets/logo.png" width=55%>
</picture>
</p>

Expand Down
41 changes: 24 additions & 17 deletions api/core/v1alpha1/model_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,28 +120,35 @@ type ModelClaim struct {
InferenceFlavors []FlavorName `json:"inferenceFlavors,omitempty"`
}

type InferenceMode string
type ModelRole string

const (
Standard InferenceMode = "Standard"
SpeculativeDecoding InferenceMode = "SpeculativeDecoding"
// Main represents the main model, if only one model is required,
// it must be the main model. Only one main model is allowed.
MainRole ModelRole = "main"
// Draft represents the draft model in speculative decoding,
// the main model is the target model then.
DraftRole ModelRole = "draft"
)

// MultiModelsClaim represents claiming for multiple models with different claimModes,
// like standard or speculative-decoding to support different inference scenarios.
type MultiModelsClaim struct {
// ModelNames represents a list of models, there maybe multiple models here
// to support state-of-the-art technologies like speculative decoding.
// If the composedMode is SpeculativeDecoding, the first model is the target model,
// and the second model is the draft model.
// +kubebuilder:validation:MinItems=1
ModelNames []ModelName `json:"modelNames,omitempty"`
// Mode represents the paradigm to serve the model, whether via a standard way
// or via an advanced technique like SpeculativeDecoding.
// +kubebuilder:default=Standard
// +kubebuilder:validation:Enum={Standard,SpeculativeDecoding}
type ModelRepresentative struct {
// Name represents the model name.
Name ModelName `json:"name"`
// Role represents the model role once more than one model is required.
// +kubebuilder:validation:Enum={main,draft}
// +kubebuilder:default=main
// +optional
InferenceMode InferenceMode `json:"inferenceMode,omitempty"`
Role *ModelRole `json:"role,omitempty"`
}

// ModelClaims represents multiple claims for different models.
type ModelClaims struct {
// Models represents a list of models with roles specified, there maybe
// multiple models here to support state-of-the-art technologies like
// speculative decoding, then one model is main(target) model, another one
// is draft model.
// +kubebuilder:validation:MinItems=1
Models []ModelRepresentative `json:"models,omitempty"`
// InferenceFlavors represents a list of flavors with fungibility supported
// to serve the model.
// - If not set, always apply with the 0-index model by default.
Expand Down
72 changes: 47 additions & 25 deletions api/core/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 7 additions & 8 deletions api/inference/v1alpha1/playground_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,16 @@ type PlaygroundSpec struct {
// +kubebuilder:default=1
// +optional
Replicas *int32 `json:"replicas,omitempty"`
// ModelClaim represents claiming for one model, it's the standard claimMode
// of multiModelsClaim compared to other modes like SpeculativeDecoding.
// Most of the time, modelClaim is enough.
// ModelClaim and multiModelsClaim are exclusive configured.
// ModelClaim represents claiming for one model, it's a simplified use case
// of modelClaims. Most of the time, modelClaim is enough.
// ModelClaim and modelClaims are exclusive configured.
// +optional
ModelClaim *coreapi.ModelClaim `json:"modelClaim,omitempty"`
// MultiModelsClaim represents claiming for multiple models with different claimModes,
// like standard or speculative-decoding to support different inference scenarios.
// ModelClaim and multiModelsClaim are exclusive configured.
// ModelClaims represents claiming for multiple models for more complicated
// use cases like speculative-decoding.
// ModelClaims and modelClaim are exclusive configured.
// +optional
MultiModelsClaim *coreapi.MultiModelsClaim `json:"multiModelsClaim,omitempty"`
ModelClaims *coreapi.ModelClaims `json:"modelClaims,omitempty"`
// BackendConfig represents the inference backend configuration
// under the hood, e.g. vLLM, which is the default backend.
// +optional
Expand Down
5 changes: 2 additions & 3 deletions api/inference/v1alpha1/service_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,8 @@ import (
// Service controller will maintain multi-flavor of workloads with
// different accelerators for cost or performance considerations.
type ServiceSpec struct {
// MultiModelsClaim represents claiming for multiple models with different claimModes,
// like standard or speculative-decoding to support different inference scenarios.
MultiModelsClaim coreapi.MultiModelsClaim `json:"multiModelsClaim,omitempty"`
// ModelClaims represents multiple claims for different models.
ModelClaims coreapi.ModelClaims `json:"modelClaims,omitempty"`
// WorkloadTemplate defines the underlying workload layout and configuration.
// Note: the LWS spec might be twisted with various LWS instances to support
// accelerator fungibility or other cutting-edge researches.
Expand Down
8 changes: 4 additions & 4 deletions api/inference/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

58 changes: 58 additions & 0 deletions client-go/applyconfiguration/core/v1alpha1/modelclaims.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

51 changes: 51 additions & 0 deletions client-go/applyconfiguration/core/v1alpha1/modelrepresentative.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

64 changes: 0 additions & 64 deletions client-go/applyconfiguration/core/v1alpha1/multimodelsclaim.go

This file was deleted.

Loading