InftyAI · InftyAI-Agent · Sep 9, 2024 · Sep 9, 2024
diff --git a/api/inference/v1alpha1/config_types.go b/api/inference/v1alpha1/config_types.go
@@ -38,17 +38,16 @@ type BackendConfig struct {
 	// from the default version.
 	// +optional
 	Version *string `json:"version,omitempty"`
-	// Args represents the arguments passed to the backend.
+	// Args represents the arguments appended to the backend.
 	// You can add new args or overwrite the default args.
 	// +optional
 	Args []string `json:"args,omitempty"`
 	// Envs represents the environments set to the container.
 	// +optional
 	Envs []corev1.EnvVar `json:"envs,omitempty"`
 	// Resources represents the resource requirements for backend, like cpu/mem,
-	// accelerators like GPU should not be defined here, but at the Model flavors,
-	// or the same accelerator requirements defined there will be covered and
-	// the workload will lose the fungibility capacity.
+	// accelerators like GPU should not be defined here, but at the model flavors,
+	// or the values here will be covered.
 	Resources *ResourceRequirements `json:"resources,omitempty"`
 }
 

diff --git a/config/crd/bases/inference.llmaz.io_playgrounds.yaml b/config/crd/bases/inference.llmaz.io_playgrounds.yaml
@@ -46,7 +46,7 @@ spec:
                 properties:
                   args:
                     description: |-
-                      Args represents the arguments passed to the backend.
+                      Args represents the arguments appended to the backend.
                       You can add new args or overwrite the default args.
                     items:
                       type: string
@@ -187,9 +187,8 @@ spec:
                   resources:
                     description: |-
                       Resources represents the resource requirements for backend, like cpu/mem,
-                      accelerators like GPU should not be defined here, but at the Model flavors,
-                      or the same accelerator requirements defined there will be covered and
-                      the workload will lose the fungibility capacity.
+                      accelerators like GPU should not be defined here, but at the model flavors,
+                      or the values here will be covered.
                     properties:
                       limits:
                         additionalProperties:

diff --git a/docs/assets/arch.png b/docs/assets/arch.png
diff --git a/pkg/controller/inference/playground_controller.go b/pkg/controller/inference/playground_controller.go
@@ -291,6 +291,15 @@ func buildWorkerTemplate(models []*coreapi.OpenModel, playground *inferenceapi.P
 			Limits:   limits,
 			Requests: requests,
 		}
+
+		// Make sure the limits are always greater than requests.
+		for k, v := range resources.Limits {
+			if k == corev1.ResourceCPU || k == corev1.ResourceMemory {
+				if v.Cmp(requests[k]) == -1 {
+					resources.Limits[k] = requests[k]
+				}
+			}
+		}
 	}
 
 	template := corev1.PodTemplateSpec{

diff --git a/pkg/controller/inference/service_controller.go b/pkg/controller/inference/service_controller.go
@@ -86,7 +86,14 @@ func (r *ServiceReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct
 		if err := r.Get(ctx, types.NamespacedName{Name: string(mr.Name)}, model); err != nil {
 			return ctrl.Result{}, err
 		}
-		models = append(models, model)
+		// Make sure the main model is always the 0-index model.
+		// We only have one main model right now, if this changes,
+		// the logic may also change here.
+		if *mr.Role == coreapi.MainRole {
+			models = append([]*coreapi.OpenModel{model}, models...)
+		} else {
+			models = append(models, model)
+		}
 	}
 
 	workloadApplyConfiguration := buildWorkloadApplyConfiguration(service, models)
@@ -152,8 +159,7 @@ func injectModelProperties(template *applyconfigurationv1.LeaderWorkerTemplateAp
 		source.InjectModelLoader(template.WorkerTemplate, i)
 	}
 
-	// We treat the 0-index model as the main model, we only consider the main model's requirements,
-	// like label, flavor. Note: this may change in the future, let's see.
+	// We only consider the main model's requirements for now.
 	template.WorkerTemplate.Labels = util.MergeKVs(template.WorkerTemplate.Labels, modelLabels(models[0]))
 	injectModelFlavor(template, models[0])
 }

diff --git a/test/integration/controller/inference/playground_test.go b/test/integration/controller/inference/playground_test.go
@@ -18,6 +18,7 @@ package inference
 
 import (
 	"context"
+	"fmt"
 
 	"github.com/onsi/ginkgo/v2"
 	"github.com/onsi/gomega"
@@ -323,5 +324,33 @@ var _ = ginkgo.Describe("playground controller test", func() {
 				},
 			},
 		}),
+		ginkgo.Entry("Playground with backendConfig's resource requests greater than limits", &testValidatingCase{
+			makePlayground: func() *inferenceapi.Playground {
+				return wrapper.MakePlayground("playground", ns.Name).ModelClaim(model.Name).Label(coreapi.ModelNameLabelKey, model.Name).
+					BackendRequest("cpu", "10").
+					Obj()
+			},
+			updates: []*update{
+				{
+					updateFunc: func(playground *inferenceapi.Playground) {
+						gomega.Expect(k8sClient.Create(ctx, playground)).To(gomega.Succeed())
+					},
+					checkFunc: func(ctx context.Context, k8sClient client.Client, playground *inferenceapi.Playground) {
+						gomega.Eventually(func() error {
+							service := inferenceapi.Service{}
+							if err := k8sClient.Get(ctx, types.NamespacedName{Name: playground.Name, Namespace: playground.Namespace}, &service); err != nil {
+								return err
+							}
+							if service.Spec.WorkloadTemplate.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].Resources.Limits.Cpu().CmpInt64(10) != 0 {
+								return fmt.Errorf("unexpected Cpu limit value, want %d, got %d", 10, service.Spec.WorkloadTemplate.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].Resources.Limits.Cpu().Size())
+							}
+							return nil
+						}).Should(gomega.Succeed())
+						validation.ValidatePlayground(ctx, k8sClient, playground)
+						validation.ValidatePlaygroundStatusEqualTo(ctx, k8sClient, playground, inferenceapi.PlaygroundProgressing, "Pending", metav1.ConditionTrue)
+					},
+				},
+			},
+		}),
 	)
 })