Skip to content

Commit 7704d32

Browse files
authored
feat(Flow Control)/Expand Flow Control capacity limits schema(resource.Quantity) (kubernetes-sigs#2492)
* feat(conf): add config `Limits` in EndponintPickerConfig - add struct CapacityLimits - add Limits Config in PriorityBandConfig and FlowControlConfig * feat(config): Modify the MaxBytes configuration logic within the `NewConfigFromAPI` function: - Add New Logic: Implement logic to write the new configuration field `Limit.MaxBytes` to a `uint64` variable, including the necessary type conversion. - Remove Old Logic: Delete the existing logic responsible for writing the legacy `MaxBytes` configuration. * chore(conf): supplyment the comment for new conf * feat(conf): delete the old config `MaxBytes` - use the new conf for the logs print * refactor(config): extract `resolveMaxBytes` helper and improve Limits comments * feat(test): change the `MaxBytes` in test to `Limit.MaxBytes` * feat(test): add `ShouldSucceed_WithKubernetesQuantityFormat` to test the k8s case * chore(deepcopy): remake `deepcopy.go` * revert(config): flat the `Limits` * chore(make): after `make` * feat(docs): supplyment flow control docs for the description of `quantity format`
1 parent 874aae8 commit 7704d32

File tree

7 files changed

+86
-46
lines changed

7 files changed

+86
-46
lines changed

apix/config/v1alpha1/endpointpickerconfig_types.go

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"fmt"
2222
"strings"
2323

24+
"k8s.io/apimachinery/pkg/api/resource"
2425
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2526
)
2627

@@ -271,13 +272,11 @@ type ParserConfig struct {
271272
// FlowControlConfig configures the Flow Control layer.
272273
type FlowControlConfig struct {
273274
// +optional
274-
// MaxBytes defines the global capacity limit for the Flow Control system.
275-
// It represents the maximum aggregate byte size of all active requests across all priority
276-
// levels. If this limit is exceeded, new requests will be rejected even if their specific
277-
// priority band has capacity.
278-
// If 0 or omitted, no global limit is enforced (unlimited).
279-
// Default: 0 (unlimited).
280-
MaxBytes *int64 `json:"maxBytes,omitempty"`
275+
// MaxBytes is the global maximum number of bytes allowed across all priority levels.
276+
// If exceeded, new requests will be rejected even if their specific priority band has capacity.
277+
// Accepts standard Kubernetes resource quantities (e.g., "1Gi", "500M").
278+
// If not specified, no global limits are enforced.
279+
MaxBytes *resource.Quantity `json:"maxBytes,omitempty"`
281280

282281
// +optional
283282
// DefaultRequestTTL serves as a fallback timeout for requests that do not specify their own
@@ -315,9 +314,10 @@ type PriorityBandConfig struct {
315314

316315
// +optional
317316
// MaxBytes is the maximum number of bytes allowed for this priority band.
318-
// If 0 or omitted, the system default is used.
319-
// Default: 1 GB.
320-
MaxBytes *int64 `json:"maxBytes,omitempty"`
317+
// If exceeded, new requests at this priority will be shed.
318+
// Accepts standard Kubernetes resource quantities (e.g., "1Gi", "500M").
319+
// If not specified, the system default is used (e.g., 1 GB).
320+
MaxBytes *resource.Quantity `json:"maxBytes,omitempty"`
321321

322322
// +optional
323323
// FairnessPolicyRef specifies the name of the policy that governs flow selection.

apix/config/v1alpha1/zz_generated.deepcopy.go

Lines changed: 4 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/epp/config/loader/testdata_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ schedulingProfiles:
156156
featureGates:
157157
- flowControl
158158
flowControl:
159-
maxBytes: 1024
159+
maxBytes: "1024"
160160
defaultRequestTTL: 1m
161161
`
162162

@@ -172,7 +172,7 @@ schedulingProfiles:
172172
- pluginRef: maxScore
173173
featureGates: [] # Explicitly empty
174174
flowControl:
175-
maxBytes: 1024
175+
maxBytes: "1024"
176176
`
177177

178178
// successComplexFlowControlConfigText tests that Flow Control configuration with custom plugins is correctly loaded.

pkg/epp/flowcontrol/config_test.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222

2323
"github.com/stretchr/testify/assert"
2424
"github.com/stretchr/testify/require"
25+
"k8s.io/apimachinery/pkg/api/resource"
2526
"k8s.io/utils/ptr"
2627

2728
configapi "sigs.k8s.io/gateway-api-inference-extension/apix/config/v1alpha1"
@@ -71,11 +72,11 @@ func TestNewConfigFromAPI(t *testing.T) {
7172
{
7273
name: "Success - Explicit Values",
7374
apiConfig: &configapi.FlowControlConfig{
74-
MaxBytes: ptr.To(int64(2048)),
75+
MaxBytes: ptr.To(resource.MustParse("2048")),
7576
},
7677
assertion: func(t *testing.T, cfg *Config) {
7778
assert.Equal(t, uint64(2048), cfg.Registry.MaxBytes,
78-
"MaxBytes should be correctly translated from int64 in API to uint64 in internal config")
79+
"MaxBytes should be correctly translated from resource.Quantity in API to uint64 in internal config")
7980
},
8081
},
8182
}

pkg/epp/flowcontrol/registry/config.go

Lines changed: 32 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"slices"
2323
"time"
2424

25+
"k8s.io/apimachinery/pkg/api/resource"
2526
"k8s.io/apimachinery/pkg/util/sets"
2627

2728
configapi "sigs.k8s.io/gateway-api-inference-extension/apix/config/v1alpha1"
@@ -347,6 +348,19 @@ func WithBandMaxBytes(maxBytes uint64) PriorityBandConfigOption {
347348

348349
// --- Constructors ---
349350

351+
// resolveMaxBytes extracts and validates MaxBytes from a resource.Quantity pointer.
352+
// Returns 0 (use default) if maxBytes is nil.
353+
func resolveMaxBytes(maxBytes *resource.Quantity) (uint64, error) {
354+
if maxBytes == nil {
355+
return 0, nil
356+
}
357+
v := maxBytes.Value()
358+
if v < 0 {
359+
return 0, fmt.Errorf("MaxBytes must be non-negative, got %d", v)
360+
}
361+
return uint64(v), nil
362+
}
363+
350364
// NewConfigFromAPI creates a new Config by translating the API configuration.
351365
func NewConfigFromAPI(apiConfig *configapi.FlowControlConfig, handle plugin.Handle) (*Config, error) {
352366
if apiConfig == nil {
@@ -355,11 +369,12 @@ func NewConfigFromAPI(apiConfig *configapi.FlowControlConfig, handle plugin.Hand
355369

356370
opts := make([]ConfigOption, 0, len(apiConfig.PriorityBands)+3)
357371

358-
if apiConfig.MaxBytes != nil {
359-
if *apiConfig.MaxBytes < 0 {
360-
return nil, fmt.Errorf("MaxBytes must be non-negative, got %d", *apiConfig.MaxBytes)
361-
}
362-
opts = append(opts, WithMaxBytes(uint64(*apiConfig.MaxBytes)))
372+
maxBytes, err := resolveMaxBytes(apiConfig.MaxBytes)
373+
if err != nil {
374+
return nil, fmt.Errorf("global %w", err)
375+
}
376+
if maxBytes > 0 {
377+
opts = append(opts, WithMaxBytes(maxBytes))
363378
}
364379

365380
if apiConfig.DefaultPriorityBand != nil {
@@ -386,11 +401,12 @@ func buildDefaultPriorityBandTemplate(
386401
apiBand *configapi.PriorityBandConfig,
387402
) (*PriorityBandConfig, error) {
388403
bandOpts := make([]PriorityBandConfigOption, 0, 3)
389-
if apiBand.MaxBytes != nil {
390-
if *apiBand.MaxBytes < 0 {
391-
return nil, fmt.Errorf("DefaultPriorityBand MaxBytes must be non-negative, got %d", *apiBand.MaxBytes)
392-
}
393-
bandOpts = append(bandOpts, WithBandMaxBytes(uint64(*apiBand.MaxBytes)))
404+
maxBytes, err := resolveMaxBytes(apiBand.MaxBytes)
405+
if err != nil {
406+
return nil, fmt.Errorf("DefaultPriorityBand %w", err)
407+
}
408+
if maxBytes > 0 {
409+
bandOpts = append(bandOpts, WithBandMaxBytes(maxBytes))
394410
}
395411
if apiBand.OrderingPolicyRef != "" {
396412
bandOpts = append(bandOpts, WithOrderingPolicy(apiBand.OrderingPolicyRef, handle))
@@ -409,11 +425,12 @@ func buildDefaultPriorityBandTemplate(
409425

410426
func buildPriorityBand(handle plugin.Handle, band configapi.PriorityBandConfig) (*PriorityBandConfig, error) {
411427
bandOpts := make([]PriorityBandConfigOption, 0, 3)
412-
if band.MaxBytes != nil {
413-
if *band.MaxBytes < 0 {
414-
return nil, fmt.Errorf("priority band %d MaxBytes must be non-negative, got %d", band.Priority, *band.MaxBytes)
415-
}
416-
bandOpts = append(bandOpts, WithBandMaxBytes(uint64(*band.MaxBytes)))
428+
maxBytes, err := resolveMaxBytes(band.MaxBytes)
429+
if err != nil {
430+
return nil, fmt.Errorf("priority band %d %w", band.Priority, err)
431+
}
432+
if maxBytes > 0 {
433+
bandOpts = append(bandOpts, WithBandMaxBytes(maxBytes))
417434
}
418435
if band.OrderingPolicyRef != "" {
419436
bandOpts = append(bandOpts, WithOrderingPolicy(band.OrderingPolicyRef, handle))

pkg/epp/flowcontrol/registry/config_test.go

Lines changed: 29 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222

2323
"github.com/stretchr/testify/assert"
2424
"github.com/stretchr/testify/require"
25+
"k8s.io/apimachinery/pkg/api/resource"
2526
"k8s.io/utils/ptr"
2627

2728
configapi "sigs.k8s.io/gateway-api-inference-extension/apix/config/v1alpha1"
@@ -529,15 +530,15 @@ func TestNewConfigFromAPI(t *testing.T) {
529530
{
530531
name: "ShouldSucceed_WithFullConfiguration",
531532
apiConfig: &configapi.FlowControlConfig{
532-
MaxBytes: ptr.To(int64(100)),
533+
MaxBytes: ptr.To(resource.MustParse("100")),
533534
PriorityBands: []configapi.PriorityBandConfig{
534535
{
535536
Priority: 1,
536-
MaxBytes: ptr.To(int64(50)),
537+
MaxBytes: ptr.To(resource.MustParse("50")),
537538
},
538539
},
539540
DefaultPriorityBand: &configapi.PriorityBandConfig{
540-
MaxBytes: ptr.To(int64(10)),
541+
MaxBytes: ptr.To(resource.MustParse("10")),
541542
},
542543
},
543544
assertion: func(t *testing.T, cfg *Config) {
@@ -555,6 +556,25 @@ func TestNewConfigFromAPI(t *testing.T) {
555556
"DefaultPriorityBand template MaxBytes should be translated")
556557
},
557558
},
559+
{
560+
name: "ShouldSucceed_WithKubernetesQuantityFormat",
561+
apiConfig: &configapi.FlowControlConfig{
562+
MaxBytes: ptr.To(resource.MustParse("1Gi")),
563+
PriorityBands: []configapi.PriorityBandConfig{
564+
{
565+
Priority: 1,
566+
MaxBytes: ptr.To(resource.MustParse("500Mi")),
567+
},
568+
},
569+
},
570+
assertion: func(t *testing.T, cfg *Config) {
571+
assert.Equal(t, uint64(1073741824), cfg.MaxBytes,
572+
"1Gi should be correctly parsed as 1073741824 bytes")
573+
require.Contains(t, cfg.PriorityBands, 1)
574+
assert.Equal(t, uint64(524288000), cfg.PriorityBands[1].MaxBytes,
575+
"500Mi should be correctly parsed as 524288000 bytes")
576+
},
577+
},
558578
{
559579
name: "ShouldSucceed_WithPolicyReferences",
560580
apiConfig: &configapi.FlowControlConfig{
@@ -626,7 +646,7 @@ func TestNewConfigFromAPI(t *testing.T) {
626646
PriorityBands: []configapi.PriorityBandConfig{
627647
{
628648
Priority: 1,
629-
MaxBytes: ptr.To(int64(0)), // Explicitly zero
649+
MaxBytes: ptr.To(resource.MustParse("0")), // Explicitly zero
630650
},
631651
},
632652
},
@@ -640,7 +660,7 @@ func TestNewConfigFromAPI(t *testing.T) {
640660
name: "ShouldApplyDefault_WhenDefaultPriorityBandMaxBytesIsZero",
641661
apiConfig: &configapi.FlowControlConfig{
642662
DefaultPriorityBand: &configapi.PriorityBandConfig{
643-
MaxBytes: ptr.To(int64(0)), // Explicitly zero
663+
MaxBytes: ptr.To(resource.MustParse("0")), // Explicitly zero
644664
},
645665
},
646666
assertion: func(t *testing.T, cfg *Config) {
@@ -654,17 +674,17 @@ func TestNewConfigFromAPI(t *testing.T) {
654674
{
655675
name: "ShouldError_WithNegativeGlobalMaxBytes",
656676
apiConfig: &configapi.FlowControlConfig{
657-
MaxBytes: ptr.To(int64(-1)),
677+
MaxBytes: ptr.To(resource.MustParse("-1")),
658678
},
659-
expectedErr: "MaxBytes must be non-negative",
679+
expectedErr: "global MaxBytes must be non-negative",
660680
},
661681
{
662682
name: "ShouldError_WithNegativePriorityBandMaxBytes",
663683
apiConfig: &configapi.FlowControlConfig{
664684
PriorityBands: []configapi.PriorityBandConfig{
665685
{
666686
Priority: 1,
667-
MaxBytes: ptr.To(int64(-100)),
687+
MaxBytes: ptr.To(resource.MustParse("-100")),
668688
},
669689
},
670690
},
@@ -674,7 +694,7 @@ func TestNewConfigFromAPI(t *testing.T) {
674694
name: "ShouldError_WithNegativeDefaultPriorityBandMaxBytes",
675695
apiConfig: &configapi.FlowControlConfig{
676696
DefaultPriorityBand: &configapi.PriorityBandConfig{
677-
MaxBytes: ptr.To(int64(-5)),
697+
MaxBytes: ptr.To(resource.MustParse("-5")),
678698
},
679699
},
680700
expectedErr: "DefaultPriorityBand MaxBytes must be non-negative",

site-src/guides/epp-configuration/config-text.md

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -400,20 +400,21 @@ form:
400400

401401
```yaml
402402
flowControl:
403-
maxBytes: 10737418240 # 10 GB
403+
maxBytes: 10Gi # 10737418240 bytes
404404
defaultRequestTTL: 60s
405405
defaultPriorityBand:
406-
maxBytes: 1073741824
406+
maxBytes: 10Gi
407407
priorityBands:
408408
- priority: 100
409-
maxBytes: 5368709120
409+
maxBytes: 5Gi
410410
orderingPolicyRef: fcfs-ordering-policy
411411
fairnessPolicyRef: global-strict-fairness-policy
412412
```
413413

414414
The fields in the `flowControl` section are:
415415

416-
- `maxBytes`: Defines the global capacity limit (in bytes) for all active requests across all priority levels.
416+
- `maxBytes`: Defines the global capacity limit for all active requests across all priority levels.
417+
- Supports Kubernetes quantity format (e.g., `10Gi`, `512Mi`, `1048576Ki`) as well as plain integers (in bytes).
417418
- If `0` or omitted, no global limit is enforced (unlimited), though individual priority band limits still apply.
418419
- `defaultRequestTTL`: A fallback timeout for requests that do not specify their own deadline.
419420
- If `0` or omitted, it defaults to the client context deadline, meaning requests may wait indefinitely unless cancelled by the client.
@@ -427,6 +428,7 @@ Both the `defaultPriorityBand` template and the entries in `priorityBands` use t
427428

428429
- `priority`: (Required for `priorityBands` entries) The integer priority level. Higher values indicate higher priority.
429430
- `maxBytes`: The maximum aggregate byte size allowed for this specific priority band.
431+
- Supports Kubernetes quantity format (e.g., `5Gi`, `512Mi`) as well as plain integers (in bytes).
430432
- If `0` or omitted, the system default (1 GB) is used.
431433
- `orderingPolicyRef`: The name of the Ordering Policy plugin to use (e.g., `fcfs-ordering-policy`).
432434
- Defaults to `fcfs-ordering-policy` if omitted.

0 commit comments

Comments
 (0)