Skip to content

Commit 35c4f1c

Browse files
committed
fix: scheduling types field order and e2e embeddings constant
- types.go: move Embeddings field with other API request fields, drop extra newline - e2e_test.go: add apiEmbeddings constant for goconst (replace /embeddings literals) Signed-off-by: Sergey Bekkerman <sbekkerm@redhat.com>
1 parent a07d605 commit 35c4f1c

File tree

2 files changed

+10
-10
lines changed

2 files changed

+10
-10
lines changed

pkg/epp/framework/interface/scheduling/types.go

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -84,13 +84,12 @@ type LLMRequestBody struct {
8484
Responses *ResponsesRequest `json:"responses,omitempty"`
8585
// ConversationsRequest is the representation of the OpenAI /v1/conversations request body.
8686
Conversations *ConversationsRequest `json:"conversations,omitempty"`
87-
87+
// EmbeddingsRequest is the representation of the OpenAI /v1/embeddings request body.
88+
Embeddings *EmbeddingsRequest `json:"embeddings,omitempty"`
8889
// ParsedBody contains the unmarshaled request payload.
8990
// Note: Because this handles multiple protocols, this field is strictly expected
9091
// to be either a map[string]any (for HTTP/JSON) or a proto.Message (for gRPC).
9192
ParsedBody any `json:"-"`
92-
// EmbeddingsRequest is the representation of the OpenAI /v1/embeddings request body.
93-
Embeddings *EmbeddingsRequest `json:"embeddings,omitempty"`
9493
}
9594

9695
// PromptText returns a plain-text representation of the prompt from whichever

test/e2e/epp/e2e_test.go

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ const (
4848
maxRetries = 5
4949
backoff = 5 * time.Second
5050
batches = 20
51+
apiEmbeddings = "/embeddings"
5152
)
5253

5354
var _ = ginkgo.Describe("InferencePool", func() {
@@ -321,22 +322,22 @@ func verifyTrafficRouting() {
321322
},
322323
},
323324
{
324-
api: "/embeddings",
325+
api: apiEmbeddings,
325326
promptOrMessages: "The food was delicious and the service was great.",
326327
},
327328
{
328-
api: "/embeddings",
329+
api: apiEmbeddings,
329330
promptOrMessages: []string{"First sentence to embed.", "Second sentence to embed."},
330331
},
331332
} {
332333
ginkgo.By(fmt.Sprintf("Verifying connectivity through the inference extension with %s api and prompt/messages: %v", t.api, t.promptOrMessages))
333334

334-
// Skip /embeddings if server returns 404 (not all models support embeddings).
335-
if t.api == "/embeddings" {
335+
// Skip embeddings API if server returns 404 (not all models support embeddings).
336+
if t.api == apiEmbeddings {
336337
probeCmd := getCurlCommand(envoyName, testConfig.NsName, envoyPort, modelName, curlTimeout, t.api, t.promptOrMessages, false)
337338
probeResp, probeErr := testutils.ExecCommandInPod(testConfig, "curl", "curl", probeCmd)
338339
if probeErr == nil && strings.Contains(probeResp, "404") {
339-
ginkgo.Skip("Skipping /embeddings: server returned 404 (embeddings may not be supported by this model)")
340+
ginkgo.Skip("Skipping " + apiEmbeddings + ": server returned 404 (embeddings may not be supported by this model)")
340341
}
341342
}
342343

@@ -576,12 +577,12 @@ func getCurlCommand(name, ns, port, model string, timeout time.Duration, api str
576577
body["prompt"] = promptOrMessages
577578
case "/chat/completions":
578579
body["messages"] = promptOrMessages
579-
case "/embeddings":
580+
case apiEmbeddings:
580581
body["input"] = promptOrMessages
581582
delete(body, "max_tokens")
582583
delete(body, "temperature")
583584
}
584-
if streaming && api != "/embeddings" {
585+
if streaming && api != apiEmbeddings {
585586
body["stream"] = true
586587
body["stream_options"] = map[string]any{
587588
"include_usage": true,

0 commit comments

Comments
 (0)