fix: scheduling types field order and e2e embeddings constant

sbekkerm · sbekkerm · commit 35c4f1c96562 · 2026-03-19T10:41:27.000+01:00
- types.go: move Embeddings field with other API request fields, drop extra newline
- e2e_test.go: add apiEmbeddings constant for goconst (replace /embeddings literals)

Signed-off-by: Sergey Bekkerman &lt;sbekkerm@redhat.com&gt;
diff --git a/pkg/epp/framework/interface/scheduling/types.go b/pkg/epp/framework/interface/scheduling/types.go
@@ -84,13 +84,12 @@ type LLMRequestBody struct {
 	Responses *ResponsesRequest `json:"responses,omitempty"`
 	// ConversationsRequest is the representation of the OpenAI /v1/conversations request body.
 	Conversations *ConversationsRequest `json:"conversations,omitempty"`
-
+	// EmbeddingsRequest is the representation of the OpenAI /v1/embeddings request body.
+	Embeddings *EmbeddingsRequest `json:"embeddings,omitempty"`
 	// ParsedBody contains the unmarshaled request payload.
 	// Note: Because this handles multiple protocols, this field is strictly expected
 	// to be either a map[string]any (for HTTP/JSON) or a proto.Message (for gRPC).
 	ParsedBody any `json:"-"`
-	// EmbeddingsRequest is the representation of the OpenAI /v1/embeddings request body.
-	Embeddings *EmbeddingsRequest `json:"embeddings,omitempty"`
 }
 
 // PromptText returns a plain-text representation of the prompt from whichever
diff --git a/test/e2e/epp/e2e_test.go b/test/e2e/epp/e2e_test.go
@@ -48,6 +48,7 @@ const (
 	maxRetries            = 5
 	backoff               = 5 * time.Second
 	batches               = 20
+	apiEmbeddings         = "/embeddings"
 )
 
 var _ = ginkgo.Describe("InferencePool", func() {
@@ -321,22 +322,22 @@ func verifyTrafficRouting() {
 			},
 		},
 		{
-			api:              "/embeddings",
+			api:              apiEmbeddings,
 			promptOrMessages: "The food was delicious and the service was great.",
 		},
 		{
-			api:              "/embeddings",
+			api:              apiEmbeddings,
 			promptOrMessages: []string{"First sentence to embed.", "Second sentence to embed."},
 		},
 	} {
 		ginkgo.By(fmt.Sprintf("Verifying connectivity through the inference extension with %s api and prompt/messages: %v", t.api, t.promptOrMessages))
 
-		// Skip /embeddings if server returns 404 (not all models support embeddings).
-		if t.api == "/embeddings" {
+		// Skip embeddings API if server returns 404 (not all models support embeddings).
+		if t.api == apiEmbeddings {
 			probeCmd := getCurlCommand(envoyName, testConfig.NsName, envoyPort, modelName, curlTimeout, t.api, t.promptOrMessages, false)
 			probeResp, probeErr := testutils.ExecCommandInPod(testConfig, "curl", "curl", probeCmd)
 			if probeErr == nil && strings.Contains(probeResp, "404") {
-				ginkgo.Skip("Skipping /embeddings: server returned 404 (embeddings may not be supported by this model)")
+				ginkgo.Skip("Skipping " + apiEmbeddings + ": server returned 404 (embeddings may not be supported by this model)")
 			}
 		}
 
@@ -576,12 +577,12 @@ func getCurlCommand(name, ns, port, model string, timeout time.Duration, api str
 		body["prompt"] = promptOrMessages
 	case "/chat/completions":
 		body["messages"] = promptOrMessages
-	case "/embeddings":
+	case apiEmbeddings:
 		body["input"] = promptOrMessages
 		delete(body, "max_tokens")
 		delete(body, "temperature")
 	}
-	if streaming && api != "/embeddings" {
+	if streaming && api != apiEmbeddings {
 		body["stream"] = true
 		body["stream_options"] = map[string]any{
 			"include_usage": true,