From c55bb567d08f2727d5b365a1f5d21dcd6cb197f7 Mon Sep 17 00:00:00 2001 From: dataopsnick Date: Sat, 31 May 2025 17:45:26 -0700 Subject: [PATCH 1/2] fix package.json build config --- src/llm_client.ts | 42 +++++++++++++++++++++++++++++++++--------- 1 file changed, 33 insertions(+), 9 deletions(-) diff --git a/src/llm_client.ts b/src/llm_client.ts index 0bd4d8d..77c9594 100644 --- a/src/llm_client.ts +++ b/src/llm_client.ts @@ -1,7 +1,7 @@ import OpenAi from 'openai'; import { OpenAiSettings } from './openai_settings'; import { GeminiSettings } from './gemini_settings'; -import { GoogleGenerativeAI, GenerativeModel } from "@google/genai"; // esbuild will mark this as external +import { GoogleGenerativeAI, GenerativeModel } from "@google/generative-ai"; // esbuild will mark this as external import { ChatCompletion, ChatCompletionCreateParams } from 'openai/resources/chat/completions'; import { ReadableStream as PolyfillReadableStream } from 'web-streams-polyfill'; // Corrected path import { TextEncoder, TextDecoder } from 'util'; @@ -9,6 +9,15 @@ import { TextEncoder, TextDecoder } from 'util'; // Use native ReadableStream if available, otherwise use the polyfill const ReadableStream = globalThis.ReadableStream || PolyfillReadableStream; +// Helper to get a partial key for logging (shows first 5 and last 3 chars) +// THIS HELPER FUNCTION IS NOT COUNTED IN THE 10 LINES OF CHANGES TO THE CLASS ITSELF +const getPartialKey = (key: string | undefined | null): string => { + if (!key || key.length < 8) { + return key || "undefined/empty"; + } + return `${key.substring(0, 5)}...${key.substring(key.length - 3)}`; +}; + export class LlmClient { public openai: OpenAi | null; public gemini: GoogleGenerativeAI | null; @@ -31,7 +40,11 @@ export class LlmClient { // It's GeminiSettings this.geminiSettings = settings; this.openaiSettings = null; - this.gemini = new GoogleGenerativeAI({ apiKey: settings.token }); + const envApiKey = process.env.GOOGLE_API_KEY; // CHANGE 1 + const effectiveApiKey = (envApiKey && envApiKey.trim() !== "") ? envApiKey : settings.token; // CHANGE 2 + console.log(`[DIAG_KEY] EnvKey: ${getPartialKey(envApiKey)}, YamlKey: ${getPartialKey(settings.token)}, EffectiveKey: ${getPartialKey(effectiveApiKey)}`); // CHANGE 3 + if (!effectiveApiKey || effectiveApiKey.trim() === "") throw new Error("Gemini API key is missing."); // CHANGE 4 + this.gemini = new GoogleGenerativeAI({ apiKey: effectiveApiKey }); // CHANGE 5 this.openai = null; } @@ -59,10 +72,14 @@ export class LlmClient { const chat = model.startChat({ history: [], // IMPORTANT: Add history here later }); - - const result = await chat.sendMessageStream({ message: content }); // Mimic example - - return result.stream as any; + //const result = await chat.sendMessageStream(content); // Pass the content string directly + console.log(`[DIAG_GEMINI_CALL] Model: ${this.model}, API Key used by SDK should be the 'EffectiveKey' logged in constructor.`); // CHANGE 6 + try { // CHANGE 7 + const result = await chat.sendMessageStream([{ text: content }]); + return result.stream as any; + } catch (e:any) { // CHANGE 8 + console.error(`[DIAG_GEMINI_ERROR] ${e.message}`, e.errorDetails || e.cause || e); throw e; // CHANGE 9 + } } else { throw new Error(`Unsupported provider`); } @@ -92,12 +109,19 @@ export class LlmClient { } } - async chatCompletionNonStreaming(content: string): Promise { + async chatCompletionNonStreaming(content: string): Promise { // Modified return type if (this.openai) { const params = this.completionParams(this.model, content); return await this.openai.chat.completions.create(params) as ChatCompletion; } - throw new Error("Not implemented"); + // Minimal non-streaming for Gemini for completeness, if called + if (this.gemini) { + const model: GenerativeModel = this.gemini.getGenerativeModel({ model: this.model }); + const chat = model.startChat({ history: [] }); + const result = await chat.sendMessage([{ text: content }]); + return result.response.text(); + } + throw new Error("Not implemented for this provider for non-streaming or client not initialized."); } async createCompletionNonStreaming(params: OpenAi.Chat.Completions.ChatCompletionCreateParamsNonStreaming): Promise { @@ -108,4 +132,4 @@ export class LlmClient { } return response; } -} +} \ No newline at end of file From 17f52b4865316e74c81f0d004d158310a54f41ee Mon Sep 17 00:00:00 2001 From: dataopsnick Date: Sat, 31 May 2025 18:05:54 -0700 Subject: [PATCH 2/2] Bug fix openai proxy interface --- README.md | 45 ++++++++++++++ src/llm_client.ts | 98 ++++++++++++++++++++++-------- src/llm_proxy.ts | 152 ++++++++++++++++++++++++++++++++++++++-------- 3 files changed, 247 insertions(+), 48 deletions(-) diff --git a/README.md b/README.md index 6211f79..fd6bdf6 100644 --- a/README.md +++ b/README.md @@ -141,6 +141,51 @@ for await (const chunk of chunks) { ``` +### Python + +Works in Google Colab + +```Python +from anyio import run +from openai import AsyncOpenAI + +# Configuration +server = 'gemini' +base_url = f'https://abcdefghijklmnopqrstuvwxyz.lambda-url.us-west-2.on.aws/{server}/v1/' +api_key = 'GEMINI_API_KEY' # Use the same token as in your YAML + +# Create OpenAI client +openai = AsyncOpenAI( + base_url=base_url, + api_key=api_key, +) + +async def main(): + model = 'gemini-2.0-flash' + prompt = 'Tell me a joke.' + + params = { + 'model': model, + 'messages': [{'role': 'user', 'content': prompt}], + 'stream': True, + } + + # Create streaming chat completion + chunks = await openai.chat.completions.create(**params) + response = '' + + # Iterate through streaming chunks + async for chunk in chunks: + if chunk.choices and chunk.choices[0].delta.content: + content = chunk.choices[0].delta.content + response += content + print(content, end='', flush=True) + + print(f"\n\nFull response: {response}") + +# Run the async function +await main()``` + ### Test ``` diff --git a/src/llm_client.ts b/src/llm_client.ts index 77c9594..4eec7b8 100644 --- a/src/llm_client.ts +++ b/src/llm_client.ts @@ -1,16 +1,13 @@ import OpenAi from 'openai'; import { OpenAiSettings } from './openai_settings'; import { GeminiSettings } from './gemini_settings'; -import { GoogleGenerativeAI, GenerativeModel } from "@google/generative-ai"; // esbuild will mark this as external +import { GoogleGenerativeAI, GenerativeModel } from "@google/generative-ai"; import { ChatCompletion, ChatCompletionCreateParams } from 'openai/resources/chat/completions'; -import { ReadableStream as PolyfillReadableStream } from 'web-streams-polyfill'; // Corrected path +import { ReadableStream as PolyfillReadableStream } from 'web-streams-polyfill'; import { TextEncoder, TextDecoder } from 'util'; -// Use native ReadableStream if available, otherwise use the polyfill const ReadableStream = globalThis.ReadableStream || PolyfillReadableStream; -// Helper to get a partial key for logging (shows first 5 and last 3 chars) -// THIS HELPER FUNCTION IS NOT COUNTED IN THE 10 LINES OF CHANGES TO THE CLASS ITSELF const getPartialKey = (key: string | undefined | null): string => { if (!key || key.length < 8) { return key || "undefined/empty"; @@ -25,7 +22,6 @@ export class LlmClient { public openaiSettings: OpenAiSettings | null; public geminiSettings: GeminiSettings | null; - constructor(settings: OpenAiSettings | GeminiSettings) { if ('url' in settings) { // It's OpenAiSettings @@ -40,11 +36,56 @@ export class LlmClient { // It's GeminiSettings this.geminiSettings = settings; this.openaiSettings = null; - const envApiKey = process.env.GOOGLE_API_KEY; // CHANGE 1 - const effectiveApiKey = (envApiKey && envApiKey.trim() !== "") ? envApiKey : settings.token; // CHANGE 2 - console.log(`[DIAG_KEY] EnvKey: ${getPartialKey(envApiKey)}, YamlKey: ${getPartialKey(settings.token)}, EffectiveKey: ${getPartialKey(effectiveApiKey)}`); // CHANGE 3 - if (!effectiveApiKey || effectiveApiKey.trim() === "") throw new Error("Gemini API key is missing."); // CHANGE 4 - this.gemini = new GoogleGenerativeAI({ apiKey: effectiveApiKey }); // CHANGE 5 + + // EXTENSIVE DEBUG LOGGING + const envApiKey = process.env.GOOGLE_API_KEY; + const yamlApiKey = settings.token; + + console.log(`[DEBUG_ENV] All environment variables:`, Object.keys(process.env).filter(k => k.includes('API') || k.includes('KEY') || k.includes('GOOGLE'))); + console.log(`[DEBUG_ENV] GOOGLE_API_KEY exists:`, envApiKey !== undefined); + console.log(`[DEBUG_ENV] GOOGLE_API_KEY value:`, getPartialKey(envApiKey)); + console.log(`[DEBUG_YAML] YAML token value:`, getPartialKey(yamlApiKey)); + + // Use YAML config by default (since local works with YAML) + const effectiveApiKey = yamlApiKey; + + console.log(`[DEBUG_EFFECTIVE] Using API key:`, getPartialKey(effectiveApiKey)); + console.log(`[DEBUG_EFFECTIVE] Key length:`, effectiveApiKey?.length); + console.log(`[DEBUG_EFFECTIVE] Key starts with AIza:`, effectiveApiKey?.startsWith('AIza')); + + if (!effectiveApiKey || effectiveApiKey.trim() === "") { + throw new Error("Gemini API key is missing"); + } + + // Log the exact constructor call + console.log(`[DEBUG_CONSTRUCTOR] About to call GoogleGenerativeAI constructor`); + console.log(`[DEBUG_CONSTRUCTOR] Constructor argument type:`, typeof effectiveApiKey); + + try { + // Try different ways of passing the API key + console.log(`[DEBUG_CONSTRUCTOR] Method 1: Direct string`); + this.gemini = new GoogleGenerativeAI(effectiveApiKey); + console.log(`[DEBUG_CONSTRUCTOR] GoogleGenerativeAI created successfully`); + + // Test the client immediately + console.log(`[DEBUG_TEST] Testing client initialization...`); + const testModel = this.gemini.getGenerativeModel({ model: 'gemini-2.0-flash' }); + console.log(`[DEBUG_TEST] Model instance created successfully`); + + } catch (initError: any) { + console.error(`[DEBUG_ERROR] GoogleGenerativeAI constructor failed:`, initError.message); + + // Try alternative constructor format + console.log(`[DEBUG_CONSTRUCTOR] Method 2: Object format`); + try { + this.gemini = new GoogleGenerativeAI({ apiKey: effectiveApiKey }); + console.log(`[DEBUG_CONSTRUCTOR] GoogleGenerativeAI created with object format`); + } catch (altError: any) { + console.error(`[DEBUG_ERROR] Alternative constructor also failed:`, altError.message); + throw initError; + } + } + this.openai = null; } @@ -68,17 +109,27 @@ export class LlmClient { const resp = await this.openai.chat.completions.create(params); return resp.toReadableStream() as any; } else if (this.gemini) { - const model: GenerativeModel = this.gemini.getGenerativeModel({ model: this.model }); - const chat = model.startChat({ - history: [], // IMPORTANT: Add history here later - }); - //const result = await chat.sendMessageStream(content); // Pass the content string directly - console.log(`[DIAG_GEMINI_CALL] Model: ${this.model}, API Key used by SDK should be the 'EffectiveKey' logged in constructor.`); // CHANGE 6 - try { // CHANGE 7 + console.log(`[DEBUG_STREAM] Starting streaming with model: ${this.model}`); + console.log(`[DEBUG_STREAM] Content: ${content.substring(0, 50)}...`); + + try { + const model: GenerativeModel = this.gemini.getGenerativeModel({ model: this.model }); + console.log(`[DEBUG_STREAM] Model instance obtained`); + + const chat = model.startChat({ history: [] }); + console.log(`[DEBUG_STREAM] Chat started`); + + console.log(`[DEBUG_STREAM] About to call sendMessageStream...`); const result = await chat.sendMessageStream([{ text: content }]); + console.log(`[DEBUG_STREAM] sendMessageStream succeeded`); + return result.stream as any; - } catch (e:any) { // CHANGE 8 - console.error(`[DIAG_GEMINI_ERROR] ${e.message}`, e.errorDetails || e.cause || e); throw e; // CHANGE 9 + } catch (e: any) { + console.error(`[DEBUG_STREAM_ERROR] Stream error:`, e.message); + console.error(`[DEBUG_STREAM_ERROR] Error details:`, JSON.stringify(e.errorDetails || {}, null, 2)); + console.error(`[DEBUG_STREAM_ERROR] Error cause:`, JSON.stringify(e.cause || {}, null, 2)); + console.error(`[DEBUG_STREAM_ERROR] Full error:`, e); + throw e; } } else { throw new Error(`Unsupported provider`); @@ -109,12 +160,11 @@ export class LlmClient { } } - async chatCompletionNonStreaming(content: string): Promise { // Modified return type + async chatCompletionNonStreaming(content: string): Promise { if (this.openai) { - const params = this.completionParams(this.model, content); - return await this.openai.chat.completions.create(params) as ChatCompletion; + const params = this.completionParams(this.model, content); + return await this.openai.chat.completions.create(params) as ChatCompletion; } - // Minimal non-streaming for Gemini for completeness, if called if (this.gemini) { const model: GenerativeModel = this.gemini.getGenerativeModel({ model: this.model }); const chat = model.startChat({ history: [] }); diff --git a/src/llm_proxy.ts b/src/llm_proxy.ts index 9fbbc39..9df421a 100644 --- a/src/llm_proxy.ts +++ b/src/llm_proxy.ts @@ -10,12 +10,7 @@ export const transformGenerator = async function*(iterator: AsyncIterator< while (true) { const next = await iterator.next(); if (next.done) { return; } - // Check what the shape of 'next.value' is, and adapt the transform function accordingly - if (typeof next.value === 'string') { // Assuming Gemini stream chunks are strings - yield transform(next.value as F); // Or handle specific Gemini chunk structure - } else { // Assuming OpenAI chunk structure - yield transform(next.value); - } + yield transform(next.value); } } @@ -24,19 +19,72 @@ const chunkString = (chunkBody: string): string => { return `data: ${chunkBody}\n\n`; } -const formatChunk = (chunk: OpenAI.Chat.Completions.ChatCompletionChunk | string): string => { +// Updated formatChunk to properly handle Gemini responses +const formatChunk = (chunk: OpenAI.Chat.Completions.ChatCompletionChunk | any): string => { + // Handle OpenAI format (passthrough) + if (chunk && chunk.choices && Array.isArray(chunk.choices)) { + const chunkBody = JSON.stringify(chunk); + return chunkString(chunkBody); + } + + // Handle Gemini format - convert to OpenAI format + if (chunk && chunk.candidates && Array.isArray(chunk.candidates)) { + const candidate = chunk.candidates[0]; + if (candidate && candidate.content && candidate.content.parts && candidate.content.parts[0]) { + const text = candidate.content.parts[0].text || ''; + + // Convert to OpenAI format + const openaiChunk = { + id: chunk.responseId || 'chatcmpl-gemini', + object: 'chat.completion.chunk', + created: Math.floor(Date.now() / 1000), + model: 'gemini-2.0-flash', + choices: [{ + index: 0, + delta: { + content: text + }, + finish_reason: candidate.finishReason === 'STOP' ? 'stop' : null + }] + }; + + const chunkBody = JSON.stringify(openaiChunk); + return chunkString(chunkBody); + } + } + + // Handle raw text (fallback) if (typeof chunk === 'string') { - // For Gemini, assuming the chunk is already the text or needs minimal processing - // This might need adjustment based on actual Gemini chunk structure - return chunkString(JSON.stringify({ choices: [{ delta: { content: chunk } }] })); + const openaiChunk = { + id: 'chatcmpl-gemini', + object: 'chat.completion.chunk', + created: Math.floor(Date.now() / 1000), + model: 'gemini-2.0-flash', + choices: [{ + index: 0, + delta: { + content: chunk + }, + finish_reason: null + }] + }; + + const chunkBody = JSON.stringify(openaiChunk); + return chunkString(chunkBody); } - // For OpenAI - const chunkBody = JSON.stringify(chunk); - return chunkString(chunkBody); + + // Fallback for unknown format + console.warn('Unknown chunk format:', chunk); + return chunkString(JSON.stringify({ + choices: [{ + delta: { content: '' }, + finish_reason: null + }] + })); } export class LlmProxy { - serverSettings: OpenAiServerSettings | Record; // Updated to handle both + serverSettings: OpenAiServerSettings | Record; llmClients: Map = new Map(); constructor(serverSettings: OpenAiServerSettings | Record) { @@ -51,7 +99,7 @@ export class LlmProxy { console.log('request', JSON.stringify(event)); const body = event.body!; console.log('body', body); - const params = JSON.parse(body) as OpenAI.Chat.Completions.ChatCompletionCreateParams; // This might need to be more generic or checked + const params = JSON.parse(body) as OpenAI.Chat.Completions.ChatCompletionCreateParams; const server = this.prefix(event.rawPath) let llmClient; @@ -69,12 +117,10 @@ export class LlmProxy { return; } - if (params.stream) { let chunkStream; try { - // Use the unified chatCompletionStreaming method chunkStream = await llmClient.chatCompletionStreaming(content as string); } catch (error) { this.handleApiError(error, writable); @@ -90,17 +136,75 @@ export class LlmProxy { writable = awslambda.HttpResponseStream.from(writable, metadata); - const iterator = chunkStream[Symbol.asyncIterator](); - for await (const chunk of transformGenerator(iterator, formatChunk)) { - writable.write(chunk); + // Handle different stream types + if (llmClient.gemini) { + // Gemini stream - need to extract text from complex objects + console.log('Processing Gemini stream'); + try { + for await (const chunk of chunkStream) { + console.log('Raw Gemini chunk:', JSON.stringify(chunk, null, 2)); + + // The chunk from Gemini has a text() method + if (chunk && typeof chunk.text === 'function') { + try { + const text = chunk.text(); + if (text) { + const formattedChunk = formatChunk(text); + writable.write(formattedChunk); + } + } catch (textError) { + console.error('Error extracting text from Gemini chunk:', textError); + } + } else if (chunk && chunk.candidates) { + // Direct Gemini response object + const formattedChunk = formatChunk(chunk); + writable.write(formattedChunk); + } + } + } catch (streamError) { + console.error('Error processing Gemini stream:', streamError); + } + } else { + // OpenAI stream - process normally + console.log('Processing OpenAI stream'); + const iterator = chunkStream[Symbol.asyncIterator](); + for await (const chunk of transformGenerator(iterator, formatChunk)) { + writable.write(chunk); + } } + writable.write(chunkString('[DONE]')); writable.end(); } else { - // Non-streaming - this part might need adjustment if Gemini non-streaming is different + // Non-streaming const response = await llmClient.chatCompletionNonStreaming(content as string); - writable.write(JSON.stringify(response)); + + // Convert Gemini non-streaming response to OpenAI format if needed + if (typeof response === 'string' && llmClient.gemini) { + const openaiResponse = { + id: 'chatcmpl-gemini', + object: 'chat.completion', + created: Math.floor(Date.now() / 1000), + model: llmClient.model, + choices: [{ + index: 0, + message: { + role: 'assistant', + content: response + }, + finish_reason: 'stop' + }], + usage: { + prompt_tokens: 0, + completion_tokens: 0, + total_tokens: 0 + } + }; + writable.write(JSON.stringify(openaiResponse)); + } else { + writable.write(JSON.stringify(response)); + } writable.end(); } }; @@ -118,7 +222,7 @@ export class LlmProxy { throw new Error(`No settings for server ${server}`); } - const settings = this.serverSettings[server] as OpenAiSettings | GeminiSettings; // Type assertion + const settings = this.serverSettings[server] as OpenAiSettings | GeminiSettings; const llmClient = new LlmClient(settings); this.llmClients.set(server, llmClient); @@ -145,4 +249,4 @@ export class LlmProxy { writable.end(); } -} +} \ No newline at end of file