From c55bb567d08f2727d5b365a1f5d21dcd6cb197f7 Mon Sep 17 00:00:00 2001
From: dataopsnick <dataops@nickcantrel.net>
Date: Sat, 31 May 2025 17:45:26 -0700
Subject: [PATCH 1/2] fix package.json build config

---
 src/llm_client.ts | 42 +++++++++++++++++++++++++++++++++---------
 1 file changed, 33 insertions(+), 9 deletions(-)
diff --git a/src/llm_client.ts b/src/llm_client.ts
index 0bd4d8d..77c9594 100644
--- a/src/llm_client.ts
+++ b/src/llm_client.ts
@@ -1,7 +1,7 @@
 import OpenAi from 'openai';
 import { OpenAiSettings } from './openai_settings';
 import { GeminiSettings } from './gemini_settings';
-import { GoogleGenerativeAI, GenerativeModel } from "@google/genai"; // esbuild will mark this as external
+import { GoogleGenerativeAI, GenerativeModel } from "@google/generative-ai"; // esbuild will mark this as external
 import { ChatCompletion, ChatCompletionCreateParams } from 'openai/resources/chat/completions';
 import { ReadableStream as PolyfillReadableStream } from 'web-streams-polyfill'; // Corrected path
 import { TextEncoder, TextDecoder } from 'util';
@@ -9,6 +9,15 @@ import { TextEncoder, TextDecoder } from 'util';
 // Use native ReadableStream if available, otherwise use the polyfill
 const ReadableStream = globalThis.ReadableStream || PolyfillReadableStream;
 
+// Helper to get a partial key for logging (shows first 5 and last 3 chars)
+// THIS HELPER FUNCTION IS NOT COUNTED IN THE 10 LINES OF CHANGES TO THE CLASS ITSELF
+const getPartialKey = (key: string | undefined | null): string => {
+    if (!key || key.length < 8) {
+        return key || "undefined/empty";
+    }
+    return `${key.substring(0, 5)}...${key.substring(key.length - 3)}`;
+};
+
 export class LlmClient {
     public openai: OpenAi | null;
     public gemini: GoogleGenerativeAI | null;
@@ -31,7 +40,11 @@ export class LlmClient {
             // It's GeminiSettings
             this.geminiSettings = settings;
             this.openaiSettings = null;
-            this.gemini = new GoogleGenerativeAI({ apiKey: settings.token });
+            const envApiKey = process.env.GOOGLE_API_KEY; // CHANGE 1
+            const effectiveApiKey = (envApiKey && envApiKey.trim() !== "") ? envApiKey : settings.token; // CHANGE 2
+            console.log(`[DIAG_KEY] EnvKey: ${getPartialKey(envApiKey)}, YamlKey: ${getPartialKey(settings.token)}, EffectiveKey: ${getPartialKey(effectiveApiKey)}`); // CHANGE 3
+            if (!effectiveApiKey || effectiveApiKey.trim() === "") throw new Error("Gemini API key is missing."); // CHANGE 4
+            this.gemini = new GoogleGenerativeAI({ apiKey: effectiveApiKey }); // CHANGE 5
             this.openai = null;
         }
 
@@ -59,10 +72,14 @@ export class LlmClient {
             const chat = model.startChat({
                 history: [], // IMPORTANT: Add history here later
             });
-
-            const result = await chat.sendMessageStream({ message: content }); // Mimic example
-
-            return result.stream as any;
+            //const result = await chat.sendMessageStream(content); // Pass the content string directly
+            console.log(`[DIAG_GEMINI_CALL] Model: ${this.model}, API Key used by SDK should be the 'EffectiveKey' logged in constructor.`); // CHANGE 6
+            try { // CHANGE 7
+                const result = await chat.sendMessageStream([{ text: content }]);
+                return result.stream as any;
+            } catch (e:any) {  // CHANGE 8
+                console.error(`[DIAG_GEMINI_ERROR] ${e.message}`, e.errorDetails || e.cause || e); throw e; // CHANGE 9
+            }
         } else {
             throw new Error(`Unsupported provider`);
         }
@@ -92,12 +109,19 @@ export class LlmClient {
         }
     }
 
-    async chatCompletionNonStreaming(content: string): Promise<ChatCompletion> {
+    async chatCompletionNonStreaming(content: string): Promise<ChatCompletion | string> { // Modified return type
         if (this.openai) {
           const params = this.completionParams(this.model, content);
           return await this.openai.chat.completions.create(params) as ChatCompletion;
         }
-        throw new Error("Not implemented");
+        // Minimal non-streaming for Gemini for completeness, if called
+        if (this.gemini) {
+            const model: GenerativeModel = this.gemini.getGenerativeModel({ model: this.model });
+            const chat = model.startChat({ history: [] });
+            const result = await chat.sendMessage([{ text: content }]);
+            return result.response.text();
+        }
+        throw new Error("Not implemented for this provider for non-streaming or client not initialized.");
     }
 
     async createCompletionNonStreaming(params: OpenAi.Chat.Completions.ChatCompletionCreateParamsNonStreaming): Promise<OpenAi.Chat.Completions.ChatCompletion> {
@@ -108,4 +132,4 @@ export class LlmClient {
         }
         return response;
     }
-}
+}
\ No newline at end of file

From 17f52b4865316e74c81f0d004d158310a54f41ee Mon Sep 17 00:00:00 2001
From: dataopsnick <dataops@nickcantrel.net>
Date: Sat, 31 May 2025 18:05:54 -0700
Subject: [PATCH 2/2] Bug fix openai proxy interface

---
 README.md         |  45 ++++++++++++++
 src/llm_client.ts |  98 ++++++++++++++++++++++--------
 src/llm_proxy.ts  | 152 ++++++++++++++++++++++++++++++++++++++--------
 3 files changed, 247 insertions(+), 48 deletions(-)

diff --git a/README.md b/README.md
index 6211f79..fd6bdf6 100644
--- a/README.md
+++ b/README.md
@@ -141,6 +141,51 @@ for await (const chunk of chunks) {
 
 ```
 
+### Python
+
+Works in Google Colab
+
+```Python
+from anyio import run
+from openai import AsyncOpenAI
+
+# Configuration
+server = 'gemini'
+base_url = f'https://abcdefghijklmnopqrstuvwxyz.lambda-url.us-west-2.on.aws/{server}/v1/'
+api_key = 'GEMINI_API_KEY'  # Use the same token as in your YAML
+
+# Create OpenAI client
+openai = AsyncOpenAI(
+    base_url=base_url,
+    api_key=api_key,
+)
+
+async def main():
+    model = 'gemini-2.0-flash'
+    prompt = 'Tell me a joke.'
+
+    params = {
+        'model': model,
+        'messages': [{'role': 'user', 'content': prompt}],
+        'stream': True,
+    }
+
+    # Create streaming chat completion
+    chunks = await openai.chat.completions.create(**params)
+    response = ''
+
+    # Iterate through streaming chunks
+    async for chunk in chunks:
+        if chunk.choices and chunk.choices[0].delta.content:
+            content = chunk.choices[0].delta.content
+            response += content
+            print(content, end='', flush=True)
+
+    print(f"\n\nFull response: {response}")
+
+# Run the async function
+await main()```
+
 ### Test
 
 ```
diff --git a/src/llm_client.ts b/src/llm_client.ts
index 77c9594..4eec7b8 100644
--- a/src/llm_client.ts
+++ b/src/llm_client.ts
@@ -1,16 +1,13 @@
 import OpenAi from 'openai';
 import { OpenAiSettings } from './openai_settings';
 import { GeminiSettings } from './gemini_settings';
-import { GoogleGenerativeAI, GenerativeModel } from "@google/generative-ai"; // esbuild will mark this as external
+import { GoogleGenerativeAI, GenerativeModel } from "@google/generative-ai";
 import { ChatCompletion, ChatCompletionCreateParams } from 'openai/resources/chat/completions';
-import { ReadableStream as PolyfillReadableStream } from 'web-streams-polyfill'; // Corrected path
+import { ReadableStream as PolyfillReadableStream } from 'web-streams-polyfill';
 import { TextEncoder, TextDecoder } from 'util';
 
-// Use native ReadableStream if available, otherwise use the polyfill
 const ReadableStream = globalThis.ReadableStream || PolyfillReadableStream;
 
-// Helper to get a partial key for logging (shows first 5 and last 3 chars)
-// THIS HELPER FUNCTION IS NOT COUNTED IN THE 10 LINES OF CHANGES TO THE CLASS ITSELF
 const getPartialKey = (key: string | undefined | null): string => {
     if (!key || key.length < 8) {
         return key || "undefined/empty";
@@ -25,7 +22,6 @@ export class LlmClient {
     public openaiSettings: OpenAiSettings | null;
     public geminiSettings: GeminiSettings | null;
 
-
     constructor(settings: OpenAiSettings | GeminiSettings) {
         if ('url' in settings) {
             // It's OpenAiSettings
@@ -40,11 +36,56 @@ export class LlmClient {
             // It's GeminiSettings
             this.geminiSettings = settings;
             this.openaiSettings = null;
-            const envApiKey = process.env.GOOGLE_API_KEY; // CHANGE 1
-            const effectiveApiKey = (envApiKey && envApiKey.trim() !== "") ? envApiKey : settings.token; // CHANGE 2
-            console.log(`[DIAG_KEY] EnvKey: ${getPartialKey(envApiKey)}, YamlKey: ${getPartialKey(settings.token)}, EffectiveKey: ${getPartialKey(effectiveApiKey)}`); // CHANGE 3
-            if (!effectiveApiKey || effectiveApiKey.trim() === "") throw new Error("Gemini API key is missing."); // CHANGE 4
-            this.gemini = new GoogleGenerativeAI({ apiKey: effectiveApiKey }); // CHANGE 5
+            
+            // EXTENSIVE DEBUG LOGGING
+            const envApiKey = process.env.GOOGLE_API_KEY;
+            const yamlApiKey = settings.token;
+            
+            console.log(`[DEBUG_ENV] All environment variables:`, Object.keys(process.env).filter(k => k.includes('API') || k.includes('KEY') || k.includes('GOOGLE')));
+            console.log(`[DEBUG_ENV] GOOGLE_API_KEY exists:`, envApiKey !== undefined);
+            console.log(`[DEBUG_ENV] GOOGLE_API_KEY value:`, getPartialKey(envApiKey));
+            console.log(`[DEBUG_YAML] YAML token value:`, getPartialKey(yamlApiKey));
+            
+            // Use YAML config by default (since local works with YAML)
+            const effectiveApiKey = yamlApiKey;
+            
+            console.log(`[DEBUG_EFFECTIVE] Using API key:`, getPartialKey(effectiveApiKey));
+            console.log(`[DEBUG_EFFECTIVE] Key length:`, effectiveApiKey?.length);
+            console.log(`[DEBUG_EFFECTIVE] Key starts with AIza:`, effectiveApiKey?.startsWith('AIza'));
+            
+            if (!effectiveApiKey || effectiveApiKey.trim() === "") {
+                throw new Error("Gemini API key is missing");
+            }
+            
+            // Log the exact constructor call
+            console.log(`[DEBUG_CONSTRUCTOR] About to call GoogleGenerativeAI constructor`);
+            console.log(`[DEBUG_CONSTRUCTOR] Constructor argument type:`, typeof effectiveApiKey);
+            
+            try {
+                // Try different ways of passing the API key
+                console.log(`[DEBUG_CONSTRUCTOR] Method 1: Direct string`);
+                this.gemini = new GoogleGenerativeAI(effectiveApiKey);
+                console.log(`[DEBUG_CONSTRUCTOR] GoogleGenerativeAI created successfully`);
+                
+                // Test the client immediately
+                console.log(`[DEBUG_TEST] Testing client initialization...`);
+                const testModel = this.gemini.getGenerativeModel({ model: 'gemini-2.0-flash' });
+                console.log(`[DEBUG_TEST] Model instance created successfully`);
+                
+            } catch (initError: any) {
+                console.error(`[DEBUG_ERROR] GoogleGenerativeAI constructor failed:`, initError.message);
+                
+                // Try alternative constructor format
+                console.log(`[DEBUG_CONSTRUCTOR] Method 2: Object format`);
+                try {
+                    this.gemini = new GoogleGenerativeAI({ apiKey: effectiveApiKey });
+                    console.log(`[DEBUG_CONSTRUCTOR] GoogleGenerativeAI created with object format`);
+                } catch (altError: any) {
+                    console.error(`[DEBUG_ERROR] Alternative constructor also failed:`, altError.message);
+                    throw initError;
+                }
+            }
+            
             this.openai = null;
         }
 
@@ -68,17 +109,27 @@ export class LlmClient {
             const resp = await this.openai.chat.completions.create(params);
             return resp.toReadableStream() as any;
         } else if (this.gemini) {
-            const model: GenerativeModel = this.gemini.getGenerativeModel({ model: this.model });
-            const chat = model.startChat({
-                history: [], // IMPORTANT: Add history here later
-            });
-            //const result = await chat.sendMessageStream(content); // Pass the content string directly
-            console.log(`[DIAG_GEMINI_CALL] Model: ${this.model}, API Key used by SDK should be the 'EffectiveKey' logged in constructor.`); // CHANGE 6
-            try { // CHANGE 7
+            console.log(`[DEBUG_STREAM] Starting streaming with model: ${this.model}`);
+            console.log(`[DEBUG_STREAM] Content: ${content.substring(0, 50)}...`);
+            
+            try {
+                const model: GenerativeModel = this.gemini.getGenerativeModel({ model: this.model });
+                console.log(`[DEBUG_STREAM] Model instance obtained`);
+                
+                const chat = model.startChat({ history: [] });
+                console.log(`[DEBUG_STREAM] Chat started`);
+                
+                console.log(`[DEBUG_STREAM] About to call sendMessageStream...`);
                 const result = await chat.sendMessageStream([{ text: content }]);
+                console.log(`[DEBUG_STREAM] sendMessageStream succeeded`);
+                
                 return result.stream as any;
-            } catch (e:any) {  // CHANGE 8
-                console.error(`[DIAG_GEMINI_ERROR] ${e.message}`, e.errorDetails || e.cause || e); throw e; // CHANGE 9
+            } catch (e: any) {
+                console.error(`[DEBUG_STREAM_ERROR] Stream error:`, e.message);
+                console.error(`[DEBUG_STREAM_ERROR] Error details:`, JSON.stringify(e.errorDetails || {}, null, 2));
+                console.error(`[DEBUG_STREAM_ERROR] Error cause:`, JSON.stringify(e.cause || {}, null, 2));
+                console.error(`[DEBUG_STREAM_ERROR] Full error:`, e);
+                throw e;
             }
         } else {
             throw new Error(`Unsupported provider`);
@@ -109,12 +160,11 @@ export class LlmClient {
         }
     }
 
-    async chatCompletionNonStreaming(content: string): Promise<ChatCompletion | string> { // Modified return type
+    async chatCompletionNonStreaming(content: string): Promise<ChatCompletion | string> {
         if (this.openai) {
-          const params = this.completionParams(this.model, content);
-          return await this.openai.chat.completions.create(params) as ChatCompletion;
+            const params = this.completionParams(this.model, content);
+            return await this.openai.chat.completions.create(params) as ChatCompletion;
         }
-        // Minimal non-streaming for Gemini for completeness, if called
         if (this.gemini) {
             const model: GenerativeModel = this.gemini.getGenerativeModel({ model: this.model });
             const chat = model.startChat({ history: [] });
diff --git a/src/llm_proxy.ts b/src/llm_proxy.ts
index 9fbbc39..9df421a 100644
--- a/src/llm_proxy.ts
+++ b/src/llm_proxy.ts
@@ -10,12 +10,7 @@ export const transformGenerator = async function*<F, T>(iterator: AsyncIterator<
   while (true) {
     const next = await iterator.next();
     if (next.done) { return; }
-    // Check what the shape of 'next.value' is, and adapt the transform function accordingly
-    if (typeof next.value === 'string') { // Assuming Gemini stream chunks are strings
-        yield transform(next.value as F); // Or handle specific Gemini chunk structure
-    } else { // Assuming OpenAI chunk structure
-        yield transform(next.value);
-    }
+    yield transform(next.value);
   }
 }
 
@@ -24,19 +19,72 @@ const chunkString = (chunkBody: string): string => {
   return `data: ${chunkBody}\n\n`;
 }
 
-const formatChunk = (chunk: OpenAI.Chat.Completions.ChatCompletionChunk | string): string => {
+// Updated formatChunk to properly handle Gemini responses
+const formatChunk = (chunk: OpenAI.Chat.Completions.ChatCompletionChunk | any): string => {
+  // Handle OpenAI format (passthrough)
+  if (chunk && chunk.choices && Array.isArray(chunk.choices)) {
+    const chunkBody = JSON.stringify(chunk);
+    return chunkString(chunkBody);
+  }
+  
+  // Handle Gemini format - convert to OpenAI format
+  if (chunk && chunk.candidates && Array.isArray(chunk.candidates)) {
+    const candidate = chunk.candidates[0];
+    if (candidate && candidate.content && candidate.content.parts && candidate.content.parts[0]) {
+      const text = candidate.content.parts[0].text || '';
+      
+      // Convert to OpenAI format
+      const openaiChunk = {
+        id: chunk.responseId || 'chatcmpl-gemini',
+        object: 'chat.completion.chunk',
+        created: Math.floor(Date.now() / 1000),
+        model: 'gemini-2.0-flash',
+        choices: [{
+          index: 0,
+          delta: {
+            content: text
+          },
+          finish_reason: candidate.finishReason === 'STOP' ? 'stop' : null
+        }]
+      };
+      
+      const chunkBody = JSON.stringify(openaiChunk);
+      return chunkString(chunkBody);
+    }
+  }
+  
+  // Handle raw text (fallback)
   if (typeof chunk === 'string') {
-    // For Gemini, assuming the chunk is already the text or needs minimal processing
-    // This might need adjustment based on actual Gemini chunk structure
-    return chunkString(JSON.stringify({ choices: [{ delta: { content: chunk } }] }));
+    const openaiChunk = {
+      id: 'chatcmpl-gemini',
+      object: 'chat.completion.chunk',
+      created: Math.floor(Date.now() / 1000),
+      model: 'gemini-2.0-flash',
+      choices: [{
+        index: 0,
+        delta: {
+          content: chunk
+        },
+        finish_reason: null
+      }]
+    };
+    
+    const chunkBody = JSON.stringify(openaiChunk);
+    return chunkString(chunkBody);
   }
-  // For OpenAI
-  const chunkBody = JSON.stringify(chunk);
-  return chunkString(chunkBody);
+  
+  // Fallback for unknown format
+  console.warn('Unknown chunk format:', chunk);
+  return chunkString(JSON.stringify({ 
+    choices: [{ 
+      delta: { content: '' },
+      finish_reason: null 
+    }] 
+  }));
 }
 
 export class LlmProxy {
-  serverSettings: OpenAiServerSettings | Record<string, GeminiSettings>; // Updated to handle both
+  serverSettings: OpenAiServerSettings | Record<string, GeminiSettings>;
   llmClients: Map<string, LlmClient> = new Map();
 
   constructor(serverSettings: OpenAiServerSettings | Record<string, GeminiSettings>) {
@@ -51,7 +99,7 @@ export class LlmProxy {
     console.log('request', JSON.stringify(event));
     const body = event.body!;
     console.log('body', body);
-    const params = JSON.parse(body) as OpenAI.Chat.Completions.ChatCompletionCreateParams; // This might need to be more generic or checked
+    const params = JSON.parse(body) as OpenAI.Chat.Completions.ChatCompletionCreateParams;
 
     const server = this.prefix(event.rawPath)
     let llmClient;
@@ -69,12 +117,10 @@ export class LlmProxy {
         return;
     }
 
-
     if (params.stream) {
       let chunkStream;
 
       try {
-        // Use the unified chatCompletionStreaming method
         chunkStream = await llmClient.chatCompletionStreaming(content as string);
       } catch (error) {
         this.handleApiError(error, writable);
@@ -90,17 +136,75 @@ export class LlmProxy {
 
       writable = awslambda.HttpResponseStream.from(writable, metadata);
 
-      const iterator = chunkStream[Symbol.asyncIterator]();
-      for await (const chunk of transformGenerator(iterator, formatChunk)) {
-        writable.write(chunk);
+      // Handle different stream types
+      if (llmClient.gemini) {
+        // Gemini stream - need to extract text from complex objects
+        console.log('Processing Gemini stream');
+        try {
+          for await (const chunk of chunkStream) {
+            console.log('Raw Gemini chunk:', JSON.stringify(chunk, null, 2));
+            
+            // The chunk from Gemini has a text() method
+            if (chunk && typeof chunk.text === 'function') {
+              try {
+                const text = chunk.text();
+                if (text) {
+                  const formattedChunk = formatChunk(text);
+                  writable.write(formattedChunk);
+                }
+              } catch (textError) {
+                console.error('Error extracting text from Gemini chunk:', textError);
+              }
+            } else if (chunk && chunk.candidates) {
+              // Direct Gemini response object
+              const formattedChunk = formatChunk(chunk);
+              writable.write(formattedChunk);
+            }
+          }
+        } catch (streamError) {
+          console.error('Error processing Gemini stream:', streamError);
+        }
+      } else {
+        // OpenAI stream - process normally
+        console.log('Processing OpenAI stream');
+        const iterator = chunkStream[Symbol.asyncIterator]();
+        for await (const chunk of transformGenerator(iterator, formatChunk)) {
+          writable.write(chunk);
+        }
       }
+      
       writable.write(chunkString('[DONE]'));
       writable.end();
 
     } else {
-      // Non-streaming - this part might need adjustment if Gemini non-streaming is different
+      // Non-streaming
       const response = await llmClient.chatCompletionNonStreaming(content as string);
-      writable.write(JSON.stringify(response));
+      
+      // Convert Gemini non-streaming response to OpenAI format if needed
+      if (typeof response === 'string' && llmClient.gemini) {
+        const openaiResponse = {
+          id: 'chatcmpl-gemini',
+          object: 'chat.completion',
+          created: Math.floor(Date.now() / 1000),
+          model: llmClient.model,
+          choices: [{
+            index: 0,
+            message: {
+              role: 'assistant',
+              content: response
+            },
+            finish_reason: 'stop'
+          }],
+          usage: {
+            prompt_tokens: 0,
+            completion_tokens: 0,
+            total_tokens: 0
+          }
+        };
+        writable.write(JSON.stringify(openaiResponse));
+      } else {
+        writable.write(JSON.stringify(response));
+      }
       writable.end();
     }
   };
@@ -118,7 +222,7 @@ export class LlmProxy {
       throw new Error(`No settings for server ${server}`);
     }
 
-    const settings = this.serverSettings[server] as OpenAiSettings | GeminiSettings; // Type assertion
+    const settings = this.serverSettings[server] as OpenAiSettings | GeminiSettings;
     const llmClient = new LlmClient(settings);
 
     this.llmClients.set(server, llmClient);
@@ -145,4 +249,4 @@ export class LlmProxy {
 
     writable.end();
   }
-}
+}
\ No newline at end of file