diff --git a/src/llm_client.ts b/src/llm_client.ts index 4eec7b8..f3a0a3e 100644 --- a/src/llm_client.ts +++ b/src/llm_client.ts @@ -4,16 +4,26 @@ import { GeminiSettings } from './gemini_settings'; import { GoogleGenerativeAI, GenerativeModel } from "@google/generative-ai"; import { ChatCompletion, ChatCompletionCreateParams } from 'openai/resources/chat/completions'; import { ReadableStream as PolyfillReadableStream } from 'web-streams-polyfill'; -import { TextEncoder, TextDecoder } from 'util'; +import fs from 'fs'; const ReadableStream = globalThis.ReadableStream || PolyfillReadableStream; -const getPartialKey = (key: string | undefined | null): string => { - if (!key || key.length < 8) { - return key || "undefined/empty"; - } - return `${key.substring(0, 5)}...${key.substring(key.length - 3)}`; -}; +// Types for conversation history +interface ConversationMessage { + role: string; + content: string; +} + +interface GeminiHistoryItem { + role: string; + parts: Array<{ text: string }>; +} + +interface ConversationFile { + conversation_history: ConversationMessage[] | any[]; + format: string; + version: string; +} export class LlmClient { public openai: OpenAi | null; @@ -21,10 +31,11 @@ export class LlmClient { public model: string; public openaiSettings: OpenAiSettings | null; public geminiSettings: GeminiSettings | null; + private conversationHistory: ConversationMessage[] = []; constructor(settings: OpenAiSettings | GeminiSettings) { if ('url' in settings) { - // It's OpenAiSettings + // OpenAI Settings this.openaiSettings = settings; this.geminiSettings = null; this.openai = new OpenAi({ @@ -33,102 +44,109 @@ export class LlmClient { }); this.gemini = null; } else { - // It's GeminiSettings + // Gemini Settings this.geminiSettings = settings; this.openaiSettings = null; - - // EXTENSIVE DEBUG LOGGING - const envApiKey = process.env.GOOGLE_API_KEY; - const yamlApiKey = settings.token; - - console.log(`[DEBUG_ENV] All environment variables:`, Object.keys(process.env).filter(k => k.includes('API') || k.includes('KEY') || k.includes('GOOGLE'))); - console.log(`[DEBUG_ENV] GOOGLE_API_KEY exists:`, envApiKey !== undefined); - console.log(`[DEBUG_ENV] GOOGLE_API_KEY value:`, getPartialKey(envApiKey)); - console.log(`[DEBUG_YAML] YAML token value:`, getPartialKey(yamlApiKey)); - - // Use YAML config by default (since local works with YAML) - const effectiveApiKey = yamlApiKey; - - console.log(`[DEBUG_EFFECTIVE] Using API key:`, getPartialKey(effectiveApiKey)); - console.log(`[DEBUG_EFFECTIVE] Key length:`, effectiveApiKey?.length); - console.log(`[DEBUG_EFFECTIVE] Key starts with AIza:`, effectiveApiKey?.startsWith('AIza')); - - if (!effectiveApiKey || effectiveApiKey.trim() === "") { - throw new Error("Gemini API key is missing"); + this.gemini = new GoogleGenerativeAI(settings.token); + this.openai = null; + } + this.model = settings.model; + } + + /** + * Load conversation history from a file + */ + loadConversationHistory(filePath: string): void { + try { + if (!fs.existsSync(filePath)) { + console.log(`[CONVERSATION] History file not found: ${filePath}`); + return; } + + const fileContent = fs.readFileSync(filePath, 'utf-8'); + const conversationFile: ConversationFile = JSON.parse(fileContent); - // Log the exact constructor call - console.log(`[DEBUG_CONSTRUCTOR] About to call GoogleGenerativeAI constructor`); - console.log(`[DEBUG_CONSTRUCTOR] Constructor argument type:`, typeof effectiveApiKey); - - try { - // Try different ways of passing the API key - console.log(`[DEBUG_CONSTRUCTOR] Method 1: Direct string`); - this.gemini = new GoogleGenerativeAI(effectiveApiKey); - console.log(`[DEBUG_CONSTRUCTOR] GoogleGenerativeAI created successfully`); - - // Test the client immediately - console.log(`[DEBUG_TEST] Testing client initialization...`); - const testModel = this.gemini.getGenerativeModel({ model: 'gemini-2.0-flash' }); - console.log(`[DEBUG_TEST] Model instance created successfully`); - - } catch (initError: any) { - console.error(`[DEBUG_ERROR] GoogleGenerativeAI constructor failed:`, initError.message); - - // Try alternative constructor format - console.log(`[DEBUG_CONSTRUCTOR] Method 2: Object format`); - try { - this.gemini = new GoogleGenerativeAI({ apiKey: effectiveApiKey }); - console.log(`[DEBUG_CONSTRUCTOR] GoogleGenerativeAI created with object format`); - } catch (altError: any) { - console.error(`[DEBUG_ERROR] Alternative constructor also failed:`, altError.message); - throw initError; - } + console.log(`[CONVERSATION] Loading history format: ${conversationFile.format}`); + console.log(`[CONVERSATION] History messages: ${conversationFile.conversation_history.length}`); + + if (conversationFile.format === 'openai_conversation') { + this.conversationHistory = conversationFile.conversation_history as ConversationMessage[]; + } else if (conversationFile.format === 'gemini_conversation') { + // Convert Gemini format to standardized format + this.conversationHistory = this.convertGeminiToStandardHistory(conversationFile.conversation_history); } - - this.openai = null; + + console.log(`[CONVERSATION] Loaded ${this.conversationHistory.length} messages`); + } catch (error) { + console.error(`[CONVERSATION] Error loading history: ${error}`); + this.conversationHistory = []; } + } - this.model = settings.model; + /** + * Convert Gemini conversation format to standard format + */ + private convertGeminiToStandardHistory(geminiHistory: any[]): ConversationMessage[] { + return geminiHistory.map(item => { + // Extract text from parts array + const textParts = item.parts + ?.filter((part: any) => part.type === 'text') + ?.map((part: any) => part.content) || []; + + return { + role: item.role === 'model' ? 'assistant' : item.role, + content: textParts.join('\n\n') + }; + }).filter(msg => msg.content.trim().length > 0); + } + + /** + * Convert standard history to Gemini format + */ + private convertToGeminiHistory(messages: ConversationMessage[]): GeminiHistoryItem[] { + return messages.map(msg => ({ + role: msg.role === 'assistant' ? 'model' : 'user', + parts: [{ text: msg.content }] + })); } - private completionParams(model: string, content: string): any { - return { - model: model, - messages: [{ role: 'user', content: content }], - }; + /** + * Merge conversation history with new messages + */ + private mergeWithHistory(newMessages: ConversationMessage[]): ConversationMessage[] { + return [...this.conversationHistory, ...newMessages]; } - async chatCompletionStreaming(content: string): Promise { + async chatCompletionStreaming(messages: ConversationMessage[]): Promise { + const fullConversation = this.mergeWithHistory(messages); + console.log(`[CONVERSATION] Total messages (with history): ${fullConversation.length}`); + if (this.openai) { const params: ChatCompletionCreateParams = { model: this.model, - messages: [{ role: 'user', content: content }], + messages: fullConversation as any, stream: true }; const resp = await this.openai.chat.completions.create(params); return resp.toReadableStream() as any; } else if (this.gemini) { - console.log(`[DEBUG_STREAM] Starting streaming with model: ${this.model}`); - console.log(`[DEBUG_STREAM] Content: ${content.substring(0, 50)}...`); - try { const model: GenerativeModel = this.gemini.getGenerativeModel({ model: this.model }); - console.log(`[DEBUG_STREAM] Model instance obtained`); - const chat = model.startChat({ history: [] }); - console.log(`[DEBUG_STREAM] Chat started`); + // Convert to Gemini format and separate history from new message + const geminiHistory = this.convertToGeminiHistory(fullConversation.slice(0, -1)); + const lastMessage = fullConversation[fullConversation.length - 1]; + + console.log(`[GEMINI] Starting chat with ${geminiHistory.length} history messages`); - console.log(`[DEBUG_STREAM] About to call sendMessageStream...`); - const result = await chat.sendMessageStream([{ text: content }]); - console.log(`[DEBUG_STREAM] sendMessageStream succeeded`); + const chat = model.startChat({ + history: geminiHistory + }); + const result = await chat.sendMessageStream([{ text: lastMessage.content }]); return result.stream as any; } catch (e: any) { - console.error(`[DEBUG_STREAM_ERROR] Stream error:`, e.message); - console.error(`[DEBUG_STREAM_ERROR] Error details:`, JSON.stringify(e.errorDetails || {}, null, 2)); - console.error(`[DEBUG_STREAM_ERROR] Error cause:`, JSON.stringify(e.cause || {}, null, 2)); - console.error(`[DEBUG_STREAM_ERROR] Full error:`, e); + console.error(`[GEMINI] Streaming error:`, e.message); throw e; } } else { @@ -136,50 +154,66 @@ export class LlmClient { } } - async createCompletionStreaming(params: any): Promise { - try { - const resp = await this.openai?.chat.completions.create(params); - if (!resp) { - return null; - } - return new ReadableStream({ - async start(controller: any) { - if (resp.choices) { - for await (const chunk of resp) { - if (chunk.choices && chunk.choices[0] && chunk.choices[0].delta && chunk.choices[0].delta.content) { - controller.enqueue(new TextEncoder().encode(chunk.choices[0].delta.content)); - } - } - } - controller.close(); - } - }); - } catch (error) { - console.error("Error in createCompletionStreaming:", error); - return null; - } - } + async chatCompletionNonStreaming(messages: ConversationMessage[]): Promise { + const fullConversation = this.mergeWithHistory(messages); - async chatCompletionNonStreaming(content: string): Promise { if (this.openai) { - const params = this.completionParams(this.model, content); + const params = { + model: this.model, + messages: fullConversation, + stream: false + } as OpenAi.Chat.Completions.ChatCompletionCreateParamsNonStreaming; return await this.openai.chat.completions.create(params) as ChatCompletion; + } else if (this.gemini) { + try { + const model: GenerativeModel = this.gemini.getGenerativeModel({ model: this.model }); + + const geminiHistory = this.convertToGeminiHistory(fullConversation.slice(0, -1)); + const lastMessage = fullConversation[fullConversation.length - 1]; + + const chat = model.startChat({ + history: geminiHistory + }); + + const result = await chat.sendMessage([{ text: lastMessage.content }]); + return result.response.text(); + } catch (e: any) { + console.error(`[GEMINI] Non-streaming error:`, e.message); + throw e; + } } - if (this.gemini) { - const model: GenerativeModel = this.gemini.getGenerativeModel({ model: this.model }); - const chat = model.startChat({ history: [] }); - const result = await chat.sendMessage([{ text: content }]); - return result.response.text(); - } - throw new Error("Not implemented for this provider for non-streaming or client not initialized."); + throw new Error("Provider not supported"); } - async createCompletionNonStreaming(params: OpenAi.Chat.Completions.ChatCompletionCreateParamsNonStreaming): Promise { - let response = await this.openai.chat.completions.create(params); - if (typeof response === 'string' || response instanceof String) { - let responseStr = response as unknown as string; - response = JSON.parse(responseStr); - } - return response; + /** + * Clear conversation history + */ + clearHistory(): void { + this.conversationHistory = []; + console.log(`[CONVERSATION] History cleared`); + } + + /** + * Add message to conversation history + */ + addToHistory(role: string, content: string): void { + this.conversationHistory.push({ role, content }); + console.log(`[CONVERSATION] Added ${role} message to history`); + } + + /** + * Get current conversation history + */ + getHistory(): ConversationMessage[] { + return [...this.conversationHistory]; + } + + // Legacy methods for backward compatibility + async chatCompletionStreamingLegacy(content: string): Promise { + return this.chatCompletionStreaming([{ role: 'user', content }]); + } + + async chatCompletionNonStreamingLegacy(content: string): Promise { + return this.chatCompletionNonStreaming([{ role: 'user', content }]); } } \ No newline at end of file diff --git a/src/llm_proxy.ts b/src/llm_proxy.ts index 9df421a..643e9f2 100644 --- a/src/llm_proxy.ts +++ b/src/llm_proxy.ts @@ -19,7 +19,6 @@ const chunkString = (chunkBody: string): string => { return `data: ${chunkBody}\n\n`; } -// Updated formatChunk to properly handle Gemini responses const formatChunk = (chunk: OpenAI.Chat.Completions.ChatCompletionChunk | any): string => { // Handle OpenAI format (passthrough) if (chunk && chunk.choices && Array.isArray(chunk.choices)) { @@ -33,7 +32,6 @@ const formatChunk = (chunk: OpenAI.Chat.Completions.ChatCompletionChunk | any): if (candidate && candidate.content && candidate.content.parts && candidate.content.parts[0]) { const text = candidate.content.parts[0].text || ''; - // Convert to OpenAI format const openaiChunk = { id: chunk.responseId || 'chatcmpl-gemini', object: 'chat.completion.chunk', @@ -73,7 +71,6 @@ const formatChunk = (chunk: OpenAI.Chat.Completions.ChatCompletionChunk | any): return chunkString(chunkBody); } - // Fallback for unknown format console.warn('Unknown chunk format:', chunk); return chunkString(JSON.stringify({ choices: [{ @@ -99,73 +96,200 @@ export class LlmProxy { console.log('request', JSON.stringify(event)); const body = event.body!; console.log('body', body); - const params = JSON.parse(body) as OpenAI.Chat.Completions.ChatCompletionCreateParams; - const server = this.prefix(event.rawPath) - let llmClient; + // Parse the request path to determine the action + const pathParts = event.rawPath.split('/'); + const server = pathParts[1]; + const action = pathParts[2]; // 'v1' for chat, 'conversation' for conversation management + let llmClient; try { llmClient = this.getLlmClient(server); } catch (error) { - this.addErrorResponse(400, writable); + this.addErrorResponse(400, writable, `Server ${server} not configured`); return; } - const content = params.messages && params.messages.length > 0 ? params.messages[params.messages.length -1].content : ""; - if (typeof content !== 'string') { + // Handle conversation management endpoints + if (action === 'conversation') { + return this.handleConversationManagement(event, writable, llmClient); + } + + // Handle regular chat completions + if (action === 'v1' && pathParts[3] === 'chat' && pathParts[4] === 'completions') { + return this.handleChatCompletion(event, writable, llmClient); + } + + this.addErrorResponse(404, writable, 'Endpoint not found'); + }; + + private handleConversationManagement = async ( + event: APIGatewayProxyEventV2, + writable: Writable, + llmClient: LlmClient + ) => { + const pathParts = event.rawPath.split('/'); + const operation = pathParts[3]; // load, clear, status + + const metadata = { + statusCode: 200, + headers: { + "Content-Type": "application/json", + }, + }; + + writable = awslambda.HttpResponseStream.from(writable, metadata); + + try { + const body = event.body ? JSON.parse(event.body) : {}; + + switch (operation) { + case 'load': + // Load conversation from file + const filePath = body.filePath || './conversations/default.json'; + llmClient.loadConversationHistory(filePath); + writable.write(JSON.stringify({ + success: true, + message: `Conversation loaded from ${filePath}`, + historyLength: llmClient.getHistory().length + })); + break; + + case 'clear': + // Clear conversation history + llmClient.clearHistory(); + writable.write(JSON.stringify({ + success: true, + message: 'Conversation history cleared' + })); + break; + + case 'status': + // Get conversation status + const history = llmClient.getHistory(); + writable.write(JSON.stringify({ + success: true, + historyLength: history.length, + lastMessages: history.slice(-3) // Last 3 messages for preview + })); + break; + + case 'add': + // Add message to history + const { role, content } = body; + if (!role || !content) { + throw new Error('Role and content are required'); + } + llmClient.addToHistory(role, content); + writable.write(JSON.stringify({ + success: true, + message: 'Message added to history', + historyLength: llmClient.getHistory().length + })); + break; + + default: + throw new Error(`Unknown conversation operation: ${operation}`); + } + } catch (error) { + writable.write(JSON.stringify({ + success: false, + error: error instanceof Error ? error.message : 'Unknown error' + })); + } + + writable.end(); + }; + + private handleChatCompletion = async ( + event: APIGatewayProxyEventV2, + writable: Writable, + llmClient: LlmClient + ) => { + const body = JSON.parse(event.body!); + const params = body as OpenAI.Chat.Completions.ChatCompletionCreateParams; + + // Extract messages - support both single message and full conversation + let messages: Array<{role: string, content: string}>; + + if (params.messages && Array.isArray(params.messages)) { + // Full conversation provided + messages = params.messages.filter(msg => + typeof msg.content === 'string' + ).map(msg => ({ + role: msg.role, + content: msg.content as string + })); + console.log(`[CHAT] Processing ${messages.length} messages`); + } else { + // Legacy: extract last message only + const lastMessage = params.messages?.[params.messages.length - 1]; + if (!lastMessage || typeof lastMessage.content !== 'string') { this.addErrorResponse(400, writable, "Invalid message content"); return; + } + messages = [{ + role: lastMessage.role, + content: lastMessage.content + }]; + console.log(`[CHAT] Processing single message (legacy mode)`); } if (params.stream) { - let chunkStream; + await this.handleStreamingResponse(params, messages, writable, llmClient); + } else { + await this.handleNonStreamingResponse(messages, writable, llmClient); + } + }; - try { - chunkStream = await llmClient.chatCompletionStreaming(content as string); - } catch (error) { - this.handleApiError(error, writable); - return; - } + private handleStreamingResponse = async ( + params: any, + messages: Array<{role: string, content: string}>, + writable: Writable, + llmClient: LlmClient + ) => { + let chunkStream; - const metadata = { - statusCode: 200, - headers: { - "Content-Type": "text/event-stream", - }, - }; + try { + chunkStream = await llmClient.chatCompletionStreaming(messages); + } catch (error) { + this.handleApiError(error, writable); + return; + } - writable = awslambda.HttpResponseStream.from(writable, metadata); + const metadata = { + statusCode: 200, + headers: { + "Content-Type": "text/event-stream", + }, + }; - // Handle different stream types + writable = awslambda.HttpResponseStream.from(writable, metadata); + + try { if (llmClient.gemini) { - // Gemini stream - need to extract text from complex objects + // Gemini stream processing console.log('Processing Gemini stream'); - try { - for await (const chunk of chunkStream) { - console.log('Raw Gemini chunk:', JSON.stringify(chunk, null, 2)); - - // The chunk from Gemini has a text() method - if (chunk && typeof chunk.text === 'function') { - try { - const text = chunk.text(); - if (text) { - const formattedChunk = formatChunk(text); - writable.write(formattedChunk); - } - } catch (textError) { - console.error('Error extracting text from Gemini chunk:', textError); + for await (const chunk of chunkStream) { + console.log('Raw Gemini chunk:', JSON.stringify(chunk, null, 2)); + + if (chunk && typeof chunk.text === 'function') { + try { + const text = chunk.text(); + if (text) { + const formattedChunk = formatChunk(text); + writable.write(formattedChunk); } - } else if (chunk && chunk.candidates) { - // Direct Gemini response object - const formattedChunk = formatChunk(chunk); - writable.write(formattedChunk); + } catch (textError) { + console.error('Error extracting text from Gemini chunk:', textError); } + } else if (chunk && chunk.candidates) { + const formattedChunk = formatChunk(chunk); + writable.write(formattedChunk); } - } catch (streamError) { - console.error('Error processing Gemini stream:', streamError); } } else { - // OpenAI stream - process normally + // OpenAI stream processing console.log('Processing OpenAI stream'); const iterator = chunkStream[Symbol.asyncIterator](); for await (const chunk of transformGenerator(iterator, formatChunk)) { @@ -174,13 +298,22 @@ export class LlmProxy { } writable.write(chunkString('[DONE]')); - writable.end(); + } catch (streamError) { + console.error('Error processing stream:', streamError); + } + + writable.end(); + }; - } else { - // Non-streaming - const response = await llmClient.chatCompletionNonStreaming(content as string); + private handleNonStreamingResponse = async ( + messages: Array<{role: string, content: string}>, + writable: Writable, + llmClient: LlmClient + ) => { + try { + const response = await llmClient.chatCompletionNonStreaming(messages); - // Convert Gemini non-streaming response to OpenAI format if needed + // Convert Gemini response to OpenAI format if needed if (typeof response === 'string' && llmClient.gemini) { const openaiResponse = { id: 'chatcmpl-gemini', @@ -205,14 +338,14 @@ export class LlmProxy { } else { writable.write(JSON.stringify(response)); } - writable.end(); + } catch (error) { + this.handleApiError(error, writable); + return; } + + writable.end(); }; - prefix(rawPath: String): string { - return rawPath.split('/')[1]; - } - getLlmClient(server: string): LlmClient { if (this.llmClients.has(server)) { return this.llmClients.get(server)!; @@ -246,7 +379,6 @@ export class LlmProxy { writable = awslambda.HttpResponseStream.from(writable, metadata); writable.write(message); - writable.end(); } } \ No newline at end of file diff --git a/template.yaml b/template.yaml index 2543536..0ef0b7d 100644 --- a/template.yaml +++ b/template.yaml @@ -1,16 +1,18 @@ -# This AWS SAM template has been generated from your function's configuration. If -# your function has one or more triggers, note that the AWS resources associated -# with these triggers aren't fully specified in this template and include -# placeholder values. Open this template in AWS Application Composer or your -# favorite IDE and modify it to specify a serverless application with other AWS -# resources. AWSTemplateFormatVersion: '2010-09-09' Transform: AWS::Serverless-2016-10-31 Description: OpenAI compatible API proxy running on AWS Lambda + +Parameters: + FunctionName: + Type: String + Default: lambda-llm-proxy + Description: Name for the Lambda function + Resources: openaiProxy: Type: AWS::Serverless::Function Properties: + FunctionName: !Ref FunctionName CodeUri: dist Description: 'OpenAI compatible API proxy running on AWS Lambda' MemorySize: 128 @@ -52,4 +54,17 @@ Resources: SnapStart: ApplyOn: None RuntimeManagementConfig: - UpdateRuntimeOn: Auto \ No newline at end of file + UpdateRuntimeOn: Auto + +Outputs: + FunctionUrl: + Description: 'Lambda Function URL' + Value: !Ref openaiProxyUrl + Export: + Name: !Sub '${AWS::StackName}-FunctionUrl' + + FunctionName: + Description: 'Lambda Function Name' + Value: !Ref FunctionName + Export: + Name: !Sub '${AWS::StackName}-FunctionName' \ No newline at end of file