|
| 1 | +package utils |
| 2 | + |
| 3 | +import ( |
| 4 | + "strings" |
| 5 | +) |
| 6 | + |
| 7 | +// SplitMessage splits long messages into chunks, preserving code block integrity. |
| 8 | +// The function reserves a buffer (10% of maxLen, min 50) to leave room for closing code blocks, |
| 9 | +// but may extend to maxLen when needed. |
| 10 | +// Call SplitMessage with the full text content and the maximum allowed length of a single message; |
| 11 | +// it returns a slice of message chunks that each respect maxLen and avoid splitting fenced code blocks. |
| 12 | +func SplitMessage(content string, maxLen int) []string { |
| 13 | + var messages []string |
| 14 | + |
| 15 | + // Dynamic buffer: 10% of maxLen, but at least 50 chars if possible |
| 16 | + codeBlockBuffer := maxLen / 10 |
| 17 | + if codeBlockBuffer < 50 { |
| 18 | + codeBlockBuffer = 50 |
| 19 | + } |
| 20 | + if codeBlockBuffer > maxLen/2 { |
| 21 | + codeBlockBuffer = maxLen / 2 |
| 22 | + } |
| 23 | + |
| 24 | + for len(content) > 0 { |
| 25 | + if len(content) <= maxLen { |
| 26 | + messages = append(messages, content) |
| 27 | + break |
| 28 | + } |
| 29 | + |
| 30 | + // Effective split point: maxLen minus buffer, to leave room for code blocks |
| 31 | + effectiveLimit := maxLen - codeBlockBuffer |
| 32 | + if effectiveLimit < maxLen/2 { |
| 33 | + effectiveLimit = maxLen / 2 |
| 34 | + } |
| 35 | + |
| 36 | + // Find natural split point within the effective limit |
| 37 | + msgEnd := findLastNewline(content[:effectiveLimit], 200) |
| 38 | + if msgEnd <= 0 { |
| 39 | + msgEnd = findLastSpace(content[:effectiveLimit], 100) |
| 40 | + } |
| 41 | + if msgEnd <= 0 { |
| 42 | + msgEnd = effectiveLimit |
| 43 | + } |
| 44 | + |
| 45 | + // Check if this would end with an incomplete code block |
| 46 | + candidate := content[:msgEnd] |
| 47 | + unclosedIdx := findLastUnclosedCodeBlock(candidate) |
| 48 | + |
| 49 | + if unclosedIdx >= 0 { |
| 50 | + // Message would end with incomplete code block |
| 51 | + // Try to extend up to maxLen to include the closing ``` |
| 52 | + if len(content) > msgEnd { |
| 53 | + closingIdx := findNextClosingCodeBlock(content, msgEnd) |
| 54 | + if closingIdx > 0 && closingIdx <= maxLen { |
| 55 | + // Extend to include the closing ``` |
| 56 | + msgEnd = closingIdx |
| 57 | + } else { |
| 58 | + // Code block is too long to fit in one chunk or missing closing fence. |
| 59 | + // Try to split inside by injecting closing and reopening fences. |
| 60 | + headerEnd := strings.Index(content[unclosedIdx:], "\n") |
| 61 | + if headerEnd == -1 { |
| 62 | + headerEnd = unclosedIdx + 3 |
| 63 | + } else { |
| 64 | + headerEnd += unclosedIdx |
| 65 | + } |
| 66 | + header := strings.TrimSpace(content[unclosedIdx:headerEnd]) |
| 67 | + |
| 68 | + // If we have a reasonable amount of content after the header, split inside |
| 69 | + if msgEnd > headerEnd+20 { |
| 70 | + // Find a better split point closer to maxLen |
| 71 | + innerLimit := maxLen - 5 // Leave room for "\n```" |
| 72 | + betterEnd := findLastNewline(content[:innerLimit], 200) |
| 73 | + if betterEnd > headerEnd { |
| 74 | + msgEnd = betterEnd |
| 75 | + } else { |
| 76 | + msgEnd = innerLimit |
| 77 | + } |
| 78 | + messages = append(messages, strings.TrimRight(content[:msgEnd], " \t\n\r")+"\n```") |
| 79 | + content = strings.TrimSpace(header + "\n" + content[msgEnd:]) |
| 80 | + continue |
| 81 | + } |
| 82 | + |
| 83 | + // Otherwise, try to split before the code block starts |
| 84 | + newEnd := findLastNewline(content[:unclosedIdx], 200) |
| 85 | + if newEnd <= 0 { |
| 86 | + newEnd = findLastSpace(content[:unclosedIdx], 100) |
| 87 | + } |
| 88 | + if newEnd > 0 { |
| 89 | + msgEnd = newEnd |
| 90 | + } else { |
| 91 | + // If we can't split before, we MUST split inside (last resort) |
| 92 | + if unclosedIdx > 20 { |
| 93 | + msgEnd = unclosedIdx |
| 94 | + } else { |
| 95 | + msgEnd = maxLen - 5 |
| 96 | + messages = append(messages, strings.TrimRight(content[:msgEnd], " \t\n\r")+"\n```") |
| 97 | + content = strings.TrimSpace(header + "\n" + content[msgEnd:]) |
| 98 | + continue |
| 99 | + } |
| 100 | + } |
| 101 | + } |
| 102 | + } |
| 103 | + } |
| 104 | + |
| 105 | + if msgEnd <= 0 { |
| 106 | + msgEnd = effectiveLimit |
| 107 | + } |
| 108 | + |
| 109 | + messages = append(messages, content[:msgEnd]) |
| 110 | + content = strings.TrimSpace(content[msgEnd:]) |
| 111 | + } |
| 112 | + |
| 113 | + return messages |
| 114 | +} |
| 115 | + |
| 116 | +// findLastUnclosedCodeBlock finds the last opening ``` that doesn't have a closing ``` |
| 117 | +// Returns the position of the opening ``` or -1 if all code blocks are complete |
| 118 | +func findLastUnclosedCodeBlock(text string) int { |
| 119 | + inCodeBlock := false |
| 120 | + lastOpenIdx := -1 |
| 121 | + |
| 122 | + for i := 0; i < len(text); i++ { |
| 123 | + if i+2 < len(text) && text[i] == '`' && text[i+1] == '`' && text[i+2] == '`' { |
| 124 | + // Toggle code block state on each fence |
| 125 | + if !inCodeBlock { |
| 126 | + // Entering a code block: record this opening fence |
| 127 | + lastOpenIdx = i |
| 128 | + } |
| 129 | + inCodeBlock = !inCodeBlock |
| 130 | + i += 2 |
| 131 | + } |
| 132 | + } |
| 133 | + |
| 134 | + if inCodeBlock { |
| 135 | + return lastOpenIdx |
| 136 | + } |
| 137 | + return -1 |
| 138 | +} |
| 139 | + |
| 140 | +// findNextClosingCodeBlock finds the next closing ``` starting from a position |
| 141 | +// Returns the position after the closing ``` or -1 if not found |
| 142 | +func findNextClosingCodeBlock(text string, startIdx int) int { |
| 143 | + for i := startIdx; i < len(text); i++ { |
| 144 | + if i+2 < len(text) && text[i] == '`' && text[i+1] == '`' && text[i+2] == '`' { |
| 145 | + return i + 3 |
| 146 | + } |
| 147 | + } |
| 148 | + return -1 |
| 149 | +} |
| 150 | + |
| 151 | +// findLastNewline finds the last newline character within the last N characters |
| 152 | +// Returns the position of the newline or -1 if not found |
| 153 | +func findLastNewline(s string, searchWindow int) int { |
| 154 | + searchStart := len(s) - searchWindow |
| 155 | + if searchStart < 0 { |
| 156 | + searchStart = 0 |
| 157 | + } |
| 158 | + for i := len(s) - 1; i >= searchStart; i-- { |
| 159 | + if s[i] == '\n' { |
| 160 | + return i |
| 161 | + } |
| 162 | + } |
| 163 | + return -1 |
| 164 | +} |
| 165 | + |
| 166 | +// findLastSpace finds the last space character within the last N characters |
| 167 | +// Returns the position of the space or -1 if not found |
| 168 | +func findLastSpace(s string, searchWindow int) int { |
| 169 | + searchStart := len(s) - searchWindow |
| 170 | + if searchStart < 0 { |
| 171 | + searchStart = 0 |
| 172 | + } |
| 173 | + for i := len(s) - 1; i >= searchStart; i-- { |
| 174 | + if s[i] == ' ' || s[i] == '\t' { |
| 175 | + return i |
| 176 | + } |
| 177 | + } |
| 178 | + return -1 |
| 179 | +} |
0 commit comments