Skip to content

Commit 59fd391

Browse files
authored
Merge pull request #436 from Huaaudio/feat/base-layer-message-split
Refactor/base layer message split from #143
2 parents 048cd08 + 0d6b22f commit 59fd391

File tree

3 files changed

+331
-128
lines changed

3 files changed

+331
-128
lines changed

pkg/channels/discord.go

Lines changed: 1 addition & 128 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ import (
44
"context"
55
"fmt"
66
"os"
7-
"strings"
87
"time"
98

109
"github.com/bwmarrin/discordgo"
@@ -106,7 +105,7 @@ func (c *DiscordChannel) Send(ctx context.Context, msg bus.OutboundMessage) erro
106105
return nil
107106
}
108107

109-
chunks := splitMessage(msg.Content, 1500) // Discord has a limit of 2000 characters per message, leave 500 for natural split e.g. code blocks
108+
chunks := utils.SplitMessage(msg.Content, 2000) // Split messages into chunks, Discord length limit: 2000 chars
110109

111110
for _, chunk := range chunks {
112111
if err := c.sendChunk(ctx, channelID, chunk); err != nil {
@@ -117,132 +116,6 @@ func (c *DiscordChannel) Send(ctx context.Context, msg bus.OutboundMessage) erro
117116
return nil
118117
}
119118

120-
// splitMessage splits long messages into chunks, preserving code block integrity
121-
// Uses natural boundaries (newlines, spaces) and extends messages slightly to avoid breaking code blocks
122-
func splitMessage(content string, limit int) []string {
123-
var messages []string
124-
125-
for len(content) > 0 {
126-
if len(content) <= limit {
127-
messages = append(messages, content)
128-
break
129-
}
130-
131-
msgEnd := limit
132-
133-
// Find natural split point within the limit
134-
msgEnd = findLastNewline(content[:limit], 200)
135-
if msgEnd <= 0 {
136-
msgEnd = findLastSpace(content[:limit], 100)
137-
}
138-
if msgEnd <= 0 {
139-
msgEnd = limit
140-
}
141-
142-
// Check if this would end with an incomplete code block
143-
candidate := content[:msgEnd]
144-
unclosedIdx := findLastUnclosedCodeBlock(candidate)
145-
146-
if unclosedIdx >= 0 {
147-
// Message would end with incomplete code block
148-
// Try to extend to include the closing ``` (with some buffer)
149-
extendedLimit := limit + 500 // Allow 500 char buffer for code blocks
150-
if len(content) > extendedLimit {
151-
closingIdx := findNextClosingCodeBlock(content, msgEnd)
152-
if closingIdx > 0 && closingIdx <= extendedLimit {
153-
// Extend to include the closing ```
154-
msgEnd = closingIdx
155-
} else {
156-
// Can't find closing, split before the code block
157-
msgEnd = findLastNewline(content[:unclosedIdx], 200)
158-
if msgEnd <= 0 {
159-
msgEnd = findLastSpace(content[:unclosedIdx], 100)
160-
}
161-
if msgEnd <= 0 {
162-
msgEnd = unclosedIdx
163-
}
164-
}
165-
} else {
166-
// Remaining content fits within extended limit
167-
msgEnd = len(content)
168-
}
169-
}
170-
171-
if msgEnd <= 0 {
172-
msgEnd = limit
173-
}
174-
175-
messages = append(messages, content[:msgEnd])
176-
content = strings.TrimSpace(content[msgEnd:])
177-
}
178-
179-
return messages
180-
}
181-
182-
// findLastUnclosedCodeBlock finds the last opening ``` that doesn't have a closing ```
183-
// Returns the position of the opening ``` or -1 if all code blocks are complete
184-
func findLastUnclosedCodeBlock(text string) int {
185-
count := 0
186-
lastOpenIdx := -1
187-
188-
for i := 0; i < len(text); i++ {
189-
if i+2 < len(text) && text[i] == '`' && text[i+1] == '`' && text[i+2] == '`' {
190-
if count == 0 {
191-
lastOpenIdx = i
192-
}
193-
count++
194-
i += 2
195-
}
196-
}
197-
198-
// If odd number of ``` markers, last one is unclosed
199-
if count%2 == 1 {
200-
return lastOpenIdx
201-
}
202-
return -1
203-
}
204-
205-
// findNextClosingCodeBlock finds the next closing ``` starting from a position
206-
// Returns the position after the closing ``` or -1 if not found
207-
func findNextClosingCodeBlock(text string, startIdx int) int {
208-
for i := startIdx; i < len(text); i++ {
209-
if i+2 < len(text) && text[i] == '`' && text[i+1] == '`' && text[i+2] == '`' {
210-
return i + 3
211-
}
212-
}
213-
return -1
214-
}
215-
216-
// findLastNewline finds the last newline character within the last N characters
217-
// Returns the position of the newline or -1 if not found
218-
func findLastNewline(s string, searchWindow int) int {
219-
searchStart := len(s) - searchWindow
220-
if searchStart < 0 {
221-
searchStart = 0
222-
}
223-
for i := len(s) - 1; i >= searchStart; i-- {
224-
if s[i] == '\n' {
225-
return i
226-
}
227-
}
228-
return -1
229-
}
230-
231-
// findLastSpace finds the last space character within the last N characters
232-
// Returns the position of the space or -1 if not found
233-
func findLastSpace(s string, searchWindow int) int {
234-
searchStart := len(s) - searchWindow
235-
if searchStart < 0 {
236-
searchStart = 0
237-
}
238-
for i := len(s) - 1; i >= searchStart; i-- {
239-
if s[i] == ' ' || s[i] == '\t' {
240-
return i
241-
}
242-
}
243-
return -1
244-
}
245-
246119
func (c *DiscordChannel) sendChunk(ctx context.Context, channelID, content string) error {
247120
// 使用传入的 ctx 进行超时控制
248121
sendCtx, cancel := context.WithTimeout(ctx, sendTimeout)

pkg/utils/message.go

Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
package utils
2+
3+
import (
4+
"strings"
5+
)
6+
7+
// SplitMessage splits long messages into chunks, preserving code block integrity.
8+
// The function reserves a buffer (10% of maxLen, min 50) to leave room for closing code blocks,
9+
// but may extend to maxLen when needed.
10+
// Call SplitMessage with the full text content and the maximum allowed length of a single message;
11+
// it returns a slice of message chunks that each respect maxLen and avoid splitting fenced code blocks.
12+
func SplitMessage(content string, maxLen int) []string {
13+
var messages []string
14+
15+
// Dynamic buffer: 10% of maxLen, but at least 50 chars if possible
16+
codeBlockBuffer := maxLen / 10
17+
if codeBlockBuffer < 50 {
18+
codeBlockBuffer = 50
19+
}
20+
if codeBlockBuffer > maxLen/2 {
21+
codeBlockBuffer = maxLen / 2
22+
}
23+
24+
for len(content) > 0 {
25+
if len(content) <= maxLen {
26+
messages = append(messages, content)
27+
break
28+
}
29+
30+
// Effective split point: maxLen minus buffer, to leave room for code blocks
31+
effectiveLimit := maxLen - codeBlockBuffer
32+
if effectiveLimit < maxLen/2 {
33+
effectiveLimit = maxLen / 2
34+
}
35+
36+
// Find natural split point within the effective limit
37+
msgEnd := findLastNewline(content[:effectiveLimit], 200)
38+
if msgEnd <= 0 {
39+
msgEnd = findLastSpace(content[:effectiveLimit], 100)
40+
}
41+
if msgEnd <= 0 {
42+
msgEnd = effectiveLimit
43+
}
44+
45+
// Check if this would end with an incomplete code block
46+
candidate := content[:msgEnd]
47+
unclosedIdx := findLastUnclosedCodeBlock(candidate)
48+
49+
if unclosedIdx >= 0 {
50+
// Message would end with incomplete code block
51+
// Try to extend up to maxLen to include the closing ```
52+
if len(content) > msgEnd {
53+
closingIdx := findNextClosingCodeBlock(content, msgEnd)
54+
if closingIdx > 0 && closingIdx <= maxLen {
55+
// Extend to include the closing ```
56+
msgEnd = closingIdx
57+
} else {
58+
// Code block is too long to fit in one chunk or missing closing fence.
59+
// Try to split inside by injecting closing and reopening fences.
60+
headerEnd := strings.Index(content[unclosedIdx:], "\n")
61+
if headerEnd == -1 {
62+
headerEnd = unclosedIdx + 3
63+
} else {
64+
headerEnd += unclosedIdx
65+
}
66+
header := strings.TrimSpace(content[unclosedIdx:headerEnd])
67+
68+
// If we have a reasonable amount of content after the header, split inside
69+
if msgEnd > headerEnd+20 {
70+
// Find a better split point closer to maxLen
71+
innerLimit := maxLen - 5 // Leave room for "\n```"
72+
betterEnd := findLastNewline(content[:innerLimit], 200)
73+
if betterEnd > headerEnd {
74+
msgEnd = betterEnd
75+
} else {
76+
msgEnd = innerLimit
77+
}
78+
messages = append(messages, strings.TrimRight(content[:msgEnd], " \t\n\r")+"\n```")
79+
content = strings.TrimSpace(header + "\n" + content[msgEnd:])
80+
continue
81+
}
82+
83+
// Otherwise, try to split before the code block starts
84+
newEnd := findLastNewline(content[:unclosedIdx], 200)
85+
if newEnd <= 0 {
86+
newEnd = findLastSpace(content[:unclosedIdx], 100)
87+
}
88+
if newEnd > 0 {
89+
msgEnd = newEnd
90+
} else {
91+
// If we can't split before, we MUST split inside (last resort)
92+
if unclosedIdx > 20 {
93+
msgEnd = unclosedIdx
94+
} else {
95+
msgEnd = maxLen - 5
96+
messages = append(messages, strings.TrimRight(content[:msgEnd], " \t\n\r")+"\n```")
97+
content = strings.TrimSpace(header + "\n" + content[msgEnd:])
98+
continue
99+
}
100+
}
101+
}
102+
}
103+
}
104+
105+
if msgEnd <= 0 {
106+
msgEnd = effectiveLimit
107+
}
108+
109+
messages = append(messages, content[:msgEnd])
110+
content = strings.TrimSpace(content[msgEnd:])
111+
}
112+
113+
return messages
114+
}
115+
116+
// findLastUnclosedCodeBlock finds the last opening ``` that doesn't have a closing ```
117+
// Returns the position of the opening ``` or -1 if all code blocks are complete
118+
func findLastUnclosedCodeBlock(text string) int {
119+
inCodeBlock := false
120+
lastOpenIdx := -1
121+
122+
for i := 0; i < len(text); i++ {
123+
if i+2 < len(text) && text[i] == '`' && text[i+1] == '`' && text[i+2] == '`' {
124+
// Toggle code block state on each fence
125+
if !inCodeBlock {
126+
// Entering a code block: record this opening fence
127+
lastOpenIdx = i
128+
}
129+
inCodeBlock = !inCodeBlock
130+
i += 2
131+
}
132+
}
133+
134+
if inCodeBlock {
135+
return lastOpenIdx
136+
}
137+
return -1
138+
}
139+
140+
// findNextClosingCodeBlock finds the next closing ``` starting from a position
141+
// Returns the position after the closing ``` or -1 if not found
142+
func findNextClosingCodeBlock(text string, startIdx int) int {
143+
for i := startIdx; i < len(text); i++ {
144+
if i+2 < len(text) && text[i] == '`' && text[i+1] == '`' && text[i+2] == '`' {
145+
return i + 3
146+
}
147+
}
148+
return -1
149+
}
150+
151+
// findLastNewline finds the last newline character within the last N characters
152+
// Returns the position of the newline or -1 if not found
153+
func findLastNewline(s string, searchWindow int) int {
154+
searchStart := len(s) - searchWindow
155+
if searchStart < 0 {
156+
searchStart = 0
157+
}
158+
for i := len(s) - 1; i >= searchStart; i-- {
159+
if s[i] == '\n' {
160+
return i
161+
}
162+
}
163+
return -1
164+
}
165+
166+
// findLastSpace finds the last space character within the last N characters
167+
// Returns the position of the space or -1 if not found
168+
func findLastSpace(s string, searchWindow int) int {
169+
searchStart := len(s) - searchWindow
170+
if searchStart < 0 {
171+
searchStart = 0
172+
}
173+
for i := len(s) - 1; i >= searchStart; i-- {
174+
if s[i] == ' ' || s[i] == '\t' {
175+
return i
176+
}
177+
}
178+
return -1
179+
}

0 commit comments

Comments
 (0)