perf(loki.source.file): Reduce allocations during file reading [backport] (#5416)

grafana-alloybot[bot] · kalleep · web-flow · commit eca2294ff38b · 2026-02-02T12:18:13.000Z
## Backport of #5405 This PR backports #5405 to release/v1.13. ### Original PR Author @kalleep ### Description ### Pull Request Details There is a lot of allocations being performed while reading files. I can clearly see this being a hot spot in our internal clusters. Two of the biggest issues are: 1. We allocate a new pending buffer as soon as we have completed a line 2. We use ReadBytes api causing new allocation for every call that we just move to pending Instead we can use `ReadSlice`, this api will not allocate but return a slice pointing to the internal buffer while still advancing read position. We copy it into pending so this is fine and we remove the intermediate allocation done by `ReadBytes`. We can also reset length of `pending` and this buffer. This is fine because we never read past newline boundary. ### Issue(s) fixed by this Pull Request  ### Notes to the Reviewer  ### PR Checklist  - [ ] Documentation added - [x] Tests updated - [ ] Config converters updated --- *This backport was created automatically.* Co-authored-by: Karl Persson <23356117+kalleep@users.noreply.github.com>
diff --git a/internal/component/loki/source/file/internal/tail/file_test.go b/internal/component/loki/source/file/internal/tail/file_test.go
@@ -1,6 +1,7 @@
 package tail
 
 import (
+	"bytes"
 	"compress/gzip"
 	"compress/zlib"
 	"context"
@@ -472,9 +473,9 @@ func compressionTest(t *testing.T, name, compression string, enc *encoding.Encod
 	})
 }
 
-func createFile(t *testing.T, name, content string) string {
-	path := t.TempDir() + "/" + name
-	require.NoError(t, os.WriteFile(path, []byte(content), 0600))
+func createFile(tb testing.TB, name, content string) string {
+	path := tb.TempDir() + "/" + name
+	require.NoError(tb, os.WriteFile(path, []byte(content), 0600))
 	return path
 }
 
@@ -494,8 +495,8 @@ func truncateFile(t *testing.T, name, content string) {
 	require.NoError(t, err)
 }
 
-func removeFile(t *testing.T, name string) {
-	require.NoError(t, os.Remove(name))
+func removeFile(tb testing.TB, name string) {
+	require.NoError(tb, os.Remove(name))
 }
 
 func rotateFile(t *testing.T, name, newContent string) {
@@ -548,3 +549,36 @@ func verifyResult(t *testing.T, f *File, expectedLine *Line, expectedErr error)
 		require.Equal(t, expectedLine.Offset, line.Offset)
 	}
 }
+
+var benchLine *Line
+
+func BenchmarkFile(b *testing.B) {
+	// we create a file with 1000 lines and each line is 500 bytes.
+	line := bytes.Repeat([]byte{'a'}, 500)
+	lines := strings.Repeat(string(line)+"\n", 1000)
+	name := createFile(b, "benchfile", lines)
+	defer removeFile(b, name)
+
+	b.ReportAllocs()
+
+	for b.Loop() {
+		file, err := NewFile(log.NewNopLogger(), &Config{
+			Filename:      name,
+			WatcherConfig: WatcherConfig{},
+		})
+		require.NoError(b, err)
+		// Disable waiting at EOF so Next returns io.EOF after the file is fully consumed.
+		file.waitAtEOF = false
+
+		for {
+			var err error
+			benchLine, err = file.Next()
+			if errors.Is(err, io.EOF) {
+				break
+			}
+			require.NoError(b, err)
+		}
+
+		file.Stop()
+	}
+}
diff --git a/internal/component/loki/source/file/internal/tail/reader.go b/internal/component/loki/source/file/internal/tail/reader.go
@@ -6,6 +6,7 @@ import (
 	"compress/bzip2"
 	"compress/gzip"
 	"compress/zlib"
+	"errors"
 	"io"
 	"os"
 	"unsafe"
@@ -79,14 +80,9 @@ type reader struct {
 // next reads and returns the next complete line from the file.
 // It will return EOF if there is no more data to read.
 func (r *reader) next() (string, error) {
-	// First we check if we already have a full line buffered.
-	if line, ok := r.consumeLine(); ok {
-		return r.decode(line)
-	}
-
 	for {
 		// Read more data up until the last byte of nl.
-		chunk, err := r.br.ReadBytes(r.lastNl)
+		chunk, err := r.br.ReadSlice(r.lastNl)
 		if len(chunk) > 0 {
 			r.pending = append(r.pending, chunk...)
 
@@ -95,13 +91,12 @@ func (r *reader) next() (string, error) {
 			}
 		}
 
-		// If we did not get an error and did not find a full line we
-		// need to read more data.
-		if err == nil {
-			continue
-		}
+		// ReadSlice does not allocate; it returns a slice into bufio's buffer and advances
+		// the read position. If we did not find a full line or got ErrBufferFull, loop and call again.
+		if err != nil && !errors.Is(err, bufio.ErrBufferFull) {
+			return "", err
 
-		return "", err
+		}
 	}
 }
 
@@ -115,7 +110,7 @@ func (r *reader) flush() (string, error) {
 
 	line := r.pending[:]
 	r.pos += int64(len(line))
-	r.pending = make([]byte, 0, defaultBufSize)
+	r.pending = r.pending[:0]
 	return r.decode(bytes.TrimSuffix(line, r.nl))
 }
 
@@ -142,9 +137,9 @@ func (r *reader) consumeLine() ([]byte, bool) {
 
 	// Extract everything up until newline.
 	line := r.pending[:i]
-	// Keep everything except the line we extracted and newline.
-	rem := r.pending[i+len(r.nl):]
-	r.pending = append(make([]byte, 0, defaultBufSize), rem...)
+
+	// Reset pending. We never buffer beyond newline so it is safe to reset.
+	r.pending = r.pending[:0]
 
 	// Advance the position on bytes we have consumed as a full line.
 	r.pos += int64(len(line) + len(r.nl))