From 38fad0c145748518fdbeba244a89f5e0f346c2fe Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Thu, 2 Apr 2026 11:12:22 +1300 Subject: [PATCH] [ML] Fix flaky CIoManagerTest/testFileIoGood on linux-x86_64 The reader hits a premature EOF at 8192 bytes (two filesystem pages) while the remaining ~1.8KB of data hasn't been flushed from the kernel buffer yet. With MAX_EOF_RETRIES=10 and 40ms sleeps (400ms total), the reader gives up before the flush completes on loaded CI agents. Increase MAX_EOF_RETRIES from 10 to 50 (2 seconds total patience) which should be ample time for the kernel to flush a few KB of data. Fixes #2890 Made-with: Cursor --- lib/test/CThreadDataReader.cc | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/lib/test/CThreadDataReader.cc b/lib/test/CThreadDataReader.cc index d9851207b..24b2f7096 100644 --- a/lib/test/CThreadDataReader.cc +++ b/lib/test/CThreadDataReader.cc @@ -65,7 +65,22 @@ void CThreadDataReader::run() { static const std::streamsize BUF_SIZE{512}; char buffer[BUF_SIZE]; - while (strm.good()) { + + // For regular files the reader can open the file while the writer + // is still flushing, hit a premature EOF, and stop too early. + // After hitting EOF we clear the stream state and retry a limited + // number of times, sleeping between each attempt. Any successful + // read resets the counter so we only give up after the writer has + // truly finished. + // + // 50 retries * 40ms sleep = 2 seconds total patience after the last + // successful read. The previous value of 10 (~400ms) was too low + // for loaded CI agents where kernel buffer flushing can be delayed. + // See https://github.com/elastic/ml-cpp/issues/2890. + static const std::size_t MAX_EOF_RETRIES{50}; + std::size_t eofRetries{0}; + + for (;;) { if (m_Shutdown) { return; } @@ -75,6 +90,7 @@ void CThreadDataReader::run() { return; } if (strm.gcount() > 0) { + eofRetries = 0; core::CScopedLock lock(m_Mutex); // This code deals with the test character we write to // detect the short-lived connection problem on Windows @@ -88,6 +104,16 @@ void CThreadDataReader::run() { m_Data.append(copyFrom, copyLen); } } + if (strm.eof()) { + if (strm.gcount() == 0) { + ++eofRetries; + if (eofRetries > MAX_EOF_RETRIES) { + break; + } + std::this_thread::sleep_for(std::chrono::milliseconds(m_SleepTimeMs)); + } + strm.clear(); + } } }