Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion source/common/filesystem/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -120,12 +120,12 @@ envoy_cc_library(
"//source/common/common:assert_lib",
"//source/common/common:linked_object",
"//source/common/common:minimal_logger_lib",
"//source/common/common:thread_lib",
"//source/common/common:utility_lib",
"//source/common/network:default_socket_interface_lib",
] + select({
"//bazel:windows_x86_64": [
"//source/common/api:os_sys_calls_lib",
"//source/common/common:thread_lib",
],
"//conditions:default": [],
}),
Expand Down
27 changes: 25 additions & 2 deletions source/common/filesystem/inotify/watcher_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

#include "source/common/common/assert.h"
#include "source/common/common/fmt.h"
#include "source/common/common/thread.h"
#include "source/common/common/utility.h"
#include "source/common/filesystem/watcher_impl.h"

Expand Down Expand Up @@ -52,6 +53,28 @@ absl::Status WatcherImpl::addWatch(absl::string_view path, uint32_t events, OnCh
return absl::OkStatus();
}

void WatcherImpl::callAndLogOnError(OnChangedCb& cb, uint32_t events, const std::string& file) {
TRY_ASSERT_MAIN_THREAD {
const absl::Status status = cb(events);
if (!status.ok()) {
// Use ENVOY_LOG_EVERY_POW_2 to avoid log spam if a callback keeps failing.
ENVOY_LOG_EVERY_POW_2(warn, "Filesystem watch callback for '{}' returned error: {}", file,
status.message());
}
}
END_TRY
MULTI_CATCH(
const std::exception& e,
{
ENVOY_LOG_EVERY_POW_2(warn, "Filesystem watch callback for '{}' threw exception: {}", file,
e.what());
},
{
ENVOY_LOG_EVERY_POW_2(warn, "Filesystem watch callback for '{}' threw unknown exception",
file);
});
}

absl::Status WatcherImpl::onInotifyEvent() {
while (true) {
// The buffer needs to be suitably aligned to store the first inotify_event structure.
Expand Down Expand Up @@ -90,10 +113,10 @@ absl::Status WatcherImpl::onInotifyEvent() {
if (watch.events_ & events) {
if (watch.file_ == file) {
ENVOY_LOG(debug, "matched callback: file: {}", file);
RETURN_IF_NOT_OK(watch.cb_(events));
callAndLogOnError(watch.cb_, events, file);
} else if (watch.file_.empty()) {
ENVOY_LOG(debug, "matched callback: directory: {}", file);
RETURN_IF_NOT_OK(watch.cb_(events));
callAndLogOnError(watch.cb_, events, file);
}
}
}
Expand Down
1 change: 1 addition & 0 deletions source/common/filesystem/inotify/watcher_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ class WatcherImpl : public Watcher, Logger::Loggable<Logger::Id::file> {
};

absl::Status onInotifyEvent();
void callAndLogOnError(OnChangedCb& cb, uint32_t events, const std::string& file);

Filesystem::Instance& file_system_;
int inotify_fd_;
Expand Down
51 changes: 45 additions & 6 deletions source/common/filesystem/kqueue/watcher_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#include "source/common/common/assert.h"
#include "source/common/common/fmt.h"
#include "source/common/common/thread.h"
#include "source/common/common/utility.h"
#include "source/common/filesystem/watcher_impl.h"

Expand Down Expand Up @@ -116,20 +117,31 @@ absl::Status WatcherImpl::onKqueueEvent() {

absl::StatusOr<PathSplitResult> pathname_or_error =
file_system_.splitPathFromFilename(file->file_);
RETURN_IF_NOT_OK_REF(pathname_or_error.status());
if (!pathname_or_error.ok()) {
Copy link
Member

@botengyao botengyao Dec 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A pathname failure is not temporary, will continuing mean you may warn forever on every event and never recover?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmmm, I think a better fix here would be to remove the broken watch.

// Path split failure is permanent and we can't recover.
// We remove the broken watch to avoid repeated failures.
ENVOY_LOG(warn, "Failed to split path '{}', removing watch: {}", file->file_,
pathname_or_error.status().message());
removeWatch(file);
continue;
}
PathSplitResult& pathname = pathname_or_error.value();

if (file->watching_dir_) {
if (event.fflags & NOTE_DELETE) {
// directory was deleted
// Directory was deleted.
removeWatch(file);
return absl::OkStatus();
}

if (event.fflags & NOTE_WRITE) {
// directory was written -- check if the file we're actually watching appeared
// Directory was written -- check if the file we're actually watching appeared.
auto file_or_error = addWatch(file->file_, file->events_, file->callback_, true);
RETURN_IF_NOT_OK_REF(file_or_error.status());
if (!file_or_error.ok()) {
ENVOY_LOG_EVERY_POW_2(warn, "Failed to re-add watch for '{}': {}", file->file_,
file_or_error.status().message());
continue;
}
FileWatchPtr new_file = file_or_error.value();
if (new_file != nullptr) {
removeWatch(file);
Expand All @@ -150,7 +162,11 @@ absl::Status WatcherImpl::onKqueueEvent() {
removeWatch(file);

auto file_or_error = addWatch(file->file_, file->events_, file->callback_, true);
RETURN_IF_NOT_OK_REF(file_or_error.status());
if (!file_or_error.ok()) {
ENVOY_LOG_EVERY_POW_2(warn, "Failed to re-add watch for '{}': {}", file->file_,
file_or_error.status().message());
continue;
}
FileWatchPtr new_file = file_or_error.value();
if (new_file == nullptr) {
return absl::OkStatus();
Expand All @@ -173,11 +189,34 @@ absl::Status WatcherImpl::onKqueueEvent() {

if (events & file->events_) {
ENVOY_LOG(debug, "matched callback: file: {}", file->file_);
RETURN_IF_NOT_OK(file->callback_(events));
callAndLogOnError(file->callback_, events, file->file_);
}
}
return absl::OkStatus();
}

void WatcherImpl::callAndLogOnError(Watcher::OnChangedCb& cb, uint32_t events,
const std::string& file) {
TRY_ASSERT_MAIN_THREAD {
const absl::Status status = cb(events);
if (!status.ok()) {
// Use ENVOY_LOG_EVERY_POW_2 to avoid log spam if a callback keeps failing.
ENVOY_LOG_EVERY_POW_2(warn, "Filesystem watch callback for '{}' returned error: {}", file,
status.message());
}
}
END_TRY
MULTI_CATCH(
const std::exception& e,
{
ENVOY_LOG_EVERY_POW_2(warn, "Filesystem watch callback for '{}' threw exception: {}", file,
e.what());
},
{
ENVOY_LOG_EVERY_POW_2(warn, "Filesystem watch callback for '{}' threw unknown exception",
file);
});
}

} // namespace Filesystem
} // namespace Envoy
1 change: 1 addition & 0 deletions source/common/filesystem/kqueue/watcher_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ class WatcherImpl : public Watcher, Logger::Loggable<Logger::Id::file> {
absl::StatusOr<FileWatchPtr> addWatch(absl::string_view path, uint32_t events,
Watcher::OnChangedCb cb, bool pathMustExist);
void removeWatch(FileWatchPtr& watch);
void callAndLogOnError(OnChangedCb& cb, uint32_t events, const std::string& file);

Filesystem::Instance& file_system_;
int queue_;
Expand Down
2 changes: 2 additions & 0 deletions test/common/filesystem/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,12 @@ envoy_cc_test(
srcs = ["watcher_impl_test.cc"],
rbe_pool = "6gig",
deps = [
"//envoy/common:exception_lib",
"//source/common/common:assert_lib",
"//source/common/event:dispatcher_includes",
"//source/common/event:dispatcher_lib",
"//source/common/filesystem:watcher_lib",
"//test/test_common:environment_lib",
"//test/test_common:logging_lib",
],
)
90 changes: 90 additions & 0 deletions test/common/filesystem/watcher_impl_test.cc
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
#include <cstdint>
#include <fstream>

#include "envoy/common/exception.h"

#include "source/common/common/assert.h"
#include "source/common/event/dispatcher_impl.h"
#include "source/common/filesystem/watcher_impl.h"

#include "test/test_common/environment.h"
#include "test/test_common/logging.h"
#include "test/test_common/utility.h"

#include "gmock/gmock.h"
Expand Down Expand Up @@ -214,5 +217,92 @@ TEST_F(WatcherImplTest, SymlinkAtomicRename) {
}
#endif

// Test that callback returning error status is logged and doesn't crash.
TEST_F(WatcherImplTest, CallbackReturnsErrorStatus) {
Filesystem::WatcherPtr watcher = dispatcher_->createFilesystemWatcher();

TestEnvironment::createPath(TestEnvironment::temporaryPath("envoy_test"));
std::ofstream file(TestEnvironment::temporaryPath("envoy_test/watcher_target"));

WatchCallback callback;
EXPECT_CALL(callback, called(Watcher::Events::Modified));
ASSERT_TRUE(watcher
->addWatch(TestEnvironment::temporaryPath("envoy_test/watcher_target"),
Watcher::Events::Modified,
[&](uint32_t events) {
callback.called(events);
dispatcher_->exit();
// Return an error status - should be logged but not crash.
return absl::InternalError("simulated callback error");
})
.ok());
dispatcher_->run(Event::Dispatcher::RunType::NonBlock);

EXPECT_LOG_CONTAINS("warn", "Filesystem watch callback for", file << "text" << std::flush;
file.close(); dispatcher_->run(Event::Dispatcher::RunType::Block););
}

// Test that callback throwing exception is caught and logged.
TEST_F(WatcherImplTest, CallbackThrowsException) {
Filesystem::WatcherPtr watcher = dispatcher_->createFilesystemWatcher();

TestEnvironment::createPath(TestEnvironment::temporaryPath("envoy_test"));
std::ofstream file(TestEnvironment::temporaryPath("envoy_test/watcher_target"));

WatchCallback callback;
EXPECT_CALL(callback, called(Watcher::Events::Modified));
ASSERT_TRUE(watcher
->addWatch(TestEnvironment::temporaryPath("envoy_test/watcher_target"),
Watcher::Events::Modified,
[&](uint32_t events) -> absl::Status {
callback.called(events);
dispatcher_->exit();
// Throw an exception - should be caught and logged.
throw EnvoyException("simulated callback exception");
})
.ok());
dispatcher_->run(Event::Dispatcher::RunType::NonBlock);

EXPECT_LOG_CONTAINS("warn", "threw exception", file << "text" << std::flush; file.close();
dispatcher_->run(Event::Dispatcher::RunType::Block););
}

// Test that multiple callbacks can fail without affecting each other.
TEST_F(WatcherImplTest, MultipleCallbacksWithErrors) {
Filesystem::WatcherPtr watcher = dispatcher_->createFilesystemWatcher();

TestEnvironment::createPath(TestEnvironment::temporaryPath("envoy_test"));
std::ofstream file(TestEnvironment::temporaryPath("envoy_test/watcher_target"));

int callback_count = 0;
ASSERT_TRUE(watcher
->addWatch(TestEnvironment::temporaryPath("envoy_test/watcher_target"),
Watcher::Events::Modified,
[&](uint32_t) {
callback_count++;
if (callback_count >= 2) {
dispatcher_->exit();
}
// First callback returns error, second returns OK.
if (callback_count == 1) {
return absl::InternalError("first callback error");
}
return absl::OkStatus();
})
.ok());
dispatcher_->run(Event::Dispatcher::RunType::NonBlock);

// Trigger first modification. The first callback returns error, but watcher continues.
file << "text1" << std::flush;
dispatcher_->run(Event::Dispatcher::RunType::NonBlock);

// Trigger second modification. It should still work.
file << "text2" << std::flush;
file.close();
dispatcher_->run(Event::Dispatcher::RunType::Block);

EXPECT_EQ(2, callback_count);
}

} // namespace Filesystem
} // namespace Envoy