Skip to content

Commit 3a81fd0

Browse files
committed
io_uring: enable LOOKUP_CACHED path resolution for filename lookups
Instead of being pessimistic and assume that path lookup will block, use LOOKUP_CACHED to attempt just a cached lookup. This ensures that the fast path is always done inline, and we only punt to async context if IO is needed to satisfy the lookup. For forced nonblock open attempts, mark the file O_NONBLOCK over the actual ->open() call as well. We can safely clear this again before doing fd_install(), so it'll never be user visible that we fiddled with it. This greatly improves the performance of file open where the dentry is already cached: ached 5.10-git 5.10-git+LOOKUP_CACHED Speedup --------------------------------------------------------------- 33% 1,014,975 900,474 1.1x 89% 545,466 292,937 1.9x 100% 435,636 151,475 2.9x The more cache hot we are, the faster the inline LOOKUP_CACHED optimization helps. This is unsurprising and expected, as a thread offload becomes a more dominant part of the total overhead. If we look at io_uring tracing, doing an IORING_OP_OPENAT on a file that isn't in the dentry cache will yield: 275.550481: io_uring_create: ring 00000000ddda6278, fd 3 sq size 8, cq size 16, flags 0 275.550491: io_uring_submit_sqe: ring 00000000ddda6278, op 18, data 0x0, non block 1, sq_thread 0 275.550498: io_uring_queue_async_work: ring 00000000ddda6278, request 00000000c0267d17, flags 69760, normal queue, work 000000003d683991 275.550502: io_uring_cqring_wait: ring 00000000ddda6278, min_events 1 275.550556: io_uring_complete: ring 00000000ddda6278, user_data 0x0, result 4 which shows a failed nonblock lookup, then punt to worker, and then we complete with fd == 4. This takes 65 usec in total. Re-running the same test case again: 281.253956: io_uring_create: ring 0000000008207252, fd 3 sq size 8, cq size 16, flags 0 281.253967: io_uring_submit_sqe: ring 0000000008207252, op 18, data 0x0, non block 1, sq_thread 0 281.253973: io_uring_complete: ring 0000000008207252, user_data 0x0, result 4 shows the same request completing inline, also returning fd == 4. This takes 6 usec. Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent b2d86c7 commit 3a81fd0

File tree

1 file changed

+27
-20
lines changed

1 file changed

+27
-20
lines changed

fs/io_uring.c

Lines changed: 27 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -489,7 +489,6 @@ struct io_sr_msg {
489489
struct io_open {
490490
struct file *file;
491491
int dfd;
492-
bool ignore_nonblock;
493492
struct filename *filename;
494493
struct open_how how;
495494
unsigned long nofile;
@@ -4054,7 +4053,6 @@ static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
40544053
return ret;
40554054
}
40564055
req->open.nofile = rlimit(RLIMIT_NOFILE);
4057-
req->open.ignore_nonblock = false;
40584056
req->flags |= REQ_F_NEED_CLEANUP;
40594057
return 0;
40604058
}
@@ -4096,39 +4094,48 @@ static int io_openat2(struct io_kiocb *req, bool force_nonblock)
40964094
{
40974095
struct open_flags op;
40984096
struct file *file;
4097+
bool nonblock_set;
4098+
bool resolve_nonblock;
40994099
int ret;
41004100

4101-
if (force_nonblock && !req->open.ignore_nonblock)
4102-
return -EAGAIN;
4103-
41044101
ret = build_open_flags(&req->open.how, &op);
41054102
if (ret)
41064103
goto err;
4104+
nonblock_set = op.open_flag & O_NONBLOCK;
4105+
resolve_nonblock = req->open.how.resolve & RESOLVE_CACHED;
4106+
if (force_nonblock) {
4107+
/*
4108+
* Don't bother trying for O_TRUNC, O_CREAT, or O_TMPFILE open,
4109+
* it'll always -EAGAIN
4110+
*/
4111+
if (req->open.how.flags & (O_TRUNC | O_CREAT | O_TMPFILE))
4112+
return -EAGAIN;
4113+
op.lookup_flags |= LOOKUP_CACHED;
4114+
op.open_flag |= O_NONBLOCK;
4115+
}
41074116

41084117
ret = __get_unused_fd_flags(req->open.how.flags, req->open.nofile);
41094118
if (ret < 0)
41104119
goto err;
41114120

41124121
file = do_filp_open(req->open.dfd, req->open.filename, &op);
4122+
/* only retry if RESOLVE_CACHED wasn't already set by application */
4123+
if ((!resolve_nonblock && force_nonblock) && file == ERR_PTR(-EAGAIN)) {
4124+
/*
4125+
* We could hang on to this 'fd', but seems like marginal
4126+
* gain for something that is now known to be a slower path.
4127+
* So just put it, and we'll get a new one when we retry.
4128+
*/
4129+
put_unused_fd(ret);
4130+
return -EAGAIN;
4131+
}
4132+
41134133
if (IS_ERR(file)) {
41144134
put_unused_fd(ret);
41154135
ret = PTR_ERR(file);
4116-
/*
4117-
* A work-around to ensure that /proc/self works that way
4118-
* that it should - if we get -EOPNOTSUPP back, then assume
4119-
* that proc_self_get_link() failed us because we're in async
4120-
* context. We should be safe to retry this from the task
4121-
* itself with force_nonblock == false set, as it should not
4122-
* block on lookup. Would be nice to know this upfront and
4123-
* avoid the async dance, but doesn't seem feasible.
4124-
*/
4125-
if (ret == -EOPNOTSUPP && io_wq_current_is_worker()) {
4126-
req->open.ignore_nonblock = true;
4127-
refcount_inc(&req->refs);
4128-
io_req_task_queue(req);
4129-
return 0;
4130-
}
41314136
} else {
4137+
if (force_nonblock && !nonblock_set)
4138+
file->f_flags &= ~O_NONBLOCK;
41324139
fsnotify_open(file);
41334140
fd_install(ret, file);
41344141
}

0 commit comments

Comments
 (0)