Skip to content

Commit 253ca86

Browse files
torvaldsbrauner
authored andcommitted
Improve __fget_files_rcu() code generation (and thus __fget_light())
Commit 0ede61d ("file: convert to SLAB_TYPESAFE_BY_RCU") caused a performance regression as reported by the kernel test robot. The __fget_light() function is one of those critical ones for some loads, and the code generation was unnecessarily impacted. Let's just write that function to better. Reported-by: kernel test robot <oliver.sang@intel.com> Cc: Christian Brauner <brauner@kernel.org> Cc: Jann Horn <jannh@google.com> Cc: Mateusz Guzik <mjguzik@gmail.com> Closes: https://lore.kernel.org/oe-lkp/202311201406.2022ca3f-oliver.sang@intel.com Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Link: https://lore.kernel.org/r/CAHk-=wiCJtLbFWNURB34b9a_R_unaH3CiMRXfkR0-iihB_z68A@mail.gmail.com Signed-off-by: Christian Brauner <brauner@kernel.org>
1 parent 7cb537b commit 253ca86

File tree

2 files changed

+44
-24
lines changed

2 files changed

+44
-24
lines changed

fs/file.c

Lines changed: 33 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -959,39 +959,54 @@ static inline struct file *__fget_files_rcu(struct files_struct *files,
959959
struct file *file;
960960
struct fdtable *fdt = rcu_dereference_raw(files->fdt);
961961
struct file __rcu **fdentry;
962+
unsigned long nospec_mask;
962963

963-
if (unlikely(fd >= fdt->max_fds))
964-
return NULL;
965-
966-
fdentry = fdt->fd + array_index_nospec(fd, fdt->max_fds);
964+
/* Mask is a 0 for invalid fd's, ~0 for valid ones */
965+
nospec_mask = array_index_mask_nospec(fd, fdt->max_fds);
967966

968967
/*
969-
* Ok, we have a file pointer. However, because we do
970-
* this all locklessly under RCU, we may be racing with
971-
* that file being closed.
972-
*
973-
* Such a race can take two forms:
974-
*
975-
* (a) the file ref already went down to zero and the
976-
* file hasn't been reused yet or the file count
977-
* isn't zero but the file has already been reused.
968+
* fdentry points to the 'fd' offset, or fdt->fd[0].
969+
* Loading from fdt->fd[0] is always safe, because the
970+
* array always exists.
978971
*/
979-
file = __get_file_rcu(fdentry);
972+
fdentry = fdt->fd + (fd & nospec_mask);
973+
974+
/* Do the load, then mask any invalid result */
975+
file = rcu_dereference_raw(*fdentry);
976+
file = (void *)(nospec_mask & (unsigned long)file);
980977
if (unlikely(!file))
981978
return NULL;
982979

983-
if (unlikely(IS_ERR(file)))
980+
/*
981+
* Ok, we have a file pointer that was valid at
982+
* some point, but it might have become stale since.
983+
*
984+
* We need to confirm it by incrementing the refcount
985+
* and then check the lookup again.
986+
*
987+
* atomic_long_inc_not_zero() gives us a full memory
988+
* barrier. We only really need an 'acquire' one to
989+
* protect the loads below, but we don't have that.
990+
*/
991+
if (unlikely(!atomic_long_inc_not_zero(&file->f_count)))
984992
continue;
985993

986994
/*
995+
* Such a race can take two forms:
996+
*
997+
* (a) the file ref already went down to zero and the
998+
* file hasn't been reused yet or the file count
999+
* isn't zero but the file has already been reused.
1000+
*
9871001
* (b) the file table entry has changed under us.
9881002
* Note that we don't need to re-check the 'fdt->fd'
9891003
* pointer having changed, because it always goes
9901004
* hand-in-hand with 'fdt'.
9911005
*
9921006
* If so, we need to put our ref and try again.
9931007
*/
994-
if (unlikely(rcu_dereference_raw(files->fdt) != fdt)) {
1008+
if (unlikely(file != rcu_dereference_raw(*fdentry)) ||
1009+
unlikely(rcu_dereference_raw(files->fdt) != fdt)) {
9951010
fput(file);
9961011
continue;
9971012
}
@@ -1128,13 +1143,13 @@ static unsigned long __fget_light(unsigned int fd, fmode_t mask)
11281143
* atomic_read_acquire() pairs with atomic_dec_and_test() in
11291144
* put_files_struct().
11301145
*/
1131-
if (atomic_read_acquire(&files->count) == 1) {
1146+
if (likely(atomic_read_acquire(&files->count) == 1)) {
11321147
file = files_lookup_fd_raw(files, fd);
11331148
if (!file || unlikely(file->f_mode & mask))
11341149
return 0;
11351150
return (unsigned long)file;
11361151
} else {
1137-
file = __fget(fd, mask);
1152+
file = __fget_files(files, fd, mask);
11381153
if (!file)
11391154
return 0;
11401155
return FDPUT_FPUT | (unsigned long)file;

include/linux/fdtable.h

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -83,12 +83,17 @@ struct dentry;
8383
static inline struct file *files_lookup_fd_raw(struct files_struct *files, unsigned int fd)
8484
{
8585
struct fdtable *fdt = rcu_dereference_raw(files->fdt);
86-
87-
if (fd < fdt->max_fds) {
88-
fd = array_index_nospec(fd, fdt->max_fds);
89-
return rcu_dereference_raw(fdt->fd[fd]);
90-
}
91-
return NULL;
86+
unsigned long mask = array_index_mask_nospec(fd, fdt->max_fds);
87+
struct file *needs_masking;
88+
89+
/*
90+
* 'mask' is zero for an out-of-bounds fd, all ones for ok.
91+
* 'fd&mask' is 'fd' for ok, or 0 for out of bounds.
92+
*
93+
* Accessing fdt->fd[0] is ok, but needs masking of the result.
94+
*/
95+
needs_masking = rcu_dereference_raw(fdt->fd[fd&mask]);
96+
return (struct file *)(mask & (unsigned long)needs_masking);
9297
}
9398

9499
static inline struct file *files_lookup_fd_locked(struct files_struct *files, unsigned int fd)

0 commit comments

Comments
 (0)