Skip to content

Commit a27628f

Browse files
mjguzikbrauner
authored andcommitted
fs: rework I_NEW handling to operate without fences
In the inode hash code grab the state while ->i_lock is held. If found to be set, synchronize the sleep once more with the lock held. In the real world the flag is not set most of the time. Apart from being simpler to reason about, it comes with a minor speed up as now clearing the flag does not require the smp_mb() fence. While here rename wait_on_inode() to wait_on_new_inode() to line it up with __wait_on_freeing_inode(). Christian Brauner <brauner@kernel.org> says: As per the discussion in [1] I folded in the diff sent in [2]. Link: https://lore.kernel.org/69238e4d.a70a0220.d98e3.006e.GAE@google.com [1] Link: https://lore.kernel.org/c2kpawomkbvtahjm7y5mposbhckb7wxthi3iqy5yr22ggpucrm@ufvxwy233qxo [2] Signed-off-by: Mateusz Guzik <mjguzik@gmail.com> Link: https://patch.msgid.link/20251010221737.1403539-1-mjguzik@gmail.com Signed-off-by: Christian Brauner <brauner@kernel.org>
1 parent 11f2af2 commit a27628f

File tree

5 files changed

+66
-60
lines changed

5 files changed

+66
-60
lines changed

fs/afs/dir.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -779,7 +779,7 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry)
779779
struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode;
780780
struct inode *inode = NULL, *ti;
781781
afs_dataversion_t data_version = READ_ONCE(dvnode->status.data_version);
782-
bool supports_ibulk;
782+
bool supports_ibulk, isnew;
783783
long ret;
784784
int i;
785785

@@ -850,7 +850,7 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry)
850850
* callback counters.
851851
*/
852852
ti = ilookup5_nowait(dir->i_sb, vp->fid.vnode,
853-
afs_ilookup5_test_by_fid, &vp->fid);
853+
afs_ilookup5_test_by_fid, &vp->fid, &isnew);
854854
if (!IS_ERR_OR_NULL(ti)) {
855855
vnode = AFS_FS_I(ti);
856856
vp->dv_before = vnode->status.data_version;

fs/dcache.c

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1981,17 +1981,7 @@ void d_instantiate_new(struct dentry *entry, struct inode *inode)
19811981
spin_lock(&inode->i_lock);
19821982
__d_instantiate(entry, inode);
19831983
WARN_ON(!(inode_state_read(inode) & I_NEW));
1984-
/*
1985-
* Pairs with smp_rmb in wait_on_inode().
1986-
*/
1987-
smp_wmb();
19881984
inode_state_clear(inode, I_NEW | I_CREATING);
1989-
/*
1990-
* Pairs with the barrier in prepare_to_wait_event() to make sure
1991-
* ___wait_var_event() either sees the bit cleared or
1992-
* waitqueue_active() check in wake_up_var() sees the waiter.
1993-
*/
1994-
smp_mb();
19951985
inode_wake_up_bit(inode, __I_NEW);
19961986
spin_unlock(&inode->i_lock);
19971987
}

fs/gfs2/glock.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -957,7 +957,7 @@ static struct gfs2_inode *gfs2_grab_existing_inode(struct gfs2_glock *gl)
957957
ip = NULL;
958958
spin_unlock(&gl->gl_lockref.lock);
959959
if (ip) {
960-
wait_on_inode(&ip->i_inode);
960+
wait_on_new_inode(&ip->i_inode);
961961
if (is_bad_inode(&ip->i_inode)) {
962962
iput(&ip->i_inode);
963963
ip = NULL;

fs/inode.c

Lines changed: 61 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -558,6 +558,32 @@ struct wait_queue_head *inode_bit_waitqueue(struct wait_bit_queue_entry *wqe,
558558
}
559559
EXPORT_SYMBOL(inode_bit_waitqueue);
560560

561+
void wait_on_new_inode(struct inode *inode)
562+
{
563+
struct wait_bit_queue_entry wqe;
564+
struct wait_queue_head *wq_head;
565+
566+
spin_lock(&inode->i_lock);
567+
if (!(inode_state_read(inode) & I_NEW)) {
568+
spin_unlock(&inode->i_lock);
569+
return;
570+
}
571+
572+
wq_head = inode_bit_waitqueue(&wqe, inode, __I_NEW);
573+
for (;;) {
574+
prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE);
575+
if (!(inode_state_read(inode) & I_NEW))
576+
break;
577+
spin_unlock(&inode->i_lock);
578+
schedule();
579+
spin_lock(&inode->i_lock);
580+
}
581+
finish_wait(wq_head, &wqe.wq_entry);
582+
WARN_ON(inode_state_read(inode) & I_NEW);
583+
spin_unlock(&inode->i_lock);
584+
}
585+
EXPORT_SYMBOL(wait_on_new_inode);
586+
561587
/*
562588
* Add inode to LRU if needed (inode is unused and clean).
563589
*
@@ -1008,7 +1034,8 @@ static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_lock
10081034
static struct inode *find_inode(struct super_block *sb,
10091035
struct hlist_head *head,
10101036
int (*test)(struct inode *, void *),
1011-
void *data, bool is_inode_hash_locked)
1037+
void *data, bool is_inode_hash_locked,
1038+
bool *isnew)
10121039
{
10131040
struct inode *inode = NULL;
10141041

@@ -1035,6 +1062,7 @@ static struct inode *find_inode(struct super_block *sb,
10351062
return ERR_PTR(-ESTALE);
10361063
}
10371064
__iget(inode);
1065+
*isnew = !!(inode_state_read(inode) & I_NEW);
10381066
spin_unlock(&inode->i_lock);
10391067
rcu_read_unlock();
10401068
return inode;
@@ -1049,7 +1077,7 @@ static struct inode *find_inode(struct super_block *sb,
10491077
*/
10501078
static struct inode *find_inode_fast(struct super_block *sb,
10511079
struct hlist_head *head, unsigned long ino,
1052-
bool is_inode_hash_locked)
1080+
bool is_inode_hash_locked, bool *isnew)
10531081
{
10541082
struct inode *inode = NULL;
10551083

@@ -1076,6 +1104,7 @@ static struct inode *find_inode_fast(struct super_block *sb,
10761104
return ERR_PTR(-ESTALE);
10771105
}
10781106
__iget(inode);
1107+
*isnew = !!(inode_state_read(inode) & I_NEW);
10791108
spin_unlock(&inode->i_lock);
10801109
rcu_read_unlock();
10811110
return inode;
@@ -1181,17 +1210,7 @@ void unlock_new_inode(struct inode *inode)
11811210
lockdep_annotate_inode_mutex_key(inode);
11821211
spin_lock(&inode->i_lock);
11831212
WARN_ON(!(inode_state_read(inode) & I_NEW));
1184-
/*
1185-
* Pairs with smp_rmb in wait_on_inode().
1186-
*/
1187-
smp_wmb();
11881213
inode_state_clear(inode, I_NEW | I_CREATING);
1189-
/*
1190-
* Pairs with the barrier in prepare_to_wait_event() to make sure
1191-
* ___wait_var_event() either sees the bit cleared or
1192-
* waitqueue_active() check in wake_up_var() sees the waiter.
1193-
*/
1194-
smp_mb();
11951214
inode_wake_up_bit(inode, __I_NEW);
11961215
spin_unlock(&inode->i_lock);
11971216
}
@@ -1202,17 +1221,7 @@ void discard_new_inode(struct inode *inode)
12021221
lockdep_annotate_inode_mutex_key(inode);
12031222
spin_lock(&inode->i_lock);
12041223
WARN_ON(!(inode_state_read(inode) & I_NEW));
1205-
/*
1206-
* Pairs with smp_rmb in wait_on_inode().
1207-
*/
1208-
smp_wmb();
12091224
inode_state_clear(inode, I_NEW);
1210-
/*
1211-
* Pairs with the barrier in prepare_to_wait_event() to make sure
1212-
* ___wait_var_event() either sees the bit cleared or
1213-
* waitqueue_active() check in wake_up_var() sees the waiter.
1214-
*/
1215-
smp_mb();
12161225
inode_wake_up_bit(inode, __I_NEW);
12171226
spin_unlock(&inode->i_lock);
12181227
iput(inode);
@@ -1268,6 +1277,7 @@ EXPORT_SYMBOL(unlock_two_nondirectories);
12681277
* @test: callback used for comparisons between inodes
12691278
* @set: callback used to initialize a new struct inode
12701279
* @data: opaque data pointer to pass to @test and @set
1280+
* @isnew: pointer to a bool which will indicate whether I_NEW is set
12711281
*
12721282
* Search for the inode specified by @hashval and @data in the inode cache,
12731283
* and if present return it with an increased reference count. This is a
@@ -1286,12 +1296,13 @@ struct inode *inode_insert5(struct inode *inode, unsigned long hashval,
12861296
{
12871297
struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval);
12881298
struct inode *old;
1299+
bool isnew;
12891300

12901301
might_sleep();
12911302

12921303
again:
12931304
spin_lock(&inode_hash_lock);
1294-
old = find_inode(inode->i_sb, head, test, data, true);
1305+
old = find_inode(inode->i_sb, head, test, data, true, &isnew);
12951306
if (unlikely(old)) {
12961307
/*
12971308
* Uhhuh, somebody else created the same inode under us.
@@ -1300,7 +1311,8 @@ struct inode *inode_insert5(struct inode *inode, unsigned long hashval,
13001311
spin_unlock(&inode_hash_lock);
13011312
if (IS_ERR(old))
13021313
return NULL;
1303-
wait_on_inode(old);
1314+
if (unlikely(isnew))
1315+
wait_on_new_inode(old);
13041316
if (unlikely(inode_unhashed(old))) {
13051317
iput(old);
13061318
goto again;
@@ -1391,15 +1403,17 @@ struct inode *iget5_locked_rcu(struct super_block *sb, unsigned long hashval,
13911403
{
13921404
struct hlist_head *head = inode_hashtable + hash(sb, hashval);
13931405
struct inode *inode, *new;
1406+
bool isnew;
13941407

13951408
might_sleep();
13961409

13971410
again:
1398-
inode = find_inode(sb, head, test, data, false);
1411+
inode = find_inode(sb, head, test, data, false, &isnew);
13991412
if (inode) {
14001413
if (IS_ERR(inode))
14011414
return NULL;
1402-
wait_on_inode(inode);
1415+
if (unlikely(isnew))
1416+
wait_on_new_inode(inode);
14031417
if (unlikely(inode_unhashed(inode))) {
14041418
iput(inode);
14051419
goto again;
@@ -1434,15 +1448,17 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino)
14341448
{
14351449
struct hlist_head *head = inode_hashtable + hash(sb, ino);
14361450
struct inode *inode;
1451+
bool isnew;
14371452

14381453
might_sleep();
14391454

14401455
again:
1441-
inode = find_inode_fast(sb, head, ino, false);
1456+
inode = find_inode_fast(sb, head, ino, false, &isnew);
14421457
if (inode) {
14431458
if (IS_ERR(inode))
14441459
return NULL;
1445-
wait_on_inode(inode);
1460+
if (unlikely(isnew))
1461+
wait_on_new_inode(inode);
14461462
if (unlikely(inode_unhashed(inode))) {
14471463
iput(inode);
14481464
goto again;
@@ -1456,7 +1472,7 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino)
14561472

14571473
spin_lock(&inode_hash_lock);
14581474
/* We released the lock, so.. */
1459-
old = find_inode_fast(sb, head, ino, true);
1475+
old = find_inode_fast(sb, head, ino, true, &isnew);
14601476
if (!old) {
14611477
inode->i_ino = ino;
14621478
spin_lock(&inode->i_lock);
@@ -1482,7 +1498,8 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino)
14821498
if (IS_ERR(old))
14831499
return NULL;
14841500
inode = old;
1485-
wait_on_inode(inode);
1501+
if (unlikely(isnew))
1502+
wait_on_new_inode(inode);
14861503
if (unlikely(inode_unhashed(inode))) {
14871504
iput(inode);
14881505
goto again;
@@ -1586,13 +1603,13 @@ EXPORT_SYMBOL(igrab);
15861603
* Note2: @test is called with the inode_hash_lock held, so can't sleep.
15871604
*/
15881605
struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
1589-
int (*test)(struct inode *, void *), void *data)
1606+
int (*test)(struct inode *, void *), void *data, bool *isnew)
15901607
{
15911608
struct hlist_head *head = inode_hashtable + hash(sb, hashval);
15921609
struct inode *inode;
15931610

15941611
spin_lock(&inode_hash_lock);
1595-
inode = find_inode(sb, head, test, data, true);
1612+
inode = find_inode(sb, head, test, data, true, isnew);
15961613
spin_unlock(&inode_hash_lock);
15971614

15981615
return IS_ERR(inode) ? NULL : inode;
@@ -1620,13 +1637,15 @@ struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
16201637
int (*test)(struct inode *, void *), void *data)
16211638
{
16221639
struct inode *inode;
1640+
bool isnew;
16231641

16241642
might_sleep();
16251643

16261644
again:
1627-
inode = ilookup5_nowait(sb, hashval, test, data);
1645+
inode = ilookup5_nowait(sb, hashval, test, data, &isnew);
16281646
if (inode) {
1629-
wait_on_inode(inode);
1647+
if (unlikely(isnew))
1648+
wait_on_new_inode(inode);
16301649
if (unlikely(inode_unhashed(inode))) {
16311650
iput(inode);
16321651
goto again;
@@ -1648,16 +1667,18 @@ struct inode *ilookup(struct super_block *sb, unsigned long ino)
16481667
{
16491668
struct hlist_head *head = inode_hashtable + hash(sb, ino);
16501669
struct inode *inode;
1670+
bool isnew;
16511671

16521672
might_sleep();
16531673

16541674
again:
1655-
inode = find_inode_fast(sb, head, ino, false);
1675+
inode = find_inode_fast(sb, head, ino, false, &isnew);
16561676

16571677
if (inode) {
16581678
if (IS_ERR(inode))
16591679
return NULL;
1660-
wait_on_inode(inode);
1680+
if (unlikely(isnew))
1681+
wait_on_new_inode(inode);
16611682
if (unlikely(inode_unhashed(inode))) {
16621683
iput(inode);
16631684
goto again;
@@ -1800,6 +1821,7 @@ int insert_inode_locked(struct inode *inode)
18001821
struct super_block *sb = inode->i_sb;
18011822
ino_t ino = inode->i_ino;
18021823
struct hlist_head *head = inode_hashtable + hash(sb, ino);
1824+
bool isnew;
18031825

18041826
might_sleep();
18051827

@@ -1832,9 +1854,11 @@ int insert_inode_locked(struct inode *inode)
18321854
return -EBUSY;
18331855
}
18341856
__iget(old);
1857+
isnew = !!(inode_state_read(old) & I_NEW);
18351858
spin_unlock(&old->i_lock);
18361859
spin_unlock(&inode_hash_lock);
1837-
wait_on_inode(old);
1860+
if (isnew)
1861+
wait_on_new_inode(old);
18381862
if (unlikely(!inode_unhashed(old))) {
18391863
iput(old);
18401864
return -EBUSY;

include/linux/fs.h

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1030,15 +1030,7 @@ static inline void inode_fake_hash(struct inode *inode)
10301030
hlist_add_fake(&inode->i_hash);
10311031
}
10321032

1033-
static inline void wait_on_inode(struct inode *inode)
1034-
{
1035-
wait_var_event(inode_state_wait_address(inode, __I_NEW),
1036-
!(inode_state_read_once(inode) & I_NEW));
1037-
/*
1038-
* Pairs with routines clearing I_NEW.
1039-
*/
1040-
smp_rmb();
1041-
}
1033+
void wait_on_new_inode(struct inode *inode);
10421034

10431035
/*
10441036
* inode->i_rwsem nesting subclasses for the lock validator:
@@ -3417,7 +3409,7 @@ extern void d_mark_dontcache(struct inode *inode);
34173409

34183410
extern struct inode *ilookup5_nowait(struct super_block *sb,
34193411
unsigned long hashval, int (*test)(struct inode *, void *),
3420-
void *data);
3412+
void *data, bool *isnew);
34213413
extern struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
34223414
int (*test)(struct inode *, void *), void *data);
34233415
extern struct inode *ilookup(struct super_block *sb, unsigned long ino);

0 commit comments

Comments
 (0)