2323#include <linux/coredump.h>
2424#include <linux/rhashtable.h>
2525#include <linux/xattr.h>
26+ #include <linux/cookie.h>
2627
2728#include "internal.h"
2829#include "mount.h"
@@ -65,7 +66,39 @@ static const struct rhashtable_params pidfs_ino_ht_params = {
6566 .automatic_shrinking = true,
6667};
6768
69+ /*
70+ * inode number handling
71+ *
72+ * On 64 bit nothing special happens. The 64bit number assigned
73+ * to struct pid is the inode number.
74+ *
75+ * On 32 bit the 64 bit number assigned to struct pid is split
76+ * into two 32 bit numbers. The lower 32 bits are used as the
77+ * inode number and the upper 32 bits are used as the inode
78+ * generation number.
79+ *
80+ * On 32 bit pidfs_ino() will return the lower 32 bit. When
81+ * pidfs_ino() returns zero a wrap around happened. When a
82+ * wraparound happens the 64 bit number will be incremented by 1
83+ * so inode numbering starts at 1 again.
84+ *
85+ * On 64 bit comparing two pidfds is as simple as comparing
86+ * inode numbers.
87+ *
88+ * When a wraparound happens on 32 bit multiple pidfds with the
89+ * same inode number are likely to exist (This isn't a problem
90+ * since before pidfs pidfds used the anonymous inode meaning
91+ * all pidfds had the same inode number.). Userspace can
92+ * reconstruct the 64 bit identifier by retrieving both the
93+ * inode number and the inode generation number to compare or
94+ * use file handles.
95+ */
96+
6897#if BITS_PER_LONG == 32
98+
99+ DEFINE_SPINLOCK (pidfs_ino_lock );
100+ static u64 pidfs_ino_nr = 1 ;
101+
69102static inline unsigned long pidfs_ino (u64 ino )
70103{
71104 return lower_32_bits (ino );
@@ -77,6 +110,18 @@ static inline u32 pidfs_gen(u64 ino)
77110 return upper_32_bits (ino );
78111}
79112
113+ static inline u64 pidfs_alloc_ino (void )
114+ {
115+ u64 ino ;
116+
117+ spin_lock (& pidfs_ino_lock );
118+ if (pidfs_ino (pidfs_ino_nr ) == 0 )
119+ pidfs_ino_nr ++ ;
120+ ino = pidfs_ino_nr ++ ;
121+ spin_unlock (& pidfs_ino_lock );
122+ return ino ;
123+ }
124+
80125#else
81126
82127/* On 64 bit simply return ino. */
@@ -90,61 +135,47 @@ static inline u32 pidfs_gen(u64 ino)
90135{
91136 return 0 ;
92137}
93- #endif
94138
95- /*
96- * Allocate inode number and initialize pidfs fields.
97- * Called with pidmap_lock held.
98- */
99- void pidfs_prepare_pid (struct pid * pid )
139+ DEFINE_COOKIE (pidfs_ino_cookie );
140+
141+ static u64 pidfs_alloc_ino (void )
100142{
101- static u64 pidfs_ino_nr = 2 ;
143+ u64 ino ;
102144
103- /*
104- * On 64 bit nothing special happens. The 64bit number assigned
105- * to struct pid is the inode number.
106- *
107- * On 32 bit the 64 bit number assigned to struct pid is split
108- * into two 32 bit numbers. The lower 32 bits are used as the
109- * inode number and the upper 32 bits are used as the inode
110- * generation number.
111- *
112- * On 32 bit pidfs_ino() will return the lower 32 bit. When
113- * pidfs_ino() returns zero a wrap around happened. When a
114- * wraparound happens the 64 bit number will be incremented by 2
115- * so inode numbering starts at 2 again.
116- *
117- * On 64 bit comparing two pidfds is as simple as comparing
118- * inode numbers.
119- *
120- * When a wraparound happens on 32 bit multiple pidfds with the
121- * same inode number are likely to exist (This isn't a problem
122- * since before pidfs pidfds used the anonymous inode meaning
123- * all pidfds had the same inode number.). Userspace can
124- * reconstruct the 64 bit identifier by retrieving both the
125- * inode number and the inode generation number to compare or
126- * use file handles.
127- */
128- if (pidfs_ino (pidfs_ino_nr ) == 0 )
129- pidfs_ino_nr += 2 ;
145+ preempt_disable ();
146+ ino = gen_cookie_next (& pidfs_ino_cookie );
147+ preempt_enable ();
148+
149+ VFS_WARN_ON_ONCE (ino < 1 );
150+ return ino ;
151+ }
152+
153+ #endif
130154
131- pid -> ino = pidfs_ino_nr ;
132- pid -> pidfs_hash . next = NULL ;
155+ void pidfs_prepare_pid ( struct pid * pid )
156+ {
133157 pid -> stashed = NULL ;
134158 pid -> attr = NULL ;
135- pidfs_ino_nr ++ ;
159+ pid -> ino = 0 ;
136160}
137161
138162int pidfs_add_pid (struct pid * pid )
139163{
140- return rhashtable_insert_fast (& pidfs_ino_ht , & pid -> pidfs_hash ,
141- pidfs_ino_ht_params );
164+ int ret ;
165+
166+ pid -> ino = pidfs_alloc_ino ();
167+ ret = rhashtable_insert_fast (& pidfs_ino_ht , & pid -> pidfs_hash ,
168+ pidfs_ino_ht_params );
169+ if (unlikely (ret ))
170+ pid -> ino = 0 ;
171+ return ret ;
142172}
143173
144174void pidfs_remove_pid (struct pid * pid )
145175{
146- rhashtable_remove_fast (& pidfs_ino_ht , & pid -> pidfs_hash ,
147- pidfs_ino_ht_params );
176+ if (likely (pid -> ino ))
177+ rhashtable_remove_fast (& pidfs_ino_ht , & pid -> pidfs_hash ,
178+ pidfs_ino_ht_params );
148179}
149180
150181void pidfs_free_pid (struct pid * pid )
0 commit comments