Skip to content

Commit 5d7ddc5

Browse files
committed
selinux: reduce path walk overhead
Reduce the SELinux performance overhead during path walks through the use of a per-task directory access cache and some minor code optimizations. The directory access cache is per-task because it allows for a lockless cache while also fitting well with a common application pattern of heavily accessing a relatively small number of SELinux directory labels. The cache is inherited by child processes when the child runs with the same SELinux domain as the parent, and invalidated on changes to the task's SELinux domain or the loaded SELinux policy. A cache of four entries was chosen based on testing with the Fedora "targeted" policy, a SELinux Reference Policy variant, and 'make allmodconfig' on Linux v6.14. Code optimizations include better use of inline functions to reduce function calls in the common case, especially in the inode revalidation code paths, and elimination of redundant checks between the LSM and SELinux layers. As mentioned briefly above, aside from general use and regression testing with the selinux-testsuite, performance was measured using 'make allmodconfig' with Linux v6.14 as a base reference. As expected, there were variations from one test run to another, but the measurements below are a good representation of the test results seen on my test system. * Linux v6.14 REF 1.26% [k] __d_lookup_rcu SELINUX (1.31%) 0.58% [k] selinux_inode_permission 0.29% [k] avc_lookup 0.25% [k] avc_has_perm_noaudit 0.19% [k] __inode_security_revalidate * Linux v6.14 + patch REF 1.41% [k] __d_lookup_rcu SELINUX (0.89%) 0.65% [k] selinux_inode_permission 0.15% [k] avc_lookup 0.05% [k] avc_has_perm_noaudit 0.04% [k] avc_policy_seqno X.XX% [k] __inode_security_revalidate (now inline) In both cases the __d_lookup_rcu() function was used as a reference point to establish a context for the SELinux related functions. On a unpatched Linux v6.14 system we see the time spent in the combined SELinux functions exceeded that of __d_lookup_rcu(), 1.31% compared to 1.26%. However, with this patch applied the time spent in the combined SELinux functions dropped to roughly 65% of the time spent in __d_lookup_rcu(), 0.89% compared to 1.41%. Aside from the significant decrease in time spent in the SELinux AVC, it appears that any additional time spent searching and updating the cache is offset by other code improvements, e.g. time spent in selinux_inode_permission() + __inode_security_revalidate() + avc_policy_seqno() is less on the patched kernel than the unpatched kernel. It is worth noting that in this patch the use of the per-task cache is limited to the security_inode_permission() LSM callback, selinux_inode_permission(), but future work could expand the cache into inode_has_perm(), likely through consolidation of the two functions. While this would likely have little to no impact on path walks, it may benefit other operations. Reviewed-by: Stephen Smalley <stephen.smalley.work@gmail.com> Tested-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Paul Moore <paul@paul-moore.com>
1 parent 8716451 commit 5d7ddc5

File tree

2 files changed

+185
-54
lines changed

2 files changed

+185
-54
lines changed

security/selinux/hooks.c

Lines changed: 171 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -213,8 +213,10 @@ static void cred_init_security(void)
213213
{
214214
struct task_security_struct *tsec;
215215

216+
/* NOTE: the lsm framework zeros out the buffer on allocation */
217+
216218
tsec = selinux_cred(unrcu_pointer(current->real_cred));
217-
tsec->osid = tsec->sid = SECINITSID_KERNEL;
219+
tsec->osid = tsec->sid = tsec->avdcache.sid = SECINITSID_KERNEL;
218220
}
219221

220222
/*
@@ -278,27 +280,21 @@ static int __inode_security_revalidate(struct inode *inode,
278280
struct dentry *dentry,
279281
bool may_sleep)
280282
{
281-
struct inode_security_struct *isec = selinux_inode(inode);
283+
if (!selinux_initialized())
284+
return 0;
282285

283-
might_sleep_if(may_sleep);
286+
if (may_sleep)
287+
might_sleep();
288+
else
289+
return -ECHILD;
284290

285291
/*
286-
* The check of isec->initialized below is racy but
287-
* inode_doinit_with_dentry() will recheck with
288-
* isec->lock held.
292+
* Check to ensure that an inode's SELinux state is valid and try
293+
* reloading the inode security label if necessary. This will fail if
294+
* @dentry is NULL and no dentry for this inode can be found; in that
295+
* case, continue using the old label.
289296
*/
290-
if (selinux_initialized() &&
291-
data_race(isec->initialized != LABEL_INITIALIZED)) {
292-
if (!may_sleep)
293-
return -ECHILD;
294-
295-
/*
296-
* Try reloading the inode security label. This will fail if
297-
* @opt_dentry is NULL and no dentry for this inode can be
298-
* found; in that case, continue using the old label.
299-
*/
300-
inode_doinit_with_dentry(inode, dentry);
301-
}
297+
inode_doinit_with_dentry(inode, dentry);
302298
return 0;
303299
}
304300

@@ -307,41 +303,53 @@ static struct inode_security_struct *inode_security_novalidate(struct inode *ino
307303
return selinux_inode(inode);
308304
}
309305

310-
static struct inode_security_struct *inode_security_rcu(struct inode *inode, bool rcu)
306+
static inline struct inode_security_struct *inode_security_rcu(struct inode *inode,
307+
bool rcu)
311308
{
312-
int error;
309+
int rc;
310+
struct inode_security_struct *isec = selinux_inode(inode);
313311

314-
error = __inode_security_revalidate(inode, NULL, !rcu);
315-
if (error)
316-
return ERR_PTR(error);
317-
return selinux_inode(inode);
312+
/* check below is racy, but revalidate will recheck with lock held */
313+
if (data_race(likely(isec->initialized == LABEL_INITIALIZED)))
314+
return isec;
315+
rc = __inode_security_revalidate(inode, NULL, !rcu);
316+
if (rc)
317+
return ERR_PTR(rc);
318+
return isec;
318319
}
319320

320321
/*
321322
* Get the security label of an inode.
322323
*/
323-
static struct inode_security_struct *inode_security(struct inode *inode)
324+
static inline struct inode_security_struct *inode_security(struct inode *inode)
324325
{
326+
struct inode_security_struct *isec = selinux_inode(inode);
327+
328+
/* check below is racy, but revalidate will recheck with lock held */
329+
if (data_race(likely(isec->initialized == LABEL_INITIALIZED)))
330+
return isec;
325331
__inode_security_revalidate(inode, NULL, true);
326-
return selinux_inode(inode);
332+
return isec;
327333
}
328334

329-
static struct inode_security_struct *backing_inode_security_novalidate(struct dentry *dentry)
335+
static inline struct inode_security_struct *backing_inode_security_novalidate(struct dentry *dentry)
330336
{
331-
struct inode *inode = d_backing_inode(dentry);
332-
333-
return selinux_inode(inode);
337+
return selinux_inode(d_backing_inode(dentry));
334338
}
335339

336340
/*
337341
* Get the security label of a dentry's backing inode.
338342
*/
339-
static struct inode_security_struct *backing_inode_security(struct dentry *dentry)
343+
static inline struct inode_security_struct *backing_inode_security(struct dentry *dentry)
340344
{
341345
struct inode *inode = d_backing_inode(dentry);
346+
struct inode_security_struct *isec = selinux_inode(inode);
342347

348+
/* check below is racy, but revalidate will recheck with lock held */
349+
if (data_race(likely(isec->initialized == LABEL_INITIALIZED)))
350+
return isec;
343351
__inode_security_revalidate(inode, dentry, true);
344-
return selinux_inode(inode);
352+
return isec;
345353
}
346354

347355
static void inode_free_security(struct inode *inode)
@@ -1683,12 +1691,15 @@ static inline int dentry_has_perm(const struct cred *cred,
16831691
struct dentry *dentry,
16841692
u32 av)
16851693
{
1686-
struct inode *inode = d_backing_inode(dentry);
16871694
struct common_audit_data ad;
1695+
struct inode *inode = d_backing_inode(dentry);
1696+
struct inode_security_struct *isec = selinux_inode(inode);
16881697

16891698
ad.type = LSM_AUDIT_DATA_DENTRY;
16901699
ad.u.dentry = dentry;
1691-
__inode_security_revalidate(inode, dentry, true);
1700+
/* check below is racy, but revalidate will recheck with lock held */
1701+
if (data_race(unlikely(isec->initialized != LABEL_INITIALIZED)))
1702+
__inode_security_revalidate(inode, dentry, true);
16921703
return inode_has_perm(cred, inode, av, &ad);
16931704
}
16941705

@@ -1699,12 +1710,15 @@ static inline int path_has_perm(const struct cred *cred,
16991710
const struct path *path,
17001711
u32 av)
17011712
{
1702-
struct inode *inode = d_backing_inode(path->dentry);
17031713
struct common_audit_data ad;
1714+
struct inode *inode = d_backing_inode(path->dentry);
1715+
struct inode_security_struct *isec = selinux_inode(inode);
17041716

17051717
ad.type = LSM_AUDIT_DATA_PATH;
17061718
ad.u.path = *path;
1707-
__inode_security_revalidate(inode, path->dentry, true);
1719+
/* check below is racy, but revalidate will recheck with lock held */
1720+
if (data_race(unlikely(isec->initialized != LABEL_INITIALIZED)))
1721+
__inode_security_revalidate(inode, path->dentry, true);
17081722
return inode_has_perm(cred, inode, av, &ad);
17091723
}
17101724

@@ -3088,44 +3102,147 @@ static noinline int audit_inode_permission(struct inode *inode,
30883102
audited, denied, result, &ad);
30893103
}
30903104

3091-
static int selinux_inode_permission(struct inode *inode, int mask)
3105+
/**
3106+
* task_avdcache_reset - Reset the task's AVD cache
3107+
* @tsec: the task's security state
3108+
*
3109+
* Clear the task's AVD cache in @tsec and reset it to the current policy's
3110+
* and task's info.
3111+
*/
3112+
static inline void task_avdcache_reset(struct task_security_struct *tsec)
3113+
{
3114+
memset(&tsec->avdcache.dir, 0, sizeof(tsec->avdcache.dir));
3115+
tsec->avdcache.sid = tsec->sid;
3116+
tsec->avdcache.seqno = avc_policy_seqno();
3117+
tsec->avdcache.dir_spot = TSEC_AVDC_DIR_SIZE - 1;
3118+
}
3119+
3120+
/**
3121+
* task_avdcache_search - Search the task's AVD cache
3122+
* @tsec: the task's security state
3123+
* @isec: the inode to search for in the cache
3124+
* @avdc: matching avd cache entry returned to the caller
3125+
*
3126+
* Search @tsec for a AVD cache entry that matches @isec and return it to the
3127+
* caller via @avdc. Returns 0 if a match is found, negative values otherwise.
3128+
*/
3129+
static inline int task_avdcache_search(struct task_security_struct *tsec,
3130+
struct inode_security_struct *isec,
3131+
struct avdc_entry **avdc)
3132+
{
3133+
int orig, iter;
3134+
3135+
/* focused on path walk optimization, only cache directories */
3136+
if (isec->sclass != SECCLASS_DIR)
3137+
return -ENOENT;
3138+
3139+
if (unlikely(tsec->sid != tsec->avdcache.sid ||
3140+
tsec->avdcache.seqno != avc_policy_seqno())) {
3141+
task_avdcache_reset(tsec);
3142+
return -ENOENT;
3143+
}
3144+
3145+
orig = iter = tsec->avdcache.dir_spot;
3146+
do {
3147+
if (tsec->avdcache.dir[iter].isid == isec->sid) {
3148+
/* cache hit */
3149+
tsec->avdcache.dir_spot = iter;
3150+
*avdc = &tsec->avdcache.dir[iter];
3151+
return 0;
3152+
}
3153+
iter = (iter - 1) & (TSEC_AVDC_DIR_SIZE - 1);
3154+
} while (iter != orig);
3155+
3156+
return -ENOENT;
3157+
}
3158+
3159+
/**
3160+
* task_avdcache_update - Update the task's AVD cache
3161+
* @tsec: the task's security state
3162+
* @isec: the inode associated with the cache entry
3163+
* @avdc: the AVD info to cache
3164+
* @audited: the permission audit bitmask to cache
3165+
*
3166+
* Update the AVD cache in @tsec with the @avdc and @audited info associated
3167+
* with @isec.
3168+
*/
3169+
static inline void task_avdcache_update(struct task_security_struct *tsec,
3170+
struct inode_security_struct *isec,
3171+
struct av_decision *avd,
3172+
u32 audited)
30923173
{
3174+
int spot;
3175+
3176+
/* focused on path walk optimization, only cache directories */
3177+
if (isec->sclass != SECCLASS_DIR)
3178+
return;
3179+
3180+
/* update cache */
3181+
spot = (tsec->avdcache.dir_spot + 1) & (TSEC_AVDC_DIR_SIZE - 1);
3182+
tsec->avdcache.dir_spot = spot;
3183+
tsec->avdcache.dir[spot].isid = isec->sid;
3184+
tsec->avdcache.dir[spot].audited = audited;
3185+
tsec->avdcache.dir[spot].allowed = avd->allowed;
3186+
tsec->avdcache.dir[spot].permissive = avd->flags & AVD_FLAGS_PERMISSIVE;
3187+
}
3188+
3189+
/**
3190+
* selinux_inode_permission - Check if the current task can access an inode
3191+
* @inode: the inode that is being accessed
3192+
* @requested: the accesses being requested
3193+
*
3194+
* Check if the current task is allowed to access @inode according to
3195+
* @requested. Returns 0 if allowed, negative values otherwise.
3196+
*/
3197+
static int selinux_inode_permission(struct inode *inode, int requested)
3198+
{
3199+
int mask;
30933200
u32 perms;
3094-
bool from_access;
3095-
bool no_block = mask & MAY_NOT_BLOCK;
3201+
struct task_security_struct *tsec;
30963202
struct inode_security_struct *isec;
3097-
u32 sid = current_sid();
3098-
struct av_decision avd;
3203+
struct avdc_entry *avdc;
30993204
int rc, rc2;
31003205
u32 audited, denied;
31013206

3102-
from_access = mask & MAY_ACCESS;
3103-
mask &= (MAY_READ|MAY_WRITE|MAY_EXEC|MAY_APPEND);
3207+
mask = requested & (MAY_READ|MAY_WRITE|MAY_EXEC|MAY_APPEND);
31043208

31053209
/* No permission to check. Existence test. */
31063210
if (!mask)
31073211
return 0;
31083212

3109-
if (unlikely(IS_PRIVATE(inode)))
3110-
return 0;
3111-
3112-
perms = file_mask_to_av(inode->i_mode, mask);
3113-
3114-
isec = inode_security_rcu(inode, no_block);
3213+
isec = inode_security_rcu(inode, requested & MAY_NOT_BLOCK);
31153214
if (IS_ERR(isec))
31163215
return PTR_ERR(isec);
3216+
tsec = selinux_cred(current_cred());
3217+
perms = file_mask_to_av(inode->i_mode, mask);
3218+
3219+
rc = task_avdcache_search(tsec, isec, &avdc);
3220+
if (likely(!rc)) {
3221+
/* Cache hit. */
3222+
audited = perms & avdc->audited;
3223+
denied = perms & ~avdc->allowed;
3224+
if (unlikely(denied && enforcing_enabled() &&
3225+
!avdc->permissive))
3226+
rc = -EACCES;
3227+
} else {
3228+
struct av_decision avd;
3229+
3230+
/* Cache miss. */
3231+
rc = avc_has_perm_noaudit(tsec->sid, isec->sid, isec->sclass,
3232+
perms, 0, &avd);
3233+
audited = avc_audit_required(perms, &avd, rc,
3234+
(requested & MAY_ACCESS) ? FILE__AUDIT_ACCESS : 0,
3235+
&denied);
3236+
task_avdcache_update(tsec, isec, &avd, audited);
3237+
}
31173238

3118-
rc = avc_has_perm_noaudit(sid, isec->sid, isec->sclass, perms, 0,
3119-
&avd);
3120-
audited = avc_audit_required(perms, &avd, rc,
3121-
from_access ? FILE__AUDIT_ACCESS : 0,
3122-
&denied);
31233239
if (likely(!audited))
31243240
return rc;
31253241

31263242
rc2 = audit_inode_permission(inode, perms, audited, denied, rc);
31273243
if (rc2)
31283244
return rc2;
3245+
31293246
return rc;
31303247
}
31313248

security/selinux/include/objsec.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,27 @@
2929
#include "flask.h"
3030
#include "avc.h"
3131

32+
struct avdc_entry {
33+
u32 isid; /* inode SID */
34+
u32 allowed; /* allowed permission bitmask */
35+
u32 audited; /* audited permission bitmask */
36+
bool permissive; /* AVC permissive flag */
37+
};
38+
3239
struct task_security_struct {
3340
u32 osid; /* SID prior to last execve */
3441
u32 sid; /* current SID */
3542
u32 exec_sid; /* exec SID */
3643
u32 create_sid; /* fscreate SID */
3744
u32 keycreate_sid; /* keycreate SID */
3845
u32 sockcreate_sid; /* fscreate SID */
46+
#define TSEC_AVDC_DIR_SIZE (1 << 2)
47+
struct {
48+
u32 sid; /* current SID for cached entries */
49+
u32 seqno; /* AVC sequence number */
50+
unsigned int dir_spot; /* dir cache index to check first */
51+
struct avdc_entry dir[TSEC_AVDC_DIR_SIZE]; /* dir entries */
52+
} avdcache;
3953
} __randomize_layout;
4054

4155
enum label_initialized {

0 commit comments

Comments
 (0)