Skip to content

Commit b9c0a5c

Browse files
author
Alexei Starovoitov
committed
Merge branch 'fix-invariant-violation-for-single-value-tnums'
Paul Chaignon says: ==================== Fix invariant violation for single-value tnums We're hitting an invariant violation in Cilium that sometimes leads to BPF programs being rejected and Cilium failing to start [1]. As far as I know this is the first case of invariant violation found in a real program (i.e., not by a fuzzer). The following extract from verifier logs shows what's happening: from 201 to 236: R1=0 R6=ctx() R7=1 R9=scalar(smin=umin=smin32=umin32=3584,smax=umax=smax32=umax32=3840,var_off=(0xe00; 0x100)) R10=fp0 236: R1=0 R6=ctx() R7=1 R9=scalar(smin=umin=smin32=umin32=3584,smax=umax=smax32=umax32=3840,var_off=(0xe00; 0x100)) R10=fp0 ; if (magic == MARK_MAGIC_HOST || magic == MARK_MAGIC_OVERLAY || magic == MARK_MAGIC_ENCRYPT) @ bpf_host.c:1337 236: (16) if w9 == 0xe00 goto pc+45 ; R9=scalar(smin=umin=smin32=umin32=3585,smax=umax=smax32=umax32=3840,var_off=(0xe00; 0x100)) 237: (16) if w9 == 0xf00 goto pc+1 verifier bug: REG INVARIANTS VIOLATION (false_reg1): range bounds violation u64=[0xe01, 0xe00] s64=[0xe01, 0xe00] u32=[0xe01, 0xe00] s32=[0xe01, 0xe00] var_off=(0xe00, 0x0) More details are given in the second patch, but in short, the verifier should be able to detect that the false branch of instruction 237 is never true. After instruction 236, the u64 range and the tnum overlap in a single value, 0xf00. The long-term solution to invariant violation is likely to rely on the refinement + invariant violation check to detect dead branches, as started by Eduard. To fix the current issue, we need something with less refactoring that we can backport to affected kernels. The solution implemented in the second patch is to improve the bounds refinement to avoid this case. It relies on a new tnum helper, tnum_step, first sent as an RFC in [2]. The last two patches extend and update the selftests. Link: cilium/cilium#44216 [1] Link: https://lore.kernel.org/bpf/20251107192328.2190680-2-harishankar.vishwanathan@gmail.com/ [2] Changes in v3: - Fix commit description error spotted by AI bot. - Simplify constants in first two tests (Eduard). - Rework comment on third test (Eduard). - Add two new negative test cases (Eduard). - Rebased. Changes in v2: - Add guard suggested by Hari in tnum_step, to avoid undefined behavior spotted by AI code review. - Add explanation diagrams in code as suggested by Eduard. - Rework conditions for readability as suggested by Eduard. - Updated reference to SMT formula. - Rebased. ==================== Link: https://patch.msgid.link/cover.1772225741.git.paul.chaignon@gmail.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2 parents 8c0d9e1 + 024cea2 commit b9c0a5c

File tree

5 files changed

+227
-1
lines changed

5 files changed

+227
-1
lines changed

include/linux/tnum.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,4 +131,7 @@ static inline bool tnum_subreg_is_const(struct tnum a)
131131
return !(tnum_subreg(a)).mask;
132132
}
133133

134+
/* Returns the smallest member of t larger than z */
135+
u64 tnum_step(struct tnum t, u64 z);
136+
134137
#endif /* _LINUX_TNUM_H */

kernel/bpf/tnum.c

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,3 +269,59 @@ struct tnum tnum_bswap64(struct tnum a)
269269
{
270270
return TNUM(swab64(a.value), swab64(a.mask));
271271
}
272+
273+
/* Given tnum t, and a number z such that tmin <= z < tmax, where tmin
274+
* is the smallest member of the t (= t.value) and tmax is the largest
275+
* member of t (= t.value | t.mask), returns the smallest member of t
276+
* larger than z.
277+
*
278+
* For example,
279+
* t = x11100x0
280+
* z = 11110001 (241)
281+
* result = 11110010 (242)
282+
*
283+
* Note: if this function is called with z >= tmax, it just returns
284+
* early with tmax; if this function is called with z < tmin, the
285+
* algorithm already returns tmin.
286+
*/
287+
u64 tnum_step(struct tnum t, u64 z)
288+
{
289+
u64 tmax, j, p, q, r, s, v, u, w, res;
290+
u8 k;
291+
292+
tmax = t.value | t.mask;
293+
294+
/* if z >= largest member of t, return largest member of t */
295+
if (z >= tmax)
296+
return tmax;
297+
298+
/* if z < smallest member of t, return smallest member of t */
299+
if (z < t.value)
300+
return t.value;
301+
302+
/* keep t's known bits, and match all unknown bits to z */
303+
j = t.value | (z & t.mask);
304+
305+
if (j > z) {
306+
p = ~z & t.value & ~t.mask;
307+
k = fls64(p); /* k is the most-significant 0-to-1 flip */
308+
q = U64_MAX << k;
309+
r = q & z; /* positions > k matched to z */
310+
s = ~q & t.value; /* positions <= k matched to t.value */
311+
v = r | s;
312+
res = v;
313+
} else {
314+
p = z & ~t.value & ~t.mask;
315+
k = fls64(p); /* k is the most-significant 1-to-0 flip */
316+
q = U64_MAX << k;
317+
r = q & t.mask & z; /* unknown positions > k, matched to z */
318+
s = q & ~t.mask; /* known positions > k, set to 1 */
319+
v = r | s;
320+
/* add 1 to unknown positions > k to make value greater than z */
321+
u = v + (1ULL << k);
322+
/* extract bits in unknown positions > k from u, rest from t.value */
323+
w = (u & t.mask) | t.value;
324+
res = w;
325+
}
326+
return res;
327+
}

kernel/bpf/verifier.c

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2379,6 +2379,9 @@ static void __update_reg32_bounds(struct bpf_reg_state *reg)
23792379

23802380
static void __update_reg64_bounds(struct bpf_reg_state *reg)
23812381
{
2382+
u64 tnum_next, tmax;
2383+
bool umin_in_tnum;
2384+
23822385
/* min signed is max(sign bit) | min(other bits) */
23832386
reg->smin_value = max_t(s64, reg->smin_value,
23842387
reg->var_off.value | (reg->var_off.mask & S64_MIN));
@@ -2388,6 +2391,33 @@ static void __update_reg64_bounds(struct bpf_reg_state *reg)
23882391
reg->umin_value = max(reg->umin_value, reg->var_off.value);
23892392
reg->umax_value = min(reg->umax_value,
23902393
reg->var_off.value | reg->var_off.mask);
2394+
2395+
/* Check if u64 and tnum overlap in a single value */
2396+
tnum_next = tnum_step(reg->var_off, reg->umin_value);
2397+
umin_in_tnum = (reg->umin_value & ~reg->var_off.mask) == reg->var_off.value;
2398+
tmax = reg->var_off.value | reg->var_off.mask;
2399+
if (umin_in_tnum && tnum_next > reg->umax_value) {
2400+
/* The u64 range and the tnum only overlap in umin.
2401+
* u64: ---[xxxxxx]-----
2402+
* tnum: --xx----------x-
2403+
*/
2404+
___mark_reg_known(reg, reg->umin_value);
2405+
} else if (!umin_in_tnum && tnum_next == tmax) {
2406+
/* The u64 range and the tnum only overlap in the maximum value
2407+
* represented by the tnum, called tmax.
2408+
* u64: ---[xxxxxx]-----
2409+
* tnum: xx-----x--------
2410+
*/
2411+
___mark_reg_known(reg, tmax);
2412+
} else if (!umin_in_tnum && tnum_next <= reg->umax_value &&
2413+
tnum_step(reg->var_off, tnum_next) > reg->umax_value) {
2414+
/* The u64 range and the tnum only overlap in between umin
2415+
* (excluded) and umax.
2416+
* u64: ---[xxxxxx]-----
2417+
* tnum: xx----x-------x-
2418+
*/
2419+
___mark_reg_known(reg, tnum_next);
2420+
}
23912421
}
23922422

23932423
static void __update_reg_bounds(struct bpf_reg_state *reg)

tools/testing/selftests/bpf/prog_tests/reg_bounds.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2091,7 +2091,7 @@ static struct subtest_case crafted_cases[] = {
20912091
{U64, S64, {0, 0xffffffffULL}, {0x7fffffff, 0x7fffffff}},
20922092

20932093
{U64, U32, {0, 0x100000000}, {0, 0}},
2094-
{U64, U32, {0xfffffffe, 0x100000000}, {0x80000000, 0x80000000}},
2094+
{U64, U32, {0xfffffffe, 0x300000000}, {0x80000000, 0x80000000}},
20952095

20962096
{U64, S32, {0, 0xffffffff00000000ULL}, {0, 0}},
20972097
/* these are tricky cases where lower 32 bits allow to tighten 64

tools/testing/selftests/bpf/progs/verifier_bounds.c

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1863,4 +1863,141 @@ l1_%=: r0 = 1; \
18631863
: __clobber_all);
18641864
}
18651865

1866+
/* This test covers the bounds deduction when the u64 range and the tnum
1867+
* overlap only at umax. After instruction 3, the ranges look as follows:
1868+
*
1869+
* 0 umin=0xe01 umax=0xf00 U64_MAX
1870+
* | [xxxxxxxxxxxxxx] |
1871+
* |----------------------------|------------------------------|
1872+
* | x x | tnum values
1873+
*
1874+
* The verifier can therefore deduce that the R0=0xf0=240.
1875+
*/
1876+
SEC("socket")
1877+
__description("bounds refinement with single-value tnum on umax")
1878+
__msg("3: (15) if r0 == 0xe0 {{.*}} R0=240")
1879+
__success __log_level(2)
1880+
__flag(BPF_F_TEST_REG_INVARIANTS)
1881+
__naked void bounds_refinement_tnum_umax(void *ctx)
1882+
{
1883+
asm volatile(" \
1884+
call %[bpf_get_prandom_u32]; \
1885+
r0 |= 0xe0; \
1886+
r0 &= 0xf0; \
1887+
if r0 == 0xe0 goto +2; \
1888+
if r0 == 0xf0 goto +1; \
1889+
r10 = 0; \
1890+
exit; \
1891+
" :
1892+
: __imm(bpf_get_prandom_u32)
1893+
: __clobber_all);
1894+
}
1895+
1896+
/* This test covers the bounds deduction when the u64 range and the tnum
1897+
* overlap only at umin. After instruction 3, the ranges look as follows:
1898+
*
1899+
* 0 umin=0xe00 umax=0xeff U64_MAX
1900+
* | [xxxxxxxxxxxxxx] |
1901+
* |----------------------------|------------------------------|
1902+
* | x x | tnum values
1903+
*
1904+
* The verifier can therefore deduce that the R0=0xe0=224.
1905+
*/
1906+
SEC("socket")
1907+
__description("bounds refinement with single-value tnum on umin")
1908+
__msg("3: (15) if r0 == 0xf0 {{.*}} R0=224")
1909+
__success __log_level(2)
1910+
__flag(BPF_F_TEST_REG_INVARIANTS)
1911+
__naked void bounds_refinement_tnum_umin(void *ctx)
1912+
{
1913+
asm volatile(" \
1914+
call %[bpf_get_prandom_u32]; \
1915+
r0 |= 0xe0; \
1916+
r0 &= 0xf0; \
1917+
if r0 == 0xf0 goto +2; \
1918+
if r0 == 0xe0 goto +1; \
1919+
r10 = 0; \
1920+
exit; \
1921+
" :
1922+
: __imm(bpf_get_prandom_u32)
1923+
: __clobber_all);
1924+
}
1925+
1926+
/* This test covers the bounds deduction when the only possible tnum value is
1927+
* in the middle of the u64 range. After instruction 3, the ranges look as
1928+
* follows:
1929+
*
1930+
* 0 umin=0x7cf umax=0x7df U64_MAX
1931+
* | [xxxxxxxxxxxx] |
1932+
* |----------------------------|------------------------------|
1933+
* | x x x x x | tnum values
1934+
* | +--- 0x7e0
1935+
* +--- 0x7d0
1936+
*
1937+
* Since the lower four bits are zero, the tnum and the u64 range only overlap
1938+
* in R0=0x7d0=2000. Instruction 5 is therefore dead code.
1939+
*/
1940+
SEC("socket")
1941+
__description("bounds refinement with single-value tnum in middle of range")
1942+
__msg("3: (a5) if r0 < 0x7cf {{.*}} R0=2000")
1943+
__success __log_level(2)
1944+
__naked void bounds_refinement_tnum_middle(void *ctx)
1945+
{
1946+
asm volatile(" \
1947+
call %[bpf_get_prandom_u32]; \
1948+
if r0 & 0x0f goto +4; \
1949+
if r0 > 0x7df goto +3; \
1950+
if r0 < 0x7cf goto +2; \
1951+
if r0 == 0x7d0 goto +1; \
1952+
r10 = 0; \
1953+
exit; \
1954+
" :
1955+
: __imm(bpf_get_prandom_u32)
1956+
: __clobber_all);
1957+
}
1958+
1959+
/* This test cover the negative case for the tnum/u64 overlap. Since
1960+
* they contain the same two values (i.e., {0, 1}), we can't deduce
1961+
* anything more.
1962+
*/
1963+
SEC("socket")
1964+
__description("bounds refinement: several overlaps between tnum and u64")
1965+
__msg("2: (25) if r0 > 0x1 {{.*}} R0=scalar(smin=smin32=0,smax=umax=smax32=umax32=1,var_off=(0x0; 0x1))")
1966+
__failure __log_level(2)
1967+
__naked void bounds_refinement_several_overlaps(void *ctx)
1968+
{
1969+
asm volatile(" \
1970+
call %[bpf_get_prandom_u32]; \
1971+
if r0 < 0 goto +3; \
1972+
if r0 > 1 goto +2; \
1973+
if r0 == 1 goto +1; \
1974+
r10 = 0; \
1975+
exit; \
1976+
" :
1977+
: __imm(bpf_get_prandom_u32)
1978+
: __clobber_all);
1979+
}
1980+
1981+
/* This test cover the negative case for the tnum/u64 overlap. Since
1982+
* they overlap in the two values contained by the u64 range (i.e.,
1983+
* {0xf, 0x10}), we can't deduce anything more.
1984+
*/
1985+
SEC("socket")
1986+
__description("bounds refinement: multiple overlaps between tnum and u64")
1987+
__msg("2: (25) if r0 > 0x10 {{.*}} R0=scalar(smin=umin=smin32=umin32=15,smax=umax=smax32=umax32=16,var_off=(0x0; 0x1f))")
1988+
__failure __log_level(2)
1989+
__naked void bounds_refinement_multiple_overlaps(void *ctx)
1990+
{
1991+
asm volatile(" \
1992+
call %[bpf_get_prandom_u32]; \
1993+
if r0 < 0xf goto +3; \
1994+
if r0 > 0x10 goto +2; \
1995+
if r0 == 0x10 goto +1; \
1996+
r10 = 0; \
1997+
exit; \
1998+
" :
1999+
: __imm(bpf_get_prandom_u32)
2000+
: __clobber_all);
2001+
}
2002+
18662003
char _license[] SEC("license") = "GPL";

0 commit comments

Comments
 (0)