Skip to content

Commit c265ae7

Browse files
isilenceaxboe
authored andcommitted
io_uring: introduce io_uring querying
There are many parameters users might want to query about io_uring like available request types or the ring sizes. This patch introduces an interface for such slow path queries. It was written with several requirements in mind: - Can be used with or without an io_uring instance. Asking for supported setup flags before creating an instance as well as qeurying info about an already created ring are valid use cases. - Should be moderately fast. For example, users might use it to periodically retrieve ring attributes at runtime. As a consequence, it should be able to query multiple attributes in a single syscall. - Backward and forward compatible. - Should be reasobably easy to use. - Reduce the kernel code size for introducing new query types. It's implemented as a new registration opcode IORING_REGISTER_QUERY. The user passes one or more query strutctures linked together, each represented by struct io_uring_query_hdr. The header stores common control fields needed for processing and points to query type specific information. The header contains - The query type - The result field, which on return contains the error code for the query - Pointer to the query type specific information - The size of the query structure. The kernel will only populate up to the size, which helps with backward compatibility. The kernel can also reduce the size, so if the current kernel is older than the inteface the user tries to use, it'll get only the supported bits. - next_entry field is used to chain multiple queries. Apart from common registeration syscall failures, it can only immediately return an error code in case when the headers are incorrect or any other addresses and invalid. That usually mean that the userspace doesn't use the API right and should be corrected. All query type specific errors are returned in the header's result field. As an example, the patch adds a single query type for now, i.e. IO_URING_QUERY_OPCODES, which tells what register / request / etc. opcodes are supported, but there are particular plans to extend it. Note: there is a request probing interface via IORING_REGISTER_PROBE, but it's a mess. It requires the user to create a ring first, it only works for requests, and requires dynamic allocations. Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent 63805d0 commit c265ae7

File tree

6 files changed

+153
-1
lines changed

6 files changed

+153
-1
lines changed

include/uapi/linux/io_uring.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -686,6 +686,9 @@ enum io_uring_register_op {
686686

687687
IORING_REGISTER_MEM_REGION = 34,
688688

689+
/* query various aspects of io_uring, see linux/io_uring/query.h */
690+
IORING_REGISTER_QUERY = 35,
691+
689692
/* this goes last */
690693
IORING_REGISTER_LAST,
691694

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */
2+
/*
3+
* Header file for the io_uring query interface.
4+
*/
5+
#ifndef LINUX_IO_URING_QUERY_H
6+
#define LINUX_IO_URING_QUERY_H
7+
8+
#include <linux/types.h>
9+
10+
struct io_uring_query_hdr {
11+
__u64 next_entry;
12+
__u64 query_data;
13+
__u32 query_op;
14+
__u32 size;
15+
__s32 result;
16+
__u32 __resv[3];
17+
};
18+
19+
enum {
20+
IO_URING_QUERY_OPCODES = 0,
21+
22+
__IO_URING_QUERY_MAX,
23+
};
24+
25+
/* Doesn't require a ring */
26+
struct io_uring_query_opcode {
27+
/* The number of supported IORING_OP_* opcodes */
28+
__u32 nr_request_opcodes;
29+
/* The number of supported IORING_[UN]REGISTER_* opcodes */
30+
__u32 nr_register_opcodes;
31+
/* Bitmask of all supported IORING_FEAT_* flags */
32+
__u64 feature_flags;
33+
/* Bitmask of all supported IORING_SETUP_* flags */
34+
__u64 ring_setup_flags;
35+
/* Bitmask of all supported IORING_ENTER_** flags */
36+
__u64 enter_flags;
37+
/* Bitmask of all supported IOSQE_* flags */
38+
__u64 sqe_flags;
39+
};
40+
41+
#endif

io_uring/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ obj-$(CONFIG_IO_URING) += io_uring.o opdef.o kbuf.o rsrc.o notif.o \
1313
sync.o msg_ring.o advise.o openclose.o \
1414
statx.o timeout.o cancel.o \
1515
waitid.o register.o truncate.o \
16-
memmap.o alloc_cache.o
16+
memmap.o alloc_cache.o query.o
1717
obj-$(CONFIG_IO_URING_ZCRX) += zcrx.o
1818
obj-$(CONFIG_IO_WQ) += io-wq.o
1919
obj-$(CONFIG_FUTEX) += futex.o

io_uring/query.c

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
3+
#include "linux/io_uring/query.h"
4+
5+
#include "query.h"
6+
#include "io_uring.h"
7+
8+
#define IO_MAX_QUERY_SIZE (sizeof(struct io_uring_query_opcode))
9+
10+
static ssize_t io_query_ops(void *data)
11+
{
12+
struct io_uring_query_opcode *e = data;
13+
14+
BUILD_BUG_ON(sizeof(*e) > IO_MAX_QUERY_SIZE);
15+
16+
e->nr_request_opcodes = IORING_OP_LAST;
17+
e->nr_register_opcodes = IORING_REGISTER_LAST;
18+
e->feature_flags = IORING_FEAT_FLAGS;
19+
e->ring_setup_flags = IORING_SETUP_FLAGS;
20+
e->enter_flags = IORING_ENTER_FLAGS;
21+
e->sqe_flags = SQE_VALID_FLAGS;
22+
return sizeof(*e);
23+
}
24+
25+
static int io_handle_query_entry(struct io_ring_ctx *ctx,
26+
void *data, void __user *uhdr,
27+
u64 *next_entry)
28+
{
29+
struct io_uring_query_hdr hdr;
30+
size_t usize, res_size = 0;
31+
ssize_t ret = -EINVAL;
32+
void __user *udata;
33+
34+
if (copy_from_user(&hdr, uhdr, sizeof(hdr)))
35+
return -EFAULT;
36+
usize = hdr.size;
37+
hdr.size = min(hdr.size, IO_MAX_QUERY_SIZE);
38+
udata = u64_to_user_ptr(hdr.query_data);
39+
40+
if (hdr.query_op >= __IO_URING_QUERY_MAX) {
41+
ret = -EOPNOTSUPP;
42+
goto out;
43+
}
44+
if (!mem_is_zero(hdr.__resv, sizeof(hdr.__resv)) || hdr.result || !hdr.size)
45+
goto out;
46+
if (copy_from_user(data, udata, hdr.size))
47+
return -EFAULT;
48+
49+
switch (hdr.query_op) {
50+
case IO_URING_QUERY_OPCODES:
51+
ret = io_query_ops(data);
52+
break;
53+
}
54+
55+
if (ret >= 0) {
56+
if (WARN_ON_ONCE(ret > IO_MAX_QUERY_SIZE))
57+
return -EFAULT;
58+
res_size = ret;
59+
ret = 0;
60+
}
61+
out:
62+
hdr.result = ret;
63+
hdr.size = min_t(size_t, usize, res_size);
64+
65+
if (copy_struct_to_user(udata, usize, data, hdr.size, NULL))
66+
return -EFAULT;
67+
if (copy_to_user(uhdr, &hdr, sizeof(hdr)))
68+
return -EFAULT;
69+
*next_entry = hdr.next_entry;
70+
return 0;
71+
}
72+
73+
int io_query(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args)
74+
{
75+
char entry_buffer[IO_MAX_QUERY_SIZE];
76+
void __user *uhdr = arg;
77+
int ret;
78+
79+
memset(entry_buffer, 0, sizeof(entry_buffer));
80+
81+
if (nr_args)
82+
return -EINVAL;
83+
84+
while (uhdr) {
85+
u64 next_hdr;
86+
87+
ret = io_handle_query_entry(ctx, entry_buffer, uhdr, &next_hdr);
88+
if (ret)
89+
return ret;
90+
uhdr = u64_to_user_ptr(next_hdr);
91+
}
92+
return 0;
93+
}

io_uring/query.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
#ifndef IORING_QUERY_H
3+
#define IORING_QUERY_H
4+
5+
#include <linux/io_uring_types.h>
6+
7+
int io_query(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args);
8+
9+
#endif

io_uring/register.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include "msg_ring.h"
3232
#include "memmap.h"
3333
#include "zcrx.h"
34+
#include "query.h"
3435

3536
#define IORING_MAX_RESTRICTIONS (IORING_RESTRICTION_LAST + \
3637
IORING_REGISTER_LAST + IORING_OP_LAST)
@@ -832,6 +833,9 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
832833
break;
833834
ret = io_register_mem_region(ctx, arg);
834835
break;
836+
case IORING_REGISTER_QUERY:
837+
ret = io_query(ctx, arg, nr_args);
838+
break;
835839
default:
836840
ret = -EINVAL;
837841
break;
@@ -901,6 +905,8 @@ static int io_uring_register_blind(unsigned int opcode, void __user *arg,
901905
switch (opcode) {
902906
case IORING_REGISTER_SEND_MSG_RING:
903907
return io_uring_register_send_msg_ring(arg, nr_args);
908+
case IORING_REGISTER_QUERY:
909+
return io_query(NULL, arg, nr_args);
904910
}
905911
return -EINVAL;
906912
}

0 commit comments

Comments
 (0)