Skip to content

Commit b8b7570

Browse files
Christoph Hellwigaxboe
authored andcommitted
nvme-pci: fix dma unmapping when using PRPs and not using the IOVA mapping
The current version of the blk_rq_dma_map support in nvme-pci tries to reconstruct the DMA mappings from the on the wire descriptors if they are needed for unmapping. While this is not the case for the direct mapping fast path and the IOVA path, it is needed for the non-IOVA slow path, e.g. when using the interconnect is not dma coherent, when using swiotlb bounce buffering, or a IOMMU mapping that can't coalesce. While the reconstruction is easy and works fine for the SGL path, where the on the wire representation maps 1:1 to DMA mappings, the code to reconstruct the DMA mapping ranges from PRPs can't always work, as a given PRP layout can come from different DMA mappings, and the current code doesn't even always get that right. Give up on this approach and track the actual DMA mapping when actually needed again. Fixes: 7ce3c1d ("nvme-pci: convert the data mapping to blk_rq_dma_map") Reported-by: Ben Copeland <ben.copeland@linaro.org> Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Keith Busch <kbusch@kernel.org> Tested-by: Jens Axboe <axboe@kernel.dk> Link: https://lore.kernel.org/r/20250707125223.3022531-1-hch@lst.de Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent 1cea518 commit b8b7570

File tree

1 file changed

+62
-52
lines changed

1 file changed

+62
-52
lines changed

drivers/nvme/host/pci.c

Lines changed: 62 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,7 @@ struct nvme_dev {
173173
bool hmb;
174174
struct sg_table *hmb_sgt;
175175

176+
mempool_t *dmavec_mempool;
176177
mempool_t *iod_meta_mempool;
177178

178179
/* shadow doorbell buffer support: */
@@ -262,6 +263,11 @@ enum nvme_iod_flags {
262263
IOD_SINGLE_SEGMENT = 1U << 2,
263264
};
264265

266+
struct nvme_dma_vec {
267+
dma_addr_t addr;
268+
unsigned int len;
269+
};
270+
265271
/*
266272
* The nvme_iod describes the data in an I/O.
267273
*/
@@ -274,6 +280,8 @@ struct nvme_iod {
274280
unsigned int total_len;
275281
struct dma_iova_state dma_state;
276282
void *descriptors[NVME_MAX_NR_DESCRIPTORS];
283+
struct nvme_dma_vec *dma_vecs;
284+
unsigned int nr_dma_vecs;
277285

278286
dma_addr_t meta_dma;
279287
struct sg_table meta_sgt;
@@ -674,44 +682,12 @@ static void nvme_free_prps(struct request *req)
674682
{
675683
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
676684
struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
677-
struct device *dma_dev = nvmeq->dev->dev;
678-
enum dma_data_direction dir = rq_dma_dir(req);
679-
int length = iod->total_len;
680-
dma_addr_t dma_addr;
681-
int i, desc;
682-
__le64 *prp_list;
683-
u32 dma_len;
684-
685-
dma_addr = le64_to_cpu(iod->cmd.common.dptr.prp1);
686-
dma_len = min_t(u32, length,
687-
NVME_CTRL_PAGE_SIZE - (dma_addr & (NVME_CTRL_PAGE_SIZE - 1)));
688-
length -= dma_len;
689-
if (!length) {
690-
dma_unmap_page(dma_dev, dma_addr, dma_len, dir);
691-
return;
692-
}
693-
694-
if (length <= NVME_CTRL_PAGE_SIZE) {
695-
dma_unmap_page(dma_dev, dma_addr, dma_len, dir);
696-
dma_addr = le64_to_cpu(iod->cmd.common.dptr.prp2);
697-
dma_unmap_page(dma_dev, dma_addr, length, dir);
698-
return;
699-
}
700-
701-
i = 0;
702-
desc = 0;
703-
prp_list = iod->descriptors[desc];
704-
do {
705-
dma_unmap_page(dma_dev, dma_addr, dma_len, dir);
706-
if (i == NVME_CTRL_PAGE_SIZE >> 3) {
707-
prp_list = iod->descriptors[++desc];
708-
i = 0;
709-
}
685+
unsigned int i;
710686

711-
dma_addr = le64_to_cpu(prp_list[i++]);
712-
dma_len = min(length, NVME_CTRL_PAGE_SIZE);
713-
length -= dma_len;
714-
} while (length);
687+
for (i = 0; i < iod->nr_dma_vecs; i++)
688+
dma_unmap_page(nvmeq->dev->dev, iod->dma_vecs[i].addr,
689+
iod->dma_vecs[i].len, rq_dma_dir(req));
690+
mempool_free(iod->dma_vecs, nvmeq->dev->dmavec_mempool);
715691
}
716692

717693
static void nvme_free_sgls(struct request *req)
@@ -760,6 +736,23 @@ static void nvme_unmap_data(struct request *req)
760736
nvme_free_descriptors(req);
761737
}
762738

739+
static bool nvme_pci_prp_iter_next(struct request *req, struct device *dma_dev,
740+
struct blk_dma_iter *iter)
741+
{
742+
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
743+
744+
if (iter->len)
745+
return true;
746+
if (!blk_rq_dma_map_iter_next(req, dma_dev, &iod->dma_state, iter))
747+
return false;
748+
if (dma_need_unmap(dma_dev)) {
749+
iod->dma_vecs[iod->nr_dma_vecs].addr = iter->addr;
750+
iod->dma_vecs[iod->nr_dma_vecs].len = iter->len;
751+
iod->nr_dma_vecs++;
752+
}
753+
return true;
754+
}
755+
763756
static blk_status_t nvme_pci_setup_data_prp(struct request *req,
764757
struct blk_dma_iter *iter)
765758
{
@@ -770,6 +763,16 @@ static blk_status_t nvme_pci_setup_data_prp(struct request *req,
770763
unsigned int prp_len, i;
771764
__le64 *prp_list;
772765

766+
if (dma_need_unmap(nvmeq->dev->dev)) {
767+
iod->dma_vecs = mempool_alloc(nvmeq->dev->dmavec_mempool,
768+
GFP_ATOMIC);
769+
if (!iod->dma_vecs)
770+
return BLK_STS_RESOURCE;
771+
iod->dma_vecs[0].addr = iter->addr;
772+
iod->dma_vecs[0].len = iter->len;
773+
iod->nr_dma_vecs = 1;
774+
}
775+
773776
/*
774777
* PRP1 always points to the start of the DMA transfers.
775778
*
@@ -786,13 +789,10 @@ static blk_status_t nvme_pci_setup_data_prp(struct request *req,
786789
if (!length)
787790
goto done;
788791

789-
if (!iter->len) {
790-
if (!blk_rq_dma_map_iter_next(req, nvmeq->dev->dev,
791-
&iod->dma_state, iter)) {
792-
if (WARN_ON_ONCE(!iter->status))
793-
goto bad_sgl;
794-
goto done;
795-
}
792+
if (!nvme_pci_prp_iter_next(req, nvmeq->dev->dev, iter)) {
793+
if (WARN_ON_ONCE(!iter->status))
794+
goto bad_sgl;
795+
goto done;
796796
}
797797

798798
/*
@@ -831,13 +831,10 @@ static blk_status_t nvme_pci_setup_data_prp(struct request *req,
831831
if (!length)
832832
break;
833833

834-
if (iter->len == 0) {
835-
if (!blk_rq_dma_map_iter_next(req, nvmeq->dev->dev,
836-
&iod->dma_state, iter)) {
837-
if (WARN_ON_ONCE(!iter->status))
838-
goto bad_sgl;
839-
goto done;
840-
}
834+
if (!nvme_pci_prp_iter_next(req, nvmeq->dev->dev, iter)) {
835+
if (WARN_ON_ONCE(!iter->status))
836+
goto bad_sgl;
837+
goto done;
841838
}
842839

843840
/*
@@ -3025,14 +3022,25 @@ static int nvme_disable_prepare_reset(struct nvme_dev *dev, bool shutdown)
30253022
static int nvme_pci_alloc_iod_mempool(struct nvme_dev *dev)
30263023
{
30273024
size_t meta_size = sizeof(struct scatterlist) * (NVME_MAX_META_SEGS + 1);
3025+
size_t alloc_size = sizeof(struct nvme_dma_vec) * NVME_MAX_SEGS;
3026+
3027+
dev->dmavec_mempool = mempool_create_node(1,
3028+
mempool_kmalloc, mempool_kfree,
3029+
(void *)alloc_size, GFP_KERNEL,
3030+
dev_to_node(dev->dev));
3031+
if (!dev->dmavec_mempool)
3032+
return -ENOMEM;
30283033

30293034
dev->iod_meta_mempool = mempool_create_node(1,
30303035
mempool_kmalloc, mempool_kfree,
30313036
(void *)meta_size, GFP_KERNEL,
30323037
dev_to_node(dev->dev));
30333038
if (!dev->iod_meta_mempool)
3034-
return -ENOMEM;
3039+
goto free;
30353040
return 0;
3041+
free:
3042+
mempool_destroy(dev->dmavec_mempool);
3043+
return -ENOMEM;
30363044
}
30373045

30383046
static void nvme_free_tagset(struct nvme_dev *dev)
@@ -3477,6 +3485,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
34773485
nvme_dbbuf_dma_free(dev);
34783486
nvme_free_queues(dev, 0);
34793487
out_release_iod_mempool:
3488+
mempool_destroy(dev->dmavec_mempool);
34803489
mempool_destroy(dev->iod_meta_mempool);
34813490
out_dev_unmap:
34823491
nvme_dev_unmap(dev);
@@ -3540,6 +3549,7 @@ static void nvme_remove(struct pci_dev *pdev)
35403549
nvme_dev_remove_admin(dev);
35413550
nvme_dbbuf_dma_free(dev);
35423551
nvme_free_queues(dev, 0);
3552+
mempool_destroy(dev->dmavec_mempool);
35433553
mempool_destroy(dev->iod_meta_mempool);
35443554
nvme_release_descriptor_pools(dev);
35453555
nvme_dev_unmap(dev);

0 commit comments

Comments
 (0)