From 8419770f37f0543bc5c2390651c46892dc520018 Mon Sep 17 00:00:00 2001 From: Romuald JEANNE Date: Tue, 16 Mar 2021 08:58:56 +0100 Subject: [PATCH 05/22] ARM 5.10.10-stm32mp1-r1 DMA --- drivers/dma/dmaengine.c | 34 ++ drivers/dma/stm32-dma.c | 1011 ++++++++++++++++++++++++++++++++----- drivers/dma/stm32-mdma.c | 188 +++++-- include/linux/dmaengine.h | 11 + 4 files changed, 1070 insertions(+), 174 deletions(-) diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 962cbb5e5f7f..1381f15eb6f2 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -873,6 +873,33 @@ struct dma_chan *dma_request_chan(struct device *dev, const char *name) } EXPORT_SYMBOL_GPL(dma_request_chan); +/** + * dma_request_chan_linked - try to allocate an exclusive slave channel + * @dev: pointer to client device structure + * @name: slave channel name + * + * Returns pointer to appropriate DMA channel on success or an error pointer. + * Create device link between DMA channel provider and client device consumer. + */ +struct dma_chan *dma_request_chan_linked(struct device *dev, const char *name) +{ + struct dma_chan *chan = dma_request_chan(dev, name); + struct device *provider_dev = chan->device->dev; + struct device_link *link; + + if (!IS_ERR_OR_NULL(chan)) { + link = device_link_add(dev, provider_dev, DL_FLAG_STATELESS); + if (!link) { + dev_err(provider_dev, "failed to add dev link with %s\n", dev_name(dev)); + dma_release_channel(chan); + return ERR_PTR(-EINVAL); + } + } + + return chan; +} +EXPORT_SYMBOL_GPL(dma_request_chan_linked); + /** * dma_request_chan_by_mask - allocate a channel satisfying certain capabilities * @mask: capabilities that the channel must satisfy @@ -926,6 +953,13 @@ void dma_release_channel(struct dma_chan *chan) } EXPORT_SYMBOL_GPL(dma_release_channel); +void dma_release_chan_linked(struct device *dev, struct dma_chan *chan) +{ + device_link_remove(dev, chan->device->dev); + dma_release_channel(chan); +} +EXPORT_SYMBOL_GPL(dma_release_chan_linked); + /** * dmaengine_get - register interest in dma_channels */ diff --git a/drivers/dma/stm32-dma.c b/drivers/dma/stm32-dma.c index d0055d2f0b9a..1d89e0807ef0 100644 --- a/drivers/dma/stm32-dma.c +++ b/drivers/dma/stm32-dma.c @@ -14,12 +14,14 @@ #include #include #include +#include #include #include #include #include #include #include +#include #include #include #include @@ -60,6 +62,7 @@ #define STM32_DMA_SCR_PSIZE_GET(n) ((n & STM32_DMA_SCR_PSIZE_MASK) >> 11) #define STM32_DMA_SCR_DIR_MASK GENMASK(7, 6) #define STM32_DMA_SCR_DIR(n) ((n & 0x3) << 6) +#define STM32_DMA_SCR_TRBUFF BIT(20) /* Bufferable transfer for USART/UART */ #define STM32_DMA_SCR_CT BIT(19) /* Target in double buffer */ #define STM32_DMA_SCR_DBM BIT(18) /* Double Buffer Mode */ #define STM32_DMA_SCR_PINCOS BIT(15) /* Peripheral inc offset size */ @@ -120,6 +123,7 @@ #define STM32_DMA_FIFO_THRESHOLD_NONE 0x04 #define STM32_DMA_MAX_DATA_ITEMS 0xffff +#define STM32_DMA_SRAM_GRANULARITY PAGE_SIZE /* * Valid transfer starts from @0 to @0xFFFE leading to unaligned scatter * gather at boundary. Thus it's safer to round down this value on FIFO @@ -140,6 +144,15 @@ #define STM32_DMA_DIRECT_MODE_MASK BIT(2) #define STM32_DMA_DIRECT_MODE_GET(n) (((n) & STM32_DMA_DIRECT_MODE_MASK) \ >> 2) +#define STM32_DMA_ALT_ACK_MODE_MASK BIT(4) +#define STM32_DMA_ALT_ACK_MODE_GET(n) (((n) & STM32_DMA_ALT_ACK_MODE_MASK) \ + >> 4) +#define STM32_DMA_MDMA_CHAIN_FTR_MASK BIT(31) +#define STM32_DMA_MDMA_CHAIN_FTR_GET(n) (((n) & STM32_DMA_MDMA_CHAIN_FTR_MASK) \ + >> 31) +#define STM32_DMA_MDMA_SRAM_SIZE_MASK GENMASK(30, 29) +#define STM32_DMA_MDMA_SRAM_SIZE_GET(n) (((n) & STM32_DMA_MDMA_SRAM_SIZE_MASK) \ + >> 29) enum stm32_dma_width { STM32_DMA_BYTE, @@ -181,15 +194,32 @@ struct stm32_dma_chan_reg { u32 dma_sfcr; }; +struct stm32_dma_mdma_desc { + struct sg_table sgt; + struct dma_async_tx_descriptor *desc; +}; + +struct stm32_dma_mdma { + struct dma_chan *chan; + enum dma_transfer_direction dir; + dma_addr_t sram_buf; + u32 sram_period; + u32 num_sgs; +}; + struct stm32_dma_sg_req { - u32 len; + struct scatterlist stm32_sgl_req; struct stm32_dma_chan_reg chan_reg; + struct stm32_dma_mdma_desc m_desc; }; struct stm32_dma_desc { struct virt_dma_desc vdesc; bool cyclic; u32 num_sgs; + dma_addr_t dma_buf; + void *dma_buf_cpu; + u32 dma_buf_size; struct stm32_dma_sg_req sg_req[]; }; @@ -206,6 +236,10 @@ struct stm32_dma_chan { u32 threshold; u32 mem_burst; u32 mem_width; + struct stm32_dma_mdma mchan; + u32 use_mdma; + u32 sram_size; + u32 residue_after_drain; }; struct stm32_dma_device { @@ -214,6 +248,7 @@ struct stm32_dma_device { struct clk *clk; bool mem2mem; struct stm32_dma_chan chan[STM32_DMA_MAX_CHANNELS]; + struct gen_pool *sram_pool; }; static struct stm32_dma_device *stm32_dma_get_dev(struct stm32_dma_chan *chan) @@ -264,6 +299,7 @@ static int stm32_dma_get_width(struct stm32_dma_chan *chan, } static enum dma_slave_buswidth stm32_dma_get_max_width(u32 buf_len, + u64 buf_addr, u32 threshold) { enum dma_slave_buswidth max_width; @@ -277,6 +313,9 @@ static enum dma_slave_buswidth stm32_dma_get_max_width(u32 buf_len, max_width > DMA_SLAVE_BUSWIDTH_1_BYTE) max_width = max_width >> 1; + if (do_div(buf_addr, max_width)) + max_width = DMA_SLAVE_BUSWIDTH_1_BYTE; + return max_width; } @@ -484,12 +523,20 @@ static void stm32_dma_stop(struct stm32_dma_chan *chan) static int stm32_dma_terminate_all(struct dma_chan *c) { struct stm32_dma_chan *chan = to_stm32_dma_chan(c); + struct stm32_dma_mdma *mchan = &chan->mchan; unsigned long flags; LIST_HEAD(head); - spin_lock_irqsave(&chan->vchan.lock, flags); + if (chan->use_mdma) { + spin_lock_irqsave_nested(&chan->vchan.lock, flags, + SINGLE_DEPTH_NESTING); + dmaengine_terminate_async(mchan->chan); + } else { + spin_lock_irqsave(&chan->vchan.lock, flags); + } if (chan->desc) { + dma_cookie_complete(&chan->desc->vdesc.tx); vchan_terminate_vdesc(&chan->desc->vdesc); if (chan->busy) stm32_dma_stop(chan); @@ -503,9 +550,103 @@ static int stm32_dma_terminate_all(struct dma_chan *c) return 0; } +static u32 stm32_dma_get_remaining_bytes(struct stm32_dma_chan *chan) +{ + u32 dma_scr, width, ndtr; + struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan); + + dma_scr = stm32_dma_read(dmadev, STM32_DMA_SCR(chan->id)); + width = STM32_DMA_SCR_PSIZE_GET(dma_scr); + ndtr = stm32_dma_read(dmadev, STM32_DMA_SNDTR(chan->id)); + + return ndtr << width; +} + +static int stm32_dma_mdma_drain(struct stm32_dma_chan *chan) +{ + struct stm32_dma_mdma *mchan = &chan->mchan; + struct stm32_dma_sg_req *sg_req; + struct dma_device *ddev = mchan->chan->device; + struct dma_async_tx_descriptor *desc = NULL; + enum dma_status status; + dma_addr_t src_buf, dst_buf; + u32 mdma_residue, mdma_wrote, dma_to_write, len; + struct dma_tx_state state; + int ret; + unsigned long flags; + + /* DMA/MDMA chain: drain remaining data in SRAM */ + + /* Get the residue on MDMA side */ + status = dmaengine_tx_status(mchan->chan, mchan->chan->cookie, &state); + if (status == DMA_COMPLETE) + return status; + + mdma_residue = state.residue; + sg_req = &chan->desc->sg_req[chan->next_sg - 1]; + len = sg_dma_len(&sg_req->stm32_sgl_req); + + /* + * Total = mdma blocks * sram_period + rest (< sram_period) + * so mdma blocks * sram_period = len - mdma residue - rest + */ + mdma_wrote = len - mdma_residue - (len % mchan->sram_period); + + /* Remaining data stuck in SRAM */ + dma_to_write = mchan->sram_period - stm32_dma_get_remaining_bytes(chan); + if (dma_to_write > 0) { + spin_lock_irqsave_nested(&chan->vchan.lock, flags, + SINGLE_DEPTH_NESTING); + + /* Terminate current MDMA to initiate a new one */ + dmaengine_terminate_async(mchan->chan); + + /* Stop DMA current operation */ + stm32_dma_disable_chan(chan); + + spin_unlock_irqrestore(&chan->vchan.lock, flags); + + /* Double buffer management */ + src_buf = mchan->sram_buf + + ((mdma_wrote / mchan->sram_period) & 0x1) * + mchan->sram_period; + dst_buf = sg_dma_address(&sg_req->stm32_sgl_req) + mdma_wrote; + + desc = ddev->device_prep_dma_memcpy(mchan->chan, + dst_buf, src_buf, + dma_to_write, + DMA_PREP_INTERRUPT); + if (!desc) + return -EINVAL; + + ret = dma_submit_error(dmaengine_submit(desc)); + if (ret < 0) + return ret; + + status = dma_wait_for_async_tx(desc); + if (status != DMA_COMPLETE) { + dev_err(chan2dev(chan), + "%s dma_wait_for_async_tx error\n", __func__); + dmaengine_terminate_async(mchan->chan); + return -EBUSY; + } + + /* We need to store residue for tx_status() */ + chan->residue_after_drain = len - (mdma_wrote + dma_to_write); + } + + return 0; +} + static void stm32_dma_synchronize(struct dma_chan *c) { struct stm32_dma_chan *chan = to_stm32_dma_chan(c); + struct stm32_dma_mdma *mchan = &chan->mchan; + + if (chan->desc && chan->use_mdma && mchan->dir == DMA_DEV_TO_MEM) + if (stm32_dma_mdma_drain(chan)) + dev_err(chan2dev(chan), "%s: can't drain DMA\n", + __func__); vchan_synchronize(&chan->vchan); } @@ -528,65 +669,213 @@ static void stm32_dma_dump_reg(struct stm32_dma_chan *chan) dev_dbg(chan2dev(chan), "SFCR: 0x%08x\n", sfcr); } -static void stm32_dma_configure_next_sg(struct stm32_dma_chan *chan); +static int stm32_dma_dummy_memcpy_xfer(struct stm32_dma_chan *chan) +{ + struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan); + struct dma_device *ddev = &dmadev->ddev; + struct stm32_dma_chan_reg reg; + u8 src_buf, dst_buf; + dma_addr_t dma_src_buf, dma_dst_buf; + u32 ndtr, status; + int len, ret; + + ret = 0; + src_buf = 0; + len = 1; + + dma_src_buf = dma_map_single(ddev->dev, &src_buf, len, DMA_TO_DEVICE); + ret = dma_mapping_error(ddev->dev, dma_src_buf); + if (ret < 0) { + dev_err(chan2dev(chan), "Source buffer map failed\n"); + return ret; + } + + dma_dst_buf = dma_map_single(ddev->dev, &dst_buf, len, DMA_FROM_DEVICE); + ret = dma_mapping_error(ddev->dev, dma_dst_buf); + if (ret < 0) { + dev_err(chan2dev(chan), "Destination buffer map failed\n"); + dma_unmap_single(ddev->dev, dma_src_buf, len, DMA_TO_DEVICE); + return ret; + } -static void stm32_dma_start_transfer(struct stm32_dma_chan *chan) + reg.dma_scr = STM32_DMA_SCR_DIR(STM32_DMA_MEM_TO_MEM) | + STM32_DMA_SCR_PBURST(STM32_DMA_BURST_SINGLE) | + STM32_DMA_SCR_MBURST(STM32_DMA_BURST_SINGLE) | + STM32_DMA_SCR_MINC | + STM32_DMA_SCR_PINC | + STM32_DMA_SCR_TEIE; + reg.dma_spar = dma_src_buf; + reg.dma_sm0ar = dma_dst_buf; + reg.dma_sfcr = STM32_DMA_SFCR_MASK | + STM32_DMA_SFCR_FTH(STM32_DMA_FIFO_THRESHOLD_FULL); + reg.dma_sm1ar = dma_dst_buf; + reg.dma_sndtr = 1; + + stm32_dma_write(dmadev, STM32_DMA_SCR(chan->id), reg.dma_scr); + stm32_dma_write(dmadev, STM32_DMA_SPAR(chan->id), reg.dma_spar); + stm32_dma_write(dmadev, STM32_DMA_SM0AR(chan->id), reg.dma_sm0ar); + stm32_dma_write(dmadev, STM32_DMA_SFCR(chan->id), reg.dma_sfcr); + stm32_dma_write(dmadev, STM32_DMA_SM1AR(chan->id), reg.dma_sm1ar); + stm32_dma_write(dmadev, STM32_DMA_SNDTR(chan->id), reg.dma_sndtr); + + /* Clear interrupt status if it is there */ + status = stm32_dma_irq_status(chan); + if (status) + stm32_dma_irq_clear(chan, status); + + stm32_dma_dump_reg(chan); + + chan->busy = true; + /* Start DMA */ + reg.dma_scr |= STM32_DMA_SCR_EN; + stm32_dma_write(dmadev, STM32_DMA_SCR(chan->id), reg.dma_scr); + + ret = readl_relaxed_poll_timeout_atomic(dmadev->base + + STM32_DMA_SNDTR(chan->id), + ndtr, !ndtr, 10, 1000); + if (ret) { + dev_err(chan2dev(chan), "%s: timeout!\n", __func__); + ret = -EBUSY; + } + + chan->busy = false; + + ret = stm32_dma_disable_chan(chan); + status = stm32_dma_irq_status(chan); + if (status) + stm32_dma_irq_clear(chan, status); + + dma_unmap_single(ddev->dev, dma_src_buf, len, DMA_TO_DEVICE); + dma_unmap_single(ddev->dev, dma_dst_buf, len, DMA_FROM_DEVICE); + + return ret; +} + +static int stm32_dma_mdma_flush_remaining(struct stm32_dma_chan *chan) { struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan); - struct virt_dma_desc *vdesc; + struct stm32_dma_mdma *mchan = &chan->mchan; struct stm32_dma_sg_req *sg_req; - struct stm32_dma_chan_reg *reg; - u32 status; + struct dma_device *ddev = mchan->chan->device; + struct dma_async_tx_descriptor *desc = NULL; + enum dma_status status; + dma_addr_t src_buf, dst_buf; + u32 residue, remain, len, dma_scr; int ret; - ret = stm32_dma_disable_chan(chan); - if (ret < 0) - return; + residue = stm32_dma_get_remaining_bytes(chan); + if (!residue) + return 0; - if (!chan->desc) { - vdesc = vchan_next_desc(&chan->vchan); - if (!vdesc) - return; + dma_scr = stm32_dma_read(dmadev, STM32_DMA_SCR(chan->id)); + if (!(dma_scr & STM32_DMA_SCR_EN)) + return -EPERM; + + sg_req = &chan->desc->sg_req[chan->next_sg - 1]; + len = sg_dma_len(&sg_req->stm32_sgl_req); + remain = len % mchan->sram_period; + + if (len > mchan->sram_period && ((len % mchan->sram_period) != 0)) { + unsigned long dma_sync_wait_timeout = + jiffies + msecs_to_jiffies(5000); + + while (residue > 0 && + residue > (mchan->sram_period - remain)) { + if (time_after_eq(jiffies, dma_sync_wait_timeout)) { + dev_err(chan2dev(chan), + "%s timeout pending last %d bytes\n", + __func__, residue); + return -EBUSY; + } + cpu_relax(); + residue = stm32_dma_get_remaining_bytes(chan); + } + stm32_dma_disable_chan(chan); - list_del(&vdesc->node); + src_buf = mchan->sram_buf + ((len / mchan->sram_period) & 0x1) + * mchan->sram_period; + dst_buf = sg_dma_address(&sg_req->stm32_sgl_req) + len - + (len % mchan->sram_period); - chan->desc = to_stm32_dma_desc(vdesc); - chan->next_sg = 0; + desc = ddev->device_prep_dma_memcpy(mchan->chan, + dst_buf, src_buf, + len % mchan->sram_period, + DMA_PREP_INTERRUPT); + + if (!desc) + return -EINVAL; + + ret = dma_submit_error(dmaengine_submit(desc)); + if (ret < 0) + return ret; + + status = dma_wait_for_async_tx(desc); + if (status != DMA_COMPLETE) { + dmaengine_terminate_async(mchan->chan); + return -EBUSY; + } } - if (chan->next_sg == chan->desc->num_sgs) - chan->next_sg = 0; + return 0; +} - sg_req = &chan->desc->sg_req[chan->next_sg]; - reg = &sg_req->chan_reg; +static void stm32_dma_start_transfer(struct stm32_dma_chan *chan); - reg->dma_scr &= ~STM32_DMA_SCR_EN; - stm32_dma_write(dmadev, STM32_DMA_SCR(chan->id), reg->dma_scr); - stm32_dma_write(dmadev, STM32_DMA_SPAR(chan->id), reg->dma_spar); - stm32_dma_write(dmadev, STM32_DMA_SM0AR(chan->id), reg->dma_sm0ar); - stm32_dma_write(dmadev, STM32_DMA_SFCR(chan->id), reg->dma_sfcr); - stm32_dma_write(dmadev, STM32_DMA_SM1AR(chan->id), reg->dma_sm1ar); - stm32_dma_write(dmadev, STM32_DMA_SNDTR(chan->id), reg->dma_sndtr); +static void stm32_mdma_chan_complete(void *param, + const struct dmaengine_result *result) +{ + struct stm32_dma_chan *chan = param; + int ret; - chan->next_sg++; + chan->busy = false; + if (result->result == DMA_TRANS_NOERROR) { + ret = stm32_dma_mdma_flush_remaining(chan); + if (ret) { + dev_err(chan2dev(chan), "Can't flush DMA: %d\n", ret); + return; + } - /* Clear interrupt status if it is there */ - status = stm32_dma_irq_status(chan); - if (status) - stm32_dma_irq_clear(chan, status); + if (chan->next_sg == chan->desc->num_sgs) { + vchan_cookie_complete(&chan->desc->vdesc); + chan->desc = NULL; + } + stm32_dma_start_transfer(chan); + } else { + dev_err(chan2dev(chan), "MDMA transfer error: %d\n", + result->result); + } +} - if (chan->desc->cyclic) - stm32_dma_configure_next_sg(chan); +static int stm32_dma_mdma_start(struct stm32_dma_chan *chan, + struct stm32_dma_sg_req *sg_req) +{ + struct stm32_dma_mdma *mchan = &chan->mchan; + struct stm32_dma_mdma_desc *m_desc = &sg_req->m_desc; + int ret; - stm32_dma_dump_reg(chan); + ret = dma_submit_error(dmaengine_submit(m_desc->desc)); + if (ret < 0) { + dev_err(chan2dev(chan), "MDMA submit failed\n"); + goto error; + } - /* Start DMA */ - reg->dma_scr |= STM32_DMA_SCR_EN; - stm32_dma_write(dmadev, STM32_DMA_SCR(chan->id), reg->dma_scr); + dma_async_issue_pending(mchan->chan); - chan->busy = true; + /* + * In case of M2D transfer, we have to generate dummy DMA transfer to + * copy 1st sg data into SRAM + */ + if (mchan->dir == DMA_MEM_TO_DEV) { + ret = stm32_dma_dummy_memcpy_xfer(chan); + if (ret < 0) { + dmaengine_terminate_async(mchan->chan); + goto error; + } + } - dev_dbg(chan2dev(chan), "vchan %pK: started\n", &chan->vchan); + return 0; +error: + return ret; } static void stm32_dma_configure_next_sg(struct stm32_dma_chan *chan) @@ -618,22 +907,134 @@ static void stm32_dma_configure_next_sg(struct stm32_dma_chan *chan) } } -static void stm32_dma_handle_chan_done(struct stm32_dma_chan *chan) +static void stm32_dma_start_transfer(struct stm32_dma_chan *chan) { - if (chan->desc) { - if (chan->desc->cyclic) { - vchan_cyclic_callback(&chan->desc->vdesc); - chan->next_sg++; - stm32_dma_configure_next_sg(chan); + struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan); + struct virt_dma_desc *vdesc; + struct stm32_dma_sg_req *sg_req; + struct stm32_dma_chan_reg *reg; + u32 status; + int ret; + + ret = stm32_dma_disable_chan(chan); + if (ret < 0) + return; + + if (!chan->desc) { + vdesc = vchan_next_desc(&chan->vchan); + if (!vdesc) + return; + + list_del(&vdesc->node); + + chan->desc = to_stm32_dma_desc(vdesc); + chan->next_sg = 0; + } else { + vdesc = &chan->desc->vdesc; + } + + if (chan->next_sg == chan->desc->num_sgs) + chan->next_sg = 0; + + sg_req = &chan->desc->sg_req[chan->next_sg]; + reg = &sg_req->chan_reg; + + /* Clear interrupt status if it is there */ + status = stm32_dma_irq_status(chan); + if (status) + stm32_dma_irq_clear(chan, status); + + if (chan->use_mdma) { + if (chan->next_sg == 0) { + struct stm32_dma_mdma_desc *m_desc; + + m_desc = &sg_req->m_desc; + if (chan->desc->cyclic) { + /* + * If one callback is set, it will be called by + * MDMA driver. + */ + if (vdesc->tx.callback) { + m_desc->desc->callback = + vdesc->tx.callback; + m_desc->desc->callback_param = + vdesc->tx.callback_param; + vdesc->tx.callback = NULL; + vdesc->tx.callback_param = NULL; + } + } + } + + if (chan->mchan.dir == DMA_MEM_TO_DEV) { + ret = stm32_dma_dummy_memcpy_xfer(chan); + if (ret < 0) { + dmaengine_terminate_async(chan->mchan.chan); + chan->desc = NULL; + return; + } } else { - chan->busy = false; - if (chan->next_sg == chan->desc->num_sgs) { - vchan_cookie_complete(&chan->desc->vdesc); + reg->dma_scr &= ~STM32_DMA_SCR_TCIE; + } + + if (!chan->desc->cyclic) { + /* MDMA already started */ + if (chan->mchan.dir != DMA_MEM_TO_DEV && + sg_dma_len(&sg_req->stm32_sgl_req) > + chan->mchan.sram_period) + reg->dma_scr |= STM32_DMA_SCR_DBM; + ret = stm32_dma_mdma_start(chan, sg_req); + if (ret < 0) { chan->desc = NULL; + return; } - stm32_dma_start_transfer(chan); } } + + chan->next_sg++; + + reg->dma_scr &= ~STM32_DMA_SCR_EN; + stm32_dma_write(dmadev, STM32_DMA_SCR(chan->id), reg->dma_scr); + stm32_dma_write(dmadev, STM32_DMA_SPAR(chan->id), reg->dma_spar); + stm32_dma_write(dmadev, STM32_DMA_SM0AR(chan->id), reg->dma_sm0ar); + stm32_dma_write(dmadev, STM32_DMA_SFCR(chan->id), reg->dma_sfcr); + stm32_dma_write(dmadev, STM32_DMA_SM1AR(chan->id), reg->dma_sm1ar); + stm32_dma_write(dmadev, STM32_DMA_SNDTR(chan->id), reg->dma_sndtr); + + if (chan->desc->cyclic) + stm32_dma_configure_next_sg(chan); + + stm32_dma_dump_reg(chan); + + /* Start DMA */ + chan->busy = true; + reg->dma_scr |= STM32_DMA_SCR_EN; + stm32_dma_write(dmadev, STM32_DMA_SCR(chan->id), reg->dma_scr); + + dev_dbg(chan2dev(chan), "vchan %pK: started\n", &chan->vchan); +} + +static void stm32_dma_handle_chan_done(struct stm32_dma_chan *chan) +{ + if (!chan->desc) + return; + + if (chan->desc->cyclic) { + vchan_cyclic_callback(&chan->desc->vdesc); + if (chan->use_mdma) + return; + chan->next_sg++; + stm32_dma_configure_next_sg(chan); + } else { + chan->busy = false; + if (chan->use_mdma && chan->mchan.dir != DMA_MEM_TO_DEV) + return; + if (chan->next_sg == chan->desc->num_sgs) { + vchan_cookie_complete(&chan->desc->vdesc); + chan->desc = NULL; + } + + stm32_dma_start_transfer(chan); + } } static irqreturn_t stm32_dma_chan_irq(int irq, void *devid) @@ -648,21 +1049,12 @@ static irqreturn_t stm32_dma_chan_irq(int irq, void *devid) scr = stm32_dma_read(dmadev, STM32_DMA_SCR(chan->id)); sfcr = stm32_dma_read(dmadev, STM32_DMA_SFCR(chan->id)); - if (status & STM32_DMA_TCI) { - stm32_dma_irq_clear(chan, STM32_DMA_TCI); - if (scr & STM32_DMA_SCR_TCIE) - stm32_dma_handle_chan_done(chan); - status &= ~STM32_DMA_TCI; - } - if (status & STM32_DMA_HTI) { - stm32_dma_irq_clear(chan, STM32_DMA_HTI); - status &= ~STM32_DMA_HTI; - } if (status & STM32_DMA_FEI) { stm32_dma_irq_clear(chan, STM32_DMA_FEI); status &= ~STM32_DMA_FEI; if (sfcr & STM32_DMA_SFCR_FEIE) { - if (!(scr & STM32_DMA_SCR_EN)) + if (!(scr & STM32_DMA_SCR_EN) && + !(status & STM32_DMA_TCI)) dev_err(chan2dev(chan), "FIFO Error\n"); else dev_dbg(chan2dev(chan), "FIFO over/underrun\n"); @@ -674,6 +1066,19 @@ static irqreturn_t stm32_dma_chan_irq(int irq, void *devid) if (sfcr & STM32_DMA_SCR_DMEIE) dev_dbg(chan2dev(chan), "Direct mode overrun\n"); } + + if (status & STM32_DMA_TCI) { + stm32_dma_irq_clear(chan, STM32_DMA_TCI); + if (scr & STM32_DMA_SCR_TCIE) + stm32_dma_handle_chan_done(chan); + status &= ~STM32_DMA_TCI; + } + + if (status & STM32_DMA_HTI) { + stm32_dma_irq_clear(chan, STM32_DMA_HTI); + status &= ~STM32_DMA_HTI; + } + if (status) { stm32_dma_irq_clear(chan, status); dev_err(chan2dev(chan), "DMA error: status=0x%08x\n", status); @@ -691,19 +1096,25 @@ static void stm32_dma_issue_pending(struct dma_chan *c) struct stm32_dma_chan *chan = to_stm32_dma_chan(c); unsigned long flags; - spin_lock_irqsave(&chan->vchan.lock, flags); + if (chan->use_mdma) + spin_lock_irqsave_nested(&chan->vchan.lock, flags, + SINGLE_DEPTH_NESTING); + else + spin_lock_irqsave(&chan->vchan.lock, flags); + if (vchan_issue_pending(&chan->vchan) && !chan->desc && !chan->busy) { dev_dbg(chan2dev(chan), "vchan %pK: issued\n", &chan->vchan); stm32_dma_start_transfer(chan); } + spin_unlock_irqrestore(&chan->vchan.lock, flags); } static int stm32_dma_set_xfer_param(struct stm32_dma_chan *chan, enum dma_transfer_direction direction, enum dma_slave_buswidth *buswidth, - u32 buf_len) + u32 buf_len, u64 buf_addr) { enum dma_slave_buswidth src_addr_width, dst_addr_width; int src_bus_width, dst_bus_width; @@ -735,14 +1146,21 @@ static int stm32_dma_set_xfer_param(struct stm32_dma_chan *chan, return dst_burst_size; /* Set memory data size */ - src_addr_width = stm32_dma_get_max_width(buf_len, fifoth); + src_addr_width = stm32_dma_get_max_width(buf_len, buf_addr, + fifoth); chan->mem_width = src_addr_width; src_bus_width = stm32_dma_get_width(chan, src_addr_width); if (src_bus_width < 0) return src_bus_width; - /* Set memory burst size */ - src_maxburst = STM32_DMA_MAX_BURST; + /* + * Set memory burst size - burst not possible if address is not aligned on + * the address boundary equal to the size of the transfer + */ + if (do_div(buf_addr, buf_len)) + src_maxburst = 1; + else + src_maxburst = STM32_DMA_MAX_BURST; src_best_burst = stm32_dma_get_best_burst(buf_len, src_maxburst, fifoth, @@ -784,14 +1202,21 @@ static int stm32_dma_set_xfer_param(struct stm32_dma_chan *chan, return src_burst_size; /* Set memory data size */ - dst_addr_width = stm32_dma_get_max_width(buf_len, fifoth); + dst_addr_width = stm32_dma_get_max_width(buf_len, buf_addr, + fifoth); chan->mem_width = dst_addr_width; dst_bus_width = stm32_dma_get_width(chan, dst_addr_width); if (dst_bus_width < 0) return dst_bus_width; - /* Set memory burst size */ - dst_maxburst = STM32_DMA_MAX_BURST; + /* + * Set memory burst size - burst not possible if address is not aligned on + * the address boundary equal to the size of the transfer + */ + if (do_div(buf_addr, buf_len)) + dst_maxburst = 1; + else + dst_maxburst = STM32_DMA_MAX_BURST; dst_best_burst = stm32_dma_get_best_burst(buf_len, dst_maxburst, fifoth, @@ -838,6 +1263,162 @@ static void stm32_dma_clear_reg(struct stm32_dma_chan_reg *regs) memset(regs, 0, sizeof(struct stm32_dma_chan_reg)); } +static int stm32_dma_mdma_prep_slave_sg(struct stm32_dma_chan *chan, + struct scatterlist *sgl, u32 sg_len, + struct stm32_dma_desc *desc, + unsigned long flags) +{ + struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan); + struct stm32_dma_mdma *mchan = &chan->mchan; + struct scatterlist *sg, *m_sg; + dma_addr_t dma_buf; + u32 len, num_sgs, sram_period; + int i, j, ret; + + desc->dma_buf_cpu = gen_pool_dma_alloc(dmadev->sram_pool, + chan->sram_size, + &desc->dma_buf); + if (!desc->dma_buf_cpu) + return -ENOMEM; + desc->dma_buf_size = chan->sram_size; + + sram_period = chan->sram_size / 2; + + for_each_sg(sgl, sg, sg_len, i) { + struct stm32_dma_mdma_desc *m_desc = &desc->sg_req[i].m_desc; + struct dma_slave_config config; + + len = sg_dma_len(sg); + desc->sg_req[i].stm32_sgl_req = *sg; + num_sgs = 1; + + if (mchan->dir == DMA_MEM_TO_DEV) { + if (len > chan->sram_size) { + dev_err(chan2dev(chan), + "max buf size = %d bytes\n", + chan->sram_size); + ret = -EINVAL; + goto free_alloc; + } + } else { + /* + * Build new sg for MDMA transfer + * Scatter DMA Req into several SDRAM transfer + */ + if (len > sram_period) + num_sgs = len / sram_period; + } + + ret = sg_alloc_table(&m_desc->sgt, num_sgs, GFP_ATOMIC); + if (ret) { + dev_err(chan2dev(chan), "MDMA sg table alloc failed\n"); + ret = -ENOMEM; + goto err; + } + + dma_buf = sg_dma_address(sg); + for_each_sg(m_desc->sgt.sgl, m_sg, num_sgs, j) { + size_t bytes = min_t(size_t, len, sram_period); + + sg_dma_address(m_sg) = dma_buf; + sg_dma_len(m_sg) = bytes; + dma_buf += bytes; + len -= bytes; + } + + /* Configure MDMA channel */ + memset(&config, 0, sizeof(config)); + if (mchan->dir == DMA_MEM_TO_DEV) + config.dst_addr = desc->dma_buf; + else + config.src_addr = desc->dma_buf; + + ret = dmaengine_slave_config(mchan->chan, &config); + if (ret < 0) + goto err; + + /* Prepare MDMA descriptor */ + m_desc->desc = dmaengine_prep_slave_sg(mchan->chan, + m_desc->sgt.sgl, + m_desc->sgt.nents, + mchan->dir, + DMA_PREP_INTERRUPT); + + if (!m_desc->desc) { + ret = -EINVAL; + goto err; + } + + if (flags & DMA_CTRL_REUSE) + dmaengine_desc_set_reuse(m_desc->desc); + + if (mchan->dir != DMA_MEM_TO_DEV) { + m_desc->desc->callback_result = + stm32_mdma_chan_complete; + m_desc->desc->callback_param = chan; + } + } + + chan->mchan.sram_buf = desc->dma_buf; + chan->mchan.sram_period = sram_period; + chan->mchan.num_sgs = num_sgs; + + return 0; + +err: + for (j = 0; j < i; j++) { + struct stm32_dma_mdma_desc *m_desc = &desc->sg_req[j].m_desc; + + m_desc->desc = NULL; + sg_free_table(&desc->sg_req[j].m_desc.sgt); + } +free_alloc: + gen_pool_free(dmadev->sram_pool, (unsigned long)desc->dma_buf_cpu, + desc->dma_buf_size); + return ret; +} + +static int stm32_dma_setup_sg_requests(struct stm32_dma_chan *chan, + struct scatterlist *sgl, + unsigned int sg_len, + enum dma_transfer_direction direction, + struct stm32_dma_desc *desc) +{ + struct scatterlist *sg; + u32 nb_data_items; + int i, ret; + enum dma_slave_buswidth buswidth; + + for_each_sg(sgl, sg, sg_len, i) { + ret = stm32_dma_set_xfer_param(chan, direction, &buswidth, + sg_dma_len(sg), + (u64)sg_dma_address(sg)); + if (ret < 0) + return ret; + + nb_data_items = sg_dma_len(sg) / buswidth; + if (nb_data_items > STM32_DMA_ALIGNED_MAX_DATA_ITEMS) { + dev_err(chan2dev(chan), "nb items not supported\n"); + return -EINVAL; + } + + stm32_dma_clear_reg(&desc->sg_req[i].chan_reg); + desc->sg_req[i].chan_reg.dma_scr = chan->chan_reg.dma_scr; + desc->sg_req[i].chan_reg.dma_sfcr = chan->chan_reg.dma_sfcr; + desc->sg_req[i].chan_reg.dma_spar = chan->chan_reg.dma_spar; + desc->sg_req[i].chan_reg.dma_sm0ar = sg_dma_address(sg); + desc->sg_req[i].chan_reg.dma_sm1ar = sg_dma_address(sg); + if (chan->use_mdma) + desc->sg_req[i].chan_reg.dma_sm1ar += + chan->mchan.sram_period; + desc->sg_req[i].chan_reg.dma_sndtr = nb_data_items; + } + + desc->num_sgs = sg_len; + + return 0; +} + static struct dma_async_tx_descriptor *stm32_dma_prep_slave_sg( struct dma_chan *c, struct scatterlist *sgl, u32 sg_len, enum dma_transfer_direction direction, @@ -845,9 +1426,6 @@ static struct dma_async_tx_descriptor *stm32_dma_prep_slave_sg( { struct stm32_dma_chan *chan = to_stm32_dma_chan(c); struct stm32_dma_desc *desc; - struct scatterlist *sg; - enum dma_slave_buswidth buswidth; - u32 nb_data_items; int i, ret; if (!chan->config_init) { @@ -870,48 +1448,140 @@ static struct dma_async_tx_descriptor *stm32_dma_prep_slave_sg( else chan->chan_reg.dma_scr &= ~STM32_DMA_SCR_PFCTRL; - for_each_sg(sgl, sg, sg_len, i) { - ret = stm32_dma_set_xfer_param(chan, direction, &buswidth, - sg_dma_len(sg)); - if (ret < 0) - goto err; + if (chan->use_mdma) { + struct sg_table new_sgt; + struct scatterlist *s, *_sgl; - desc->sg_req[i].len = sg_dma_len(sg); - - nb_data_items = desc->sg_req[i].len / buswidth; - if (nb_data_items > STM32_DMA_ALIGNED_MAX_DATA_ITEMS) { - dev_err(chan2dev(chan), "nb items not supported\n"); - goto err; + chan->mchan.dir = direction; + ret = stm32_dma_mdma_prep_slave_sg(chan, sgl, sg_len, desc, + flags); + if (ret < 0) + return NULL; + + ret = sg_alloc_table(&new_sgt, sg_len, GFP_ATOMIC); + if (ret) + dev_err(chan2dev(chan), "DMA sg table alloc failed\n"); + + for_each_sg(new_sgt.sgl, s, sg_len, i) { + _sgl = sgl; + sg_dma_len(s) = + min(sg_dma_len(_sgl), chan->mchan.sram_period); + s->dma_address = chan->mchan.sram_buf; + _sgl = sg_next(_sgl); } - stm32_dma_clear_reg(&desc->sg_req[i].chan_reg); - desc->sg_req[i].chan_reg.dma_scr = chan->chan_reg.dma_scr; - desc->sg_req[i].chan_reg.dma_sfcr = chan->chan_reg.dma_sfcr; - desc->sg_req[i].chan_reg.dma_spar = chan->chan_reg.dma_spar; - desc->sg_req[i].chan_reg.dma_sm0ar = sg_dma_address(sg); - desc->sg_req[i].chan_reg.dma_sm1ar = sg_dma_address(sg); - desc->sg_req[i].chan_reg.dma_sndtr = nb_data_items; + ret = stm32_dma_setup_sg_requests(chan, new_sgt.sgl, sg_len, + direction, desc); + sg_free_table(&new_sgt); + if (ret < 0) + goto err; + } else { + /* Prepare a normal DMA transfer */ + ret = stm32_dma_setup_sg_requests(chan, sgl, sg_len, direction, + desc); + if (ret < 0) + goto err; } - desc->num_sgs = sg_len; desc->cyclic = false; return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags); - err: + if (chan->use_mdma) { + struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan); + + for (i = 0; i < sg_len; i++) + sg_free_table(&desc->sg_req[i].m_desc.sgt); + + gen_pool_free(dmadev->sram_pool, + (unsigned long)desc->dma_buf_cpu, + desc->dma_buf_size); + } kfree(desc); return NULL; } +static int stm32_dma_mdma_prep_dma_cyclic(struct stm32_dma_chan *chan, + dma_addr_t buf_addr, size_t buf_len, + size_t period_len, + struct stm32_dma_desc *desc) +{ + struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan); + struct stm32_dma_mdma *mchan = &chan->mchan; + struct stm32_dma_mdma_desc *m_desc = &desc->sg_req[0].m_desc; + struct dma_slave_config config; + int ret; + + chan->sram_size = ALIGN(period_len, STM32_DMA_SRAM_GRANULARITY); + desc->dma_buf_cpu = gen_pool_dma_alloc(dmadev->sram_pool, + 2 * chan->sram_size, + &desc->dma_buf); + if (!desc->dma_buf_cpu) + return -ENOMEM; + desc->dma_buf_size = 2 * chan->sram_size; + + memset(&config, 0, sizeof(config)); + + /* Configure MDMA channel */ + if (chan->mchan.dir == DMA_MEM_TO_DEV) + config.dst_addr = desc->dma_buf; + else + config.src_addr = desc->dma_buf; + ret = dmaengine_slave_config(mchan->chan, &config); + if (ret < 0) + goto err; + + /* Prepare MDMA descriptor */ + m_desc->desc = dmaengine_prep_dma_cyclic(mchan->chan, buf_addr, buf_len, + period_len, chan->mchan.dir, + DMA_PREP_INTERRUPT); + + if (!m_desc->desc) { + ret = -EINVAL; + goto err; + } + + ret = dma_submit_error(dmaengine_submit(m_desc->desc)); + if (ret < 0) { + dev_err(chan2dev(chan), "MDMA submit failed\n"); + goto err; + } + + dma_async_issue_pending(mchan->chan); + + /* + * In case of M2D transfer, we have to generate dummy DMA transfer to + * copy 1 period of data into SRAM + */ + if (chan->mchan.dir == DMA_MEM_TO_DEV) { + ret = stm32_dma_dummy_memcpy_xfer(chan); + if (ret < 0) { + dev_err(chan2dev(chan), + "stm32_dma_dummy_memcpy_xfer failed\n"); + dmaengine_terminate_async(mchan->chan); + goto err; + } + } + + return 0; +err: + gen_pool_free(dmadev->sram_pool, + (unsigned long)desc->dma_buf_cpu, + desc->dma_buf_size); + return ret; +} + static struct dma_async_tx_descriptor *stm32_dma_prep_dma_cyclic( struct dma_chan *c, dma_addr_t buf_addr, size_t buf_len, size_t period_len, enum dma_transfer_direction direction, unsigned long flags) { struct stm32_dma_chan *chan = to_stm32_dma_chan(c); + struct stm32_dma_chan_reg *chan_reg = &chan->chan_reg; struct stm32_dma_desc *desc; enum dma_slave_buswidth buswidth; u32 num_periods, nb_data_items; + dma_addr_t dma_buf = 0; int i, ret; if (!buf_len || !period_len) { @@ -940,7 +1610,7 @@ static struct dma_async_tx_descriptor *stm32_dma_prep_dma_cyclic( return NULL; } - ret = stm32_dma_set_xfer_param(chan, direction, &buswidth, period_len); + ret = stm32_dma_set_xfer_param(chan, direction, &buswidth, period_len, (u64)buf_addr); if (ret < 0) return NULL; @@ -959,28 +1629,49 @@ static struct dma_async_tx_descriptor *stm32_dma_prep_dma_cyclic( /* Clear periph ctrl if client set it */ chan->chan_reg.dma_scr &= ~STM32_DMA_SCR_PFCTRL; - num_periods = buf_len / period_len; + if (chan->use_mdma) + num_periods = 1; + else + num_periods = buf_len / period_len; desc = kzalloc(struct_size(desc, sg_req, num_periods), GFP_NOWAIT); if (!desc) return NULL; - for (i = 0; i < num_periods; i++) { - desc->sg_req[i].len = period_len; + desc->num_sgs = num_periods; + desc->cyclic = true; + if (chan->use_mdma) { + chan->mchan.dir = direction; + + ret = stm32_dma_mdma_prep_dma_cyclic(chan, buf_addr, buf_len, + period_len, desc); + if (ret < 0) + return NULL; + dma_buf = desc->dma_buf; + } else { + dma_buf = buf_addr; + } + + for (i = 0; i < num_periods; i++) { + sg_dma_len(&desc->sg_req[i].stm32_sgl_req) = period_len; + sg_dma_address(&desc->sg_req[i].stm32_sgl_req) = dma_buf; stm32_dma_clear_reg(&desc->sg_req[i].chan_reg); - desc->sg_req[i].chan_reg.dma_scr = chan->chan_reg.dma_scr; - desc->sg_req[i].chan_reg.dma_sfcr = chan->chan_reg.dma_sfcr; - desc->sg_req[i].chan_reg.dma_spar = chan->chan_reg.dma_spar; - desc->sg_req[i].chan_reg.dma_sm0ar = buf_addr; - desc->sg_req[i].chan_reg.dma_sm1ar = buf_addr; + desc->sg_req[i].chan_reg.dma_scr = chan_reg->dma_scr; + desc->sg_req[i].chan_reg.dma_sfcr = chan_reg->dma_sfcr; + desc->sg_req[i].chan_reg.dma_spar = chan_reg->dma_spar; + if (chan->use_mdma) { + desc->sg_req[i].chan_reg.dma_sm0ar = desc->dma_buf; + desc->sg_req[i].chan_reg.dma_sm1ar = desc->dma_buf + + chan->sram_size; + } else { + desc->sg_req[i].chan_reg.dma_sm0ar = dma_buf; + desc->sg_req[i].chan_reg.dma_sm1ar = dma_buf; + dma_buf += period_len; + } desc->sg_req[i].chan_reg.dma_sndtr = nb_data_items; - buf_addr += period_len; } - desc->num_sgs = num_periods; - desc->cyclic = true; - return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags); } @@ -1021,13 +1712,13 @@ static struct dma_async_tx_descriptor *stm32_dma_prep_dma_memcpy( STM32_DMA_SCR_PINC | STM32_DMA_SCR_TCIE | STM32_DMA_SCR_TEIE; - desc->sg_req[i].chan_reg.dma_sfcr |= STM32_DMA_SFCR_MASK; + desc->sg_req[i].chan_reg.dma_sfcr &= ~STM32_DMA_SFCR_MASK; desc->sg_req[i].chan_reg.dma_sfcr |= STM32_DMA_SFCR_FTH(threshold); desc->sg_req[i].chan_reg.dma_spar = src + offset; desc->sg_req[i].chan_reg.dma_sm0ar = dest + offset; desc->sg_req[i].chan_reg.dma_sndtr = xfer_count; - desc->sg_req[i].len = xfer_count; + sg_dma_len(&desc->sg_req[i].stm32_sgl_req) = xfer_count; } desc->num_sgs = num_sgs; @@ -1036,18 +1727,6 @@ static struct dma_async_tx_descriptor *stm32_dma_prep_dma_memcpy( return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags); } -static u32 stm32_dma_get_remaining_bytes(struct stm32_dma_chan *chan) -{ - u32 dma_scr, width, ndtr; - struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan); - - dma_scr = stm32_dma_read(dmadev, STM32_DMA_SCR(chan->id)); - width = STM32_DMA_SCR_PSIZE_GET(dma_scr); - ndtr = stm32_dma_read(dmadev, STM32_DMA_SNDTR(chan->id)); - - return ndtr << width; -} - /** * stm32_dma_is_current_sg - check that expected sg_req is currently transferred * @chan: dma channel @@ -1094,6 +1773,10 @@ static size_t stm32_dma_desc_residue(struct stm32_dma_chan *chan, struct stm32_dma_sg_req *sg_req = &chan->desc->sg_req[chan->next_sg]; int i; + /* Drain case */ + if (chan->residue_after_drain) + return chan->residue_after_drain; + /* * Calculate the residue means compute the descriptors * information: @@ -1125,7 +1808,7 @@ static size_t stm32_dma_desc_residue(struct stm32_dma_chan *chan, n_sg++; if (n_sg == chan->desc->num_sgs) n_sg = 0; - residue = sg_req->len; + residue = sg_dma_len(&sg_req->stm32_sgl_req); } /* @@ -1137,7 +1820,7 @@ static size_t stm32_dma_desc_residue(struct stm32_dma_chan *chan, */ if (!chan->desc->cyclic || n_sg != 0) for (i = n_sg; i < desc->num_sgs; i++) - residue += desc->sg_req[i].len; + residue += sg_dma_len(&desc->sg_req[i].stm32_sgl_req); if (!chan->mem_burst) return residue; @@ -1155,11 +1838,23 @@ static enum dma_status stm32_dma_tx_status(struct dma_chan *c, struct dma_tx_state *state) { struct stm32_dma_chan *chan = to_stm32_dma_chan(c); + struct stm32_dma_mdma *mchan = &chan->mchan; struct virt_dma_desc *vdesc; enum dma_status status; unsigned long flags; u32 residue = 0; + /* + * When DMA/MDMA chain is used, we return the status of MDMA in cyclic + * mode and for D2M transfer in sg mode in order to return the correct + * residue if any + */ + if (chan->desc && chan->use_mdma && + (mchan->dir != DMA_MEM_TO_DEV || chan->desc->cyclic) && + !chan->residue_after_drain) + return dmaengine_tx_status(mchan->chan, mchan->chan->cookie, + state); + status = dma_cookie_status(c, cookie, state); if (status == DMA_COMPLETE || !state) return status; @@ -1216,27 +1911,53 @@ static void stm32_dma_free_chan_resources(struct dma_chan *c) pm_runtime_put(dmadev->ddev.dev); vchan_free_chan_resources(to_virt_chan(c)); + stm32_dma_clear_reg(&chan->chan_reg); + chan->threshold = 0; + chan->use_mdma = false; + chan->sram_size = 0; } static void stm32_dma_desc_free(struct virt_dma_desc *vdesc) { - kfree(container_of(vdesc, struct stm32_dma_desc, vdesc)); + struct stm32_dma_desc *desc = to_stm32_dma_desc(vdesc); + struct stm32_dma_chan *chan = to_stm32_dma_chan(vdesc->tx.chan); + struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan); + int i; + + if (chan->use_mdma) { + struct stm32_dma_mdma_desc *m_desc; + + for (i = 0; i < desc->num_sgs; i++) { + m_desc = &desc->sg_req[i].m_desc; + if (dmaengine_desc_test_reuse(&vdesc->tx)) + dmaengine_desc_free(m_desc->desc); + m_desc->desc = NULL; + sg_free_table(&m_desc->sgt); + } + + gen_pool_free(dmadev->sram_pool, + (unsigned long)desc->dma_buf_cpu, + desc->dma_buf_size); + } + + kfree(desc); } static void stm32_dma_set_config(struct stm32_dma_chan *chan, struct stm32_dma_cfg *cfg) { stm32_dma_clear_reg(&chan->chan_reg); - chan->chan_reg.dma_scr = cfg->stream_config & STM32_DMA_SCR_CFG_MASK; chan->chan_reg.dma_scr |= STM32_DMA_SCR_REQ(cfg->request_line); - - /* Enable Interrupts */ chan->chan_reg.dma_scr |= STM32_DMA_SCR_TEIE | STM32_DMA_SCR_TCIE; - chan->threshold = STM32_DMA_THRESHOLD_FTR_GET(cfg->features); if (STM32_DMA_DIRECT_MODE_GET(cfg->features)) chan->threshold = STM32_DMA_FIFO_THRESHOLD_NONE; + if (STM32_DMA_ALT_ACK_MODE_GET(cfg->features)) + chan->chan_reg.dma_scr |= STM32_DMA_SCR_TRBUFF; + chan->use_mdma = STM32_DMA_MDMA_CHAIN_FTR_GET(cfg->features); + chan->sram_size = (1 << STM32_DMA_MDMA_SRAM_SIZE_GET(cfg->features)) * + STM32_DMA_SRAM_GRANULARITY; } static struct dma_chan *stm32_dma_of_xlate(struct of_phandle_args *dma_spec, @@ -1274,6 +1995,9 @@ static struct dma_chan *stm32_dma_of_xlate(struct of_phandle_args *dma_spec, stm32_dma_set_config(chan, &cfg); + if (!dmadev->sram_pool || !chan->mchan.chan) + chan->use_mdma = 0; + return c; } @@ -1286,11 +2010,13 @@ MODULE_DEVICE_TABLE(of, stm32_dma_of_match); static int stm32_dma_probe(struct platform_device *pdev) { struct stm32_dma_chan *chan; + struct stm32_dma_mdma *mchan; struct stm32_dma_device *dmadev; struct dma_device *dd; const struct of_device_id *match; struct resource *res; struct reset_control *rst; + char name[4]; int i, ret; match = of_match_device(stm32_dma_of_match, &pdev->dev); @@ -1334,6 +2060,13 @@ static int stm32_dma_probe(struct platform_device *pdev) reset_control_deassert(rst); } + dmadev->sram_pool = of_gen_pool_get(pdev->dev.of_node, "sram", 0); + if (!dmadev->sram_pool) + dev_info(&pdev->dev, "no dma pool: can't use MDMA: %d\n", ret); + else + dev_dbg(&pdev->dev, "SRAM pool: %zu KiB\n", + gen_pool_size(dmadev->sram_pool) / 1024); + dma_set_max_seg_size(&pdev->dev, STM32_DMA_ALIGNED_MAX_DATA_ITEMS); dma_cap_set(DMA_SLAVE, dd->cap_mask); @@ -1373,11 +2106,27 @@ static int stm32_dma_probe(struct platform_device *pdev) chan->id = i; chan->vchan.desc_free = stm32_dma_desc_free; vchan_init(&chan->vchan, dd); + + mchan = &chan->mchan; + if (dmadev->sram_pool) { + snprintf(name, sizeof(name), "ch%d", chan->id); + mchan->chan = dma_request_chan(dd->dev, name); + if (IS_ERR(mchan->chan)) { + ret = PTR_ERR(mchan->chan); + mchan->chan = NULL; + if (ret == -EPROBE_DEFER) + goto err_dma; + + dev_info(&pdev->dev, + "can't request MDMA chan for %s\n", + name); + } + } } ret = dma_async_device_register(dd); if (ret) - goto clk_free; + goto err_dma; for (i = 0; i < STM32_DMA_MAX_CHANNELS; i++) { chan = &dmadev->chan[i]; @@ -1418,6 +2167,10 @@ static int stm32_dma_probe(struct platform_device *pdev) err_unregister: dma_async_device_unregister(dd); +err_dma: + for (i = 0; i < STM32_DMA_MAX_CHANNELS; i++) + if (dmadev->chan[i].mchan.chan) + dma_release_channel(dmadev->chan[i].mchan.chan); clk_free: clk_disable_unprepare(dmadev->clk); @@ -1499,4 +2252,4 @@ static int __init stm32_dma_init(void) { return platform_driver_register(&stm32_dma_driver); } -subsys_initcall(stm32_dma_init); +device_initcall(stm32_dma_init); diff --git a/drivers/dma/stm32-mdma.c b/drivers/dma/stm32-mdma.c index 08cfbfab837b..a4b25944fba4 100644 --- a/drivers/dma/stm32-mdma.c +++ b/drivers/dma/stm32-mdma.c @@ -199,7 +199,9 @@ #define STM32_MDMA_MAX_CHANNELS 63 #define STM32_MDMA_MAX_REQUESTS 256 #define STM32_MDMA_MAX_BURST 128 -#define STM32_MDMA_VERY_HIGH_PRIORITY 0x11 +#define STM32_MDMA_VERY_HIGH_PRIORITY 0x3 + +#define STM32_DMA_SRAM_GRANULARITY PAGE_SIZE enum stm32_mdma_trigger_mode { STM32_MDMA_BUFFER, @@ -227,6 +229,7 @@ struct stm32_mdma_chan_config { u32 transfer_config; u32 mask_addr; u32 mask_data; + bool m2m_hw; }; struct stm32_mdma_hwdesc { @@ -252,6 +255,7 @@ struct stm32_mdma_desc { u32 ccr; bool cyclic; u32 count; + enum dma_transfer_direction dir; struct stm32_mdma_desc_node node[]; }; @@ -566,13 +570,25 @@ static int stm32_mdma_set_xfer_param(struct stm32_mdma_chan *chan, dst_addr = chan->dma_config.dst_addr; /* Set device data size */ + if (chan_config->m2m_hw) + dst_addr_width = + stm32_mdma_get_max_width(dst_addr, buf_len, + STM32_MDMA_MAX_BUF_LEN); + dst_bus_width = stm32_mdma_get_width(chan, dst_addr_width); if (dst_bus_width < 0) return dst_bus_width; ctcr &= ~STM32_MDMA_CTCR_DSIZE_MASK; ctcr |= STM32_MDMA_CTCR_DSIZE(dst_bus_width); + if (chan_config->m2m_hw) { + ctcr &= ~STM32_MDMA_CTCR_DINCOS_MASK; + ctcr |= STM32_MDMA_CTCR_DINCOS(dst_bus_width); + } /* Set device burst value */ + if (chan_config->m2m_hw) + dst_maxburst = STM32_MDMA_MAX_BUF_LEN / dst_addr_width; + dst_best_burst = stm32_mdma_get_best_burst(buf_len, tlen, dst_maxburst, dst_addr_width); @@ -615,13 +631,25 @@ static int stm32_mdma_set_xfer_param(struct stm32_mdma_chan *chan, src_addr = chan->dma_config.src_addr; /* Set device data size */ + if (chan_config->m2m_hw) + src_addr_width = + stm32_mdma_get_max_width(src_addr, buf_len, + STM32_MDMA_MAX_BUF_LEN); + src_bus_width = stm32_mdma_get_width(chan, src_addr_width); if (src_bus_width < 0) return src_bus_width; ctcr &= ~STM32_MDMA_CTCR_SSIZE_MASK; ctcr |= STM32_MDMA_CTCR_SSIZE(src_bus_width); + if (chan_config->m2m_hw) { + ctcr &= ~STM32_MDMA_CTCR_SINCOS_MASK; + ctcr |= STM32_MDMA_CTCR_SINCOS(src_bus_width); + } /* Set device burst value */ + if (chan_config->m2m_hw) + src_maxburst = STM32_MDMA_MAX_BUF_LEN / src_addr_width; + src_best_burst = stm32_mdma_get_best_burst(buf_len, tlen, src_maxburst, src_addr_width); @@ -729,6 +757,7 @@ static int stm32_mdma_setup_xfer(struct stm32_mdma_chan *chan, { struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan); struct dma_slave_config *dma_config = &chan->dma_config; + struct stm32_mdma_chan_config *chan_config = &chan->chan_config; struct scatterlist *sg; dma_addr_t src_addr, dst_addr; u32 ccr, ctcr, ctbr; @@ -751,6 +780,8 @@ static int stm32_mdma_setup_xfer(struct stm32_mdma_chan *chan, } else { src_addr = dma_config->src_addr; dst_addr = sg_dma_address(sg); + if (chan_config->m2m_hw) + src_addr += ((i & 1) ? sg_dma_len(sg) : 0); ret = stm32_mdma_set_xfer_param(chan, direction, &ccr, &ctcr, &ctbr, dst_addr, sg_dma_len(sg)); @@ -769,8 +800,6 @@ static int stm32_mdma_setup_xfer(struct stm32_mdma_chan *chan, /* Enable interrupts */ ccr &= ~STM32_MDMA_CCR_IRQ_MASK; ccr |= STM32_MDMA_CCR_TEIE | STM32_MDMA_CCR_CTCIE; - if (sg_len > 1) - ccr |= STM32_MDMA_CCR_BTIE; desc->ccr = ccr; return 0; @@ -782,7 +811,9 @@ stm32_mdma_prep_slave_sg(struct dma_chan *c, struct scatterlist *sgl, unsigned long flags, void *context) { struct stm32_mdma_chan *chan = to_stm32_mdma_chan(c); + struct stm32_mdma_chan_config *chan_config = &chan->chan_config; struct stm32_mdma_desc *desc; + struct stm32_mdma_hwdesc *hwdesc; int i, ret; /* @@ -804,6 +835,20 @@ stm32_mdma_prep_slave_sg(struct dma_chan *c, struct scatterlist *sgl, if (ret < 0) goto xfer_setup_err; + /* + * In case of M2M HW transfer triggered by STM32 DMA, we do not have to + * clear the transfer complete flag by hardware in order to let the + * CPU rearm the DMA with the next sg element and update some data in + * dmaengine framework + */ + if (chan_config->m2m_hw && direction == DMA_MEM_TO_DEV) { + for (i = 0; i < sg_len; i++) { + hwdesc = desc->node[i].hwdesc; + hwdesc->cmar = 0; + hwdesc->cmdr = 0; + } + } + desc->cyclic = false; return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags); @@ -825,9 +870,10 @@ stm32_mdma_prep_dma_cyclic(struct dma_chan *c, dma_addr_t buf_addr, struct stm32_mdma_chan *chan = to_stm32_mdma_chan(c); struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan); struct dma_slave_config *dma_config = &chan->dma_config; + struct stm32_mdma_chan_config *chan_config = &chan->chan_config; struct stm32_mdma_desc *desc; dma_addr_t src_addr, dst_addr; - u32 ccr, ctcr, ctbr, count; + u32 ccr, ctcr, ctbr, count, offset; int i, ret; /* @@ -881,12 +927,29 @@ stm32_mdma_prep_dma_cyclic(struct dma_chan *c, dma_addr_t buf_addr, desc->ccr = ccr; /* Configure hwdesc list */ + offset = ALIGN(period_len, STM32_DMA_SRAM_GRANULARITY); for (i = 0; i < count; i++) { if (direction == DMA_MEM_TO_DEV) { + /* + * When the DMA is configured in double buffer mode, + * the MDMA has to use 2 destination buffers to be + * compliant with this mode. + */ + if (chan_config->m2m_hw && count > 1 && i % 2) + dst_addr = dma_config->dst_addr + offset; + else + dst_addr = dma_config->dst_addr; src_addr = buf_addr + i * period_len; - dst_addr = dma_config->dst_addr; } else { - src_addr = dma_config->src_addr; + /* + * When the DMA is configured in double buffer mode, + * the MDMA has to use 2 destination buffers to be + * compliant with this mode. + */ + if (chan_config->m2m_hw && count > 1 && i % 2) + src_addr = dma_config->src_addr + offset; + else + src_addr = dma_config->src_addr; dst_addr = buf_addr + i * period_len; } @@ -896,6 +959,7 @@ stm32_mdma_prep_dma_cyclic(struct dma_chan *c, dma_addr_t buf_addr, } desc->cyclic = true; + desc->dir = direction; return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags); @@ -1280,14 +1344,28 @@ static size_t stm32_mdma_desc_residue(struct stm32_mdma_chan *chan, { struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan); struct stm32_mdma_hwdesc *hwdesc = desc->node[0].hwdesc; - u32 cbndtr, residue, modulo, burst_size; + u32 residue = 0; + u32 modulo, burst_size; + dma_addr_t next_clar; + u32 cbndtr; int i; - residue = 0; - for (i = curr_hwdesc + 1; i < desc->count; i++) { + /* + * Get the residue of pending descriptors + */ + /* Get the next hw descriptor to process from current transfer */ + next_clar = stm32_mdma_read(dmadev, STM32_MDMA_CLAR(chan->id)); + for (i = desc->count - 1; i >= 0; i--) { hwdesc = desc->node[i].hwdesc; + + if (hwdesc->clar == next_clar) + break;/* Current transfer found, stop cumulating */ + + /* Cumulate residue of unprocessed hw descriptors */ residue += STM32_MDMA_CBNDTR_BNDT(hwdesc->cbndtr); } + + /* Read & cumulate the residue of the current transfer */ cbndtr = stm32_mdma_read(dmadev, STM32_MDMA_CBNDTR(chan->id)); residue += cbndtr & STM32_MDMA_CBNDTR_BNDT_MASK; @@ -1307,24 +1385,39 @@ static enum dma_status stm32_mdma_tx_status(struct dma_chan *c, struct dma_tx_state *state) { struct stm32_mdma_chan *chan = to_stm32_mdma_chan(c); + struct stm32_mdma_chan_config *chan_config = &chan->chan_config; struct virt_dma_desc *vdesc; enum dma_status status; unsigned long flags; u32 residue = 0; status = dma_cookie_status(c, cookie, state); - if ((status == DMA_COMPLETE) || (!state)) + if (status == DMA_COMPLETE || !state) return status; spin_lock_irqsave(&chan->vchan.lock, flags); vdesc = vchan_find_desc(&chan->vchan, cookie); - if (chan->desc && cookie == chan->desc->vdesc.tx.cookie) - residue = stm32_mdma_desc_residue(chan, chan->desc, - chan->curr_hwdesc); - else if (vdesc) + if (chan->desc && cookie == chan->desc->vdesc.tx.cookie) { + /* + * In case of M2D transfer triggered by STM32 DMA, the MDMA has + * always one period in advance in cyclic mode. So, we have to + * add 1 period of data to return the good residue to the + * client + */ + if (chan_config->m2m_hw && chan->desc->dir == DMA_MEM_TO_DEV && + chan->curr_hwdesc > 1) + residue = + stm32_mdma_desc_residue(chan, chan->desc, + chan->curr_hwdesc - 1); + else + residue = stm32_mdma_desc_residue(chan, chan->desc, + chan->curr_hwdesc); + } else if (vdesc) { residue = stm32_mdma_desc_residue(chan, to_stm32_mdma_desc(vdesc), 0); + } + dma_set_residue(state, residue); spin_unlock_irqrestore(&chan->vchan.lock, flags); @@ -1346,7 +1439,7 @@ static irqreturn_t stm32_mdma_irq_handler(int irq, void *devid) { struct stm32_mdma_device *dmadev = devid; struct stm32_mdma_chan *chan = devid; - u32 reg, id, ien, status, flag; + u32 reg, id, ccr, ien, status; /* Find out which channel generates the interrupt */ status = readl_relaxed(dmadev->base + STM32_MDMA_GISR0); @@ -1368,67 +1461,71 @@ static irqreturn_t stm32_mdma_irq_handler(int irq, void *devid) chan = &dmadev->chan[id]; if (!chan) { - dev_dbg(mdma2dev(dmadev), "MDMA channel not initialized\n"); - goto exit; + dev_warn(mdma2dev(dmadev), "MDMA channel not initialized\n"); + return IRQ_NONE; } /* Handle interrupt for the channel */ spin_lock(&chan->vchan.lock); - status = stm32_mdma_read(dmadev, STM32_MDMA_CISR(chan->id)); - ien = stm32_mdma_read(dmadev, STM32_MDMA_CCR(chan->id)); - ien &= STM32_MDMA_CCR_IRQ_MASK; - ien >>= 1; + status = stm32_mdma_read(dmadev, STM32_MDMA_CISR(id)); + /* Mask Channel ReQuest Active bit which can be set in case of MEM2MEM */ + status &= ~STM32_MDMA_CISR_CRQA; + ccr = stm32_mdma_read(dmadev, STM32_MDMA_CCR(id)); + ien = (ccr & STM32_MDMA_CCR_IRQ_MASK) >> 1; if (!(status & ien)) { spin_unlock(&chan->vchan.lock); - dev_dbg(chan2dev(chan), - "spurious it (status=0x%04x, ien=0x%04x)\n", - status, ien); + dev_warn(chan2dev(chan), + "spurious it (status=0x%04x, ien=0x%04x)\n", + status, ien); return IRQ_NONE; } - flag = __ffs(status & ien); - reg = STM32_MDMA_CIFCR(chan->id); + reg = STM32_MDMA_CIFCR(id); - switch (1 << flag) { - case STM32_MDMA_CISR_TEIF: - id = chan->id; - status = readl_relaxed(dmadev->base + STM32_MDMA_CESR(id)); - dev_err(chan2dev(chan), "Transfer Err: stat=0x%08x\n", status); + if (status & STM32_MDMA_CISR_TEIF) { + dev_err(chan2dev(chan), "Transfer Err: stat=0x%08x\n", + readl_relaxed(dmadev->base + STM32_MDMA_CESR(id))); stm32_mdma_set_bits(dmadev, reg, STM32_MDMA_CIFCR_CTEIF); - break; + status &= ~STM32_MDMA_CISR_TEIF; + } - case STM32_MDMA_CISR_CTCIF: + if (status & STM32_MDMA_CISR_CTCIF) { stm32_mdma_set_bits(dmadev, reg, STM32_MDMA_CIFCR_CCTCIF); + status &= ~STM32_MDMA_CISR_CTCIF; stm32_mdma_xfer_end(chan); - break; + } - case STM32_MDMA_CISR_BRTIF: + if (status & STM32_MDMA_CISR_BRTIF) { stm32_mdma_set_bits(dmadev, reg, STM32_MDMA_CIFCR_CBRTIF); - break; + status &= ~STM32_MDMA_CISR_BRTIF; + } - case STM32_MDMA_CISR_BTIF: + if (status & STM32_MDMA_CISR_BTIF) { stm32_mdma_set_bits(dmadev, reg, STM32_MDMA_CIFCR_CBTIF); + status &= ~STM32_MDMA_CISR_BTIF; chan->curr_hwdesc++; if (chan->desc && chan->desc->cyclic) { if (chan->curr_hwdesc == chan->desc->count) chan->curr_hwdesc = 0; vchan_cyclic_callback(&chan->desc->vdesc); } - break; + } - case STM32_MDMA_CISR_TCIF: + if (status & STM32_MDMA_CISR_TCIF) { stm32_mdma_set_bits(dmadev, reg, STM32_MDMA_CIFCR_CLTCIF); - break; + status &= ~STM32_MDMA_CISR_TCIF; + } - default: - dev_err(chan2dev(chan), "it %d unhandled (status=0x%04x)\n", - 1 << flag, status); + if (status) { + stm32_mdma_set_bits(dmadev, reg, status); + dev_err(chan2dev(chan), "DMA error: status=0x%08x\n", status); + if (!(ccr & STM32_MDMA_CCR_EN)) + dev_err(chan2dev(chan), "chan disabled by HW\n"); } spin_unlock(&chan->vchan.lock); -exit: return IRQ_HANDLED; } @@ -1488,7 +1585,7 @@ static struct dma_chan *stm32_mdma_of_xlate(struct of_phandle_args *dma_spec, struct dma_chan *c; struct stm32_mdma_chan_config config; - if (dma_spec->args_count < 5) { + if (dma_spec->args_count < 6) { dev_err(mdma2dev(dmadev), "Bad number of args\n"); return NULL; } @@ -1498,6 +1595,7 @@ static struct dma_chan *stm32_mdma_of_xlate(struct of_phandle_args *dma_spec, config.transfer_config = dma_spec->args[2]; config.mask_addr = dma_spec->args[3]; config.mask_data = dma_spec->args[4]; + config.m2m_hw = dma_spec->args[5]; if (config.request >= dmadev->nr_requests) { dev_err(mdma2dev(dmadev), "Bad request line\n"); diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index dd357a747780..42745f58412c 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -1474,9 +1474,11 @@ struct dma_chan *__dma_request_channel(const dma_cap_mask_t *mask, struct device_node *np); struct dma_chan *dma_request_chan(struct device *dev, const char *name); +struct dma_chan *dma_request_chan_linked(struct device *dev, const char *name); struct dma_chan *dma_request_chan_by_mask(const dma_cap_mask_t *mask); void dma_release_channel(struct dma_chan *chan); +void dma_release_chan_linked(struct device *dev, struct dma_chan *chan); int dma_get_slave_caps(struct dma_chan *chan, struct dma_slave_caps *caps); #else static inline struct dma_chan *dma_find_channel(enum dma_transaction_type tx_type) @@ -1506,6 +1508,11 @@ static inline struct dma_chan *dma_request_chan(struct device *dev, { return ERR_PTR(-ENODEV); } +static inline struct dma_chan *dma_request_chan_linked(struct device *dev, + const char *name) +{ + return ERR_PTR(-ENODEV); +} static inline struct dma_chan *dma_request_chan_by_mask( const dma_cap_mask_t *mask) { @@ -1514,6 +1521,10 @@ static inline struct dma_chan *dma_request_chan_by_mask( static inline void dma_release_channel(struct dma_chan *chan) { } +static inline void dma_release_chan_linked(struct device *dev, + struct dma_chan *chan) +{ +} static inline int dma_get_slave_caps(struct dma_chan *chan, struct dma_slave_caps *caps) { -- 2.17.1