Prev: Hardware Accelerated MD RAID5: Introduction
Next: driver for mcs7830 (aka DeLOCK) USB ethernet adapter
From: Jeff Garzik on 11 Sep 2006 19:50 Dan Williams wrote: > @@ -759,8 +755,10 @@ #endif > device->common.device_memcpy_buf_to_buf = ioat_dma_memcpy_buf_to_buf; > device->common.device_memcpy_buf_to_pg = ioat_dma_memcpy_buf_to_pg; > device->common.device_memcpy_pg_to_pg = ioat_dma_memcpy_pg_to_pg; > - device->common.device_memcpy_complete = ioat_dma_is_complete; > - device->common.device_memcpy_issue_pending = ioat_dma_memcpy_issue_pending; > + device->common.device_operation_complete = ioat_dma_is_complete; > + device->common.device_xor_pgs_to_pg = dma_async_xor_pgs_to_pg_err; > + device->common.device_issue_pending = ioat_dma_memcpy_issue_pending; > + device->common.capabilities = DMA_MEMCPY; Are we really going to add a set of hooks for each DMA engine whizbang feature? That will get ugly when DMA engines support memcpy, xor, crc32, sha1, aes, and a dozen other transforms. > diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h > index c94d8f1..3599472 100644 > --- a/include/linux/dmaengine.h > +++ b/include/linux/dmaengine.h > @@ -20,7 +20,7 @@ > */ > #ifndef DMAENGINE_H > #define DMAENGINE_H > - > +#include <linux/config.h> > #ifdef CONFIG_DMA_ENGINE > > #include <linux/device.h> > @@ -65,6 +65,27 @@ enum dma_status { > }; > > /** > + * enum dma_capabilities - DMA operational capabilities > + * @DMA_MEMCPY: src to dest copy > + * @DMA_XOR: src*n to dest xor > + * @DMA_DUAL_XOR: src*n to dest_diag and dest_horiz xor > + * @DMA_PQ_XOR: src*n to dest_q and dest_p gf/xor > + * @DMA_MEMCPY_CRC32C: src to dest copy and crc-32c sum > + * @DMA_SHARE: multiple clients can use this channel > + */ > +enum dma_capabilities { > + DMA_MEMCPY = 0x1, > + DMA_XOR = 0x2, > + DMA_PQ_XOR = 0x4, > + DMA_DUAL_XOR = 0x8, > + DMA_PQ_UPDATE = 0x10, > + DMA_ZERO_SUM = 0x20, > + DMA_PQ_ZERO_SUM = 0x40, > + DMA_MEMSET = 0x80, > + DMA_MEMCPY_CRC32C = 0x100, Please use the more readable style that explicitly lists bits: DMA_MEMCPY = (1 << 0), DMA_XOR = (1 << 1), ... > +/** > * struct dma_chan_percpu - the per-CPU part of struct dma_chan > * @refcount: local_t used for open-coded "bigref" counting > * @memcpy_count: transaction counter > @@ -75,27 +96,32 @@ struct dma_chan_percpu { > local_t refcount; > /* stats */ > unsigned long memcpy_count; > + unsigned long xor_count; > unsigned long bytes_transferred; > + unsigned long bytes_xor; Clearly, each operation needs to be more compartmentalized. This just isn't scalable, when you consider all the possible transforms. Jeff - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo(a)vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
From: Dan Williams on 11 Sep 2006 20:20 On 9/11/06, Jeff Garzik <jeff(a)garzik.org> wrote: > Dan Williams wrote: > > @@ -759,8 +755,10 @@ #endif > > device->common.device_memcpy_buf_to_buf = ioat_dma_memcpy_buf_to_buf; > > device->common.device_memcpy_buf_to_pg = ioat_dma_memcpy_buf_to_pg; > > device->common.device_memcpy_pg_to_pg = ioat_dma_memcpy_pg_to_pg; > > - device->common.device_memcpy_complete = ioat_dma_is_complete; > > - device->common.device_memcpy_issue_pending = ioat_dma_memcpy_issue_pending; > > + device->common.device_operation_complete = ioat_dma_is_complete; > > + device->common.device_xor_pgs_to_pg = dma_async_xor_pgs_to_pg_err; > > + device->common.device_issue_pending = ioat_dma_memcpy_issue_pending; > > + device->common.capabilities = DMA_MEMCPY; > > > Are we really going to add a set of hooks for each DMA engine whizbang > feature? What's the alternative? But, also see patch 9 "dmaengine: reduce backend address permutations" it relieves some of this pain. > > That will get ugly when DMA engines support memcpy, xor, crc32, sha1, > aes, and a dozen other transforms. > > > > diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h > > index c94d8f1..3599472 100644 > > --- a/include/linux/dmaengine.h > > +++ b/include/linux/dmaengine.h > > @@ -20,7 +20,7 @@ > > */ > > #ifndef DMAENGINE_H > > #define DMAENGINE_H > > - > > +#include <linux/config.h> > > #ifdef CONFIG_DMA_ENGINE > > > > #include <linux/device.h> > > @@ -65,6 +65,27 @@ enum dma_status { > > }; > > > > /** > > + * enum dma_capabilities - DMA operational capabilities > > + * @DMA_MEMCPY: src to dest copy > > + * @DMA_XOR: src*n to dest xor > > + * @DMA_DUAL_XOR: src*n to dest_diag and dest_horiz xor > > + * @DMA_PQ_XOR: src*n to dest_q and dest_p gf/xor > > + * @DMA_MEMCPY_CRC32C: src to dest copy and crc-32c sum > > + * @DMA_SHARE: multiple clients can use this channel > > + */ > > +enum dma_capabilities { > > + DMA_MEMCPY = 0x1, > > + DMA_XOR = 0x2, > > + DMA_PQ_XOR = 0x4, > > + DMA_DUAL_XOR = 0x8, > > + DMA_PQ_UPDATE = 0x10, > > + DMA_ZERO_SUM = 0x20, > > + DMA_PQ_ZERO_SUM = 0x40, > > + DMA_MEMSET = 0x80, > > + DMA_MEMCPY_CRC32C = 0x100, > > Please use the more readable style that explicitly lists bits: > > DMA_MEMCPY = (1 << 0), > DMA_XOR = (1 << 1), > ... I prefer this as well, although at one point I was told (not by you) the absolute number was preferred when I was making changes to drivers/scsi/sata_vsc.c. In any event I'll change it... > > > +/** > > * struct dma_chan_percpu - the per-CPU part of struct dma_chan > > * @refcount: local_t used for open-coded "bigref" counting > > * @memcpy_count: transaction counter > > @@ -75,27 +96,32 @@ struct dma_chan_percpu { > > local_t refcount; > > /* stats */ > > unsigned long memcpy_count; > > + unsigned long xor_count; > > unsigned long bytes_transferred; > > + unsigned long bytes_xor; > > Clearly, each operation needs to be more compartmentalized. > > This just isn't scalable, when you consider all the possible transforms. Ok, one set of counters per op is probably overkill what about lumping operations into groups and just tracking at the group level? i.e. memcpy, memset -> string_count, string_bytes_transferred crc, sha1, aes -> hash_count, hash_transferred xor, pq_xor -> sum_count, sum_transferred > > Jeff Dan - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo(a)vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
From: Roland Dreier on 11 Sep 2006 21:00 Jeff> Are we really going to add a set of hooks for each DMA Jeff> engine whizbang feature? Dan> What's the alternative? But, also see patch 9 "dmaengine: Dan> reduce backend address permutations" it relieves some of this Dan> pain. I guess you can pass an opcode into a common "start operation" function. With all the memcpy / xor / crypto / etc. hardware out there already, we definitely have to get this interface right. - R. - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo(a)vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
From: Olof Johansson on 15 Sep 2006 12:50
On Mon, 11 Sep 2006 19:44:16 -0400 Jeff Garzik <jeff(a)garzik.org> wrote: > Dan Williams wrote: > > @@ -759,8 +755,10 @@ #endif > > device->common.device_memcpy_buf_to_buf = ioat_dma_memcpy_buf_to_buf; > > device->common.device_memcpy_buf_to_pg = ioat_dma_memcpy_buf_to_pg; > > device->common.device_memcpy_pg_to_pg = ioat_dma_memcpy_pg_to_pg; > > - device->common.device_memcpy_complete = ioat_dma_is_complete; > > - device->common.device_memcpy_issue_pending = ioat_dma_memcpy_issue_pending; > > + device->common.device_operation_complete = ioat_dma_is_complete; > > + device->common.device_xor_pgs_to_pg = dma_async_xor_pgs_to_pg_err; > > + device->common.device_issue_pending = ioat_dma_memcpy_issue_pending; > > + device->common.capabilities = DMA_MEMCPY; > > > Are we really going to add a set of hooks for each DMA engine whizbang > feature? > > That will get ugly when DMA engines support memcpy, xor, crc32, sha1, > aes, and a dozen other transforms. Yes, it will be unmaintainable. We need some sort of multiplexing with per-function registrations. Here's a first cut at it, just very quick. It could be improved further but it shows that we could exorcise most of the hardcoded things pretty easily. Dan, would this fit with your added XOR stuff as well? If so, would you mind rebasing on top of something like this (with your further cleanups going in before added function, please. :-) (Build tested only, since I lack Intel hardware). It would be nice if we could move the type specification to only be needed in the channel allocation. I don't know how well that fits the model for some of the hardware platforms though, since a single channel might be shared for different types of functions. Maybe we need a different level of abstraction there instead, i.e. divorce the hardware channel and software channel model and have several software channels map onto a hardware one. Clean up the DMA API a bit, allowing each engine to register an array of supported functions instead of allocating static names for each possible function. Signed-off-by: Olof Johansson <olof(a)lixom.net> diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 1527804..282ce85 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -80,7 +80,7 @@ static ssize_t show_memcpy_count(struct int i; for_each_possible_cpu(i) - count += per_cpu_ptr(chan->local, i)->memcpy_count; + count += per_cpu_ptr(chan->local, i)->count; return sprintf(buf, "%lu\n", count); } @@ -105,7 +105,7 @@ static ssize_t show_in_use(struct class_ } static struct class_device_attribute dma_class_attrs[] = { - __ATTR(memcpy_count, S_IRUGO, show_memcpy_count, NULL), + __ATTR(count, S_IRUGO, show_memcpy_count, NULL), __ATTR(bytes_transferred, S_IRUGO, show_bytes_transferred, NULL), __ATTR(in_use, S_IRUGO, show_in_use, NULL), __ATTR_NULL @@ -402,11 +402,11 @@ subsys_initcall(dma_bus_init); EXPORT_SYMBOL(dma_async_client_register); EXPORT_SYMBOL(dma_async_client_unregister); EXPORT_SYMBOL(dma_async_client_chan_request); -EXPORT_SYMBOL(dma_async_memcpy_buf_to_buf); -EXPORT_SYMBOL(dma_async_memcpy_buf_to_pg); -EXPORT_SYMBOL(dma_async_memcpy_pg_to_pg); -EXPORT_SYMBOL(dma_async_memcpy_complete); -EXPORT_SYMBOL(dma_async_memcpy_issue_pending); +EXPORT_SYMBOL(dma_async_buf_to_buf); +EXPORT_SYMBOL(dma_async_buf_to_pg); +EXPORT_SYMBOL(dma_async_pg_to_pg); +EXPORT_SYMBOL(dma_async_complete); +EXPORT_SYMBOL(dma_async_issue_pending); EXPORT_SYMBOL(dma_async_device_register); EXPORT_SYMBOL(dma_async_device_unregister); EXPORT_SYMBOL(dma_chan_cleanup); diff --git a/drivers/dma/ioatdma.c b/drivers/dma/ioatdma.c index dbd4d6c..6cbed42 100644 --- a/drivers/dma/ioatdma.c +++ b/drivers/dma/ioatdma.c @@ -40,6 +40,7 @@ #define to_ioat_device(dev) container_of(dev, struct ioat_device, common) #define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node) + /* internal functions */ static int __devinit ioat_probe(struct pci_dev *pdev, const struct pci_device_id *ent); static void __devexit ioat_remove(struct pci_dev *pdev); @@ -681,6 +682,14 @@ out: return err; } +struct dma_function ioat_memcpy_functions = { + .buf_to_buf = ioat_dma_memcpy_buf_to_buf, + .buf_to_pg = ioat_dma_memcpy_buf_to_pg, + .pg_to_pg = ioat_dma_memcpy_pg_to_pg, + .complete = ioat_dma_is_complete, + .issue_pending = ioat_dma_memcpy_issue_pending, +}; + static int __devinit ioat_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { @@ -756,11 +765,8 @@ static int __devinit ioat_probe(struct p device->common.device_alloc_chan_resources = ioat_dma_alloc_chan_resources; device->common.device_free_chan_resources = ioat_dma_free_chan_resources; - device->common.device_memcpy_buf_to_buf = ioat_dma_memcpy_buf_to_buf; - device->common.device_memcpy_buf_to_pg = ioat_dma_memcpy_buf_to_pg; - device->common.device_memcpy_pg_to_pg = ioat_dma_memcpy_pg_to_pg; - device->common.device_memcpy_complete = ioat_dma_is_complete; - device->common.device_memcpy_issue_pending = ioat_dma_memcpy_issue_pending; + device->common.funcs[DMAFUNC_MEMCPY] = &ioat_memcpy_functions; + printk(KERN_INFO "Intel(R) I/OAT DMA Engine found, %d channels\n", device->common.chancnt); diff --git a/drivers/dma/iovlock.c b/drivers/dma/iovlock.c index d637555..8a2f642 100644 --- a/drivers/dma/iovlock.c +++ b/drivers/dma/iovlock.c @@ -151,11 +151,8 @@ static dma_cookie_t dma_memcpy_to_kernel while (len > 0) { if (iov->iov_len) { int copy = min_t(unsigned int, iov->iov_len, len); - dma_cookie = dma_async_memcpy_buf_to_buf( - chan, - iov->iov_base, - kdata, - copy); + dma_cookie = dma_async_buf_to_buf(DMAFUNC_MEMCPY, chan, + iov->iov_base, kdata, copy); kdata += copy; len -= copy; iov->iov_len -= copy; @@ -210,7 +207,7 @@ dma_cookie_t dma_memcpy_to_iovec(struct copy = min_t(int, PAGE_SIZE - iov_byte_offset, len); copy = min_t(int, copy, iov[iovec_idx].iov_len); - dma_cookie = dma_async_memcpy_buf_to_pg(chan, + dma_cookie = dma_async_buf_to_pg(DMAFUNC_MEMCPY, chan, page_list->pages[page_idx], iov_byte_offset, kdata, @@ -274,7 +271,7 @@ dma_cookie_t dma_memcpy_pg_to_iovec(stru copy = min_t(int, PAGE_SIZE - iov_byte_offset, len); copy = min_t(int, copy, iov[iovec_idx |