From f41391fa937aa9ea2b36dde2b40dd5dbb3bcb4f1 Mon Sep 17 00:00:00 2001 From: Ionut Podgoreanu Date: Thu, 10 Aug 2023 13:10:24 +0300 Subject: [PATCH] axi_dmac: Add support for DMA Scatter-Gather This commit introduces a different interface to submit transfers, using DMA descriptors. The structure of the DMA descriptor is as follows: struct dma_desc { u32 flags, u32 id, u64 dest_addr, u64 src_addr, u64 next_sg_addr, u32 y_len, u32 x_len, u32 src_stride, u32 dst_stride, }; The 'flags' field currently offers two control bits: - bit 0: if set, the transfer will complete after this last descriptor is processed, and the DMA core will go back to idle state; if cleared, the next DMA descriptor pointed to by 'next_sg_addr' will be loaded. - bit 1: if set, an end-of-transfer interrupt will be raised after the memory segment pointed to by this descriptor has been transferred. The 'id' field corresponds to an identifier of the descriptor. The 'dest_addr' and 'src_addr' contain the destination and source addresses to use for the transfer, respectively. The 'x_len' field contains the number of bytes to transfer, minus one. The 'y_len', 'src_stride' and 'dst_stride' fields are only useful for 2D transfers, and should be set to zero if 2D transfers are not required. To start a transfer, the address of the first DMA descriptor must be written to register 0x47c and the HWDESC bit of CONTROL register must be set. The Scatter-Gather transfer is queued similarly to the simple transfers, by writing 1 in TRANSFER_SUBMIT. The Scatter-Gather interface has a dedicated AXI-MM bus configured for read transfers, with its own dedicated clock, which can be asynchronous. The Scatter-Gather reset is generated by the reset manager to reset the logic after completing any pending transactions on the bus. When the Scatter-Gather is enabled during runtime, the legacy cyclic functionality of the DMA is disabled. Signed-off-by: Ionut Podgoreanu --- library/axi_dmac/Makefile | 2 + library/axi_dmac/axi_dmac.v | 150 +++++++-- library/axi_dmac/axi_dmac_constr.ttcl | 75 ++++- library/axi_dmac/axi_dmac_hw.tcl | 74 +++- library/axi_dmac/axi_dmac_ip.tcl | 122 +++++-- library/axi_dmac/axi_dmac_pkg_sv.ttcl | 14 + library/axi_dmac/axi_dmac_regmap.v | 29 +- library/axi_dmac/axi_dmac_regmap_request.v | 39 ++- library/axi_dmac/axi_dmac_reset_manager.v | 60 +++- library/axi_dmac/axi_dmac_response_manager.v | 2 +- library/axi_dmac/axi_dmac_transfer.v | 197 ++++++++++- library/axi_dmac/bd/bd.tcl | 40 ++- library/axi_dmac/dmac_sg.v | 336 +++++++++++++++++++ library/axi_dmac/request_arb.v | 3 - 14 files changed, 1050 insertions(+), 93 deletions(-) create mode 100644 library/axi_dmac/dmac_sg.v diff --git a/library/axi_dmac/Makefile b/library/axi_dmac/Makefile index 81e2f39b1..d66b31eff 100644 --- a/library/axi_dmac/Makefile +++ b/library/axi_dmac/Makefile @@ -24,6 +24,7 @@ GENERIC_DEPS += dest_axi_mm.v GENERIC_DEPS += dest_axi_stream.v GENERIC_DEPS += dest_fifo_inf.v GENERIC_DEPS += dmac_2d_transfer.v +GENERIC_DEPS += dmac_sg.v GENERIC_DEPS += inc_id.vh GENERIC_DEPS += request_arb.v GENERIC_DEPS += request_generator.v @@ -52,6 +53,7 @@ INTEL_DEPS += ../util_axis_fifo/util_axis_fifo.v INTEL_DEPS += ../util_axis_fifo/util_axis_fifo_address_generator.v INTEL_DEPS += ../util_cdc/sync_bits.v INTEL_DEPS += ../util_cdc/sync_event.v +INTEL_DEPS += ../util_cdc/sync_gray.v INTEL_DEPS += axi_dmac_constr.sdc INTEL_DEPS += axi_dmac_hw.tcl diff --git a/library/axi_dmac/axi_dmac.v b/library/axi_dmac/axi_dmac.v index 88218e37e..f78316f25 100644 --- a/library/axi_dmac/axi_dmac.v +++ b/library/axi_dmac/axi_dmac.v @@ -40,17 +40,23 @@ module axi_dmac #( parameter ID = 0, parameter DMA_DATA_WIDTH_SRC = 64, parameter DMA_DATA_WIDTH_DEST = 64, + parameter DMA_DATA_WIDTH_SG = 64, parameter DMA_LENGTH_WIDTH = 24, parameter DMA_2D_TRANSFER = 0, + parameter DMA_SG_TRANSFER = 0, parameter ASYNC_CLK_REQ_SRC = 1, parameter ASYNC_CLK_SRC_DEST = 1, parameter ASYNC_CLK_DEST_REQ = 1, + parameter ASYNC_CLK_REQ_SG = 1, + parameter ASYNC_CLK_SRC_SG = 1, + parameter ASYNC_CLK_DEST_SG = 1, parameter AXI_SLICE_DEST = 0, parameter AXI_SLICE_SRC = 0, parameter SYNC_TRANSFER_START = 0, parameter CYCLIC = 1, parameter DMA_AXI_PROTOCOL_DEST = 0, parameter DMA_AXI_PROTOCOL_SRC = 0, + parameter DMA_AXI_PROTOCOL_SG = 0, parameter DMA_TYPE_DEST = 0, parameter DMA_TYPE_SRC = 2, parameter DMA_AXI_ADDR_WIDTH = 32, @@ -58,6 +64,7 @@ module axi_dmac #( parameter FIFO_SIZE = 8, // In bursts parameter AXI_ID_WIDTH_SRC = 1, parameter AXI_ID_WIDTH_DEST = 1, + parameter AXI_ID_WIDTH_SG = 1, parameter DMA_AXIS_ID_W = 8, parameter DMA_AXIS_DEST_W = 4, parameter DISABLE_DEBUG_REGISTERS = 0, @@ -187,6 +194,52 @@ module axi_dmac #( output [AXI_ID_WIDTH_SRC-1:0] m_src_axi_wid, input [AXI_ID_WIDTH_SRC-1:0] m_src_axi_bid, + // Master AXI interface + input m_sg_axi_aclk, + input m_sg_axi_aresetn, + + // Read address + input m_sg_axi_arready, + output m_sg_axi_arvalid, + output [DMA_AXI_ADDR_WIDTH-1:0] m_sg_axi_araddr, + output [7-(4*DMA_AXI_PROTOCOL_SG):0] m_sg_axi_arlen, + output [ 2:0] m_sg_axi_arsize, + output [ 1:0] m_sg_axi_arburst, + output [ 2:0] m_sg_axi_arprot, + output [ 3:0] m_sg_axi_arcache, + output [AXI_ID_WIDTH_SG-1:0] m_sg_axi_arid, + output [DMA_AXI_PROTOCOL_SG:0] m_sg_axi_arlock, + + // Read data and response + input [DMA_DATA_WIDTH_SG-1:0] m_sg_axi_rdata, + output m_sg_axi_rready, + input m_sg_axi_rvalid, + input [ 1:0] m_sg_axi_rresp, + input [AXI_ID_WIDTH_SG-1:0] m_sg_axi_rid, + input m_sg_axi_rlast, + + // Unused write interface + output m_sg_axi_awvalid, + output [DMA_AXI_ADDR_WIDTH-1:0] m_sg_axi_awaddr, + output [7-(4*DMA_AXI_PROTOCOL_SG):0] m_sg_axi_awlen, + output [ 2:0] m_sg_axi_awsize, + output [ 1:0] m_sg_axi_awburst, + output [ 3:0] m_sg_axi_awcache, + output [ 2:0] m_sg_axi_awprot, + input m_sg_axi_awready, + output m_sg_axi_wvalid, + output [DMA_DATA_WIDTH_SG-1:0] m_sg_axi_wdata, + output [(DMA_DATA_WIDTH_SG/8)-1:0] m_sg_axi_wstrb, + output m_sg_axi_wlast, + input m_sg_axi_wready, + input m_sg_axi_bvalid, + input [ 1:0] m_sg_axi_bresp, + output m_sg_axi_bready, + output [AXI_ID_WIDTH_SG-1:0] m_sg_axi_awid, + output [DMA_AXI_PROTOCOL_SG:0] m_sg_axi_awlock, + output [AXI_ID_WIDTH_SG-1:0] m_sg_axi_wid, + input [AXI_ID_WIDTH_SG-1:0] m_sg_axi_bid, + // Slave streaming AXI interface input s_axis_aclk, output s_axis_ready, @@ -257,6 +310,14 @@ module axi_dmac #( DMA_DATA_WIDTH_SRC > 32 ? 3 : DMA_DATA_WIDTH_SRC > 16 ? 2 : DMA_DATA_WIDTH_SRC > 8 ? 1 : 0; + localparam BYTES_PER_BEAT_WIDTH_SG = DMA_DATA_WIDTH_SG > 1024 ? 8 : + DMA_DATA_WIDTH_SG > 512 ? 7 : + DMA_DATA_WIDTH_SG > 256 ? 6 : + DMA_DATA_WIDTH_SG > 128 ? 5 : + DMA_DATA_WIDTH_SG > 64 ? 4 : + DMA_DATA_WIDTH_SG > 32 ? 3 : + DMA_DATA_WIDTH_SG > 16 ? 2 : + DMA_DATA_WIDTH_SG > 8 ? 1 : 0; localparam ID_WIDTH = (FIFO_SIZE) > 64 ? 8 : (FIFO_SIZE) > 32 ? 7 : (FIFO_SIZE) > 16 ? 6 : @@ -331,33 +392,8 @@ module axi_dmac #( wire [31:0] dbg_ids0; wire [31:0] dbg_ids1; - assign m_dest_axi_araddr = 'd0; - assign m_dest_axi_arlen = 'd0; - assign m_dest_axi_arsize = 'd0; - assign m_dest_axi_arburst = 'd0; - assign m_dest_axi_arcache = 'd0; - assign m_dest_axi_arprot = 'd0; - assign m_dest_axi_awid = 'h0; - assign m_dest_axi_awlock = 'h0; - assign m_dest_axi_wid = 'h0; - assign m_dest_axi_arid = 'h0; - assign m_dest_axi_arlock = 'h0; - assign m_src_axi_awaddr = 'd0; - assign m_src_axi_awlen = 'd0; - assign m_src_axi_awsize = 'd0; - assign m_src_axi_awburst = 'd0; - assign m_src_axi_awcache = 'd0; - assign m_src_axi_awprot = 'd0; - assign m_src_axi_wdata = 'd0; - assign m_src_axi_wstrb = 'd0; - assign m_src_axi_wlast = 'd0; - assign m_src_axi_awid = 'h0; - assign m_src_axi_awlock = 'h0; - assign m_src_axi_wid = 'h0; - assign m_src_axi_arid = 'h0; - assign m_src_axi_arlock = 'h0; - wire up_req_eot; + wire [31:0] up_req_sg_desc_id; wire [BYTES_PER_BURST_WIDTH-1:0] up_req_measured_burst_length; wire up_response_partial; wire up_response_valid; @@ -365,11 +401,13 @@ module axi_dmac #( wire ctrl_enable; wire ctrl_pause; + wire ctrl_hwdesc; wire up_dma_req_valid; wire up_dma_req_ready; wire [DMA_AXI_ADDR_WIDTH-1:BYTES_PER_BEAT_WIDTH_DEST] up_dma_req_dest_address; wire [DMA_AXI_ADDR_WIDTH-1:BYTES_PER_BEAT_WIDTH_SRC] up_dma_req_src_address; + wire [DMA_AXI_ADDR_WIDTH-1:BYTES_PER_BEAT_WIDTH_SG] up_dma_req_sg_address; wire [DMA_LENGTH_WIDTH-1:0] up_dma_req_x_length; wire [DMA_LENGTH_WIDTH-1:0] up_dma_req_y_length; wire [DMA_LENGTH_WIDTH-1:0] up_dma_req_dest_stride; @@ -396,6 +434,7 @@ module axi_dmac #( .DISABLE_DEBUG_REGISTERS(DISABLE_DEBUG_REGISTERS), .BYTES_PER_BEAT_WIDTH_DEST(BYTES_PER_BEAT_WIDTH_DEST), .BYTES_PER_BEAT_WIDTH_SRC(BYTES_PER_BEAT_WIDTH_SRC), + .BYTES_PER_BEAT_WIDTH_SG(BYTES_PER_BEAT_WIDTH_SG), .BYTES_PER_BURST_WIDTH(BYTES_PER_BURST_WIDTH), .DMA_TYPE_DEST(DMA_TYPE_DEST), .DMA_TYPE_SRC(DMA_TYPE_SRC), @@ -406,6 +445,7 @@ module axi_dmac #( .HAS_DEST_ADDR(HAS_DEST_ADDR), .HAS_SRC_ADDR(HAS_SRC_ADDR), .DMA_2D_TRANSFER(DMA_2D_TRANSFER), + .DMA_SG_TRANSFER(DMA_SG_TRANSFER), .SYNC_TRANSFER_START(SYNC_TRANSFER_START), .CACHE_COHERENT_DEST(CACHE_COHERENT_DEST) ) i_regmap ( @@ -438,12 +478,14 @@ module axi_dmac #( // Control interface .ctrl_enable(ctrl_enable), .ctrl_pause(ctrl_pause), + .ctrl_hwdesc(ctrl_hwdesc), // Request interface .request_valid(up_dma_req_valid), .request_ready(up_dma_req_ready), .request_dest_address(up_dma_req_dest_address), .request_src_address(up_dma_req_src_address), + .request_sg_address(up_dma_req_sg_address), .request_x_length(up_dma_req_x_length), .request_y_length(up_dma_req_y_length), .request_dest_stride(up_dma_req_dest_stride), @@ -453,6 +495,7 @@ module axi_dmac #( // DMA response interface .response_eot(up_req_eot), + .response_sg_desc_id(up_req_sg_desc_id), .response_measured_burst_length(up_req_measured_burst_length), .response_partial(up_response_partial), .response_valid(up_response_valid), @@ -468,18 +511,22 @@ module axi_dmac #( axi_dmac_transfer #( .DMA_DATA_WIDTH_SRC(DMA_DATA_WIDTH_SRC), .DMA_DATA_WIDTH_DEST(DMA_DATA_WIDTH_DEST), + .DMA_DATA_WIDTH_SG(DMA_DATA_WIDTH_SG), .DMA_LENGTH_WIDTH(DMA_LENGTH_WIDTH), .DMA_LENGTH_ALIGN(DMA_LENGTH_ALIGN), .BYTES_PER_BEAT_WIDTH_DEST(BYTES_PER_BEAT_WIDTH_DEST), .BYTES_PER_BEAT_WIDTH_SRC(BYTES_PER_BEAT_WIDTH_SRC), + .BYTES_PER_BEAT_WIDTH_SG(BYTES_PER_BEAT_WIDTH_SG), .BYTES_PER_BURST_WIDTH(BYTES_PER_BURST_WIDTH), .DMA_TYPE_DEST(DMA_TYPE_DEST), .DMA_TYPE_SRC(DMA_TYPE_SRC), .DMA_AXI_ADDR_WIDTH(DMA_AXI_ADDR_WIDTH), .DMA_2D_TRANSFER(DMA_2D_TRANSFER), + .DMA_SG_TRANSFER(DMA_SG_TRANSFER), .ASYNC_CLK_REQ_SRC(ASYNC_CLK_REQ_SRC), .ASYNC_CLK_SRC_DEST(ASYNC_CLK_SRC_DEST), .ASYNC_CLK_DEST_REQ(ASYNC_CLK_DEST_REQ), + .ASYNC_CLK_REQ_SG(ASYNC_CLK_REQ_SG), .AXI_SLICE_DEST(AXI_SLICE_DEST), .AXI_SLICE_SRC(AXI_SLICE_SRC), .MAX_BYTES_PER_BURST(REAL_MAX_BYTES_PER_BURST), @@ -487,6 +534,7 @@ module axi_dmac #( .ID_WIDTH(ID_WIDTH), .AXI_LENGTH_WIDTH_SRC(8-(4*DMA_AXI_PROTOCOL_SRC)), .AXI_LENGTH_WIDTH_DEST(8-(4*DMA_AXI_PROTOCOL_DEST)), + .AXI_LENGTH_WIDTH_SG(8-(4*DMA_AXI_PROTOCOL_SG)), .ENABLE_DIAGNOSTICS_IF(ENABLE_DIAGNOSTICS_IF), .ALLOW_ASYM_MEM(ALLOW_ASYM_MEM), .CACHE_COHERENT_DEST(CACHE_COHERENT_DEST) @@ -496,11 +544,13 @@ module axi_dmac #( .ctrl_enable(ctrl_enable), .ctrl_pause(ctrl_pause), + .ctrl_hwdesc(ctrl_hwdesc), .req_valid(up_dma_req_valid), .req_ready(up_dma_req_ready), .req_dest_address(up_dma_req_dest_address), .req_src_address(up_dma_req_src_address), + .req_sg_address(up_dma_req_sg_address), .req_x_length(up_dma_req_x_length), .req_y_length(up_dma_req_y_length), .req_dest_stride(up_dma_req_dest_stride), @@ -509,6 +559,7 @@ module axi_dmac #( .req_last(up_dma_req_last), .req_eot(up_req_eot), + .req_sg_desc_id(up_req_sg_desc_id), .req_measured_burst_length(up_req_measured_burst_length), .req_response_partial(up_response_partial), .req_response_valid(up_response_valid), @@ -518,6 +569,8 @@ module axi_dmac #( .m_dest_axi_aresetn(m_dest_axi_aresetn), .m_src_axi_aclk(m_src_axi_aclk), .m_src_axi_aresetn(m_src_axi_aresetn), + .m_sg_axi_aclk(m_sg_axi_aclk), + .m_sg_axi_aresetn(m_sg_axi_aresetn), .m_axi_awaddr(m_dest_axi_awaddr), .m_axi_awlen(m_dest_axi_awlen), @@ -553,6 +606,21 @@ module axi_dmac #( .m_axi_rlast(m_src_axi_rlast), .m_axi_rresp(m_src_axi_rresp), + .m_sg_axi_arready(m_sg_axi_arready), + .m_sg_axi_arvalid(m_sg_axi_arvalid), + .m_sg_axi_araddr(m_sg_axi_araddr), + .m_sg_axi_arlen(m_sg_axi_arlen), + .m_sg_axi_arsize(m_sg_axi_arsize), + .m_sg_axi_arburst(m_sg_axi_arburst), + .m_sg_axi_arprot(m_sg_axi_arprot), + .m_sg_axi_arcache(m_sg_axi_arcache), + + .m_sg_axi_rdata(m_sg_axi_rdata), + .m_sg_axi_rready(m_sg_axi_rready), + .m_sg_axi_rvalid(m_sg_axi_rvalid), + .m_sg_axi_rlast(m_sg_axi_rlast), + .m_sg_axi_rresp(m_sg_axi_rresp), + .s_axis_aclk(s_axis_aclk), .s_axis_ready(s_axis_ready), .s_axis_valid(s_axis_valid), @@ -603,21 +671,47 @@ module axi_dmac #( assign m_dest_axi_arburst = 'h0; assign m_dest_axi_arcache = 'h0; assign m_dest_axi_arprot = 'h0; + assign m_dest_axi_awid = 'h0; + assign m_dest_axi_awlock = 'h0; + assign m_dest_axi_wid = 'h0; + assign m_dest_axi_arid = 'h0; + assign m_dest_axi_arlock = 'h0; assign m_src_axi_awvalid = 1'b0; assign m_src_axi_wvalid = 1'b0; assign m_src_axi_bready = 1'b0; - assign m_src_axi_awvalid = 'h0; assign m_src_axi_awaddr = 'h0; assign m_src_axi_awlen = 'h0; assign m_src_axi_awsize = 'h0; assign m_src_axi_awburst = 'h0; assign m_src_axi_awcache = 'h0; assign m_src_axi_awprot = 'h0; - assign m_src_axi_wvalid = 'h0; assign m_src_axi_wdata = 'h0; assign m_src_axi_wstrb = 'h0; assign m_src_axi_wlast = 'h0; + assign m_src_axi_awid = 'h0; + assign m_src_axi_awlock = 'h0; + assign m_src_axi_wid = 'h0; + assign m_src_axi_arid = 'h0; + assign m_src_axi_arlock = 'h0; + + assign m_sg_axi_awvalid = 1'b0; + assign m_sg_axi_wvalid = 1'b0; + assign m_sg_axi_bready = 1'b0; + assign m_sg_axi_awaddr = 'h0; + assign m_sg_axi_awlen = 'h0; + assign m_sg_axi_awsize = 'h0; + assign m_sg_axi_awburst = 'h0; + assign m_sg_axi_awcache = 'h0; + assign m_sg_axi_awprot = 'h0; + assign m_sg_axi_wdata = 'h0; + assign m_sg_axi_wstrb = 'h0; + assign m_sg_axi_wlast = 'h0; + assign m_sg_axi_awid = 'h0; + assign m_sg_axi_awlock = 'h0; + assign m_sg_axi_wid = 'h0; + assign m_sg_axi_arid = 'h0; + assign m_sg_axi_arlock = 'h0; assign m_axis_keep = {DMA_DATA_WIDTH_DEST/8{1'b1}}; assign m_axis_strb = {DMA_DATA_WIDTH_DEST/8{1'b1}}; diff --git a/library/axi_dmac/axi_dmac_constr.ttcl b/library/axi_dmac/axi_dmac_constr.ttcl index 3af06ea2e..e90fe6ba2 100644 --- a/library/axi_dmac/axi_dmac_constr.ttcl +++ b/library/axi_dmac/axi_dmac_constr.ttcl @@ -11,6 +11,10 @@ <: set async_dest_req [getBooleanValue "ASYNC_CLK_DEST_REQ"] :> <: set async_req_src [getBooleanValue "ASYNC_CLK_REQ_SRC"] :> <: set async_src_dest [getBooleanValue "ASYNC_CLK_SRC_DEST"] :> +<: set async_req_sg [getBooleanValue "ASYNC_CLK_REQ_SG"] :> +<: set async_src_sg [getBooleanValue "ASYNC_CLK_SRC_SG"] :> +<: set async_dest_sg [getBooleanValue "ASYNC_CLK_DEST_SG"] :> +<: set sg_enabled [getBooleanValue "DMA_SG_TRANSFER"] :> <: set disable_debug_registers [getBooleanValue "DISABLE_DEBUG_REGISTERS"] :> set req_clk_ports_base {s_axi_aclk} @@ -31,11 +35,28 @@ set dest_clk_ports "$dest_clk_ports $src_clk_ports_base" set req_clk_ports "$req_clk_ports $dest_clk_ports_base" set dest_clk_ports "$dest_clk_ports $req_clk_ports_base" <: } :> +<: if {$sg_enabled} { :> +set sg_clk_ports_base {m_sg_axi_aclk} +set sg_clk_ports $sg_clk_ports_base +<: if {[expr {!$async_req_sg}]} { :> +set req_clk_ports "$req_clk_ports $sg_clk_ports_base" +set sg_clk_ports "$sg_clk_ports $req_clk_ports_base" +<: } :> +<: if {[expr {!$async_src_sg}]} { :> +set src_clk_ports "$src_clk_ports $sg_clk_ports_base" +set sg_clk_ports "$sg_clk_ports $src_clk_ports_base" +<: } :> +<: if {[expr {!$async_dest_sg}]} { :> +set dest_clk_ports "$dest_clk_ports $sg_clk_ports_base" +set sg_clk_ports "$sg_clk_ports $dest_clk_ports_base" +<: } :> +set sg_clk [get_clocks -of_objects [get_ports -quiet $sg_clk_ports]] +<: } :> set req_clk [get_clocks -of_objects [get_ports -quiet $req_clk_ports]] set src_clk [get_clocks -of_objects [get_ports -quiet $src_clk_ports]] set dest_clk [get_clocks -of_objects [get_ports -quiet $dest_clk_ports]] -<: if {$async_req_src || $async_src_dest || $async_dest_req} { :> +<: if {$async_req_src || $async_src_dest || $async_dest_req || ($async_req_sg && $sg_enabled)} { :> set_property ASYNC_REG TRUE \ [get_cells -quiet -hier *cdc_sync_stage1_reg*] \ [get_cells -quiet -hier *cdc_sync_stage2_reg*] @@ -140,6 +161,7 @@ set_max_delay -quiet -datapath_only \ -to [get_cells -quiet -hier *cdc_sync_stage1_reg* \ -filter {NAME =~ *i_dest_response_fifo/zerodeep.i_raddr_sync* && IS_SEQUENTIAL}] \ [get_property -min PERIOD $req_clk] + set_max_delay -quiet -datapath_only \ -from [get_cells -quiet -hier *cdc_sync_fifo_ram_reg* \ -filter {NAME =~ *i_dest_response_fifo* && IS_SEQUENTIAL}] \ @@ -209,13 +231,62 @@ set_max_delay -quiet -datapath_only \ -to $dest_clk \ [get_property -min PERIOD $dest_clk] - set_max_delay -quiet -datapath_only \ +set_max_delay -quiet -datapath_only \ -from $src_clk \ -through [get_cells -quiet -hier DP \ -filter {NAME =~ *i_request_arb/eot_mem_dest_reg*}] \ -to $dest_clk \ [get_property -min PERIOD $dest_clk] +<: } :> +<: if {$async_req_sg && $sg_enabled} { :> +set_false_path -quiet \ + -from $req_clk \ + -to [get_cells -quiet -hier *cdc_sync_stage1_reg* \ + -filter {NAME =~ *i_sync_sg_enable* && IS_SEQUENTIAL}] + +set_max_delay -quiet -datapath_only \ + -from $req_clk \ + -to [get_cells -quiet -hier *cdc_sync_stage1_reg* \ + -filter {NAME =~ *i_sg_addr_fifo/zerodeep.i_waddr_sync* && IS_SEQUENTIAL}] \ + [get_property -min PERIOD $req_clk] + +set_max_delay -quiet -datapath_only \ + -from $sg_clk \ + -to [get_cells -quiet -hier *cdc_sync_stage1_reg* \ + -filter {NAME =~ *i_sg_addr_fifo/zerodeep.i_raddr_sync* && IS_SEQUENTIAL}] \ + [get_property -min PERIOD $sg_clk] + +set_max_delay -quiet -datapath_only \ + -from [get_cells -quiet -hier *cdc_sync_fifo_ram_reg* \ + -filter {NAME =~ *i_sg_addr_fifo* && IS_SEQUENTIAL}] \ + -to $sg_clk \ + [get_property -min PERIOD $sg_clk] + +set_max_delay -quiet -datapath_only \ + -from $sg_clk \ + -to [get_cells -quiet -hier *cdc_sync_stage1_reg* \ + -filter {NAME =~ *i_sg_desc_fifo/zerodeep.i_waddr_sync* && IS_SEQUENTIAL}] \ + [get_property -min PERIOD $sg_clk] + +set_max_delay -quiet -datapath_only \ + -from $req_clk \ + -to [get_cells -quiet -hier *cdc_sync_stage1_reg* \ + -filter {NAME =~ *i_sg_desc_fifo/zerodeep.i_raddr_sync* && IS_SEQUENTIAL}] \ + [get_property -min PERIOD $req_clk] + +set_max_delay -quiet -datapath_only \ + -from [get_cells -quiet -hier *cdc_sync_fifo_ram_reg* \ + -filter {NAME =~ *i_sg_desc_fifo* && IS_SEQUENTIAL}] \ + -to $req_clk \ + [get_property -min PERIOD $req_clk] + +set_false_path \ + -to [get_pins -hierarchical * -filter {NAME=~*i_waddr_sync_gray/cdc_sync_stage1_reg[*]/D}] + +set_false_path \ + -to [get_pins -hierarchical * -filter {NAME=~*i_raddr_sync_gray/cdc_sync_stage1_reg[*]/D}] + <: } :> # Reset signals set_false_path -quiet \ diff --git a/library/axi_dmac/axi_dmac_hw.tcl b/library/axi_dmac/axi_dmac_hw.tcl index 56b509cee..b8bde93ad 100644 --- a/library/axi_dmac/axi_dmac_hw.tcl +++ b/library/axi_dmac/axi_dmac_hw.tcl @@ -20,6 +20,7 @@ set_module_property VALIDATION_CALLBACK axi_dmac_validate ad_ip_files axi_dmac [list \ $ad_hdl_dir/library/util_cdc/sync_bits.v \ $ad_hdl_dir/library/util_cdc/sync_event.v \ + $ad_hdl_dir/library/util_cdc/sync_gray.v \ $ad_hdl_dir/library/common/up_axi.v \ $ad_hdl_dir/library/util_axis_fifo/util_axis_fifo.v \ $ad_hdl_dir/library/util_axis_fifo/util_axis_fifo_address_generator.v \ @@ -41,6 +42,7 @@ ad_ip_files axi_dmac [list \ response_handler.v \ axi_register_slice.v \ dmac_2d_transfer.v \ + dmac_sg.v \ dest_axi_mm.v \ dest_axi_stream.v \ dest_fifo_inf.v \ @@ -127,6 +129,22 @@ foreach {suffix group} { \ # FIFO interface set_parameter_property DMA_TYPE_SRC DEFAULT_VALUE 2 +# Scatter-Gather interface +add_parameter DMA_AXI_PROTOCOL_SG INTEGER 1 +set_parameter_property DMA_AXI_PROTOCOL_SG DISPLAY_NAME "AXI Protocol" +set_parameter_property DMA_AXI_PROTOCOL_SG HDL_PARAMETER true +set_parameter_property DMA_AXI_PROTOCOL_SG ALLOWED_RANGES { "0:AXI4" "1:AXI3" } +set_parameter_property DMA_AXI_PROTOCOL_SG VISIBLE true +set_parameter_property DMA_AXI_PROTOCOL_SG GROUP $group + +add_parameter DMA_DATA_WIDTH_SG INTEGER 64 +set_parameter_property DMA_DATA_WIDTH_SG DISPLAY_NAME "Bus Width" +set_parameter_property DMA_DATA_WIDTH_SG UNITS Bits +set_parameter_property DMA_DATA_WIDTH_SG HDL_PARAMETER true +set_parameter_property DMA_DATA_WIDTH_SG ALLOWED_RANGES {64} +set_parameter_property DMA_DATA_WIDTH_SG VISIBLE true +set_parameter_property DMA_DATA_WIDTH_SG GROUP $group + set group "Features" add_parameter CYCLIC INTEGER 1 @@ -141,6 +159,12 @@ set_parameter_property DMA_2D_TRANSFER DISPLAY_HINT boolean set_parameter_property DMA_2D_TRANSFER HDL_PARAMETER true set_parameter_property DMA_2D_TRANSFER GROUP $group +add_parameter DMA_SG_TRANSFER INTEGER 0 +set_parameter_property DMA_SG_TRANSFER DISPLAY_NAME "SG Transfer Support" +set_parameter_property DMA_SG_TRANSFER DISPLAY_HINT boolean +set_parameter_property DMA_SG_TRANSFER HDL_PARAMETER true +set_parameter_property DMA_SG_TRANSFER GROUP $group + add_parameter SYNC_TRANSFER_START INTEGER 0 set_parameter_property SYNC_TRANSFER_START DISPLAY_NAME "Transfer Start Synchronization Support" set_parameter_property SYNC_TRANSFER_START DISPLAY_HINT boolean @@ -158,6 +182,9 @@ foreach {p name} { \ ASYNC_CLK_REQ_SRC "Request and Source" \ ASYNC_CLK_SRC_DEST "Source and Destination" \ ASYNC_CLK_DEST_REQ "Destination and Request" \ + ASYNC_CLK_REQ_SG "Request and Scatter-Gather" \ + ASYNC_CLK_SRC_SG "Source and Scatter-Gather" \ + ASYNC_CLK_DEST_SG "Destination and Scatter-Gather" \ } { add_parameter ${p}_MANUAL INTEGER 1 @@ -202,6 +229,12 @@ foreach domain [list {*}$src_clks {*}$dest_clks] { set_parameter_property $p GROUP $group } +add_parameter CLK_DOMAIN_SG INTEGER +set_parameter_property CLK_DOMAIN_SG HDL_PARAMETER false +set_parameter_property CLK_DOMAIN_SG SYSTEM_INFO {CLOCK_DOMAIN m_sg_axi_clock} +set_parameter_property CLK_DOMAIN_SG VISIBLE false +set_parameter_property CLK_DOMAIN_SG GROUP $group + # axi4 slave ad_ip_intf_s_axi s_axi_aclk s_axi_aresetn 11 @@ -231,6 +264,7 @@ proc axi_dmac_validate {} { set req_domain [get_parameter_value CLK_DOMAIN_REQ] set src_domain [get_parameter_value [lindex $src_clks $type_src 0]] set dest_domain [get_parameter_value [lindex $dest_clks $type_dest 0]] + set sg_domain [get_parameter_value CLK_DOMAIN_SG] if {$req_domain != 0 && $req_domain == $src_domain} { set_parameter_value ASYNC_CLK_REQ_SRC 0 @@ -249,13 +283,31 @@ proc axi_dmac_validate {} { } else { set_parameter_value ASYNC_CLK_DEST_REQ 1 } + + if {$sg_domain != 0 && $sg_domain == $req_domain} { + set_parameter_value ASYNC_CLK_REQ_SG 0 + } else { + set_parameter_value ASYNC_CLK_REQ_SG 1 + } + + if {$sg_domain != 0 && $sg_domain == $src_domain} { + set_parameter_value ASYNC_CLK_SRC_SG 0 + } else { + set_parameter_value ASYNC_CLK_SRC_SG 1 + } + + if {$sg_domain != 0 && $sg_domain == $dest_domain} { + set_parameter_value ASYNC_CLK_DEST_SG 0 + } else { + set_parameter_value ASYNC_CLK_DEST_SG 1 + } } else { - foreach p {ASYNC_CLK_REQ_SRC ASYNC_CLK_SRC_DEST ASYNC_CLK_DEST_REQ} { + foreach p {ASYNC_CLK_REQ_SRC ASYNC_CLK_SRC_DEST ASYNC_CLK_DEST_REQ ASYNC_CLK_REQ_SG ASYNC_CLK_SRC_SG ASYNC_CLK_DEST_SG} { set_parameter_value $p [get_parameter_value ${p}_MANUAL] } } - foreach p {ASYNC_CLK_REQ_SRC ASYNC_CLK_SRC_DEST ASYNC_CLK_DEST_REQ} { + foreach p {ASYNC_CLK_REQ_SRC ASYNC_CLK_SRC_DEST ASYNC_CLK_DEST_REQ ASYNC_CLK_REQ_SG ASYNC_CLK_SRC_SG ASYNC_CLK_DEST_SG} { set_parameter_property ${p}_MANUAL VISIBLE [expr $auto_clk ? false : true] set_parameter_property $p VISIBLE $auto_clk } @@ -297,6 +349,15 @@ add_interface m_src_axi_reset reset end set_interface_property m_src_axi_reset associatedClock m_src_axi_clock add_interface_port m_src_axi_reset m_src_axi_aresetn reset_n Input 1 +# axi4 scatter-gather + +add_interface m_sg_axi_clock clock end +add_interface_port m_sg_axi_clock m_sg_axi_aclk clk Input 1 + +add_interface m_sg_axi_reset reset end +set_interface_property m_sg_axi_reset associatedClock m_sg_axi_clock +add_interface_port m_sg_axi_reset m_sg_axi_aresetn reset_n Input 1 + # axis destination/source ad_interface clock m_axis_aclk input 1 clk @@ -409,7 +470,7 @@ proc axi_dmac_elaborate {} { set disabled_intfs {} # add axi3 or axi4 interface depending on user selection - foreach {suffix port} {SRC m_src_axi DEST m_dest_axi} { + foreach {suffix port} {SRC m_src_axi DEST m_dest_axi SG m_sg_axi} { if {[get_parameter_value DMA_AXI_PROTOCOL_${suffix}] == 0} { set axi_type axi4 } else { @@ -434,6 +495,13 @@ proc axi_dmac_elaborate {} { lappend disabled_intfs m_src_axi_clock m_src_axi_reset m_src_axi } + if {[get_parameter_value DMA_SG_TRANSFER] == 1} { + set_interface_property m_sg_axi readIssuingCapability $fifo_size + set_interface_property m_sg_axi combinedIssuingCapability $fifo_size + } else { + lappend disabled_intfs m_sg_axi_clock m_sg_axi_reset m_sg_axi + } + # axis destination/source if {[get_parameter_value DMA_TYPE_DEST] != 1} { diff --git a/library/axi_dmac/axi_dmac_ip.tcl b/library/axi_dmac/axi_dmac_ip.tcl index 115ad3b72..d3e1cf00b 100644 --- a/library/axi_dmac/axi_dmac_ip.tcl +++ b/library/axi_dmac/axi_dmac_ip.tcl @@ -30,6 +30,7 @@ adi_ip_files axi_dmac [list \ "response_handler.v" \ "axi_register_slice.v" \ "dmac_2d_transfer.v" \ + "dmac_sg.v" \ "dest_axi_mm.v" \ "dest_axi_stream.v" \ "dest_fifo_inf.v" \ @@ -91,6 +92,8 @@ adi_set_bus_dependency "m_src_axi" "m_src_axi" \ "(spirit:decode(id('MODELPARAM_VALUE.DMA_TYPE_SRC')) = 0)" adi_set_bus_dependency "m_dest_axi" "m_dest_axi" \ "(spirit:decode(id('MODELPARAM_VALUE.DMA_TYPE_DEST')) = 0)" +adi_set_bus_dependency "m_sg_axi" "m_sg_axi" \ + "(spirit:decode(id('MODELPARAM_VALUE.DMA_SG_TRANSFER')) = 1)" adi_set_bus_dependency "s_axis" "s_axis" \ "(spirit:decode(id('MODELPARAM_VALUE.DMA_TYPE_SRC')) = 1)" adi_set_bus_dependency "m_axis" "m_axis" \ @@ -118,7 +121,6 @@ set dummy_axi_ports [list \ "m_dest_axi_rdata" \ "m_src_axi_awvalid" \ "m_src_axi_awready" \ - "m_src_axi_awvalid" \ "m_src_axi_awaddr" \ "m_src_axi_awlen" \ "m_src_axi_awsize" \ @@ -127,13 +129,28 @@ set dummy_axi_ports [list \ "m_src_axi_awprot" \ "m_src_axi_wvalid" \ "m_src_axi_wready" \ - "m_src_axi_wvalid" \ "m_src_axi_wdata" \ "m_src_axi_wstrb" \ "m_src_axi_wlast" \ "m_src_axi_bready" \ "m_src_axi_bvalid" \ "m_src_axi_bresp" \ + "m_sg_axi_awvalid" \ + "m_sg_axi_awready" \ + "m_sg_axi_awaddr" \ + "m_sg_axi_awlen" \ + "m_sg_axi_awsize" \ + "m_sg_axi_awburst" \ + "m_sg_axi_awcache" \ + "m_sg_axi_awprot" \ + "m_sg_axi_wvalid" \ + "m_sg_axi_wready" \ + "m_sg_axi_wdata" \ + "m_sg_axi_wstrb" \ + "m_sg_axi_wlast" \ + "m_sg_axi_bready" \ + "m_sg_axi_bvalid" \ + "m_sg_axi_bresp" \ ] # These are in the design to keep the Intel tools happy which require @@ -153,7 +170,14 @@ lappend dummy_axi_ports \ "m_src_axi_awid" \ "m_src_axi_awlock" \ "m_src_axi_wid" \ - "m_src_axi_bid" + "m_src_axi_bid" \ + "m_sg_axi_arid" \ + "m_sg_axi_arlock" \ + "m_sg_axi_rid" \ + "m_sg_axi_awid" \ + "m_sg_axi_awlock" \ + "m_sg_axi_wid" \ + "m_sg_axi_bid" foreach p $dummy_axi_ports { @@ -166,6 +190,9 @@ set_property master_address_space_ref m_dest_axi \ set_property master_address_space_ref m_src_axi \ [ipx::get_bus_interfaces m_src_axi \ -of_objects [ipx::current_core]] +set_property master_address_space_ref m_sg_axi \ + [ipx::get_bus_interfaces m_sg_axi \ + -of_objects [ipx::current_core]] adi_add_bus "fifo_wr" "slave" \ "analog.com:interface:fifo_wr_rtl:1.0" \ @@ -198,7 +225,7 @@ adi_add_bus_clock "fifo_rd_clk" "fifo_rd" adi_set_bus_dependency "fifo_rd" "fifo_rd" \ "(spirit:decode(id('MODELPARAM_VALUE.DMA_TYPE_DEST')) = 2)" -foreach port {"m_dest_axi_aresetn" "m_src_axi_aresetn" \ +foreach port {"m_dest_axi_aresetn" "m_src_axi_aresetn" "m_sg_axi_aresetn" \ "s_axis_valid" "s_axis_data" "s_axis_last" "m_axis_ready" \ "fifo_wr_en" "fifo_wr_din" "fifo_rd_en"} { set_property DRIVER_VALUE "0" [ipx::get_ports $port] @@ -240,17 +267,21 @@ set_property -dict [list \ [ipx::get_user_parameters DMA_AXI_ADDR_WIDTH -of_objects $cc] foreach {k v} { \ - "ASYNC_CLK_REQ_SRC" "true" \ - "ASYNC_CLK_SRC_DEST" "true" \ - "ASYNC_CLK_DEST_REQ" "true" \ - "CYCLIC" "false" \ - "DMA_2D_TRANSFER" "false" \ - "SYNC_TRANSFER_START" "false" \ - "AXI_SLICE_SRC" "false" \ - "AXI_SLICE_DEST" "false" \ - "DISABLE_DEBUG_REGISTERS" "false" \ - "ENABLE_DIAGNOSTICS_IF" "false" \ - "CACHE_COHERENT_DEST" "false" \ + "ASYNC_CLK_REQ_SRC" "true" \ + "ASYNC_CLK_SRC_DEST" "true" \ + "ASYNC_CLK_DEST_REQ" "true" \ + "ASYNC_CLK_REQ_SG" "true" \ + "ASYNC_CLK_SRC_SG" "true" \ + "ASYNC_CLK_DEST_SG" "true" \ + "CYCLIC" "false" \ + "DMA_2D_TRANSFER" "false" \ + "DMA_SG_TRANSFER" "false" \ + "SYNC_TRANSFER_START" "false" \ + "AXI_SLICE_SRC" "false" \ + "AXI_SLICE_DEST" "false" \ + "DISABLE_DEBUG_REGISTERS" "false" \ + "ENABLE_DIAGNOSTICS_IF" "false" \ + "CACHE_COHERENT_DEST" "false" \ } { \ set_property -dict [list \ "value_format" "bool" \ @@ -264,11 +295,6 @@ foreach {k v} { \ [ipx::get_hdl_parameters $k -of_objects $cc] } -set_property -dict [list \ - "enablement_tcl_expr" "\$DMA_TYPE_SRC != 0" \ -] \ -[ipx::get_user_parameters SYNC_TRANSFER_START -of_objects $cc] - foreach dir {"SRC" "DEST"} { set_property -dict [list \ "value_validation_type" "list" \ @@ -302,6 +328,8 @@ set src_group [ipgui::add_group -name {Source} -component $cc -parent $g \ -display_name {Source}] set dest_group [ipgui::add_group -name {Destination} -component $cc -parent $g \ -display_name {Destination}] +set sg_group [ipgui::add_group -name {Scatter-Gather} -component $cc -parent $g \ + -display_name {Scatter-Gather}] foreach {dir group} [list "SRC" $src_group "DEST" $dest_group] { set p [ipgui::get_guiparamspec -name "DMA_TYPE_${dir}" -component $cc] @@ -337,6 +365,9 @@ ipgui::move_param -component $cc -order 4 $p -parent $src_group set_property -dict [list \ "display_name" "Transfer Start Synchronization Support" \ ] $p +set_property -dict [list \ + "enablement_tcl_expr" "\$DMA_TYPE_SRC != 0" \ +] [ipx::get_user_parameters SYNC_TRANSFER_START -of_objects $cc] set p [ipgui::get_guiparamspec -name "CACHE_COHERENT_DEST" -component $cc] ipgui::move_param -component $cc -order 4 $p -parent $dest_group @@ -350,6 +381,32 @@ set_property -dict [list \ "enablement_value" "false" \ ] [ipx::get_user_parameters CACHE_COHERENT_DEST -of_objects $cc] +set p [ipgui::get_guiparamspec -name "DMA_AXI_PROTOCOL_SG" -component $cc] +ipgui::move_param -component $cc -order 0 $p -parent $sg_group +set_property -dict [list \ + "display_name" "AXI Protocol" \ +] $p +set_property -dict [list \ + "enablement_tcl_expr" "\$DMA_SG_TRANSFER == true" \ +] [ipx::get_user_parameters DMA_AXI_PROTOCOL_SG -of_objects $cc] +set_property -dict [list \ + "value_validation_type" "pairs" \ + "value_validation_pairs" {"AXI3" "1" "AXI4" "0"} \ +] [ipx::get_user_parameters DMA_AXI_PROTOCOL_SG -of_objects $cc] + +set p [ipgui::get_guiparamspec -name "DMA_DATA_WIDTH_SG" -component $cc] +ipgui::move_param -component $cc -order 1 $p -parent $sg_group +set_property -dict [list \ + "display_name" "Bus Width" \ +] $p +set_property -dict [list \ + "enablement_tcl_expr" "\$DMA_SG_TRANSFER == true" \ +] [ipx::get_user_parameters DMA_DATA_WIDTH_SG -of_objects $cc] +set_property -dict [list \ + "value_validation_type" "list" \ + "value_validation_list" "64" \ +] [ipx::get_user_parameters DMA_DATA_WIDTH_SG -of_objects $cc] + set general_group [ipgui::add_group -name "General Configuration" -component $cc \ -parent $page0 -display_name "General Configuration"] @@ -399,6 +456,12 @@ set_property -dict [list \ "display_name" "2D Transfer Support" \ ] $p +set p [ipgui::get_guiparamspec -name "DMA_SG_TRANSFER" -component $cc] +ipgui::move_param -component $cc -order 2 $p -parent $feature_group +set_property -dict [list \ + "display_name" "SG Transfer Support" \ +] $p + set clk_group [ipgui::add_group -name {Clock Domain Configuration} -component $cc \ -parent $page0 -display_name {Clock Domain Configuration}] @@ -420,6 +483,24 @@ set_property -dict [list \ "display_name" "Destination and Request Clock Asynchronous" \ ] $p +set p [ipgui::get_guiparamspec -name "ASYNC_CLK_REQ_SG" -component $cc] +ipgui::move_param -component $cc -order 3 $p -parent $clk_group +set_property -dict [list \ + "display_name" "Request and Scatter-Gather Clock Asynchronous" \ +] $p + +set p [ipgui::get_guiparamspec -name "ASYNC_CLK_SRC_SG" -component $cc] +ipgui::move_param -component $cc -order 4 $p -parent $clk_group +set_property -dict [list \ + "display_name" "Source and Scatter-Gather Clock Asynchronous" \ +] $p + +set p [ipgui::get_guiparamspec -name "ASYNC_CLK_DEST_SG" -component $cc] +ipgui::move_param -component $cc -order 5 $p -parent $clk_group +set_property -dict [list \ + "display_name" "Destination and Scatter-Gather Clock Asynchronous" \ +] $p + set dbg_group [ipgui::add_group -name {Debug} -component $cc \ -parent $page0 -display_name {Debug}] @@ -437,6 +518,7 @@ set_property -dict [list \ ipgui::remove_param -component $cc [ipgui::get_guiparamspec -name "AXI_ID_WIDTH_SRC" -component $cc] ipgui::remove_param -component $cc [ipgui::get_guiparamspec -name "AXI_ID_WIDTH_DEST" -component $cc] +ipgui::remove_param -component $cc [ipgui::get_guiparamspec -name "AXI_ID_WIDTH_SG" -component $cc] ipgui::remove_param -component $cc [ipgui::get_guiparamspec -name "ALLOW_ASYM_MEM" -component $cc] ipgui::remove_param -component $cc [ipgui::get_guiparamspec -name "DMA_AXIS_ID_W" -component $cc] ipgui::remove_param -component $cc [ipgui::get_guiparamspec -name "DMA_AXIS_DEST_W" -component $cc] diff --git a/library/axi_dmac/axi_dmac_pkg_sv.ttcl b/library/axi_dmac/axi_dmac_pkg_sv.ttcl index 42eaff5e2..2b09bcd35 100644 --- a/library/axi_dmac/axi_dmac_pkg_sv.ttcl +++ b/library/axi_dmac/axi_dmac_pkg_sv.ttcl @@ -11,17 +11,23 @@ <: set id [get_property MODELPARAM_VALUE.ID] :> <: set dma_data_width_src [get_property MODELPARAM_VALUE.DMA_DATA_WIDTH_SRC] :> <: set dma_data_width_dest [get_property MODELPARAM_VALUE.DMA_DATA_WIDTH_DEST] :> +<: set dma_data_width_sg [get_property MODELPARAM_VALUE.DMA_DATA_WIDTH_SG] :> <: set dma_length_width [get_property MODELPARAM_VALUE.DMA_LENGTH_WIDTH] :> <: set dma_2d_transfer [get_property MODELPARAM_VALUE.DMA_2D_TRANSFER] :> +<: set dma_sg_transfer [get_property MODELPARAM_VALUE.DMA_SG_TRANSFER] :> <: set async_clk_req_src [get_property MODELPARAM_VALUE.ASYNC_CLK_REQ_SRC] :> <: set async_clk_src_dest [get_property MODELPARAM_VALUE.ASYNC_CLK_SRC_DEST] :> <: set async_clk_dest_req [get_property MODELPARAM_VALUE.ASYNC_CLK_DEST_REQ] :> +<: set async_clk_req_sg [get_property MODELPARAM_VALUE.ASYNC_CLK_REQ_SG] :> +<: set async_clk_src_sg [get_property MODELPARAM_VALUE.ASYNC_CLK_SRC_SG] :> +<: set async_clk_dest_sg [get_property MODELPARAM_VALUE.ASYNC_CLK_DEST_SG] :> <: set axi_slice_dest [get_property MODELPARAM_VALUE.AXI_SLICE_DEST] :> <: set axi_slice_src [get_property MODELPARAM_VALUE.AXI_SLICE_SRC] :> <: set sync_transfer_start [get_property MODELPARAM_VALUE.SYNC_TRANSFER_START] :> <: set cyclic [get_property MODELPARAM_VALUE.CYCLIC] :> <: set dma_axi_protocol_dest [get_property MODELPARAM_VALUE.DMA_AXI_PROTOCOL_DEST] :> <: set dma_axi_protocol_src [get_property MODELPARAM_VALUE.DMA_AXI_PROTOCOL_SRC] :> +<: set dma_axi_protocol_sg [get_property MODELPARAM_VALUE.DMA_AXI_PROTOCOL_SG] :> <: set dma_type_dest [get_property MODELPARAM_VALUE.DMA_TYPE_DEST] :> <: set dma_type_src [get_property MODELPARAM_VALUE.DMA_TYPE_SRC] :> <: set dma_axi_addr_width [get_property MODELPARAM_VALUE.DMA_AXI_ADDR_WIDTH] :> @@ -29,6 +35,7 @@ <: set fifo_size [get_property MODELPARAM_VALUE.FIFO_SIZE] :> <: set axi_id_width_src [get_property MODELPARAM_VALUE.AXI_ID_WIDTH_SRC] :> <: set axi_id_width_dest [get_property MODELPARAM_VALUE.AXI_ID_WIDTH_DEST] :> +<: set axi_id_width_sg [get_property MODELPARAM_VALUE.AXI_ID_WIDTH_SG] :> <: set disable_debug_registers [get_property MODELPARAM_VALUE.DISABLE_DEBUG_REGISTERS] :> <: proc b2i {b} { if {$b==true} {return 1} else {return 0}} :> @@ -45,17 +52,23 @@ package <=: ComponentName :>_pkg; parameter <=: ComponentName :>_ID = <=: $id :>; parameter <=: ComponentName :>_DMA_DATA_WIDTH_SRC = <=: $dma_data_width_src :>; parameter <=: ComponentName :>_DMA_DATA_WIDTH_DEST = <=: $dma_data_width_dest :>; + parameter <=: ComponentName :>_DMA_DATA_WIDTH_SG = <=: $dma_data_width_sg :>; parameter <=: ComponentName :>_DMA_LENGTH_WIDTH = <=: $dma_length_width :>; parameter <=: ComponentName :>_DMA_2D_TRANSFER = <=: b2i $dma_2d_transfer :>; + parameter <=: ComponentName :>_DMA_SG_TRANSFER = <=: b2i $dma_sg_transfer :>; parameter <=: ComponentName :>_ASYNC_CLK_REQ_SRC = <=: b2i $async_clk_req_src :>; parameter <=: ComponentName :>_ASYNC_CLK_SRC_DEST = <=: b2i $async_clk_src_dest :>; parameter <=: ComponentName :>_ASYNC_CLK_DEST_REQ = <=: b2i $async_clk_dest_req :>; + parameter <=: ComponentName :>_ASYNC_CLK_REQ_SG = <=: b2i $async_clk_req_sg :>; + parameter <=: ComponentName :>_ASYNC_CLK_SRC_SG = <=: b2i $async_clk_src_sg :>; + parameter <=: ComponentName :>_ASYNC_CLK_DEST_SG = <=: b2i $async_clk_dest_sg :>; parameter <=: ComponentName :>_AXI_SLICE_DEST = <=: b2i $axi_slice_dest :>; parameter <=: ComponentName :>_AXI_SLICE_SRC = <=: b2i $axi_slice_src :>; parameter <=: ComponentName :>_SYNC_TRANSFER_START = <=: b2i $sync_transfer_start :>; parameter <=: ComponentName :>_CYCLIC = <=: b2i $cyclic :>; parameter <=: ComponentName :>_DMA_AXI_PROTOCOL_DEST = <=: $dma_axi_protocol_dest :>; parameter <=: ComponentName :>_DMA_AXI_PROTOCOL_SRC = <=: $dma_axi_protocol_src :>; + parameter <=: ComponentName :>_DMA_AXI_PROTOCOL_SG = <=: $dma_axi_protocol_sg :>; parameter <=: ComponentName :>_DMA_TYPE_DEST = <=: $dma_type_dest :>; parameter <=: ComponentName :>_DMA_TYPE_SRC = <=: $dma_type_src :>; parameter <=: ComponentName :>_DMA_AXI_ADDR_WIDTH = <=: $dma_axi_addr_width :>; @@ -63,6 +76,7 @@ package <=: ComponentName :>_pkg; parameter <=: ComponentName :>_FIFO_SIZE = <=: $fifo_size :>; parameter <=: ComponentName :>_AXI_ID_WIDTH_SRC = <=: $axi_id_width_src :>; parameter <=: ComponentName :>_AXI_ID_WIDTH_DEST = <=: $axi_id_width_dest :>; + parameter <=: ComponentName :>_AXI_ID_WIDTH_SG = <=: $axi_id_width_sg :>; parameter <=: ComponentName :>_DISABLE_DEBUG_REGISTERS = <=: b2i $disable_debug_registers :>; ////////////////////////////////////////////////////////////////////////// diff --git a/library/axi_dmac/axi_dmac_regmap.v b/library/axi_dmac/axi_dmac_regmap.v index e26380dac..19c264c75 100644 --- a/library/axi_dmac/axi_dmac_regmap.v +++ b/library/axi_dmac/axi_dmac_regmap.v @@ -40,6 +40,7 @@ module axi_dmac_regmap #( parameter DISABLE_DEBUG_REGISTERS = 0, parameter BYTES_PER_BEAT_WIDTH_DEST = 1, parameter BYTES_PER_BEAT_WIDTH_SRC = 1, + parameter BYTES_PER_BEAT_WIDTH_SG = 1, parameter BYTES_PER_BURST_WIDTH = 7, parameter DMA_TYPE_DEST = 0, parameter DMA_TYPE_SRC = 2, @@ -50,6 +51,7 @@ module axi_dmac_regmap #( parameter HAS_DEST_ADDR = 1, parameter HAS_SRC_ADDR = 1, parameter DMA_2D_TRANSFER = 0, + parameter DMA_SG_TRANSFER = 0, parameter SYNC_TRANSFER_START = 0, parameter CACHE_COHERENT_DEST = 0 ) ( @@ -88,12 +90,14 @@ module axi_dmac_regmap #( // Control interface output reg ctrl_enable = 1'b0, output reg ctrl_pause = 1'b0, + output reg ctrl_hwdesc = 1'b0, // DMA request interface output request_valid, input request_ready, output [DMA_AXI_ADDR_WIDTH-1:BYTES_PER_BEAT_WIDTH_DEST] request_dest_address, output [DMA_AXI_ADDR_WIDTH-1:BYTES_PER_BEAT_WIDTH_SRC] request_src_address, + output [DMA_AXI_ADDR_WIDTH-1:BYTES_PER_BEAT_WIDTH_SG] request_sg_address, output [DMA_LENGTH_WIDTH-1:0] request_x_length, output [DMA_LENGTH_WIDTH-1:0] request_y_length, output [DMA_LENGTH_WIDTH-1:0] request_dest_stride, @@ -103,6 +107,7 @@ module axi_dmac_regmap #( // DMA response interface input response_eot, + input [31:0] response_sg_desc_id, input [BYTES_PER_BURST_WIDTH-1:0] response_measured_burst_length, input response_partial, input response_valid, @@ -116,7 +121,7 @@ module axi_dmac_regmap #( input [31:0] dbg_ids1 ); - localparam PCORE_VERSION = 'h00040461; + localparam PCORE_VERSION = 'h00040561; localparam HAS_ADDR_HIGH = DMA_AXI_ADDR_WIDTH > 32; localparam ADDR_LOW_MSB = HAS_ADDR_HIGH ? 31 : DMA_AXI_ADDR_WIDTH-1; @@ -174,6 +179,7 @@ module axi_dmac_regmap #( if (s_axi_aresetn == 1'b0) begin ctrl_enable <= 1'b0; ctrl_pause <= 1'b0; + ctrl_hwdesc <= 1'b0; up_irq_mask <= 2'b11; up_scratch <= 32'h00; up_wack <= 1'b0; @@ -182,9 +188,17 @@ module axi_dmac_regmap #( if (up_wreq == 1'b1) begin case (up_waddr) - 9'h002: up_scratch <= up_wdata; - 9'h020: up_irq_mask <= up_wdata[1:0]; - 9'h100: {ctrl_pause, ctrl_enable} <= up_wdata[1:0]; + 9'h002: begin + up_scratch <= up_wdata; + end + 9'h020: begin + up_irq_mask <= up_wdata[1:0]; + end + 9'h100: begin + ctrl_hwdesc <= up_wdata[2] & DMA_SG_TRANSFER; + ctrl_pause <= up_wdata[1]; + ctrl_enable <= up_wdata[0]; + end endcase end end @@ -213,7 +227,7 @@ module axi_dmac_regmap #( 9'h020: up_rdata <= up_irq_mask; 9'h021: up_rdata <= up_irq_pending; 9'h022: up_rdata <= up_irq_source; - 9'h100: up_rdata <= {ctrl_pause, ctrl_enable}; + 9'h100: up_rdata <= {ctrl_hwdesc, ctrl_pause, ctrl_enable}; 9'h10d: up_rdata <= DISABLE_DEBUG_REGISTERS ? 32'h00 : dbg_dest_addr[ADDR_LOW_MSB:0]; 9'h10e: up_rdata <= DISABLE_DEBUG_REGISTERS ? 32'h00 : dbg_src_addr[ADDR_LOW_MSB:0]; 9'h10f: up_rdata <= DISABLE_DEBUG_REGISTERS ? 32'h00 : dbg_status; @@ -230,6 +244,7 @@ module axi_dmac_regmap #( .DISABLE_DEBUG_REGISTERS(DISABLE_DEBUG_REGISTERS), .BYTES_PER_BEAT_WIDTH_DEST(BYTES_PER_BEAT_WIDTH_DEST), .BYTES_PER_BEAT_WIDTH_SRC(BYTES_PER_BEAT_WIDTH_SRC), + .BYTES_PER_BEAT_WIDTH_SG(BYTES_PER_BEAT_WIDTH_SG), .BYTES_PER_BURST_WIDTH(BYTES_PER_BURST_WIDTH), .DMA_AXI_ADDR_WIDTH(DMA_AXI_ADDR_WIDTH), .DMA_LENGTH_WIDTH(DMA_LENGTH_WIDTH), @@ -238,6 +253,7 @@ module axi_dmac_regmap #( .HAS_DEST_ADDR(HAS_DEST_ADDR), .HAS_SRC_ADDR(HAS_SRC_ADDR), .DMA_2D_TRANSFER(DMA_2D_TRANSFER), + .DMA_SG_TRANSFER(DMA_SG_TRANSFER), .SYNC_TRANSFER_START(SYNC_TRANSFER_START) ) i_regmap_request ( .clk(s_axi_aclk), @@ -254,11 +270,13 @@ module axi_dmac_regmap #( .up_rdata(up_rdata_request), .ctrl_enable(ctrl_enable), + .ctrl_hwdesc(ctrl_hwdesc), .request_valid(request_valid), .request_ready(request_ready), .request_dest_address(request_dest_address), .request_src_address(request_src_address), + .request_sg_address(request_sg_address), .request_x_length(request_x_length), .request_y_length(request_y_length), .request_dest_stride(request_dest_stride), @@ -267,6 +285,7 @@ module axi_dmac_regmap #( .request_last(request_last), .response_eot(response_eot), + .response_sg_desc_id(response_sg_desc_id), .response_measured_burst_length(response_measured_burst_length), .response_partial(response_partial), .response_valid(response_valid), diff --git a/library/axi_dmac/axi_dmac_regmap_request.v b/library/axi_dmac/axi_dmac_regmap_request.v index adb240a2f..6f1c491ab 100644 --- a/library/axi_dmac/axi_dmac_regmap_request.v +++ b/library/axi_dmac/axi_dmac_regmap_request.v @@ -39,6 +39,7 @@ module axi_dmac_regmap_request #( parameter DISABLE_DEBUG_REGISTERS = 0, parameter BYTES_PER_BEAT_WIDTH_DEST = 1, parameter BYTES_PER_BEAT_WIDTH_SRC = 1, + parameter BYTES_PER_BEAT_WIDTH_SG = 1, parameter BYTES_PER_BURST_WIDTH = 7, parameter DMA_AXI_ADDR_WIDTH = 32, parameter DMA_LENGTH_WIDTH = 24, @@ -47,6 +48,7 @@ module axi_dmac_regmap_request #( parameter HAS_DEST_ADDR = 1, parameter HAS_SRC_ADDR = 1, parameter DMA_2D_TRANSFER = 0, + parameter DMA_SG_TRANSFER = 0, parameter SYNC_TRANSFER_START = 0 ) ( input clk, @@ -66,12 +68,14 @@ module axi_dmac_regmap_request #( // Control interface input ctrl_enable, + input ctrl_hwdesc, // DMA request interface output request_valid, input request_ready, output [DMA_AXI_ADDR_WIDTH-1:BYTES_PER_BEAT_WIDTH_DEST] request_dest_address, output [DMA_AXI_ADDR_WIDTH-1:BYTES_PER_BEAT_WIDTH_SRC] request_src_address, + output [DMA_AXI_ADDR_WIDTH-1:BYTES_PER_BEAT_WIDTH_SG] request_sg_address, output [DMA_LENGTH_WIDTH-1:0] request_x_length, output [DMA_LENGTH_WIDTH-1:0] request_y_length, output [DMA_LENGTH_WIDTH-1:0] request_dest_stride, @@ -81,13 +85,14 @@ module axi_dmac_regmap_request #( // DMA response interface input response_eot, + input [31:0] response_sg_desc_id, input [BYTES_PER_BURST_WIDTH-1:0] response_measured_burst_length, input response_partial, input response_valid, output reg response_ready = 1'b1 ); - localparam MEASURED_LENGTH_WIDTH = (DMA_2D_TRANSFER == 1) ? 32 : DMA_LENGTH_WIDTH; + localparam MEASURED_LENGTH_WIDTH = DMA_2D_TRANSFER ? 32 : DMA_LENGTH_WIDTH; localparam HAS_ADDR_HIGH = DMA_AXI_ADDR_WIDTH > 32; localparam ADDR_LOW_MSB = HAS_ADDR_HIGH ? 31 : DMA_AXI_ADDR_WIDTH-1; localparam ADDR_HIGH_MSB = HAS_ADDR_HIGH ? DMA_AXI_ADDR_WIDTH-32-1 : 0; @@ -128,8 +133,8 @@ module axi_dmac_regmap_request #( always @(posedge clk) begin if (reset == 1'b1) begin - up_dma_src_address <= 'h00; up_dma_dest_address <= 'h00; + up_dma_src_address <= 'h00; up_dma_x_length[DMA_LENGTH_WIDTH-1:DMA_LENGTH_ALIGN] <= 'h00; up_dma_req_valid <= 1'b0; up_dma_cyclic <= DMA_CYCLIC ? 1'b1 : 1'b0; @@ -186,8 +191,11 @@ module axi_dmac_regmap_request #( 9'h112: up_rdata <= up_measured_transfer_length; 9'h113: up_rdata <= up_tlf_data[MEASURED_LENGTH_WIDTH-1 : 0]; // Length 9'h114: up_rdata <= up_tlf_data[MEASURED_LENGTH_WIDTH+: 2]; // ID + 9'h115: up_rdata <= response_sg_desc_id; + 9'h11f: up_rdata <= {request_sg_address[ADDR_LOW_MSB:BYTES_PER_BEAT_WIDTH_SG],{BYTES_PER_BEAT_WIDTH_SG{1'b0}}}; 9'h124: up_rdata <= (HAS_ADDR_HIGH && HAS_DEST_ADDR) ? up_dma_dest_address[DMA_AXI_ADDR_WIDTH-1:32] : 32'h00; 9'h125: up_rdata <= (HAS_ADDR_HIGH && HAS_SRC_ADDR) ? up_dma_src_address[DMA_AXI_ADDR_WIDTH-1:32] : 32'h00; + 9'h12f: up_rdata <= HAS_ADDR_HIGH ? request_sg_address[DMA_AXI_ADDR_WIDTH-1:32] : 32'h00; default: up_rdata <= 32'h00; endcase end @@ -221,9 +229,32 @@ module axi_dmac_regmap_request #( end endgenerate + generate + if (DMA_SG_TRANSFER == 1) begin + reg [DMA_AXI_ADDR_WIDTH-1:BYTES_PER_BEAT_WIDTH_SG] up_dma_sg_address = 'h00; + + always @(posedge clk) begin + if (reset == 1'b1) begin + up_dma_sg_address <= 'h00; + end else if (up_wreq == 1'b1) begin + case (up_waddr) + 9'h11f: up_dma_sg_address[ADDR_LOW_MSB:BYTES_PER_BEAT_WIDTH_SG] <= up_wdata[ADDR_LOW_MSB:BYTES_PER_BEAT_WIDTH_SG]; + 9'h12f: + if (HAS_ADDR_HIGH) begin + up_dma_sg_address[DMA_AXI_ADDR_WIDTH-1:32] <= up_wdata[ADDR_HIGH_MSB:0]; + end + endcase + end + end + assign request_sg_address = up_dma_sg_address; + end else begin + assign request_sg_address = 'h00; + end + endgenerate + // In cyclic mode the same transfer is submitted over and over again - assign up_sot = up_dma_cyclic ? 1'b0 : up_dma_req_valid & up_dma_req_ready; - assign up_eot = up_dma_cyclic ? 1'b0 : response_eot & response_valid & response_ready; + assign up_sot = (up_dma_cyclic && !ctrl_hwdesc) ? 1'b0 : up_dma_req_valid & up_dma_req_ready; + assign up_eot = (up_dma_cyclic && !ctrl_hwdesc) ? 1'b0 : response_eot & response_valid & response_ready; assign request_valid = up_dma_req_valid; assign up_dma_req_ready = request_ready; diff --git a/library/axi_dmac/axi_dmac_reset_manager.v b/library/axi_dmac/axi_dmac_reset_manager.v index ac3ca6872..71dd0b734 100644 --- a/library/axi_dmac/axi_dmac_reset_manager.v +++ b/library/axi_dmac/axi_dmac_reset_manager.v @@ -38,13 +38,16 @@ module axi_dmac_reset_manager #( parameter ASYNC_CLK_REQ_SRC = 1, parameter ASYNC_CLK_SRC_DEST = 1, - parameter ASYNC_CLK_DEST_REQ = 1 + parameter ASYNC_CLK_DEST_REQ = 1, + parameter ASYNC_CLK_REQ_SG = 1, + parameter DMA_SG_TRANSFER = 0 ) ( input clk, input resetn, input ctrl_enable, input ctrl_pause, + input ctrl_hwdesc, output req_resetn, output req_enable, @@ -62,6 +65,12 @@ module axi_dmac_reset_manager #( output src_enable, input src_enabled, + input sg_clk, + input sg_ext_resetn, + output sg_resetn, + output sg_enable, + input sg_enabled, + output [11:0] dbg_status ); @@ -87,16 +96,22 @@ module axi_dmac_reset_manager #( wire enabled_dest; wire enabled_src; + wire enabled_sg; wire enabled_all; wire disabled_all; + generate if (DMA_SG_TRANSFER == 1) begin + assign enabled_all = req_enabled & enabled_src & enabled_dest & (enabled_sg | ~ctrl_hwdesc); + assign disabled_all = ~(req_enabled | enabled_src | enabled_dest | (enabled_sg & ctrl_hwdesc)); + end else begin assign enabled_all = req_enabled & enabled_src & enabled_dest; assign disabled_all = ~(req_enabled | enabled_src | enabled_dest); + end endgenerate assign req_enable = do_enable; - assign dbg_status = {needs_reset,req_resetn,src_resetn,dest_resetn,1'b0,req_enabled,enabled_src,enabled_dest,1'b0,state}; + assign dbg_status = {needs_reset,req_resetn,src_resetn,dest_resetn,sg_resetn,req_enabled,enabled_src,enabled_dest,enabled_sg,state}; always @(posedge clk) begin if (state == STATE_DO_RESET) begin @@ -201,23 +216,25 @@ module axi_dmac_reset_manager #( * successive domains have the same clock they'll share their reset signal. */ - wire [3:0] reset_async_chain; - wire [3:0] reset_sync_chain; - wire [2:0] reset_chain_clks = {clk, src_clk, dest_clk}; - + localparam NUM_RESET_LINKS = DMA_SG_TRANSFER ? 4 : 3; localparam GEN_ASYNC_RESET = { + ASYNC_CLK_REQ_SG ? 1'b1 : 1'b0, ASYNC_CLK_REQ_SRC ? 1'b1 : 1'b0, ASYNC_CLK_SRC_DEST ? 1'b1 : 1'b0, 1'b1 }; + wire [NUM_RESET_LINKS:0] reset_async_chain; + wire [NUM_RESET_LINKS:0] reset_sync_chain; + wire [3:0] reset_chain_clks = {sg_clk, clk, src_clk, dest_clk}; + assign reset_async_chain[0] = 1'b0; assign reset_sync_chain[0] = reset_async_chain[3]; generate genvar i; - for (i = 0; i < 3; i = i + 1) begin: reset_gen + for (i = 0; i < NUM_RESET_LINKS; i = i + 1) begin: reset_gen if (GEN_ASYNC_RESET[i] == 1'b1) begin @@ -260,6 +277,11 @@ module axi_dmac_reset_manager #( assign dest_resetn = ~reset_sync_chain[1]; assign src_resetn = ~reset_sync_chain[2]; assign req_resetn = ~reset_sync_chain[3]; + generate if (DMA_SG_TRANSFER == 1) begin + assign sg_resetn = ~reset_sync_chain[4]; + end else begin + assign sg_resetn = 1'b0; + end endgenerate sync_bits #( .NUM_OF_BITS (1), @@ -297,4 +319,28 @@ module axi_dmac_reset_manager #( .in_bits (src_enabled), .out_bits (enabled_src)); + generate if (DMA_SG_TRANSFER == 1) begin + sync_bits #( + .NUM_OF_BITS (1), + .ASYNC_CLK (ASYNC_CLK_REQ_SG) + ) i_sync_control_sg ( + .out_clk (sg_clk), + .out_resetn (1'b1), + .in_bits (do_enable), + .out_bits (sg_enable)); + + sync_bits #( + .NUM_OF_BITS (1), + .ASYNC_CLK (ASYNC_CLK_REQ_SG) + ) i_sync_status_sg ( + .out_clk (clk), + .out_resetn (1'b1), + .in_bits (sg_enabled), + .out_bits (enabled_sg)); + + end else begin + assign sg_enable = 1'b0; + assign enabled_sg = 1'b0; + end endgenerate + endmodule diff --git a/library/axi_dmac/axi_dmac_response_manager.v b/library/axi_dmac/axi_dmac_response_manager.v index 333d9212b..c51ac780d 100644 --- a/library/axi_dmac/axi_dmac_response_manager.v +++ b/library/axi_dmac/axi_dmac_response_manager.v @@ -155,7 +155,7 @@ module axi_dmac_response_manager #( end end - assign response_eot = (state == STATE_WRITE_RESPR) ? req_eot : 1'b1; + assign response_eot = (state == STATE_WRITE_RESPR) ? req_eot : 1'b0; assign response_partial = (state == STATE_WRITE_RESPR) ? req_response_partial : 1'b0; always @(posedge req_clk) diff --git a/library/axi_dmac/axi_dmac_transfer.v b/library/axi_dmac/axi_dmac_transfer.v index 352858c1a..b3c55beaa 100644 --- a/library/axi_dmac/axi_dmac_transfer.v +++ b/library/axi_dmac/axi_dmac_transfer.v @@ -38,17 +38,21 @@ module axi_dmac_transfer #( parameter DMA_DATA_WIDTH_SRC = 64, parameter DMA_DATA_WIDTH_DEST = 64, + parameter DMA_DATA_WIDTH_SG = 64, parameter DMA_LENGTH_WIDTH = 24, parameter DMA_LENGTH_ALIGN = 3, parameter BYTES_PER_BEAT_WIDTH_DEST = $clog2(DMA_DATA_WIDTH_DEST/8), parameter BYTES_PER_BEAT_WIDTH_SRC = $clog2(DMA_DATA_WIDTH_SRC/8), + parameter BYTES_PER_BEAT_WIDTH_SG = $clog2(DMA_DATA_WIDTH_SG/8), parameter DMA_TYPE_DEST = 0, parameter DMA_TYPE_SRC = 2, parameter DMA_AXI_ADDR_WIDTH = 32, - parameter DMA_2D_TRANSFER = 1, + parameter DMA_2D_TRANSFER = 0, + parameter DMA_SG_TRANSFER = 0, parameter ASYNC_CLK_REQ_SRC = 1, parameter ASYNC_CLK_SRC_DEST = 1, parameter ASYNC_CLK_DEST_REQ = 1, + parameter ASYNC_CLK_REQ_SG = 1, parameter AXI_SLICE_DEST = 0, parameter AXI_SLICE_SRC = 0, parameter MAX_BYTES_PER_BURST = 128, @@ -57,6 +61,7 @@ module axi_dmac_transfer #( parameter ID_WIDTH = $clog2(FIFO_SIZE*2), parameter AXI_LENGTH_WIDTH_SRC = 8, parameter AXI_LENGTH_WIDTH_DEST = 8, + parameter AXI_LENGTH_WIDTH_SG = 8, parameter ENABLE_DIAGNOSTICS_IF = 0, parameter ALLOW_ASYM_MEM = 0, parameter CACHE_COHERENT_DEST = 0 @@ -66,12 +71,14 @@ module axi_dmac_transfer #( input ctrl_enable, input ctrl_pause, + input ctrl_hwdesc, input req_valid, output req_ready, input [DMA_AXI_ADDR_WIDTH-1:BYTES_PER_BEAT_WIDTH_DEST] req_dest_address, input [DMA_AXI_ADDR_WIDTH-1:BYTES_PER_BEAT_WIDTH_SRC] req_src_address, + input [DMA_AXI_ADDR_WIDTH-1:BYTES_PER_BEAT_WIDTH_SG] req_sg_address, input [DMA_LENGTH_WIDTH-1:0] req_x_length, input [DMA_LENGTH_WIDTH-1:0] req_y_length, input [DMA_LENGTH_WIDTH-1:0] req_dest_stride, @@ -80,6 +87,7 @@ module axi_dmac_transfer #( input req_last, output req_eot, + output [31:0] req_sg_desc_id, output [BYTES_PER_BURST_WIDTH-1:0] req_measured_burst_length, output req_response_partial, output req_response_valid, @@ -90,6 +98,8 @@ module axi_dmac_transfer #( input m_dest_axi_aresetn, input m_src_axi_aclk, input m_src_axi_aresetn, + input m_sg_axi_aclk, + input m_sg_axi_aresetn, // Write address output [DMA_AXI_ADDR_WIDTH-1:0] m_axi_awaddr, @@ -130,6 +140,23 @@ module axi_dmac_transfer #( input m_axi_rvalid, input [1:0] m_axi_rresp, + // Read address + input m_sg_axi_arready, + output m_sg_axi_arvalid, + output [DMA_AXI_ADDR_WIDTH-1:0] m_sg_axi_araddr, + output [AXI_LENGTH_WIDTH_SG-1:0] m_sg_axi_arlen, + output [2:0] m_sg_axi_arsize, + output [1:0] m_sg_axi_arburst, + output [2:0] m_sg_axi_arprot, + output [3:0] m_sg_axi_arcache, + + // Read data and response + input [DMA_DATA_WIDTH_SG-1:0] m_sg_axi_rdata, + input m_sg_axi_rlast, + output m_sg_axi_rready, + input m_sg_axi_rvalid, + input [1:0] m_sg_axi_rresp, + // Slave streaming AXI interface input s_axis_aclk, output s_axis_ready, @@ -190,6 +217,19 @@ module axi_dmac_transfer #( wire dma_req_sync_transfer_start; wire dma_req_last; + wire [DMA_AXI_ADDR_WIDTH-1:BYTES_PER_BEAT_WIDTH_DEST] dma_sg_out_dest_address; + wire [DMA_AXI_ADDR_WIDTH-1:BYTES_PER_BEAT_WIDTH_SRC] dma_sg_out_src_address; + wire [DMA_LENGTH_WIDTH-1:0] dma_sg_x_length; + wire [DMA_LENGTH_WIDTH-1:0] dma_sg_y_length; + wire [DMA_LENGTH_WIDTH-1:0] dma_sg_src_stride; + wire [DMA_LENGTH_WIDTH-1:0] dma_sg_dst_stride; + wire [31:0] dma_sg_hwdesc_id; + wire dma_sg_hwdesc_eot; + wire dma_sg_in_req_valid; + wire dma_sg_in_req_ready; + wire dma_sg_out_req_valid; + wire dma_sg_out_req_ready; + wire req_clk = ctrl_clk; wire req_resetn; @@ -207,21 +247,31 @@ module axi_dmac_transfer #( wire src_enable; wire src_enabled; + wire sg_clk; + wire sg_ext_resetn; + wire sg_resetn; + wire sg_enable; + wire sg_enabled; + wire req_valid_gated; wire req_ready_gated; wire abort_req; + wire dma_eot; axi_dmac_reset_manager #( .ASYNC_CLK_REQ_SRC (ASYNC_CLK_REQ_SRC), .ASYNC_CLK_SRC_DEST (ASYNC_CLK_SRC_DEST), - .ASYNC_CLK_DEST_REQ (ASYNC_CLK_DEST_REQ) + .ASYNC_CLK_DEST_REQ (ASYNC_CLK_DEST_REQ), + .ASYNC_CLK_REQ_SG (ASYNC_CLK_REQ_SG), + .DMA_SG_TRANSFER (DMA_SG_TRANSFER) ) i_reset_manager ( .clk (ctrl_clk), .resetn (ctrl_resetn), .ctrl_enable (ctrl_enable), .ctrl_pause (ctrl_pause), + .ctrl_hwdesc (ctrl_hwdesc), .req_resetn (req_resetn), .req_enable (req_enable), @@ -239,6 +289,12 @@ module axi_dmac_transfer #( .src_enable (src_enable), .src_enabled (src_enabled), + .sg_clk (sg_clk), + .sg_ext_resetn (sg_ext_resetn), + .sg_resetn (sg_resetn), + .sg_enable (sg_enable), + .sg_enabled (sg_enabled), + .dbg_status (dbg_status)); /* @@ -250,8 +306,114 @@ module axi_dmac_transfer #( assign req_valid_gated = req_enable & req_valid; assign req_ready = req_enable & req_ready_gated; + assign req_eot = ctrl_hwdesc ? (dma_eot & dma_sg_hwdesc_eot) : dma_eot; + assign req_sg_desc_id = ctrl_hwdesc ? dma_sg_hwdesc_id : 'h00; + assign dma_sg_in_req_valid = ctrl_hwdesc ? req_valid_gated : 1'b0; + + /* SG Interface */ + generate if (DMA_SG_TRANSFER == 1) begin + + dmac_sg #( + .DMA_AXI_ADDR_WIDTH(DMA_AXI_ADDR_WIDTH), + .DMA_DATA_WIDTH(DMA_DATA_WIDTH_SG), + .DMA_LENGTH_WIDTH(DMA_LENGTH_WIDTH), + .AXI_LENGTH_WIDTH(AXI_LENGTH_WIDTH_SG), + .BYTES_PER_BEAT_WIDTH_DEST(BYTES_PER_BEAT_WIDTH_DEST), + .BYTES_PER_BEAT_WIDTH_SRC(BYTES_PER_BEAT_WIDTH_SRC), + .BYTES_PER_BEAT_WIDTH_SG(BYTES_PER_BEAT_WIDTH_SG), + .ASYNC_CLK_REQ_SG(ASYNC_CLK_REQ_SG) + ) i_dmac_sg ( + .req_clk(req_clk), + .req_resetn(req_resetn), + .req_enable(req_enable), + + .sg_clk(sg_clk), + .sg_ext_resetn(sg_ext_resetn), + .sg_resetn(sg_resetn), + .sg_enable(sg_enable), + .sg_enabled(sg_enabled), + + .req_in_valid(dma_sg_in_req_valid), + .req_in_ready(dma_sg_in_req_ready), + + .req_out_valid(dma_sg_out_req_valid), + .req_out_ready(dma_sg_out_req_ready), + + .req_desc_address(req_sg_address), + + .out_dest_address(dma_sg_out_dest_address), + .out_src_address(dma_sg_out_src_address), + + .out_x_length(dma_sg_x_length), + .out_y_length(dma_sg_y_length), + .out_dest_stride(dma_sg_dst_stride), + .out_src_stride(dma_sg_src_stride), + .resp_out_id(dma_sg_hwdesc_id), + .resp_out_eot(dma_sg_hwdesc_eot), + .resp_in_valid(dma_eot), + + .m_axi_aclk(m_sg_axi_aclk), + .m_axi_aresetn(m_sg_axi_aresetn), + + .m_axi_arready(m_sg_axi_arready), + .m_axi_arvalid(m_sg_axi_arvalid), + .m_axi_araddr(m_sg_axi_araddr), + .m_axi_arlen(m_sg_axi_arlen), + .m_axi_arsize(m_sg_axi_arsize), + .m_axi_arburst(m_sg_axi_arburst), + .m_axi_arprot(m_sg_axi_arprot), + .m_axi_arcache(m_sg_axi_arcache), + + .m_axi_rdata(m_sg_axi_rdata), + .m_axi_rlast(m_sg_axi_rlast), + .m_axi_rready(m_sg_axi_rready), + .m_axi_rvalid(m_sg_axi_rvalid), + .m_axi_rresp(m_sg_axi_rresp)); + + end else begin + + assign sg_clk = 1'b0; + assign sg_ext_resetn = 1'b0; + assign sg_enabled = 1'b0; + assign dma_sg_in_req_ready = 1'b0; + assign dma_sg_out_req_valid = 1'b0; + assign dma_sg_hwdesc_eot = 1'b0; + assign dma_sg_out_dest_address = 'h00; + assign dma_sg_out_src_address = 'h00; + assign dma_sg_x_length = 'h00; + assign dma_sg_y_length = 'h00; + assign dma_sg_dst_stride = 'h00; + assign dma_sg_src_stride = 'h00; + assign dma_sg_hwdesc_id = 'h00; + + end endgenerate + + /* 2D Interface */ generate if (DMA_2D_TRANSFER == 1) begin + wire [DMA_AXI_ADDR_WIDTH-1:BYTES_PER_BEAT_WIDTH_DEST] dma_2d_dest_address; + wire [DMA_AXI_ADDR_WIDTH-1:BYTES_PER_BEAT_WIDTH_SRC] dma_2d_src_address; + wire [DMA_LENGTH_WIDTH-1:0] dma_2d_x_length; + wire [DMA_LENGTH_WIDTH-1:0] dma_2d_y_length; + wire [DMA_LENGTH_WIDTH-1:0] dma_2d_src_stride; + wire [DMA_LENGTH_WIDTH-1:0] dma_2d_dst_stride; + wire dma_2d_eot; + + wire dma_2d_req_valid; + wire dma_2d_req_ready; + + assign dma_2d_dest_address = ctrl_hwdesc ? dma_sg_out_dest_address : req_dest_address; + assign dma_2d_src_address = ctrl_hwdesc ? dma_sg_out_src_address : req_src_address; + assign dma_2d_x_length = ctrl_hwdesc ? dma_sg_x_length : req_x_length; + assign dma_2d_y_length = ctrl_hwdesc ? dma_sg_y_length : req_y_length; + assign dma_2d_src_stride = ctrl_hwdesc ? dma_sg_src_stride : req_src_stride; + assign dma_2d_dst_stride = ctrl_hwdesc ? dma_sg_dst_stride : req_dest_stride; + + assign dma_2d_req_valid = ctrl_hwdesc ? dma_sg_out_req_valid : req_valid_gated; + assign req_ready_gated = ctrl_hwdesc ? dma_sg_in_req_ready : dma_2d_req_ready; + assign dma_eot = dma_2d_eot; + assign dma_sg_out_req_ready = dma_2d_req_ready; + dmac_2d_transfer #( .DMA_AXI_ADDR_WIDTH(DMA_AXI_ADDR_WIDTH), .DMA_LENGTH_WIDTH (DMA_LENGTH_WIDTH), @@ -262,20 +424,20 @@ module axi_dmac_transfer #( .req_aclk (req_clk), .req_aresetn (req_resetn), - .req_eot (req_eot), + .req_eot (dma_2d_eot), .req_measured_burst_length (req_measured_burst_length), .req_response_partial (req_response_partial), .req_response_valid (req_response_valid), .req_response_ready (req_response_ready), - .req_valid (req_valid_gated), - .req_ready (req_ready_gated), - .req_dest_address (req_dest_address), - .req_src_address (req_src_address), - .req_x_length (req_x_length), - .req_y_length (req_y_length), - .req_dest_stride (req_dest_stride), - .req_src_stride (req_src_stride), + .req_valid (dma_2d_req_valid), + .req_ready (dma_2d_req_ready), + .req_dest_address (dma_2d_dest_address), + .req_src_address (dma_2d_src_address), + .req_x_length (dma_2d_x_length), + .req_y_length (dma_2d_y_length), + .req_dest_stride (dma_2d_dst_stride), + .req_src_stride (dma_2d_src_stride), .req_sync_transfer_start (req_sync_transfer_start), .req_last (req_last), @@ -296,17 +458,18 @@ module axi_dmac_transfer #( end else begin /* Request */ - assign dma_req_valid = req_valid_gated; - assign req_ready_gated = dma_req_ready; + assign dma_req_valid = ctrl_hwdesc ? dma_sg_out_req_valid : req_valid_gated; + assign req_ready_gated = ctrl_hwdesc ? dma_sg_in_req_ready : dma_req_ready; + assign dma_eot = dma_req_eot; + assign dma_sg_out_req_ready = dma_req_ready; - assign dma_req_dest_address = req_dest_address; - assign dma_req_src_address = req_src_address; - assign dma_req_length = req_x_length; + assign dma_req_dest_address = ctrl_hwdesc ? dma_sg_out_dest_address : req_dest_address; + assign dma_req_src_address = ctrl_hwdesc ? dma_sg_out_src_address : req_src_address; + assign dma_req_length = ctrl_hwdesc ? dma_sg_x_length : req_x_length; assign dma_req_sync_transfer_start = req_sync_transfer_start; assign dma_req_last = req_last; /* Response */ - assign req_eot = dma_req_eot; assign req_measured_burst_length = dma_req_measured_burst_length; assign req_response_partial = dma_response_partial; assign req_response_valid = dma_response_valid; diff --git a/library/axi_dmac/bd/bd.tcl b/library/axi_dmac/bd/bd.tcl index 52f15798a..85a51d82d 100644 --- a/library/axi_dmac/bd/bd.tcl +++ b/library/axi_dmac/bd/bd.tcl @@ -7,7 +7,7 @@ proc init {cellpath otherInfo} { set ip [get_bd_cells $cellpath] bd::mark_propagate_override $ip \ - "ASYNC_CLK_REQ_SRC ASYNC_CLK_SRC_DEST ASYNC_CLK_DEST_REQ" + "ASYNC_CLK_REQ_SRC ASYNC_CLK_SRC_DEST ASYNC_CLK_DEST_REQ ASYNC_CLK_REQ_SG ASYNC_CLK_SRC_SG ASYNC_CLK_DEST_SG" bd::mark_propagate_override $ip \ "DMA_AXI_ADDR_WIDTH" @@ -22,14 +22,19 @@ proc init {cellpath otherInfo} { } foreach dir {SRC DEST} { - # This is a bit of a hack, but we can't change the protocol if the type - # is not AXI MM + # Change the protocol by first enabling the parameter - setting the type to AXI MM set old [get_property "CONFIG.DMA_TYPE_${dir}" $ip] set_property "CONFIG.DMA_TYPE_${dir}" "0" $ip set_property "CONFIG.DMA_AXI_PROTOCOL_${dir}" $axi_protocol $ip set_property "CONFIG.DMA_TYPE_${dir}" $old $ip } + # Change the protocol by first enabling the parameter - enabling the SG transfers + set old [get_property "CONFIG.DMA_SG_TRANSFER" $ip] + set_property "CONFIG.DMA_SG_TRANSFER" "true" $ip + set_property "CONFIG.DMA_AXI_PROTOCOL_SG" $axi_protocol $ip + set_property "CONFIG.DMA_SG_TRANSFER" $old $ip + # Versions earlier than 2017.3 infer sub-optimal asymmetric memory # See https://www.xilinx.com/support/answers/69179.html regexp {^[0-9]+\.[0-9]+} [version -short] short_version @@ -80,6 +85,27 @@ proc post_config_ip {cellpath otherinfo} { set_property CONFIG.NUM_READ_OUTSTANDING 0 $intf } } + + # SG interface configuration + set sg_enabled [get_property CONFIG.DMA_SG_TRANSFER $ip] + if {$sg_enabled == "true"} { + set axi_protocol [get_property "CONFIG.DMA_AXI_PROTOCOL_SG" $ip] + + if {$axi_protocol == 0} { + set axi_protocol_str "AXI4" + set max_beats_per_burst 256 + } else { + set axi_protocol_str "AXI3" + set max_beats_per_burst 16 + } + + set intf [get_bd_intf_pins [format "%s/m_sg_axi" $cellpath]] + set_property CONFIG.PROTOCOL $axi_protocol_str $intf + set_property CONFIG.MAX_BURST_LENGTH $max_beats_per_burst $intf + + set_property CONFIG.NUM_WRITE_OUTSTANDING 0 $intf + set_property CONFIG.NUM_READ_OUTSTANDING 0 $intf + } } proc axi_dmac_detect_async_clk { cellpath ip param_name clk_a clk_b } { @@ -118,6 +144,7 @@ proc propagate {cellpath otherinfo} { set ip [get_bd_cells $cellpath] set src_type [get_property CONFIG.DMA_TYPE_SRC $ip] set dest_type [get_property CONFIG.DMA_TYPE_DEST $ip] + set sg_enabled [get_property CONFIG.DMA_SG_TRANSFER $ip] set req_clk [get_bd_pins "$ip/s_axi_aclk"] @@ -140,6 +167,13 @@ proc propagate {cellpath otherinfo} { axi_dmac_detect_async_clk $cellpath $ip "ASYNC_CLK_REQ_SRC" $req_clk $src_clk axi_dmac_detect_async_clk $cellpath $ip "ASYNC_CLK_SRC_DEST" $src_clk $dest_clk axi_dmac_detect_async_clk $cellpath $ip "ASYNC_CLK_DEST_REQ" $dest_clk $req_clk + + if {$sg_enabled == "true"} { + set sg_clk [get_bd_pins "$ip/m_sg_axi_aclk"] + axi_dmac_detect_async_clk $cellpath $ip "ASYNC_CLK_REQ_SG" $req_clk $sg_clk + axi_dmac_detect_async_clk $cellpath $ip "ASYNC_CLK_SRC_SG" $src_clk $sg_clk + axi_dmac_detect_async_clk $cellpath $ip "ASYNC_CLK_DEST_SG" $dest_clk $sg_clk + } } proc post_propagate {cellpath otherinfo} { diff --git a/library/axi_dmac/dmac_sg.v b/library/axi_dmac/dmac_sg.v new file mode 100644 index 000000000..03c676f9e --- /dev/null +++ b/library/axi_dmac/dmac_sg.v @@ -0,0 +1,336 @@ +// *************************************************************************** +// *************************************************************************** +// Copyright (C) 2023 Analog Devices, Inc. All rights reserved. +// +// In this HDL repository, there are many different and unique modules, consisting +// of various HDL (Verilog or VHDL) components. The individual modules are +// developed independently, and may be accompanied by separate and unique license +// terms. +// +// The user should read each of these license terms, and understand the +// freedoms and responsibilities that he or she has by using this source/core. +// +// This core is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +// A PARTICULAR PURPOSE. +// +// Redistribution and use of source or resulting binaries, with or without modification +// of this file, are permitted under one of the following two license terms: +// +// 1. The GNU General Public License version 2 as published by the +// Free Software Foundation, which can be found in the top level directory +// of this repository (LICENSE_GPL2), and also online at: +// +// +// OR +// +// 2. An ADI specific BSD license, which can be found in the top level directory +// of this repository (LICENSE_ADIBSD), and also on-line at: +// https://github.com/analogdevicesinc/hdl/blob/master/LICENSE_ADIBSD +// This will allow to generate bit files and not release the source code, +// as long as it attaches to an ADI device. +// +// *************************************************************************** +// *************************************************************************** + +`timescale 1ns/100ps + +module dmac_sg #( + parameter DMA_AXI_ADDR_WIDTH = 32, + parameter DMA_DATA_WIDTH = 64, + parameter DMA_LENGTH_WIDTH = 24, + parameter AXI_LENGTH_WIDTH = 8, + parameter BYTES_PER_BEAT_WIDTH_DEST = 3, + parameter BYTES_PER_BEAT_WIDTH_SRC = 3, + parameter BYTES_PER_BEAT_WIDTH_SG = 3, + parameter ASYNC_CLK_REQ_SG = 1 +) ( + input req_clk, + input req_resetn, + input req_enable, + + output sg_clk, + input sg_resetn, + output sg_ext_resetn, + input sg_enable, + output sg_enabled, + + input req_in_valid, + output req_in_ready, + + output req_out_valid, + input req_out_ready, + + output resp_out_eot, + input resp_in_valid, + + input [DMA_AXI_ADDR_WIDTH-1:BYTES_PER_BEAT_WIDTH_SG] req_desc_address, + + output [DMA_AXI_ADDR_WIDTH-1:BYTES_PER_BEAT_WIDTH_DEST] out_dest_address, + output [DMA_AXI_ADDR_WIDTH-1:BYTES_PER_BEAT_WIDTH_SRC] out_src_address, + output [DMA_LENGTH_WIDTH-1:0] out_x_length, + output [DMA_LENGTH_WIDTH-1:0] out_y_length, + output [DMA_LENGTH_WIDTH-1:0] out_dest_stride, + output [DMA_LENGTH_WIDTH-1:0] out_src_stride, + output [31:0] resp_out_id, + + // Master AXI interface + input m_axi_aclk, + input m_axi_aresetn, + + // Read address + input m_axi_arready, + output m_axi_arvalid, + output [DMA_AXI_ADDR_WIDTH-1:0] m_axi_araddr, + output [AXI_LENGTH_WIDTH-1:0] m_axi_arlen, + output [ 2:0] m_axi_arsize, + output [ 1:0] m_axi_arburst, + output [ 2:0] m_axi_arprot, + output [ 3:0] m_axi_arcache, + + // Read data and response + input [DMA_DATA_WIDTH-1:0] m_axi_rdata, + input m_axi_rlast, + output m_axi_rready, + input m_axi_rvalid, + input [ 1:0] m_axi_rresp +); + + localparam STATE_IDLE = 0; + localparam STATE_SEND_ADDR = 1; + localparam STATE_RECV_DESC = 2; + localparam STATE_DESC_READY = 3; + + localparam MASK_LAST_HWDESC = 1 << 0; + localparam MASK_EOT_IRQ = 1 << 1; + + localparam DMA_ADDRESS_WIDTH_DEST = DMA_AXI_ADDR_WIDTH - BYTES_PER_BEAT_WIDTH_DEST; + localparam DMA_ADDRESS_WIDTH_SRC = DMA_AXI_ADDR_WIDTH - BYTES_PER_BEAT_WIDTH_SRC; + localparam DMA_ADDRESS_WIDTH_SG = DMA_AXI_ADDR_WIDTH - BYTES_PER_BEAT_WIDTH_SG; + localparam DMA_DESCRIPTOR_WIDTH = DMA_ADDRESS_WIDTH_DEST + DMA_ADDRESS_WIDTH_SRC + 4*DMA_LENGTH_WIDTH; + + wire [DMA_AXI_ADDR_WIDTH-1:BYTES_PER_BEAT_WIDTH_SG] first_desc_address; + reg [DMA_AXI_ADDR_WIDTH-1:BYTES_PER_BEAT_WIDTH_DEST] dest_addr; + reg [DMA_AXI_ADDR_WIDTH-1:BYTES_PER_BEAT_WIDTH_SRC] src_addr; + reg [DMA_AXI_ADDR_WIDTH-1:BYTES_PER_BEAT_WIDTH_SG] next_desc_addr; + reg [DMA_LENGTH_WIDTH-1:0] x_length; + reg [DMA_LENGTH_WIDTH-1:0] y_length; + reg [DMA_LENGTH_WIDTH-1:0] dest_stride; + reg [DMA_LENGTH_WIDTH-1:0] src_stride; + + reg [1:0] hwdesc_state; + reg [2:0] hwdesc_counter; + reg [1:0] hwdesc_flags; + reg [31:0] hwdesc_id; + + wire sg_in_valid; + wire sg_in_ready; + wire sg_out_valid; + wire sg_out_ready; + wire fetch_valid; + wire fetch_ready; + wire fifo_in_valid; + wire fifo_in_ready; + wire fifo_out_valid; + wire fifo_out_ready; + wire [32:0] fifo_in_data; + wire [32:0] fifo_out_data; + + assign sg_clk = m_axi_aclk; + assign sg_ext_resetn = m_axi_aresetn; + assign sg_enabled = sg_enable | ~sg_in_ready; + + assign sg_in_ready = hwdesc_state == STATE_IDLE; + assign fetch_valid = hwdesc_state == STATE_DESC_READY; + assign m_axi_arvalid = hwdesc_state == STATE_SEND_ADDR; + assign m_axi_rready = hwdesc_state == STATE_RECV_DESC; + + assign m_axi_arsize = 3'h3; + assign m_axi_arburst = 2'h1; + assign m_axi_arprot = 3'h0; + assign m_axi_arcache = 4'h3; + assign m_axi_arlen = 'h5; + assign m_axi_araddr = {next_desc_addr, {BYTES_PER_BEAT_WIDTH_SG{1'b0}}}; + + util_axis_fifo #( + .DATA_WIDTH(DMA_ADDRESS_WIDTH_SG), + .ADDRESS_WIDTH(0), + .ASYNC_CLK(ASYNC_CLK_REQ_SG) + ) i_sg_addr_fifo ( + .s_axis_aclk(req_clk), + .s_axis_aresetn(req_resetn), + .s_axis_valid(req_in_valid), + .s_axis_ready(req_in_ready), + .s_axis_full(), + .s_axis_data(req_desc_address), + .s_axis_room(), + + .m_axis_aclk(sg_clk), + .m_axis_aresetn(sg_resetn), + .m_axis_valid(sg_in_valid), + .m_axis_ready(sg_in_ready), + .m_axis_data(first_desc_address), + .m_axis_level(), + .m_axis_empty()); + + always @(posedge sg_clk) begin + if (sg_resetn == 1'b0) begin + hwdesc_counter <= 'h0; + end else if (m_axi_rvalid) begin + hwdesc_counter <= hwdesc_counter + 1'b1; + end else if (hwdesc_state == STATE_DESC_READY) begin + hwdesc_counter <= 'h0; + end + end + + // Read the descriptor data + always @(posedge sg_clk) begin + if (sg_resetn == 1'b0) begin + hwdesc_flags <= 'h00; + hwdesc_id <= 'h00; + dest_addr <= 'h00; + src_addr <= 'h00; + next_desc_addr <= 'h00; + y_length <= 'h00; + x_length <= 'h00; + src_stride <= 'h00; + dest_stride <= 'h00; + end else begin + if (sg_in_valid && sg_in_ready) begin + next_desc_addr <= first_desc_address; + end + if (m_axi_rvalid) begin + case (hwdesc_counter) + 0: begin + hwdesc_id <= m_axi_rdata[63:32]; + hwdesc_flags <= m_axi_rdata[1:0]; + end + 1: dest_addr <= m_axi_rdata[DMA_AXI_ADDR_WIDTH-1:BYTES_PER_BEAT_WIDTH_DEST]; + 2: src_addr <= m_axi_rdata[DMA_AXI_ADDR_WIDTH-1:BYTES_PER_BEAT_WIDTH_SRC]; + 3: next_desc_addr <= m_axi_rdata[DMA_AXI_ADDR_WIDTH-1:BYTES_PER_BEAT_WIDTH_SG]; + 4: begin + x_length <= m_axi_rdata[63:32]; + y_length <= m_axi_rdata[31:0]; + end + 5: begin + dest_stride <= m_axi_rdata[63:32]; + src_stride <= m_axi_rdata[31:0]; + end + endcase + end + end + end + + // Descriptor FSM + always @(posedge sg_clk) begin + if (sg_resetn == 1'b0) begin + hwdesc_state <= STATE_IDLE; + end else begin + case (hwdesc_state) + STATE_IDLE: begin + if (sg_in_valid == 1'b1 && sg_enable == 1'b1) begin + hwdesc_state <= STATE_SEND_ADDR; + end + end + + STATE_SEND_ADDR: begin + if (m_axi_arready) begin + hwdesc_state <= STATE_RECV_DESC; + end + end + + STATE_RECV_DESC: begin + if (m_axi_rvalid == 1'b1 && m_axi_rlast == 1'b1) begin + hwdesc_state <= STATE_DESC_READY; + end + end + + STATE_DESC_READY: begin + if (sg_enable == 1'b0) begin + hwdesc_state <= STATE_IDLE; + end else if (fetch_ready == 1'b1) begin + if (hwdesc_flags & MASK_LAST_HWDESC) begin + hwdesc_state <= STATE_IDLE; + end else begin + hwdesc_state <= STATE_SEND_ADDR; + end + end + end + endcase + end + end + + util_axis_fifo #( + .DATA_WIDTH(DMA_DESCRIPTOR_WIDTH), + .ADDRESS_WIDTH(0), + .ASYNC_CLK(ASYNC_CLK_REQ_SG) + ) i_sg_desc_fifo ( + .s_axis_aclk(sg_clk), + .s_axis_aresetn(sg_resetn), + .s_axis_valid(sg_out_valid), + .s_axis_ready(sg_out_ready), + .s_axis_full(), + .s_axis_data({ + dest_addr, + src_addr, + x_length, + y_length, + dest_stride, + src_stride}), + .s_axis_room(), + + .m_axis_aclk(req_clk), + .m_axis_aresetn(req_resetn), + .m_axis_valid(req_out_valid), + .m_axis_ready(req_out_ready), + .m_axis_data({ + out_dest_address, + out_src_address, + out_x_length, + out_y_length, + out_dest_stride, + out_src_stride}), + .m_axis_level(), + .m_axis_empty()); + + splitter #( + .NUM_M(2) + ) i_req_splitter ( + .clk(sg_clk), + .resetn(sg_resetn), + .s_valid(fetch_valid), + .s_ready(fetch_ready), + .m_valid({ + sg_out_valid, + fifo_in_valid}), + .m_ready({ + sg_out_ready, + fifo_in_ready})); + + assign fifo_in_data = {hwdesc_flags & MASK_EOT_IRQ ? 1'b1 : 1'b0, hwdesc_id}; + assign fifo_out_ready = resp_in_valid; + assign resp_out_eot = fifo_out_data[32]; + assign resp_out_id = fifo_out_data[31:0]; + + // Save the descriptor IDs and the eot descriptor flag in an async fifo + // Extract them one by one when the destination responds with an eot + util_axis_fifo #( + .DATA_WIDTH(33), + .ADDRESS_WIDTH(2), + .ASYNC_CLK(ASYNC_CLK_REQ_SG) + ) i_fifo ( + .s_axis_aclk(sg_clk), + .s_axis_aresetn(sg_resetn), + + .s_axis_valid(fifo_in_valid), + .s_axis_ready(fifo_in_ready), + .s_axis_data(fifo_in_data), + + .m_axis_aclk(req_clk), + .m_axis_aresetn(req_resetn), + + .m_axis_valid(fifo_out_valid), + .m_axis_ready(fifo_out_ready), + .m_axis_data(fifo_out_data)); + +endmodule diff --git a/library/axi_dmac/request_arb.v b/library/axi_dmac/request_arb.v index 9b6ad6c0f..c733f4fef 100644 --- a/library/axi_dmac/request_arb.v +++ b/library/axi_dmac/request_arb.v @@ -208,9 +208,6 @@ module request_arb #( wire [ID_WIDTH-1:0] source_id; wire [ID_WIDTH-1:0] response_id; - wire enabled_src; - wire enabled_dest; - wire req_gen_valid; wire req_gen_ready; wire src_dest_valid;