From 7986310fa0ace3a9af5cf456b23e92569e0b4294 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Fri, 8 Jun 2018 11:43:43 +0200 Subject: [PATCH] axi_dmac: burst_memory: Add support for using asymmetric memory FPGAs support different widths for the read and write port of the block SRAM cells. The DMAC can make use of this feature when the source and destination interface have a different width to up-size/down-size the data bus. Using memory cells with asymmetric port width consumes the same amount of SRAM cells, but allows to bypass the re-size blocks inside the DMAC that are otherwise used for up- and down-sizing. This reduces overall resource usage and can improve timing. If the ratio between the destination and source port is too larger to be handled by SRAM alone the SRAM block will be configured to do partial up- or down-sizing and a resize block will be inserted to take care of the remaining up-/down-sizing. E.g. if a 256-bit interface is connected to a 32-bit interface the SRAM will be used to do an initial resizing of 256 bit to 64 bit and a resize block will be used to do the remaining resizing from 64 bit to 32 bit. Currently this feature is disabled for Intel FPGAs since Quartus does not properly infer a block RAM with different read and write port widths from the current ad_asym_mem module. Once that has been resolved support for asymmetric memories can also be enabled in the DMAC. Signed-off-by: Lars-Peter Clausen --- library/axi_dmac/axi_dmac.v | 7 ++- library/axi_dmac/axi_dmac_burst_memory.v | 62 +++++++++++++++-------- library/axi_dmac/axi_dmac_hw.tcl | 2 +- library/axi_dmac/axi_dmac_ip.tcl | 4 +- library/axi_dmac/axi_dmac_transfer.v | 6 ++- library/axi_dmac/bd/bd.tcl | 6 +++ library/axi_dmac/request_arb.v | 8 +-- library/axi_dmac/tb/dma_read_shutdown_tb | 2 +- library/axi_dmac/tb/dma_read_tb | 2 +- library/axi_dmac/tb/dma_write_shutdown_tb | 2 +- library/axi_dmac/tb/dma_write_tb | 2 +- 11 files changed, 68 insertions(+), 35 deletions(-) diff --git a/library/axi_dmac/axi_dmac.v b/library/axi_dmac/axi_dmac.v index 99b639e61..bf7e64d51 100644 --- a/library/axi_dmac/axi_dmac.v +++ b/library/axi_dmac/axi_dmac.v @@ -59,7 +59,9 @@ module axi_dmac #( parameter AXI_ID_WIDTH_SRC = 1, parameter AXI_ID_WIDTH_DEST = 1, parameter DISABLE_DEBUG_REGISTERS = 0, - parameter ENABLE_DIAGNOSTICS_IF = 0)( + parameter ENABLE_DIAGNOSTICS_IF = 0, + parameter ALLOW_ASYM_MEM = 0 +) ( // Slave AXI interface input s_axi_aclk, input s_axi_aresetn, @@ -466,7 +468,8 @@ axi_dmac_transfer #( .ID_WIDTH(ID_WIDTH), .AXI_LENGTH_WIDTH_SRC(8-(4*DMA_AXI_PROTOCOL_SRC)), .AXI_LENGTH_WIDTH_DEST(8-(4*DMA_AXI_PROTOCOL_DEST)), - .ENABLE_DIAGNOSTICS_IF(ENABLE_DIAGNOSTICS_IF) + .ENABLE_DIAGNOSTICS_IF(ENABLE_DIAGNOSTICS_IF), + .ALLOW_ASYM_MEM(ALLOW_ASYM_MEM) ) i_transfer ( .ctrl_clk(s_axi_aclk), .ctrl_resetn(s_axi_aresetn), diff --git a/library/axi_dmac/axi_dmac_burst_memory.v b/library/axi_dmac/axi_dmac_burst_memory.v index beae1e303..fbf12a3ea 100644 --- a/library/axi_dmac/axi_dmac_burst_memory.v +++ b/library/axi_dmac/axi_dmac_burst_memory.v @@ -44,7 +44,8 @@ module axi_dmac_burst_memory #( parameter BYTES_PER_BEAT_WIDTH_SRC = $clog2(DATA_WIDTH_SRC/8), parameter BYTES_PER_BURST_WIDTH = $clog2(MAX_BYTES_PER_BURST), parameter DMA_LENGTH_ALIGN = 3, - parameter ENABLE_DIAGNOSTICS_IF = 0 + parameter ENABLE_DIAGNOSTICS_IF = 0, + parameter ALLOW_ASYM_MEM = 0 ) ( input src_clk, input src_reset, @@ -78,11 +79,13 @@ module axi_dmac_burst_memory #( output [7:0] dest_diag_level_bursts ); -localparam DATA_WIDTH = DATA_WIDTH_SRC > DATA_WIDTH_DEST ? +localparam DATA_WIDTH_MEM = DATA_WIDTH_SRC > DATA_WIDTH_DEST ? DATA_WIDTH_SRC : DATA_WIDTH_DEST; +localparam MEM_RATIO = DATA_WIDTH_SRC > DATA_WIDTH_DEST ? + DATA_WIDTH_SRC / DATA_WIDTH_DEST : DATA_WIDTH_DEST / DATA_WIDTH_SRC; /* A burst can have up to 256 beats */ -localparam BURST_LEN = MAX_BYTES_PER_BURST / (DATA_WIDTH / 8); +localparam BURST_LEN = MAX_BYTES_PER_BURST / (DATA_WIDTH_MEM / 8); localparam BURST_LEN_WIDTH = BURST_LEN > 128 ? 8 : BURST_LEN > 64 ? 7 : BURST_LEN > 32 ? 6 : @@ -91,11 +94,26 @@ localparam BURST_LEN_WIDTH = BURST_LEN > 128 ? 8 : BURST_LEN > 4 ? 3 : BURST_LEN > 2 ? 2 : 1; -localparam ADDRESS_WIDTH = BURST_LEN_WIDTH + ID_WIDTH - 1; - localparam AUX_FIFO_SIZE = 2**(ID_WIDTH-1); -localparam BYTES_PER_BEAT_WIDTH = BYTES_PER_BURST_WIDTH - BURST_LEN_WIDTH; +localparam MEM_RATIO_WIDTH = + (ALLOW_ASYM_MEM == 0 || MEM_RATIO == 1) ? 0 : + MEM_RATIO == 2 ? 1 : + MEM_RATIO == 4 ? 2 : 3; + +localparam BURST_LEN_WIDTH_SRC = BURST_LEN_WIDTH + + (DATA_WIDTH_SRC < DATA_WIDTH_MEM ? MEM_RATIO_WIDTH : 0); +localparam BURST_LEN_WIDTH_DEST = BURST_LEN_WIDTH + + (DATA_WIDTH_DEST < DATA_WIDTH_MEM ? MEM_RATIO_WIDTH : 0); +localparam DATA_WIDTH_MEM_SRC = DATA_WIDTH_MEM >> + (DATA_WIDTH_SRC < DATA_WIDTH_MEM ? MEM_RATIO_WIDTH : 0); +localparam DATA_WIDTH_MEM_DEST = DATA_WIDTH_MEM >> + (DATA_WIDTH_DEST < DATA_WIDTH_MEM ? MEM_RATIO_WIDTH : 0); + +localparam ADDRESS_WIDTH_SRC = BURST_LEN_WIDTH_SRC + ID_WIDTH - 1; +localparam ADDRESS_WIDTH_DEST = BURST_LEN_WIDTH_DEST + ID_WIDTH - 1; + +localparam BYTES_PER_BEAT_WIDTH_MEM_SRC = BYTES_PER_BURST_WIDTH - BURST_LEN_WIDTH_SRC; /* * The burst memory is separated into 2**(ID_WIDTH-1) segments. Each segment can @@ -124,14 +142,14 @@ localparam BYTES_PER_BEAT_WIDTH = BYTES_PER_BURST_WIDTH - BURST_LEN_WIDTH; reg [ID_WIDTH-1:0] src_id_next; reg [ID_WIDTH-1:0] src_id = 'h0; reg src_id_reduced_msb = 1'b0; -reg [BURST_LEN_WIDTH-1:0] src_beat_counter = 'h00; +reg [BURST_LEN_WIDTH_SRC-1:0] src_beat_counter = 'h00; reg [ID_WIDTH-1:0] dest_id_next = 'h0; reg dest_id_reduced_msb_next = 1'b0; reg dest_id_reduced_msb = 1'b0; reg [ID_WIDTH-1:0] dest_id = 'h0; -reg [BURST_LEN_WIDTH-1:0] dest_beat_counter = 'h00; -wire [BURST_LEN_WIDTH-1:0] dest_burst_len; +reg [BURST_LEN_WIDTH_DEST-1:0] dest_beat_counter = 'h00; +wire [BURST_LEN_WIDTH_DEST-1:0] dest_burst_len; reg dest_valid = 1'b0; reg dest_mem_data_valid = 1'b0; reg dest_mem_data_last = 1'b0; @@ -144,26 +162,26 @@ reg [BYTES_PER_BURST_WIDTH+1-1:0] dest_burst_len_data = {DMA_LENGTH_ALIGN{1'b1}} wire src_beat; wire src_last_beat; wire [ID_WIDTH-1:0] src_dest_id; -wire [ADDRESS_WIDTH-1:0] src_waddr; +wire [ADDRESS_WIDTH_SRC-1:0] src_waddr; wire [ID_WIDTH-2:0] src_id_reduced; wire src_mem_data_valid; wire src_mem_data_last; -wire [DATA_WIDTH-1:0] src_mem_data; -wire [BYTES_PER_BEAT_WIDTH-1:0] src_mem_data_valid_bytes; +wire [DATA_WIDTH_MEM_SRC-1:0] src_mem_data; +wire [BYTES_PER_BEAT_WIDTH_MEM_SRC-1:0] src_mem_data_valid_bytes; wire src_mem_data_partial_burst; wire dest_beat; wire dest_last_beat; wire dest_last; wire [ID_WIDTH-1:0] dest_src_id; -wire [ADDRESS_WIDTH-1:0] dest_raddr; +wire [ADDRESS_WIDTH_DEST-1:0] dest_raddr; wire [ID_WIDTH-2:0] dest_id_reduced_next; wire [ID_WIDTH-1:0] dest_id_next_inc; wire [ID_WIDTH-2:0] dest_id_reduced; wire dest_burst_valid; wire dest_burst_ready; wire dest_ready; -wire [DATA_WIDTH-1:0] dest_mem_data; +wire [DATA_WIDTH_MEM_DEST-1:0] dest_mem_data; wire dest_mem_data_ready; `include "inc_id.vh" @@ -317,13 +335,13 @@ always @(posedge dest_clk) begin dest_burst_info_write <= (dest_burst_valid == 1'b1 && dest_burst_ready == 1'b1); end -assign dest_burst_len = dest_burst_len_data[BYTES_PER_BURST_WIDTH-1 -: BURST_LEN_WIDTH]; +assign dest_burst_len = dest_burst_len_data[BYTES_PER_BURST_WIDTH-1 -: BURST_LEN_WIDTH_DEST]; axi_dmac_resize_src #( .DATA_WIDTH_SRC (DATA_WIDTH_SRC), .BYTES_PER_BEAT_WIDTH_SRC (BYTES_PER_BEAT_WIDTH_SRC), - .DATA_WIDTH_MEM (DATA_WIDTH), - .BYTES_PER_BEAT_WIDTH_MEM (BYTES_PER_BEAT_WIDTH) + .DATA_WIDTH_MEM (DATA_WIDTH_MEM_SRC), + .BYTES_PER_BEAT_WIDTH_MEM (BYTES_PER_BEAT_WIDTH_MEM_SRC) ) i_resize_src ( .clk (src_clk), .reset (src_reset), @@ -345,9 +363,11 @@ assign src_burst_len_data = {src_mem_data_partial_burst, src_beat_counter, src_mem_data_valid_bytes}; -ad_mem #( - .DATA_WIDTH (DATA_WIDTH), - .ADDRESS_WIDTH (ADDRESS_WIDTH) +ad_mem_asym #( + .A_ADDRESS_WIDTH (ADDRESS_WIDTH_SRC), + .A_DATA_WIDTH (DATA_WIDTH_MEM_SRC), + .B_ADDRESS_WIDTH (ADDRESS_WIDTH_DEST), + .B_DATA_WIDTH (DATA_WIDTH_MEM_DEST) ) i_mem ( .clka (src_clk), .wea (src_beat), @@ -362,7 +382,7 @@ ad_mem #( axi_dmac_resize_dest #( .DATA_WIDTH_DEST (DATA_WIDTH_DEST), - .DATA_WIDTH_MEM (DATA_WIDTH) + .DATA_WIDTH_MEM (DATA_WIDTH_MEM_DEST) ) i_resize_dest ( .clk (dest_clk), .reset (dest_reset), diff --git a/library/axi_dmac/axi_dmac_hw.tcl b/library/axi_dmac/axi_dmac_hw.tcl index 933a850d8..7e5cb2b01 100644 --- a/library/axi_dmac/axi_dmac_hw.tcl +++ b/library/axi_dmac/axi_dmac_hw.tcl @@ -20,7 +20,7 @@ ad_ip_files axi_dmac [list \ $ad_hdl_dir/library/common/up_axi.v \ $ad_hdl_dir/library/util_axis_fifo/util_axis_fifo.v \ $ad_hdl_dir/library/util_axis_fifo/address_sync.v \ - $ad_hdl_dir/library/common/ad_mem.v \ + $ad_hdl_dir/library/common/ad_mem_asym.v \ inc_id.vh \ resp.vh \ axi_dmac_burst_memory.v \ diff --git a/library/axi_dmac/axi_dmac_ip.tcl b/library/axi_dmac/axi_dmac_ip.tcl index 2e07e200b..9caad86e1 100644 --- a/library/axi_dmac/axi_dmac_ip.tcl +++ b/library/axi_dmac/axi_dmac_ip.tcl @@ -5,7 +5,7 @@ source $ad_hdl_dir/library/scripts/adi_ip.tcl adi_ip_create axi_dmac adi_ip_files axi_dmac [list \ - "$ad_hdl_dir/library/common/ad_mem.v" \ + "$ad_hdl_dir/library/common/ad_mem_asym.v" \ "$ad_hdl_dir/library/common/up_axi.v" \ "inc_id.vh" \ "resp.vh" \ @@ -391,7 +391,7 @@ set_property -dict [list \ ipgui::remove_param -component $cc [ipgui::get_guiparamspec -name "DMA_AXI_ADDR_WIDTH" -component $cc] ipgui::remove_param -component $cc [ipgui::get_guiparamspec -name "AXI_ID_WIDTH_SRC" -component $cc] ipgui::remove_param -component $cc [ipgui::get_guiparamspec -name "AXI_ID_WIDTH_DEST" -component $cc] - +ipgui::remove_param -component $cc [ipgui::get_guiparamspec -name "ALLOW_ASYM_MEM" -component $cc] ipx::create_xgui_files [ipx::current_core] ipx::save_core $cc diff --git a/library/axi_dmac/axi_dmac_transfer.v b/library/axi_dmac/axi_dmac_transfer.v index 3f0b3863e..1eac9175a 100644 --- a/library/axi_dmac/axi_dmac_transfer.v +++ b/library/axi_dmac/axi_dmac_transfer.v @@ -57,7 +57,8 @@ module axi_dmac_transfer #( parameter ID_WIDTH = $clog2(FIFO_SIZE*2), parameter AXI_LENGTH_WIDTH_SRC = 8, parameter AXI_LENGTH_WIDTH_DEST = 8, - parameter ENABLE_DIAGNOSTICS_IF = 0 + parameter ENABLE_DIAGNOSTICS_IF = 0, + parameter ALLOW_ASYM_MEM = 0 ) ( input ctrl_clk, input ctrl_resetn, @@ -335,7 +336,8 @@ dmac_request_arb #( .ID_WIDTH (ID_WIDTH), .AXI_LENGTH_WIDTH_DEST (AXI_LENGTH_WIDTH_DEST), .AXI_LENGTH_WIDTH_SRC (AXI_LENGTH_WIDTH_SRC), - .ENABLE_DIAGNOSTICS_IF(ENABLE_DIAGNOSTICS_IF) + .ENABLE_DIAGNOSTICS_IF(ENABLE_DIAGNOSTICS_IF), + .ALLOW_ASYM_MEM (ALLOW_ASYM_MEM) ) i_request_arb ( .req_clk (req_clk), .req_resetn (req_resetn), diff --git a/library/axi_dmac/bd/bd.tcl b/library/axi_dmac/bd/bd.tcl index 54dc8e762..d8405085b 100644 --- a/library/axi_dmac/bd/bd.tcl +++ b/library/axi_dmac/bd/bd.tcl @@ -25,6 +25,12 @@ proc init {cellpath otherInfo} { set_property "CONFIG.DMA_AXI_PROTOCOL_${dir}" $axi_protocol $ip set_property "CONFIG.DMA_TYPE_${dir}" $old $ip } + + # Versions earlier than 2017.3 infer sub-optimal asymmetric memory + # See https://www.xilinx.com/support/answers/69179.html + if {[expr [version -short] > 2017.2]} { + set_property "CONFIG.ALLOW_ASYM_MEM" 1 $ip + } } proc post_config_ip {cellpath otherinfo} { diff --git a/library/axi_dmac/request_arb.v b/library/axi_dmac/request_arb.v index 3bd8dafe1..778a1d964 100644 --- a/library/axi_dmac/request_arb.v +++ b/library/axi_dmac/request_arb.v @@ -56,8 +56,9 @@ module dmac_request_arb #( parameter ID_WIDTH = $clog2(FIFO_SIZE*2), parameter AXI_LENGTH_WIDTH_SRC = 8, parameter AXI_LENGTH_WIDTH_DEST = 8, - parameter ENABLE_DIAGNOSTICS_IF = 0)( - + parameter ENABLE_DIAGNOSTICS_IF = 0, + parameter ALLOW_ASYM_MEM = 0 +)( input req_clk, input req_resetn, @@ -941,7 +942,8 @@ axi_dmac_burst_memory #( .BYTES_PER_BEAT_WIDTH_SRC(BYTES_PER_BEAT_WIDTH_SRC), .BYTES_PER_BURST_WIDTH(BYTES_PER_BURST_WIDTH), .DMA_LENGTH_ALIGN(DMA_LENGTH_ALIGN), - .ENABLE_DIAGNOSTICS_IF(ENABLE_DIAGNOSTICS_IF) + .ENABLE_DIAGNOSTICS_IF(ENABLE_DIAGNOSTICS_IF), + .ALLOW_ASYM_MEM(ALLOW_ASYM_MEM) ) i_store_and_forward ( .src_clk(src_clk), .src_reset(~src_resetn), diff --git a/library/axi_dmac/tb/dma_read_shutdown_tb b/library/axi_dmac/tb/dma_read_shutdown_tb index 6ba47d32d..fe7dda1b1 100755 --- a/library/axi_dmac/tb/dma_read_shutdown_tb +++ b/library/axi_dmac/tb/dma_read_shutdown_tb @@ -13,7 +13,7 @@ SOURCE+=" ../src_axi_mm.v ../address_generator.v ../response_generator.v" SOURCE+=" ../../util_axis_fifo/util_axis_fifo.v" SOURCE+=" ../../util_cdc/sync_bits.v" SOURCE+=" ../../util_cdc/sync_event.v" -SOURCE+=" ../../common/ad_mem.v" +SOURCE+=" ../../common/ad_mem_asym.v" cd `dirname $0` source run_tb.sh diff --git a/library/axi_dmac/tb/dma_read_tb b/library/axi_dmac/tb/dma_read_tb index 7378f36ed..971f317c7 100755 --- a/library/axi_dmac/tb/dma_read_tb +++ b/library/axi_dmac/tb/dma_read_tb @@ -12,7 +12,7 @@ SOURCE+=" ../src_axi_mm.v ../address_generator.v ../response_generator.v" SOURCE+=" ../../util_axis_fifo/util_axis_fifo.v" SOURCE+=" ../../util_cdc/sync_bits.v" SOURCE+=" ../../util_cdc/sync_event.v" -SOURCE+=" ../../common/ad_mem.v" +SOURCE+=" ../../common/ad_mem_asym.v" cd `dirname $0` source run_tb.sh diff --git a/library/axi_dmac/tb/dma_write_shutdown_tb b/library/axi_dmac/tb/dma_write_shutdown_tb index 5fdb11f79..fa0f41acb 100755 --- a/library/axi_dmac/tb/dma_write_shutdown_tb +++ b/library/axi_dmac/tb/dma_write_shutdown_tb @@ -13,7 +13,7 @@ SOURCE+=" ../dest_axi_mm.v ../response_handler.v ../address_generator.v" SOURCE+=" ../../util_axis_fifo/util_axis_fifo.v" SOURCE+=" ../../util_cdc/sync_bits.v" SOURCE+=" ../../util_cdc/sync_event.v" -SOURCE+=" ../../common/ad_mem.v" +SOURCE+=" ../../common/ad_mem_asym.v" cd `dirname $0` source run_tb.sh diff --git a/library/axi_dmac/tb/dma_write_tb b/library/axi_dmac/tb/dma_write_tb index 8538ac083..2a4ab796f 100755 --- a/library/axi_dmac/tb/dma_write_tb +++ b/library/axi_dmac/tb/dma_write_tb @@ -12,7 +12,7 @@ SOURCE+=" ../dest_axi_mm.v ../response_handler.v ../address_generator.v" SOURCE+=" ../../util_axis_fifo/util_axis_fifo.v" SOURCE+=" ../../util_cdc/sync_bits.v" SOURCE+=" ../../util_cdc/sync_event.v" -SOURCE+=" ../../common/ad_mem.v" +SOURCE+=" ../../common/ad_mem_asym.v" cd `dirname $0` source run_tb.sh