pluto_hdl_adi/library/axi_dmac/dmac_sg.v

337 lines
10 KiB
Coq
Raw Normal View History

axi_dmac: Add support for DMA Scatter-Gather This commit introduces a different interface to submit transfers, using DMA descriptors. The structure of the DMA descriptor is as follows: struct dma_desc { u32 flags, u32 id, u64 dest_addr, u64 src_addr, u64 next_sg_addr, u32 y_len, u32 x_len, u32 src_stride, u32 dst_stride, }; The 'flags' field currently offers two control bits: - bit 0: if set, the transfer will complete after this last descriptor is processed, and the DMA core will go back to idle state; if cleared, the next DMA descriptor pointed to by 'next_sg_addr' will be loaded. - bit 1: if set, an end-of-transfer interrupt will be raised after the memory segment pointed to by this descriptor has been transferred. The 'id' field corresponds to an identifier of the descriptor. The 'dest_addr' and 'src_addr' contain the destination and source addresses to use for the transfer, respectively. The 'x_len' field contains the number of bytes to transfer, minus one. The 'y_len', 'src_stride' and 'dst_stride' fields are only useful for 2D transfers, and should be set to zero if 2D transfers are not required. To start a transfer, the address of the first DMA descriptor must be written to register 0x47c and the HWDESC bit of CONTROL register must be set. The Scatter-Gather transfer is queued similarly to the simple transfers, by writing 1 in TRANSFER_SUBMIT. The Scatter-Gather interface has a dedicated AXI-MM bus configured for read transfers, with its own dedicated clock, which can be asynchronous. The Scatter-Gather reset is generated by the reset manager to reset the logic after completing any pending transactions on the bus. When the Scatter-Gather is enabled during runtime, the legacy cyclic functionality of the DMA is disabled. Signed-off-by: Ionut Podgoreanu <ionut.podgoreanu@analog.com>
2023-08-10 10:10:24 +00:00
// ***************************************************************************
// ***************************************************************************
// Copyright (C) 2023 Analog Devices, Inc. All rights reserved.
//
// In this HDL repository, there are many different and unique modules, consisting
// of various HDL (Verilog or VHDL) components. The individual modules are
// developed independently, and may be accompanied by separate and unique license
// terms.
//
// The user should read each of these license terms, and understand the
// freedoms and responsibilities that he or she has by using this source/core.
//
// This core is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
// A PARTICULAR PURPOSE.
//
// Redistribution and use of source or resulting binaries, with or without modification
// of this file, are permitted under one of the following two license terms:
//
// 1. The GNU General Public License version 2 as published by the
// Free Software Foundation, which can be found in the top level directory
// of this repository (LICENSE_GPL2), and also online at:
// <https://www.gnu.org/licenses/old-licenses/gpl-2.0.html>
//
// OR
//
// 2. An ADI specific BSD license, which can be found in the top level directory
// of this repository (LICENSE_ADIBSD), and also on-line at:
// https://github.com/analogdevicesinc/hdl/blob/main/LICENSE_ADIBSD
axi_dmac: Add support for DMA Scatter-Gather This commit introduces a different interface to submit transfers, using DMA descriptors. The structure of the DMA descriptor is as follows: struct dma_desc { u32 flags, u32 id, u64 dest_addr, u64 src_addr, u64 next_sg_addr, u32 y_len, u32 x_len, u32 src_stride, u32 dst_stride, }; The 'flags' field currently offers two control bits: - bit 0: if set, the transfer will complete after this last descriptor is processed, and the DMA core will go back to idle state; if cleared, the next DMA descriptor pointed to by 'next_sg_addr' will be loaded. - bit 1: if set, an end-of-transfer interrupt will be raised after the memory segment pointed to by this descriptor has been transferred. The 'id' field corresponds to an identifier of the descriptor. The 'dest_addr' and 'src_addr' contain the destination and source addresses to use for the transfer, respectively. The 'x_len' field contains the number of bytes to transfer, minus one. The 'y_len', 'src_stride' and 'dst_stride' fields are only useful for 2D transfers, and should be set to zero if 2D transfers are not required. To start a transfer, the address of the first DMA descriptor must be written to register 0x47c and the HWDESC bit of CONTROL register must be set. The Scatter-Gather transfer is queued similarly to the simple transfers, by writing 1 in TRANSFER_SUBMIT. The Scatter-Gather interface has a dedicated AXI-MM bus configured for read transfers, with its own dedicated clock, which can be asynchronous. The Scatter-Gather reset is generated by the reset manager to reset the logic after completing any pending transactions on the bus. When the Scatter-Gather is enabled during runtime, the legacy cyclic functionality of the DMA is disabled. Signed-off-by: Ionut Podgoreanu <ionut.podgoreanu@analog.com>
2023-08-10 10:10:24 +00:00
// This will allow to generate bit files and not release the source code,
// as long as it attaches to an ADI device.
//
// ***************************************************************************
// ***************************************************************************
`timescale 1ns/100ps
module dmac_sg #(
parameter DMA_AXI_ADDR_WIDTH = 32,
parameter DMA_DATA_WIDTH = 64,
parameter DMA_LENGTH_WIDTH = 24,
parameter AXI_LENGTH_WIDTH = 8,
parameter BYTES_PER_BEAT_WIDTH_DEST = 3,
parameter BYTES_PER_BEAT_WIDTH_SRC = 3,
parameter BYTES_PER_BEAT_WIDTH_SG = 3,
parameter ASYNC_CLK_REQ_SG = 1
) (
input req_clk,
input req_resetn,
input req_enable,
output sg_clk,
input sg_resetn,
output sg_ext_resetn,
input sg_enable,
output sg_enabled,
input req_in_valid,
output req_in_ready,
output req_out_valid,
input req_out_ready,
output resp_out_eot,
input resp_in_valid,
input [DMA_AXI_ADDR_WIDTH-1:BYTES_PER_BEAT_WIDTH_SG] req_desc_address,
output [DMA_AXI_ADDR_WIDTH-1:BYTES_PER_BEAT_WIDTH_DEST] out_dest_address,
output [DMA_AXI_ADDR_WIDTH-1:BYTES_PER_BEAT_WIDTH_SRC] out_src_address,
output [DMA_LENGTH_WIDTH-1:0] out_x_length,
output [DMA_LENGTH_WIDTH-1:0] out_y_length,
output [DMA_LENGTH_WIDTH-1:0] out_dest_stride,
output [DMA_LENGTH_WIDTH-1:0] out_src_stride,
output [31:0] resp_out_id,
// Master AXI interface
input m_axi_aclk,
input m_axi_aresetn,
// Read address
input m_axi_arready,
output m_axi_arvalid,
output [DMA_AXI_ADDR_WIDTH-1:0] m_axi_araddr,
output [AXI_LENGTH_WIDTH-1:0] m_axi_arlen,
output [ 2:0] m_axi_arsize,
output [ 1:0] m_axi_arburst,
output [ 2:0] m_axi_arprot,
output [ 3:0] m_axi_arcache,
// Read data and response
input [DMA_DATA_WIDTH-1:0] m_axi_rdata,
input m_axi_rlast,
output m_axi_rready,
input m_axi_rvalid,
input [ 1:0] m_axi_rresp
);
localparam STATE_IDLE = 0;
localparam STATE_SEND_ADDR = 1;
localparam STATE_RECV_DESC = 2;
localparam STATE_DESC_READY = 3;
localparam MASK_LAST_HWDESC = 1 << 0;
localparam MASK_EOT_IRQ = 1 << 1;
localparam DMA_ADDRESS_WIDTH_DEST = DMA_AXI_ADDR_WIDTH - BYTES_PER_BEAT_WIDTH_DEST;
localparam DMA_ADDRESS_WIDTH_SRC = DMA_AXI_ADDR_WIDTH - BYTES_PER_BEAT_WIDTH_SRC;
localparam DMA_ADDRESS_WIDTH_SG = DMA_AXI_ADDR_WIDTH - BYTES_PER_BEAT_WIDTH_SG;
localparam DMA_DESCRIPTOR_WIDTH = DMA_ADDRESS_WIDTH_DEST + DMA_ADDRESS_WIDTH_SRC + 4*DMA_LENGTH_WIDTH;
wire [DMA_AXI_ADDR_WIDTH-1:BYTES_PER_BEAT_WIDTH_SG] first_desc_address;
reg [DMA_AXI_ADDR_WIDTH-1:BYTES_PER_BEAT_WIDTH_DEST] dest_addr;
reg [DMA_AXI_ADDR_WIDTH-1:BYTES_PER_BEAT_WIDTH_SRC] src_addr;
reg [DMA_AXI_ADDR_WIDTH-1:BYTES_PER_BEAT_WIDTH_SG] next_desc_addr;
reg [DMA_LENGTH_WIDTH-1:0] x_length;
reg [DMA_LENGTH_WIDTH-1:0] y_length;
reg [DMA_LENGTH_WIDTH-1:0] dest_stride;
reg [DMA_LENGTH_WIDTH-1:0] src_stride;
reg [1:0] hwdesc_state;
reg [2:0] hwdesc_counter;
reg [1:0] hwdesc_flags;
reg [31:0] hwdesc_id;
wire sg_in_valid;
wire sg_in_ready;
wire sg_out_valid;
wire sg_out_ready;
wire fetch_valid;
wire fetch_ready;
wire fifo_in_valid;
wire fifo_in_ready;
wire fifo_out_valid;
wire fifo_out_ready;
wire [32:0] fifo_in_data;
wire [32:0] fifo_out_data;
assign sg_clk = m_axi_aclk;
assign sg_ext_resetn = m_axi_aresetn;
assign sg_enabled = sg_enable | ~sg_in_ready;
assign sg_in_ready = hwdesc_state == STATE_IDLE;
assign fetch_valid = hwdesc_state == STATE_DESC_READY;
assign m_axi_arvalid = hwdesc_state == STATE_SEND_ADDR;
assign m_axi_rready = hwdesc_state == STATE_RECV_DESC;
assign m_axi_arsize = 3'h3;
assign m_axi_arburst = 2'h1;
assign m_axi_arprot = 3'h0;
assign m_axi_arcache = 4'h3;
assign m_axi_arlen = 'h5;
assign m_axi_araddr = {next_desc_addr, {BYTES_PER_BEAT_WIDTH_SG{1'b0}}};
util_axis_fifo #(
.DATA_WIDTH(DMA_ADDRESS_WIDTH_SG),
.ADDRESS_WIDTH(0),
.ASYNC_CLK(ASYNC_CLK_REQ_SG)
) i_sg_addr_fifo (
.s_axis_aclk(req_clk),
.s_axis_aresetn(req_resetn),
.s_axis_valid(req_in_valid),
.s_axis_ready(req_in_ready),
.s_axis_full(),
.s_axis_data(req_desc_address),
.s_axis_room(),
.m_axis_aclk(sg_clk),
.m_axis_aresetn(sg_resetn),
.m_axis_valid(sg_in_valid),
.m_axis_ready(sg_in_ready),
.m_axis_data(first_desc_address),
.m_axis_level(),
.m_axis_empty());
always @(posedge sg_clk) begin
if (sg_resetn == 1'b0) begin
hwdesc_counter <= 'h0;
end else if (m_axi_rvalid) begin
hwdesc_counter <= hwdesc_counter + 1'b1;
end else if (hwdesc_state == STATE_DESC_READY) begin
hwdesc_counter <= 'h0;
end
end
// Read the descriptor data
always @(posedge sg_clk) begin
if (sg_resetn == 1'b0) begin
hwdesc_flags <= 'h00;
hwdesc_id <= 'h00;
dest_addr <= 'h00;
src_addr <= 'h00;
next_desc_addr <= 'h00;
y_length <= 'h00;
x_length <= 'h00;
src_stride <= 'h00;
dest_stride <= 'h00;
end else begin
if (sg_in_valid && sg_in_ready) begin
next_desc_addr <= first_desc_address;
end
if (m_axi_rvalid) begin
case (hwdesc_counter)
0: begin
hwdesc_id <= m_axi_rdata[63:32];
hwdesc_flags <= m_axi_rdata[1:0];
end
1: dest_addr <= m_axi_rdata[DMA_AXI_ADDR_WIDTH-1:BYTES_PER_BEAT_WIDTH_DEST];
2: src_addr <= m_axi_rdata[DMA_AXI_ADDR_WIDTH-1:BYTES_PER_BEAT_WIDTH_SRC];
3: next_desc_addr <= m_axi_rdata[DMA_AXI_ADDR_WIDTH-1:BYTES_PER_BEAT_WIDTH_SG];
4: begin
x_length <= m_axi_rdata[63:32];
y_length <= m_axi_rdata[31:0];
end
5: begin
dest_stride <= m_axi_rdata[63:32];
src_stride <= m_axi_rdata[31:0];
end
endcase
end
end
end
// Descriptor FSM
always @(posedge sg_clk) begin
if (sg_resetn == 1'b0) begin
hwdesc_state <= STATE_IDLE;
end else begin
case (hwdesc_state)
STATE_IDLE: begin
if (sg_in_valid == 1'b1 && sg_enable == 1'b1) begin
hwdesc_state <= STATE_SEND_ADDR;
end
end
STATE_SEND_ADDR: begin
if (m_axi_arready) begin
hwdesc_state <= STATE_RECV_DESC;
end
end
STATE_RECV_DESC: begin
if (m_axi_rvalid == 1'b1 && m_axi_rlast == 1'b1) begin
hwdesc_state <= STATE_DESC_READY;
end
end
STATE_DESC_READY: begin
if (sg_enable == 1'b0) begin
hwdesc_state <= STATE_IDLE;
end else if (fetch_ready == 1'b1) begin
if (hwdesc_flags & MASK_LAST_HWDESC) begin
hwdesc_state <= STATE_IDLE;
end else begin
hwdesc_state <= STATE_SEND_ADDR;
end
end
end
endcase
end
end
util_axis_fifo #(
.DATA_WIDTH(DMA_DESCRIPTOR_WIDTH),
.ADDRESS_WIDTH(0),
.ASYNC_CLK(ASYNC_CLK_REQ_SG)
) i_sg_desc_fifo (
.s_axis_aclk(sg_clk),
.s_axis_aresetn(sg_resetn),
.s_axis_valid(sg_out_valid),
.s_axis_ready(sg_out_ready),
.s_axis_full(),
.s_axis_data({
dest_addr,
src_addr,
x_length,
y_length,
dest_stride,
src_stride}),
.s_axis_room(),
.m_axis_aclk(req_clk),
.m_axis_aresetn(req_resetn),
.m_axis_valid(req_out_valid),
.m_axis_ready(req_out_ready),
.m_axis_data({
out_dest_address,
out_src_address,
out_x_length,
out_y_length,
out_dest_stride,
out_src_stride}),
.m_axis_level(),
.m_axis_empty());
splitter #(
.NUM_M(2)
) i_req_splitter (
.clk(sg_clk),
.resetn(sg_resetn),
.s_valid(fetch_valid),
.s_ready(fetch_ready),
.m_valid({
sg_out_valid,
fifo_in_valid}),
.m_ready({
sg_out_ready,
fifo_in_ready}));
assign fifo_in_data = {hwdesc_flags & MASK_EOT_IRQ ? 1'b1 : 1'b0, hwdesc_id};
assign fifo_out_ready = resp_in_valid;
assign resp_out_eot = fifo_out_data[32];
assign resp_out_id = fifo_out_data[31:0];
// Save the descriptor IDs and the eot descriptor flag in an async fifo
// Extract them one by one when the destination responds with an eot
util_axis_fifo #(
.DATA_WIDTH(33),
.ADDRESS_WIDTH(2),
.ASYNC_CLK(ASYNC_CLK_REQ_SG)
) i_fifo (
.s_axis_aclk(sg_clk),
.s_axis_aresetn(sg_resetn),
.s_axis_valid(fifo_in_valid),
.s_axis_ready(fifo_in_ready),
.s_axis_data(fifo_in_data),
.m_axis_aclk(req_clk),
.m_axis_aresetn(req_resetn),
.m_axis_valid(fifo_out_valid),
.m_axis_ready(fifo_out_ready),
.m_axis_data(fifo_out_data));
endmodule