From 265781f29a843b5a8840f45d49584a35cfbe0a20 Mon Sep 17 00:00:00 2001 From: AndreiGrozav Date: Thu, 13 Sep 2018 16:26:51 +0300 Subject: [PATCH] axi_hdmi: Let the tools assign the csc resources Write code to pipeline data path for better DSP utilization on the color space conversion. In the old method the addition operations were performed outside the DSPs --- library/axi_hdmi_tx/Makefile | 4 +- library/axi_hdmi_tx/axi_hdmi_tx_ip.tcl | 4 +- library/common/{ad_csc_1.v => ad_csc.v} | 114 ++++++++++-------- library/common/ad_csc_1_add.v | 147 ------------------------ library/common/ad_csc_1_mul.v | 97 ---------------- library/common/ad_csc_RGB2CrYCb.v | 51 ++++---- 6 files changed, 96 insertions(+), 321 deletions(-) rename library/common/{ad_csc_1.v => ad_csc.v} (52%) delete mode 100644 library/common/ad_csc_1_add.v delete mode 100644 library/common/ad_csc_1_mul.v diff --git a/library/axi_hdmi_tx/Makefile b/library/axi_hdmi_tx/Makefile index 609b75bd7..6ebc8dc4c 100644 --- a/library/axi_hdmi_tx/Makefile +++ b/library/axi_hdmi_tx/Makefile @@ -5,9 +5,7 @@ LIBRARY_NAME := axi_hdmi_tx -GENERIC_DEPS += ../common/ad_csc_1.v -GENERIC_DEPS += ../common/ad_csc_1_add.v -GENERIC_DEPS += ../common/ad_csc_1_mul.v +GENERIC_DEPS += ../common/ad_csc.v GENERIC_DEPS += ../common/ad_csc_RGB2CrYCb.v GENERIC_DEPS += ../common/ad_mem.v GENERIC_DEPS += ../common/ad_rst.v diff --git a/library/axi_hdmi_tx/axi_hdmi_tx_ip.tcl b/library/axi_hdmi_tx/axi_hdmi_tx_ip.tcl index 6b42f6e29..09c18e22b 100644 --- a/library/axi_hdmi_tx/axi_hdmi_tx_ip.tcl +++ b/library/axi_hdmi_tx/axi_hdmi_tx_ip.tcl @@ -7,9 +7,7 @@ adi_ip_create axi_hdmi_tx adi_ip_files axi_hdmi_tx [list \ "$ad_hdl_dir/library/common/ad_mem.v" \ "$ad_hdl_dir/library/common/ad_rst.v" \ - "$ad_hdl_dir/library/common/ad_csc_1_mul.v" \ - "$ad_hdl_dir/library/common/ad_csc_1_add.v" \ - "$ad_hdl_dir/library/common/ad_csc_1.v" \ + "$ad_hdl_dir/library/common/ad_csc.v" \ "$ad_hdl_dir/library/common/ad_csc_RGB2CrYCb.v" \ "$ad_hdl_dir/library/common/ad_ss_444to422.v" \ "$ad_hdl_dir/library/common/up_axi.v" \ diff --git a/library/common/ad_csc_1.v b/library/common/ad_csc.v similarity index 52% rename from library/common/ad_csc_1.v rename to library/common/ad_csc.v index 3cacf023b..a21e6c2af 100644 --- a/library/common/ad_csc_1.v +++ b/library/common/ad_csc.v @@ -36,78 +36,92 @@ `timescale 1ns/100ps -module ad_csc_1 #( +module ad_csc #( - parameter DELAY_DATA_WIDTH = 16) ( + parameter DELAY_DW = 16, + parameter COLOR_N = 1) ( // data - input clk, - input [DW:0] sync, - input [23:0] data, + input clk, + input [DELAY_DW-1:0] sync, + input [ 23:0] data, // constants - input [16:0] C1, - input [16:0] C2, - input [16:0] C3, - input [24:0] C4, + input signed [16:0] C1, + input signed [16:0] C2, + input signed [16:0] C3, + input signed [24:0] C4, // sync is delay matched - output [DW:0] csc_sync_1, - output [ 7:0] csc_data_1); + output reg [DELAY_DW-1:0] csc_sync, + output [ 7:0] csc_data); - localparam DW = DELAY_DATA_WIDTH - 1; + localparam Y = 1; + localparam Cb = 2; + localparam Cr = 3; // internal wires - wire [24:0] data_1_m_s; - wire [24:0] data_2_m_s; - wire [24:0] data_3_m_s; - wire [DW:0] sync_3_m_s; + reg [ 23:0] data_d1; + reg [ 23:0] data_d2; + reg [ 33:0] data_1; + reg [ 33:0] data_2; + reg [ 33:0] data_3; + reg [DELAY_DW:0] sync_1_m; + reg [DELAY_DW:0] sync_2_m; + reg [DELAY_DW:0] sync_3_m; + reg [ 33:0] s_data_1; + reg [ 33:0] s_data_2; + reg [ 33:0] s_data_3; - // c1*R - ad_csc_1_mul #(.DELAY_DATA_WIDTH(1)) i_mul_c1 ( - .clk (clk), - .data_a (C1), - .data_b (data[23:16]), - .data_p (data_1_m_s), - .ddata_in (1'd0), - .ddata_out ()); + wire signed [33:0] data_1_s; + wire signed [33:0] data_2_s; + wire signed [33:0] data_3_s; - // c2*G - ad_csc_1_mul #(.DELAY_DATA_WIDTH(1)) i_mul_c2 ( - .clk (clk), - .data_a (C2), - .data_b (data[15:8]), - .data_p (data_2_m_s), - .ddata_in (1'd0), - .ddata_out ()); + // Let the tools decide what logic to infer - // c3*B + always @(posedge clk) begin + data_d1 <= data; + data_d2 <= data_d1; + data_1 <= {9'd0, data[23:16]} * C1; // R + data_2 <= {9'd0, data_d1[15: 8]} * C2; // G + data_3 <= {9'd0, data_d2[ 7: 0]} * C3; // B + sync_1_m <= sync; + end - ad_csc_1_mul #(.DELAY_DATA_WIDTH(DELAY_DATA_WIDTH)) i_mul_c3 ( - .clk (clk), - .data_a (C3), - .data_b (data[7:0]), - .data_p (data_3_m_s), - .ddata_in (sync), - .ddata_out (sync_3_m_s)); + generate + if (COLOR_N == Y) begin + assign data_1_s = data_1; + assign data_2_s = data_2; + assign data_3_s = data_3; + end + if (COLOR_N == Cb) begin + assign data_1_s = ~data_1; + assign data_2_s = ~data_2; + assign data_3_s = data_3; + end + if (COLOR_N == Cr) begin + assign data_1_s = data_1; + assign data_2_s = ~data_2; + assign data_3_s = ~data_3; + end + endgenerate - // sum + c4 + always @(posedge clk) begin + s_data_1 <= data_1_s + C4; + s_data_2 <= s_data_1 + data_2_s; + s_data_3 <= s_data_2 + data_3_s; + sync_2_m <= sync_1_m; + sync_3_m <= sync_2_m; + csc_sync <= sync_3_m; + end - ad_csc_1_add #(.DELAY_DATA_WIDTH(DELAY_DATA_WIDTH)) i_add_c4 ( - .clk (clk), - .data_1 (data_1_m_s), - .data_2 (data_2_m_s), - .data_3 (data_3_m_s), - .data_4 (C4), - .data_p (csc_data_1), - .ddata_in (sync_3_m_s), - .ddata_out (csc_sync_1)); + assign csc_data = s_data_3[23:16]; endmodule diff --git a/library/common/ad_csc_1_add.v b/library/common/ad_csc_1_add.v deleted file mode 100644 index 3acb3970f..000000000 --- a/library/common/ad_csc_1_add.v +++ /dev/null @@ -1,147 +0,0 @@ -// *************************************************************************** -// *************************************************************************** -// Copyright 2014 - 2017 (c) Analog Devices, Inc. All rights reserved. -// -// In this HDL repository, there are many different and unique modules, consisting -// of various HDL (Verilog or VHDL) components. The individual modules are -// developed independently, and may be accompanied by separate and unique license -// terms. -// -// The user should read each of these license terms, and understand the -// freedoms and responsibilities that he or she has by using this source/core. -// -// This core is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -// A PARTICULAR PURPOSE. -// -// Redistribution and use of source or resulting binaries, with or without modification -// of this file, are permitted under one of the following two license terms: -// -// 1. The GNU General Public License version 2 as published by the -// Free Software Foundation, which can be found in the top level directory -// of this repository (LICENSE_GPL2), and also online at: -// -// -// OR -// -// 2. An ADI specific BSD license, which can be found in the top level directory -// of this repository (LICENSE_ADIBSD), and also on-line at: -// https://github.com/analogdevicesinc/hdl/blob/master/LICENSE_ADIBSD -// This will allow to generate bit files and not release the source code, -// as long as it attaches to an ADI device. -// -// *************************************************************************** -// *************************************************************************** -// Color Space Conversion, adder. This is a simple adder, but had to be -// pipe-lined for faster clock rates. The delay input is delay-matched to -// the sum pipe-line stages - -`timescale 1ps/1ps - -module ad_csc_1_add #( - - parameter DELAY_DATA_WIDTH = 16) ( - - // all signed - - input clk, - input [24:0] data_1, - input [24:0] data_2, - input [24:0] data_3, - input [24:0] data_4, - output reg [ 7:0] data_p, - - // delay match - - input [DW:0] ddata_in, - output reg [DW:0] ddata_out); - - localparam DW = DELAY_DATA_WIDTH - 1; - - // internal registers - - reg [DW:0] p1_ddata = 'd0; - reg [24:0] p1_data_1 = 'd0; - reg [24:0] p1_data_2 = 'd0; - reg [24:0] p1_data_3 = 'd0; - reg [24:0] p1_data_4 = 'd0; - reg [DW:0] p2_ddata = 'd0; - reg [24:0] p2_data_0 = 'd0; - reg [24:0] p2_data_1 = 'd0; - reg [DW:0] p3_ddata = 'd0; - reg [24:0] p3_data = 'd0; - - // internal signals - - wire [24:0] p1_data_1_p_s; - wire [24:0] p1_data_1_n_s; - wire [24:0] p1_data_1_s; - wire [24:0] p1_data_2_p_s; - wire [24:0] p1_data_2_n_s; - wire [24:0] p1_data_2_s; - wire [24:0] p1_data_3_p_s; - wire [24:0] p1_data_3_n_s; - wire [24:0] p1_data_3_s; - wire [24:0] p1_data_4_p_s; - wire [24:0] p1_data_4_n_s; - wire [24:0] p1_data_4_s; - - // pipe line stage 1, get the two's complement versions - - assign p1_data_1_p_s = {1'b0, data_1[23:0]}; - assign p1_data_1_n_s = ~p1_data_1_p_s + 1'b1; - assign p1_data_1_s = (data_1[24] == 1'b1) ? p1_data_1_n_s : p1_data_1_p_s; - - assign p1_data_2_p_s = {1'b0, data_2[23:0]}; - assign p1_data_2_n_s = ~p1_data_2_p_s + 1'b1; - assign p1_data_2_s = (data_2[24] == 1'b1) ? p1_data_2_n_s : p1_data_2_p_s; - - assign p1_data_3_p_s = {1'b0, data_3[23:0]}; - assign p1_data_3_n_s = ~p1_data_3_p_s + 1'b1; - assign p1_data_3_s = (data_3[24] == 1'b1) ? p1_data_3_n_s : p1_data_3_p_s; - - assign p1_data_4_p_s = {1'b0, data_4[23:0]}; - assign p1_data_4_n_s = ~p1_data_4_p_s + 1'b1; - assign p1_data_4_s = (data_4[24] == 1'b1) ? p1_data_4_n_s : p1_data_4_p_s; - - always @(posedge clk) begin - p1_ddata <= ddata_in; - p1_data_1 <= p1_data_1_s; - p1_data_2 <= p1_data_2_s; - p1_data_3 <= p1_data_3_s; - p1_data_4 <= p1_data_4_s; - end - - // pipe line stage 2, get the sum (intermediate, 4->2) - - always @(posedge clk) begin - p2_ddata <= p1_ddata; - p2_data_0 <= p1_data_1 + p1_data_2; - p2_data_1 <= p1_data_3 + p1_data_4; - end - - // pipe line stage 3, get the sum (final, 2->1) - - always @(posedge clk) begin - p3_ddata <= p2_ddata; - p3_data <= p2_data_0 + p2_data_1; - end - - // output registers, output is unsigned (0 if sum is < 0) and saturated. - // the inputs are expected to be 1.4.20 format (output is 8bits). - - always @(posedge clk) begin - ddata_out <= p3_ddata; - if (p3_data[24] == 1'b1) begin - data_p <= 8'h00; - end else if (p3_data[23:20] == 'd0) begin - data_p <= p3_data[19:12]; - end else begin - data_p <= 8'hff; - end - end - -endmodule - -// *************************************************************************** -// *************************************************************************** diff --git a/library/common/ad_csc_1_mul.v b/library/common/ad_csc_1_mul.v deleted file mode 100644 index 5847e1289..000000000 --- a/library/common/ad_csc_1_mul.v +++ /dev/null @@ -1,97 +0,0 @@ -// *************************************************************************** -// *************************************************************************** -// Copyright 2014 - 2017 (c) Analog Devices, Inc. All rights reserved. -// -// In this HDL repository, there are many different and unique modules, consisting -// of various HDL (Verilog or VHDL) components. The individual modules are -// developed independently, and may be accompanied by separate and unique license -// terms. -// -// The user should read each of these license terms, and understand the -// freedoms and responsibilities that he or she has by using this source/core. -// -// This core is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -// A PARTICULAR PURPOSE. -// -// Redistribution and use of source or resulting binaries, with or without modification -// of this file, are permitted under one of the following two license terms: -// -// 1. The GNU General Public License version 2 as published by the -// Free Software Foundation, which can be found in the top level directory -// of this repository (LICENSE_GPL2), and also online at: -// -// -// OR -// -// 2. An ADI specific BSD license, which can be found in the top level directory -// of this repository (LICENSE_ADIBSD), and also on-line at: -// https://github.com/analogdevicesinc/hdl/blob/master/LICENSE_ADIBSD -// This will allow to generate bit files and not release the source code, -// as long as it attaches to an ADI device. -// -// *************************************************************************** -// *************************************************************************** -// Color Space Conversion, multiplier. This is a simple partial product adder -// that generates the product of the two inputs. - -`timescale 1ps/1ps - -module ad_csc_1_mul #( - - parameter DELAY_DATA_WIDTH = 16) ( - - // data_a is signed - - input clk, - input [16:0] data_a, - input [ 7:0] data_b, - output [24:0] data_p, - - // delay match - - input [(DELAY_DATA_WIDTH-1):0] ddata_in, - output [(DELAY_DATA_WIDTH-1):0] ddata_out); - - // internal registers - - reg [(DELAY_DATA_WIDTH-1):0] p1_ddata = 'd0; - reg [(DELAY_DATA_WIDTH-1):0] p2_ddata = 'd0; - reg [(DELAY_DATA_WIDTH-1):0] p3_ddata = 'd0; - reg p1_sign = 'd0; - reg p2_sign = 'd0; - reg p3_sign = 'd0; - - // internal signals - - wire [33:0] p3_data_s; - - // a/b reg, m-reg, p-reg delay match - - always @(posedge clk) begin - p1_ddata <= ddata_in; - p2_ddata <= p1_ddata; - p3_ddata <= p2_ddata; - end - - always @(posedge clk) begin - p1_sign <= data_a[16]; - p2_sign <= p1_sign; - p3_sign <= p2_sign; - end - - assign ddata_out = p3_ddata; - assign data_p = {p3_sign, p3_data_s[23:0]}; - - ad_mul ad_mul_1 ( - .clk(clk), - .data_a({1'b0, data_a[15:0]}), - .data_b({9'b0, data_b}), - .data_p(p3_data_s), - .ddata_in(16'h0), - .ddata_out()); - -endmodule - -// *************************************************************************** -// *************************************************************************** diff --git a/library/common/ad_csc_RGB2CrYCb.v b/library/common/ad_csc_RGB2CrYCb.v index 4b7e90eec..8bb8efd7a 100644 --- a/library/common/ad_csc_RGB2CrYCb.v +++ b/library/common/ad_csc_RGB2CrYCb.v @@ -60,42 +60,51 @@ module ad_csc_RGB2CrYCb #( // Cr (red-diff) - ad_csc_1 #(.DELAY_DATA_WIDTH(DELAY_DATA_WIDTH)) i_csc_1_Cr ( + ad_csc #( + .DELAY_DW(DELAY_DATA_WIDTH), + .COLOR_N(3)) + j_csc_1_Cr ( .clk (clk), .sync (RGB_sync), .data (RGB_data), - .C1 (17'h00707), - .C2 (17'h105e2), - .C3 (17'h10124), - .C4 (25'h0080000), - .csc_sync_1 (CrYCb_sync), - .csc_data_1 (CrYCb_data[23:16])); + .C1 (17'h7070), + .C2 (17'h5e27), + .C3 (17'h1248), + .C4 (24'h800002), + .csc_sync (CrYCb_sync), + .csc_data (CrYCb_data[23:16])); // Y (luma) - ad_csc_1 #(.DELAY_DATA_WIDTH(1)) i_csc_1_Y ( + ad_csc #( + .DELAY_DW(0), + .COLOR_N(1)) + j_csc_1_Y ( .clk (clk), .sync (1'd0), .data (RGB_data), - .C1 (17'h0041b), - .C2 (17'h00810), - .C3 (17'h00191), - .C4 (25'h0010000), - .csc_sync_1 (), - .csc_data_1 (CrYCb_data[15:8])); + .C1 (17'h041bd), + .C2 (17'h0810e), + .C3 (17'h01910), + .C4 (24'h100000), + .csc_sync (), + .csc_data (CrYCb_data[15:8])); // Cb (blue-diff) - ad_csc_1 #(.DELAY_DATA_WIDTH(1)) i_csc_1_Cb ( + ad_csc #( + .DELAY_DW(0), + .COLOR_N(2)) + j_csc_1_Cb ( .clk (clk), .sync (1'd0), .data (RGB_data), - .C1 (17'h1025f), - .C2 (17'h104a7), - .C3 (17'h00707), - .C4 (25'h0080000), - .csc_sync_1 (), - .csc_data_1 (CrYCb_data[7:0])); + .C1 (17'h25f1), + .C2 (17'h4a7e), + .C3 (17'h7070), + .C4 (24'h800002), + .csc_sync (), + .csc_data (CrYCb_data[7:0])); endmodule