From 932066367eca825926fbe71d543aea476fdc25a1 Mon Sep 17 00:00:00 2001 From: kodario Date: Sat, 5 Sep 2020 01:43:49 +0200 Subject: [PATCH] initial commit. --- .gitignore | 20 + .gitmodules | 4 + README.md | 89 + cmake/FindVivado.cmake | 31 + cmake/FindVivadoHLS.cmake | 33 + driver/Makefile | 13 + driver/fpga_drv.c | 2401 +++++++++++++++++ driver/fpga_drv.h | 513 ++++ hw/CMakeLists.txt | 136 + hw/constraints/u250/u250_base.xdc | 29 + hw/constraints/u250/u250_ddr.xdc | 628 +++++ hw/constraints/u250/u250_net.xdc | 83 + hw/constraints/u250/u250_pcie.xdc | 82 + hw/constraints/u280/u280_base.xdc | 29 + hw/constraints/u280/u280_ddr.xdc | 641 +++++ hw/constraints/u280/u280_net.xdc | 59 + hw/constraints/u280/u280_pcie.xdc | 82 + hw/constraints/vcu118/vcu_base.xdc | 14 + hw/constraints/vcu118/vcu_ddr.xdc | 619 +++++ hw/constraints/vcu118/vcu_net.xdc | 72 + hw/constraints/vcu118/vcu_pcie.xdc | 82 + hw/ext/network | 1 + hw/hdl/cdma/axi_dma_rd.sv | 269 ++ hw/hdl/cdma/axi_dma_wr.sv | 333 +++ hw/hdl/cdma/cdma.sv | 148 + hw/hdl/cdma/krnl_counter.sv | 105 + hw/hdl/cdma_unaglined/axi_dma.sv | 236 ++ hw/hdl/cdma_unaglined/axi_dma_rd.sv | 559 ++++ hw/hdl/cdma_unaglined/axi_dma_wr.sv | 666 +++++ hw/hdl/mmu/tlb_arbiter.sv | 173 ++ hw/hdl/mmu/tlb_arbiter_isr.sv | 131 + hw/hdl/mmu/tlb_assign_isr.sv | 40 + hw/hdl/mmu/tlb_credits_rd.sv | 186 ++ hw/hdl/mmu/tlb_credits_wr.sv | 99 + hw/hdl/mmu/tlb_fsm_rd.sv | 583 ++++ hw/hdl/mmu/tlb_fsm_wr.sv | 599 ++++ hw/hdl/mmu/tlb_idma_arb.sv | 76 + hw/hdl/mmu/tlb_parser.sv | 136 + hw/hdl/mmu/tlb_region_top.sv | 315 +++ hw/hdl/mmu/tlb_top.sv | 174 ++ hw/hdl/mux/axis_mux_cdma.sv | 94 + hw/hdl/mux/axis_mux_ddr_host.sv | 128 + hw/hdl/mux/axis_mux_ddr_user.sv | 112 + hw/hdl/mux/axis_mux_user_sink.sv | 173 ++ hw/hdl/mux/axis_mux_user_src.sv | 151 ++ hw/hdl/network/cmac_uplus_axis_wrapper.sv | 658 +++++ hw/hdl/network/network_clk_cross.sv | 382 +++ hw/hdl/network/network_meta_fv_arbiter.sv | 68 + hw/hdl/network/network_meta_tx_arbiter.sv | 85 + hw/hdl/network/network_module.sv | 175 ++ hw/hdl/network/network_mux_cmd_rd.sv | 266 ++ hw/hdl/network/network_mux_cmd_wr.sv | 232 ++ hw/hdl/network/network_req_parser.sv | 348 +++ hw/hdl/network/network_stack.sv | 836 ++++++ hw/hdl/network/network_top.sv | 255 ++ hw/hdl/network/roce_stack.sv | 122 + hw/hdl/network/rx_interface.v | 551 ++++ hw/hdl/network/tx_interface.v | 275 ++ hw/hdl/operators/addmul/addmul.sv | 67 + hw/hdl/operators/addmul/addmul_slave.sv | 226 ++ hw/hdl/operators/addmul/addmul_user_logic.sv | 58 + hw/hdl/operators/aes/aes_pipe_stage.vhd | 53 + hw/hdl/operators/aes/aes_pipe_stage_last.vhd | 55 + hw/hdl/operators/aes/aes_pipeline.vhd | 99 + hw/hdl/operators/aes/aes_round.vhd | 49 + hw/hdl/operators/aes/aes_round_last.vhd | 43 + hw/hdl/operators/aes/aes_slave.sv | 232 ++ hw/hdl/operators/aes/aes_top.vhd | 101 + hw/hdl/operators/aes/aes_user_logic.sv | 72 + hw/hdl/operators/aes/key_expansion.vhd | 54 + hw/hdl/operators/aes/key_pipe_reg.vhd | 44 + hw/hdl/operators/aes/key_pipe_stage.vhd | 43 + hw/hdl/operators/aes/key_pipeline.vhd | 64 + hw/hdl/operators/aes/mix_columns.vhd | 51 + hw/hdl/operators/aes/pipe_reg.vhd | 59 + hw/hdl/operators/aes/s_box_lut.vhd | 40 + hw/hdl/operators/aes/shift_rows.vhd | 54 + hw/hdl/operators/aes/utils.vhd | 31 + hw/hdl/operators/chacha/chacha_core.v | 704 +++++ hw/hdl/operators/chacha/chacha_qr.v | 116 + hw/hdl/operators/chacha/user_top.sv | 122 + hw/hdl/operators/dtengine/gbm/dt_gbm_slave.sv | 292 ++ .../operators/dtengine/gbm/dt_user_logic.sv | 236 ++ .../dtengine/gbm/inferCore/DTPackage.sv | 91 + .../dtengine/gbm/inferCore/DTProcessor.sv | 543 ++++ .../dtengine/gbm/inferCore/Data_Memory.sv | 73 + .../gbm/inferCore/FPAdder_2cycles_latency.sv | 389 +++ .../gbm/inferCore/FPAdder_3cycles_latency.v | 458 ++++ .../gbm/inferCore/FPAdder_4cycles_latency.v | 420 +++ .../gbm/inferCore/FPAddersReduceTree.sv | 156 ++ .../gbm/inferCore/FPAddersReduceTree_sync.sv | 286 ++ .../dtengine/gbm/inferCore/FPAggregator.sv | 174 ++ .../gbm/inferCore/LineRateConvertor.sv | 134 + .../dtengine/gbm/inferCore/RegBasedFIFO.sv | 122 + .../dtengine/gbm/inferCore/Tree_Memory.sv | 192 ++ .../operators/dtengine/gbm/inferCore/bram.sv | 55 + .../dtengine/gbm/inferCore/bram_1_in_2_out.sv | 136 + .../dtengine/gbm/inferCore/bus_aligner.sv | 183 ++ .../dtengine/gbm/inferCore/compute_unit.sv | 241 ++ .../operators/dtengine/gbm/inferCore/delay.sv | 75 + .../dtengine/gbm/inferCore/dual_port_mem.sv | 128 + .../dtengine/gbm/inferCore/pe_datapath.sv | 369 +++ .../gbm/inferCore/processing_element.sv | 442 +++ .../gbm/inferCore/processing_element_async.sv | 453 ++++ .../dtengine/gbm/inferCore/quick_fifo.sv | 149 + .../dtengine/gbm/inferCore/smart_shifter.sv | 83 + hw/hdl/operators/dtengine/xgboost/Core.sv | 528 ++++ .../operators/dtengine/xgboost/DTInference.sv | 193 ++ .../dtengine/xgboost/DTInferenceTop.sv | 229 ++ .../dtengine/xgboost/common/DTEngine_Types.sv | 49 + .../xgboost/common/FPAdder_2cycles_latency.v | 389 +++ .../operators/dtengine/xgboost/common/delay.v | 52 + .../operators/dtengine/xgboost/core/DTPU.sv | 772 ++++++ .../dtengine/xgboost/core/DTPUCluster.sv | 219 ++ .../xgboost/core/FPAddersReduceTree.sv | 187 ++ .../dtengine/xgboost/core/FPAggregator.v | 152 ++ .../dtengine/xgboost/core/Mem1in2out.v | 127 + .../dtengine/xgboost/core/PipelinedMUX.sv | 152 ++ .../dtengine/xgboost/core/Qdualport_mem.qip | 5 + .../dtengine/xgboost/core/Qdualport_mem.v | 220 ++ hw/hdl/operators/dtengine/xgboost/core/RLS.v | 73 + .../dtengine/xgboost/core/bram1in2out.v | 242 ++ .../dtengine/xgboost/core/bramin1out2.qip | 5 + .../dtengine/xgboost/core/bramin1out2.v | 242 ++ .../dtengine/xgboost/core/dualport_mem.v | 85 + .../dtengine/xgboost/engineParams.sv | 334 +++ hw/hdl/operators/hll/hll_slave.sv | 237 ++ hw/hdl/operators/hll/hll_user_logic.sv | 59 + hw/hdl/operators/kmeans/Formatter.sv | 153 ++ hw/hdl/operators/kmeans/adder_tree.sv | 106 + .../kmeans/adder_tree_low_resource.sv | 91 + hw/hdl/operators/kmeans/agg_div.sv | 134 + hw/hdl/operators/kmeans/dist_processor.sv | 85 + hw/hdl/operators/kmeans/div_buffer.sv | 68 + hw/hdl/operators/kmeans/dual_port_ram.sv | 68 + hw/hdl/operators/kmeans/fetch_engine.sv | 219 ++ .../kmeans/fetch_engine_output_lane.sv | 163 ++ .../kmeans/formatter_c_lane_splitter.sv | 162 ++ .../operators/kmeans/k_means_accumulation.sv | 643 +++++ .../operators/kmeans/k_means_aggregation.sv | 274 ++ hw/hdl/operators/kmeans/k_means_division.sv | 161 ++ hw/hdl/operators/kmeans/k_means_layer.sv | 187 ++ hw/hdl/operators/kmeans/k_means_module.sv | 456 ++++ hw/hdl/operators/kmeans/k_means_operator.sv | 72 + hw/hdl/operators/kmeans/k_means_slave.sv | 273 ++ hw/hdl/operators/kmeans/k_means_user_logic.sv | 88 + hw/hdl/operators/kmeans/kmeansTypes.sv | 36 + .../kmeans/low_prec_k_means_operator.sv | 227 ++ hw/hdl/operators/kmeans/mac.sv | 108 + hw/hdl/operators/kmeans/mult_acc.sv | 89 + hw/hdl/operators/kmeans/quick_fifo.v | 153 ++ hw/hdl/operators/kmeans/rd_addr_gen.sv | 168 ++ .../operators/kmeans/runtimeParam_Manager.sv | 96 + hw/hdl/operators/kmeans/spl_sdp_mem.v | 68 + hw/hdl/operators/kmeans/spl_sdp_mem_t.v | 62 + hw/hdl/operators/kmeans/wr_engine.sv | 216 ++ hw/hdl/operators/lpn/fk_ctrl.vhd | 352 +++ hw/hdl/operators/lpn/fk_data_reg.vhd | 111 + hw/hdl/operators/lpn/fk_dma_rd.vhd | 162 ++ hw/hdl/operators/lpn/fk_dma_wr.vhd | 157 ++ hw/hdl/operators/lpn/fk_dp.vhd | 144 + hw/hdl/operators/lpn/fk_key_reg.vhd | 206 ++ hw/hdl/operators/lpn/fk_noise_reg.vhd | 168 ++ hw/hdl/operators/lpn/fk_slave.vhd | 133 + hw/hdl/operators/lpn/fk_top.vhd | 253 ++ hw/hdl/operators/lpn/fk_work_reg.vhd | 81 + hw/hdl/operators/lpn/utils.vhd | 31 + hw/hdl/operators/minmaxsum/minmaxsum.vhd | 131 + hw/hdl/operators/minmaxsum/minmaxsum_slave.sv | 236 ++ .../minmaxsum/minmaxsum_user_logic.sv | 68 + .../percentage/minmaxsum_user_logic.sv | 103 + hw/hdl/operators/percentage/onesCounter.v | 83 + hw/hdl/operators/percentage/percentage.sv | 187 ++ hw/hdl/operators/percentage/reduction_tree.v | 119 + hw/hdl/operators/regex/regex.sv | 108 + hw/hdl/operators/regex/regex_data.sv | 258 ++ hw/hdl/operators/regex/regex_ip.tcl | 5 + hw/hdl/operators/regex/regex_req.sv | 181 ++ hw/hdl/operators/regex/regex_top.v | 226 ++ hw/hdl/operators/regex/regex_util.tcl | 0 hw/hdl/operators/regex/rem_charmatch.v | 81 + hw/hdl/operators/regex/rem_decoder.v | 137 + hw/hdl/operators/regex/rem_halfrange.v | 98 + hw/hdl/operators/regex/rem_onestate.v | 94 + hw/hdl/operators/regex/rem_rangematch.v | 86 + hw/hdl/operators/regex/rem_top_ff.v | 351 +++ hw/hdl/operators/selection/selection.vhd | 153 ++ hw/hdl/operators/selection/selection_slave.sv | 239 ++ .../selection/selection_user_logic.sv | 67 + hw/hdl/operators/stride/stride.sv | 96 + hw/hdl/operators/stride/stride_data.sv | 182 ++ hw/hdl/operators/stride/stride_req.sv | 146 + .../testcount/minmaxsum_user_logic.sv | 64 + hw/hdl/operators/testcount/onesCounter.v | 82 + hw/hdl/operators/testcount/testcount.sv | 183 ++ hw/hdl/operators/testcount/testcount_slave.sv | 250 ++ hw/hdl/pkg/axi_intf.sv | 384 +++ hw/hdl/pkg/axi_macros.svh | 40 + hw/hdl/pkg/lynx_intf.sv | 378 +++ hw/hdl/pkg/lynx_macros.svh | 27 + hw/hdl/regs/axi_reg.sv | 86 + hw/hdl/regs/axi_reg_array.sv | 27 + hw/hdl/regs/axil_decoupler.sv | 165 ++ hw/hdl/regs/axil_reg.sv | 54 + hw/hdl/regs/axis_decoupler.sv | 50 + hw/hdl/regs/axis_reg.sv | 62 + hw/hdl/regs/axis_reg_array.sv | 28 + hw/hdl/regs/axis_reg_array_rtl.sv | 27 + hw/hdl/regs/axis_reg_rtl.sv | 100 + hw/hdl/regs/axisr_decoupler.sv | 55 + hw/hdl/regs/axisr_reg.sv | 68 + hw/hdl/regs/axisr_reg_array.sv | 28 + hw/hdl/regs/data_queue_credits_sink.sv | 63 + hw/hdl/regs/data_queue_credits_src.sv | 66 + hw/hdl/regs/dma_isr_req_queue.sv | 26 + hw/hdl/regs/dma_req_queue.sv | 25 + hw/hdl/regs/meta_decoupler.sv | 22 + hw/hdl/regs/meta_reg.sv | 50 + hw/hdl/regs/meta_reg_array.sv | 27 + hw/hdl/regs/req_decoupler.sv | 22 + hw/hdl/regs/req_queue.sv | 23 + hw/hdl/regs/req_reg.sv | 22 + hw/hdl/regs/req_reg_array.sv | 27 + hw/hdl/regs/req_reg_array_rtl.sv | 27 + hw/hdl/regs/req_reg_rtl.sv | 83 + hw/hdl/slave/cnfg_slave.sv | 691 +++++ hw/hdl/slave/cnfg_slave_avx.sv | 740 +++++ hw/hdl/slave/static_slave.sv | 526 ++++ hw/hdl/slave/tlb_slave.sv | 239 ++ hw/hdl/util/fifo.sv | 83 + hw/hdl/util/queue.sv | 33 + hw/hdl/util/queue_meta.sv | 33 + hw/hdl/util/queue_stream.sv | 39 + hw/hdl/util/tdp_ram_nc.sv | 35 + hw/scripts/bd/cr_static_u250.tcl | 768 ++++++ hw/scripts/bd/cr_static_u280.tcl | 672 +++++ hw/scripts/bd/cr_static_vcu118.tcl | 645 +++++ hw/scripts/compile.tcl.in | 125 + hw/scripts/dynamic.tcl.in | 133 + hw/scripts/ip_inst/base_infrastructure.tcl | 104 + hw/scripts/ip_inst/network_infrastructure.tcl | 184 ++ hw/scripts/ip_inst/network_stack.tcl | 221 ++ hw/scripts/ip_inst/network_ultraplus.tcl | 71 + hw/scripts/post.tcl.in | 106 + hw/scripts/shell.tcl.in | 217 ++ hw/scripts/util_scripts/cr_hdl.tcl | 47 + hw/scripts/util_scripts/ila.tcl | 39 + hw/scripts/util_scripts/load_jtag.tcl | 4 + hw/scripts/wr_hdl/wr_hdl_dynamic.tcl | 830 ++++++ hw/scripts/wr_hdl/wr_hdl_pkg.tcl | 157 ++ hw/scripts/wr_hdl/wr_hdl_top_u250.tcl | 692 +++++ hw/scripts/wr_hdl/wr_hdl_top_u280.tcl | 581 ++++ hw/scripts/wr_hdl/wr_hdl_top_vcu118.tcl | 562 ++++ hw/scripts/wr_hdl/wr_hdl_user.tcl | 400 +++ hw/sim/axi_intf_sim.sv | 446 +++ hw/sim/lynx_intf_sim.sv | 227 ++ hw/sim/lynx_pkg.sv | 113 + hw/sim/scripts_sim/tb.tcl | 168 ++ hw/sim/tb/axis_mux_ddr_sink_tb.sv | 115 + hw/sim/tb/axis_mux_ddr_sink_wide_tb.sv | 127 + hw/sim/tb/axis_mux_ddr_src_tb.sv | 117 + hw/sim/tb/axis_mux_ddr_src_wide_tb.sv | 127 + hw/sim/tb/axis_mux_ddr_user_tb.sv | 115 + hw/sim/tb/axis_mux_tb.sv | 174 ++ hw/sim/tb/tb_cdma_unaglined.sv | 96 + hw/sim/tb/tb_design_user_logic.sv | 64 + hw/sim/tb/tb_user.sv | 124 + hw/sim/tb/tlb_arbiter_tb.sv | 81 + hw/sim/tb/tlb_top.sv | 157 ++ sw/base/CMakeLists.txt | 22 + sw/base/include/fBench.hpp | 121 + sw/base/include/fDefs.hpp | 33 + sw/base/include/fDev.hpp | 223 ++ sw/base/include/tsc_x86.h | 89 + sw/base/src/fDev.cpp | 924 +++++++ sw/base/src/main.cpp | 140 + sw/gbm/CMakeLists.txt | 22 + sw/gbm/include/fDefs.hpp | 33 + sw/gbm/include/fDev.hpp | 230 ++ sw/gbm/src/fDev.cpp | 983 +++++++ sw/gbm/src/main.cpp | 283 ++ sw/gbm/verify/dtengine.cpp | 349 +++ sw/gbm/verify/infer | Bin 0 -> 17568 bytes sw/rdma/CMakeLists.txt | 22 + sw/rdma/include/fBench.hpp | 121 + sw/rdma/include/fDefs.hpp | 49 + sw/rdma/include/fDev.hpp | 225 ++ sw/rdma/include/fQ.hpp | 46 + sw/rdma/include/fView.hpp | 97 + sw/rdma/include/tsc_x86.h | 89 + sw/rdma/src/fDev.cpp | 932 +++++++ sw/rdma/src/fQ.cpp | 72 + sw/rdma/src/fView.cpp | 677 +++++ sw/rdma/src/main.cpp | 192 ++ sw/scheduling/CMakeLists.txt | 22 + sw/scheduling/include/classical_kmeans.h | 24 + sw/scheduling/include/fApp.hpp | 28 + sw/scheduling/include/fArbiter.hpp | 80 + sw/scheduling/include/fDefs.hpp | 28 + sw/scheduling/include/fDev.hpp | 279 ++ sw/scheduling/include/fHll.hpp | 66 + sw/scheduling/include/fJob.hpp | 94 + sw/scheduling/include/fKmeans.hpp | 197 ++ sw/scheduling/include/fOp0.hpp | 73 + sw/scheduling/include/fOp1.hpp | 73 + sw/scheduling/include/fOp2.hpp | 79 + sw/scheduling/include/fOp3.hpp | 78 + sw/scheduling/include/fScheduler.hpp | 102 + sw/scheduling/include/kmeans.hpp | 196 ++ sw/scheduling/include/mlweaving.h | 219 ++ sw/scheduling/include/utils.hpp | 35 + sw/scheduling/src/classical_kmeans.cpp | 489 ++++ sw/scheduling/src/fArbiter.cpp | 41 + sw/scheduling/src/fDev.cpp | 449 +++ sw/scheduling/src/fScheduler.cpp | 59 + sw/scheduling/src/main.cpp | 183 ++ sw/scheduling/src/utils.cpp | 164 ++ sw/stride/CMakeLists.txt | 22 + sw/stride/include/fBench.hpp | 121 + sw/stride/include/fDefs.hpp | 49 + sw/stride/include/fDev.hpp | 225 ++ sw/stride/include/fQ.hpp | 46 + sw/stride/include/fView.hpp | 97 + sw/stride/include/tsc_x86.h | 89 + sw/stride/src/fDev.cpp | 932 +++++++ sw/stride/src/fQ.cpp | 72 + sw/stride/src/fView.cpp | 677 +++++ sw/stride/src/main.cpp | 181 ++ util/hot_reset.sh | 33 + 329 files changed, 63572 insertions(+) create mode 100644 .gitignore create mode 100644 .gitmodules create mode 100644 README.md create mode 100644 cmake/FindVivado.cmake create mode 100644 cmake/FindVivadoHLS.cmake create mode 100644 driver/Makefile create mode 100644 driver/fpga_drv.c create mode 100644 driver/fpga_drv.h create mode 100644 hw/CMakeLists.txt create mode 100644 hw/constraints/u250/u250_base.xdc create mode 100644 hw/constraints/u250/u250_ddr.xdc create mode 100644 hw/constraints/u250/u250_net.xdc create mode 100644 hw/constraints/u250/u250_pcie.xdc create mode 100644 hw/constraints/u280/u280_base.xdc create mode 100644 hw/constraints/u280/u280_ddr.xdc create mode 100644 hw/constraints/u280/u280_net.xdc create mode 100644 hw/constraints/u280/u280_pcie.xdc create mode 100644 hw/constraints/vcu118/vcu_base.xdc create mode 100644 hw/constraints/vcu118/vcu_ddr.xdc create mode 100644 hw/constraints/vcu118/vcu_net.xdc create mode 100644 hw/constraints/vcu118/vcu_pcie.xdc create mode 160000 hw/ext/network create mode 100644 hw/hdl/cdma/axi_dma_rd.sv create mode 100644 hw/hdl/cdma/axi_dma_wr.sv create mode 100644 hw/hdl/cdma/cdma.sv create mode 100644 hw/hdl/cdma/krnl_counter.sv create mode 100644 hw/hdl/cdma_unaglined/axi_dma.sv create mode 100644 hw/hdl/cdma_unaglined/axi_dma_rd.sv create mode 100644 hw/hdl/cdma_unaglined/axi_dma_wr.sv create mode 100644 hw/hdl/mmu/tlb_arbiter.sv create mode 100644 hw/hdl/mmu/tlb_arbiter_isr.sv create mode 100644 hw/hdl/mmu/tlb_assign_isr.sv create mode 100644 hw/hdl/mmu/tlb_credits_rd.sv create mode 100644 hw/hdl/mmu/tlb_credits_wr.sv create mode 100644 hw/hdl/mmu/tlb_fsm_rd.sv create mode 100644 hw/hdl/mmu/tlb_fsm_wr.sv create mode 100644 hw/hdl/mmu/tlb_idma_arb.sv create mode 100644 hw/hdl/mmu/tlb_parser.sv create mode 100644 hw/hdl/mmu/tlb_region_top.sv create mode 100644 hw/hdl/mmu/tlb_top.sv create mode 100644 hw/hdl/mux/axis_mux_cdma.sv create mode 100644 hw/hdl/mux/axis_mux_ddr_host.sv create mode 100644 hw/hdl/mux/axis_mux_ddr_user.sv create mode 100644 hw/hdl/mux/axis_mux_user_sink.sv create mode 100644 hw/hdl/mux/axis_mux_user_src.sv create mode 100644 hw/hdl/network/cmac_uplus_axis_wrapper.sv create mode 100644 hw/hdl/network/network_clk_cross.sv create mode 100644 hw/hdl/network/network_meta_fv_arbiter.sv create mode 100644 hw/hdl/network/network_meta_tx_arbiter.sv create mode 100644 hw/hdl/network/network_module.sv create mode 100644 hw/hdl/network/network_mux_cmd_rd.sv create mode 100644 hw/hdl/network/network_mux_cmd_wr.sv create mode 100644 hw/hdl/network/network_req_parser.sv create mode 100644 hw/hdl/network/network_stack.sv create mode 100644 hw/hdl/network/network_top.sv create mode 100644 hw/hdl/network/roce_stack.sv create mode 100644 hw/hdl/network/rx_interface.v create mode 100644 hw/hdl/network/tx_interface.v create mode 100644 hw/hdl/operators/addmul/addmul.sv create mode 100644 hw/hdl/operators/addmul/addmul_slave.sv create mode 100644 hw/hdl/operators/addmul/addmul_user_logic.sv create mode 100644 hw/hdl/operators/aes/aes_pipe_stage.vhd create mode 100644 hw/hdl/operators/aes/aes_pipe_stage_last.vhd create mode 100644 hw/hdl/operators/aes/aes_pipeline.vhd create mode 100644 hw/hdl/operators/aes/aes_round.vhd create mode 100644 hw/hdl/operators/aes/aes_round_last.vhd create mode 100644 hw/hdl/operators/aes/aes_slave.sv create mode 100644 hw/hdl/operators/aes/aes_top.vhd create mode 100644 hw/hdl/operators/aes/aes_user_logic.sv create mode 100644 hw/hdl/operators/aes/key_expansion.vhd create mode 100644 hw/hdl/operators/aes/key_pipe_reg.vhd create mode 100644 hw/hdl/operators/aes/key_pipe_stage.vhd create mode 100644 hw/hdl/operators/aes/key_pipeline.vhd create mode 100644 hw/hdl/operators/aes/mix_columns.vhd create mode 100644 hw/hdl/operators/aes/pipe_reg.vhd create mode 100644 hw/hdl/operators/aes/s_box_lut.vhd create mode 100644 hw/hdl/operators/aes/shift_rows.vhd create mode 100644 hw/hdl/operators/aes/utils.vhd create mode 100644 hw/hdl/operators/chacha/chacha_core.v create mode 100644 hw/hdl/operators/chacha/chacha_qr.v create mode 100644 hw/hdl/operators/chacha/user_top.sv create mode 100644 hw/hdl/operators/dtengine/gbm/dt_gbm_slave.sv create mode 100644 hw/hdl/operators/dtengine/gbm/dt_user_logic.sv create mode 100644 hw/hdl/operators/dtengine/gbm/inferCore/DTPackage.sv create mode 100644 hw/hdl/operators/dtengine/gbm/inferCore/DTProcessor.sv create mode 100644 hw/hdl/operators/dtengine/gbm/inferCore/Data_Memory.sv create mode 100644 hw/hdl/operators/dtengine/gbm/inferCore/FPAdder_2cycles_latency.sv create mode 100644 hw/hdl/operators/dtengine/gbm/inferCore/FPAdder_3cycles_latency.v create mode 100644 hw/hdl/operators/dtengine/gbm/inferCore/FPAdder_4cycles_latency.v create mode 100644 hw/hdl/operators/dtengine/gbm/inferCore/FPAddersReduceTree.sv create mode 100644 hw/hdl/operators/dtengine/gbm/inferCore/FPAddersReduceTree_sync.sv create mode 100644 hw/hdl/operators/dtengine/gbm/inferCore/FPAggregator.sv create mode 100644 hw/hdl/operators/dtengine/gbm/inferCore/LineRateConvertor.sv create mode 100644 hw/hdl/operators/dtengine/gbm/inferCore/RegBasedFIFO.sv create mode 100644 hw/hdl/operators/dtengine/gbm/inferCore/Tree_Memory.sv create mode 100644 hw/hdl/operators/dtengine/gbm/inferCore/bram.sv create mode 100644 hw/hdl/operators/dtengine/gbm/inferCore/bram_1_in_2_out.sv create mode 100644 hw/hdl/operators/dtengine/gbm/inferCore/bus_aligner.sv create mode 100644 hw/hdl/operators/dtengine/gbm/inferCore/compute_unit.sv create mode 100644 hw/hdl/operators/dtengine/gbm/inferCore/delay.sv create mode 100644 hw/hdl/operators/dtengine/gbm/inferCore/dual_port_mem.sv create mode 100644 hw/hdl/operators/dtengine/gbm/inferCore/pe_datapath.sv create mode 100644 hw/hdl/operators/dtengine/gbm/inferCore/processing_element.sv create mode 100644 hw/hdl/operators/dtengine/gbm/inferCore/processing_element_async.sv create mode 100644 hw/hdl/operators/dtengine/gbm/inferCore/quick_fifo.sv create mode 100644 hw/hdl/operators/dtengine/gbm/inferCore/smart_shifter.sv create mode 100644 hw/hdl/operators/dtengine/xgboost/Core.sv create mode 100644 hw/hdl/operators/dtengine/xgboost/DTInference.sv create mode 100644 hw/hdl/operators/dtengine/xgboost/DTInferenceTop.sv create mode 100644 hw/hdl/operators/dtengine/xgboost/common/DTEngine_Types.sv create mode 100644 hw/hdl/operators/dtengine/xgboost/common/FPAdder_2cycles_latency.v create mode 100644 hw/hdl/operators/dtengine/xgboost/common/delay.v create mode 100644 hw/hdl/operators/dtengine/xgboost/core/DTPU.sv create mode 100644 hw/hdl/operators/dtengine/xgboost/core/DTPUCluster.sv create mode 100644 hw/hdl/operators/dtengine/xgboost/core/FPAddersReduceTree.sv create mode 100644 hw/hdl/operators/dtengine/xgboost/core/FPAggregator.v create mode 100644 hw/hdl/operators/dtengine/xgboost/core/Mem1in2out.v create mode 100644 hw/hdl/operators/dtengine/xgboost/core/PipelinedMUX.sv create mode 100644 hw/hdl/operators/dtengine/xgboost/core/Qdualport_mem.qip create mode 100644 hw/hdl/operators/dtengine/xgboost/core/Qdualport_mem.v create mode 100644 hw/hdl/operators/dtengine/xgboost/core/RLS.v create mode 100644 hw/hdl/operators/dtengine/xgboost/core/bram1in2out.v create mode 100644 hw/hdl/operators/dtengine/xgboost/core/bramin1out2.qip create mode 100644 hw/hdl/operators/dtengine/xgboost/core/bramin1out2.v create mode 100644 hw/hdl/operators/dtengine/xgboost/core/dualport_mem.v create mode 100644 hw/hdl/operators/dtengine/xgboost/engineParams.sv create mode 100644 hw/hdl/operators/hll/hll_slave.sv create mode 100644 hw/hdl/operators/hll/hll_user_logic.sv create mode 100755 hw/hdl/operators/kmeans/Formatter.sv create mode 100755 hw/hdl/operators/kmeans/adder_tree.sv create mode 100755 hw/hdl/operators/kmeans/adder_tree_low_resource.sv create mode 100755 hw/hdl/operators/kmeans/agg_div.sv create mode 100755 hw/hdl/operators/kmeans/dist_processor.sv create mode 100755 hw/hdl/operators/kmeans/div_buffer.sv create mode 100755 hw/hdl/operators/kmeans/dual_port_ram.sv create mode 100755 hw/hdl/operators/kmeans/fetch_engine.sv create mode 100755 hw/hdl/operators/kmeans/fetch_engine_output_lane.sv create mode 100755 hw/hdl/operators/kmeans/formatter_c_lane_splitter.sv create mode 100755 hw/hdl/operators/kmeans/k_means_accumulation.sv create mode 100755 hw/hdl/operators/kmeans/k_means_aggregation.sv create mode 100755 hw/hdl/operators/kmeans/k_means_division.sv create mode 100755 hw/hdl/operators/kmeans/k_means_layer.sv create mode 100755 hw/hdl/operators/kmeans/k_means_module.sv create mode 100755 hw/hdl/operators/kmeans/k_means_operator.sv create mode 100755 hw/hdl/operators/kmeans/k_means_slave.sv create mode 100644 hw/hdl/operators/kmeans/k_means_user_logic.sv create mode 100755 hw/hdl/operators/kmeans/kmeansTypes.sv create mode 100755 hw/hdl/operators/kmeans/low_prec_k_means_operator.sv create mode 100755 hw/hdl/operators/kmeans/mac.sv create mode 100644 hw/hdl/operators/kmeans/mult_acc.sv create mode 100644 hw/hdl/operators/kmeans/quick_fifo.v create mode 100755 hw/hdl/operators/kmeans/rd_addr_gen.sv create mode 100755 hw/hdl/operators/kmeans/runtimeParam_Manager.sv create mode 100644 hw/hdl/operators/kmeans/spl_sdp_mem.v create mode 100644 hw/hdl/operators/kmeans/spl_sdp_mem_t.v create mode 100755 hw/hdl/operators/kmeans/wr_engine.sv create mode 100644 hw/hdl/operators/lpn/fk_ctrl.vhd create mode 100644 hw/hdl/operators/lpn/fk_data_reg.vhd create mode 100644 hw/hdl/operators/lpn/fk_dma_rd.vhd create mode 100644 hw/hdl/operators/lpn/fk_dma_wr.vhd create mode 100644 hw/hdl/operators/lpn/fk_dp.vhd create mode 100644 hw/hdl/operators/lpn/fk_key_reg.vhd create mode 100644 hw/hdl/operators/lpn/fk_noise_reg.vhd create mode 100644 hw/hdl/operators/lpn/fk_slave.vhd create mode 100644 hw/hdl/operators/lpn/fk_top.vhd create mode 100644 hw/hdl/operators/lpn/fk_work_reg.vhd create mode 100644 hw/hdl/operators/lpn/utils.vhd create mode 100644 hw/hdl/operators/minmaxsum/minmaxsum.vhd create mode 100644 hw/hdl/operators/minmaxsum/minmaxsum_slave.sv create mode 100644 hw/hdl/operators/minmaxsum/minmaxsum_user_logic.sv create mode 100644 hw/hdl/operators/percentage/minmaxsum_user_logic.sv create mode 100644 hw/hdl/operators/percentage/onesCounter.v create mode 100644 hw/hdl/operators/percentage/percentage.sv create mode 100644 hw/hdl/operators/percentage/reduction_tree.v create mode 100644 hw/hdl/operators/regex/regex.sv create mode 100644 hw/hdl/operators/regex/regex_data.sv create mode 100644 hw/hdl/operators/regex/regex_ip.tcl create mode 100644 hw/hdl/operators/regex/regex_req.sv create mode 100644 hw/hdl/operators/regex/regex_top.v create mode 100644 hw/hdl/operators/regex/regex_util.tcl create mode 100644 hw/hdl/operators/regex/rem_charmatch.v create mode 100644 hw/hdl/operators/regex/rem_decoder.v create mode 100644 hw/hdl/operators/regex/rem_halfrange.v create mode 100644 hw/hdl/operators/regex/rem_onestate.v create mode 100644 hw/hdl/operators/regex/rem_rangematch.v create mode 100644 hw/hdl/operators/regex/rem_top_ff.v create mode 100644 hw/hdl/operators/selection/selection.vhd create mode 100644 hw/hdl/operators/selection/selection_slave.sv create mode 100644 hw/hdl/operators/selection/selection_user_logic.sv create mode 100644 hw/hdl/operators/stride/stride.sv create mode 100644 hw/hdl/operators/stride/stride_data.sv create mode 100644 hw/hdl/operators/stride/stride_req.sv create mode 100644 hw/hdl/operators/testcount/minmaxsum_user_logic.sv create mode 100644 hw/hdl/operators/testcount/onesCounter.v create mode 100644 hw/hdl/operators/testcount/testcount.sv create mode 100644 hw/hdl/operators/testcount/testcount_slave.sv create mode 100644 hw/hdl/pkg/axi_intf.sv create mode 100644 hw/hdl/pkg/axi_macros.svh create mode 100644 hw/hdl/pkg/lynx_intf.sv create mode 100644 hw/hdl/pkg/lynx_macros.svh create mode 100644 hw/hdl/regs/axi_reg.sv create mode 100644 hw/hdl/regs/axi_reg_array.sv create mode 100644 hw/hdl/regs/axil_decoupler.sv create mode 100644 hw/hdl/regs/axil_reg.sv create mode 100644 hw/hdl/regs/axis_decoupler.sv create mode 100644 hw/hdl/regs/axis_reg.sv create mode 100644 hw/hdl/regs/axis_reg_array.sv create mode 100644 hw/hdl/regs/axis_reg_array_rtl.sv create mode 100644 hw/hdl/regs/axis_reg_rtl.sv create mode 100644 hw/hdl/regs/axisr_decoupler.sv create mode 100644 hw/hdl/regs/axisr_reg.sv create mode 100644 hw/hdl/regs/axisr_reg_array.sv create mode 100644 hw/hdl/regs/data_queue_credits_sink.sv create mode 100644 hw/hdl/regs/data_queue_credits_src.sv create mode 100644 hw/hdl/regs/dma_isr_req_queue.sv create mode 100644 hw/hdl/regs/dma_req_queue.sv create mode 100644 hw/hdl/regs/meta_decoupler.sv create mode 100644 hw/hdl/regs/meta_reg.sv create mode 100644 hw/hdl/regs/meta_reg_array.sv create mode 100644 hw/hdl/regs/req_decoupler.sv create mode 100644 hw/hdl/regs/req_queue.sv create mode 100644 hw/hdl/regs/req_reg.sv create mode 100644 hw/hdl/regs/req_reg_array.sv create mode 100644 hw/hdl/regs/req_reg_array_rtl.sv create mode 100644 hw/hdl/regs/req_reg_rtl.sv create mode 100644 hw/hdl/slave/cnfg_slave.sv create mode 100644 hw/hdl/slave/cnfg_slave_avx.sv create mode 100644 hw/hdl/slave/static_slave.sv create mode 100644 hw/hdl/slave/tlb_slave.sv create mode 100644 hw/hdl/util/fifo.sv create mode 100644 hw/hdl/util/queue.sv create mode 100644 hw/hdl/util/queue_meta.sv create mode 100644 hw/hdl/util/queue_stream.sv create mode 100644 hw/hdl/util/tdp_ram_nc.sv create mode 100644 hw/scripts/bd/cr_static_u250.tcl create mode 100644 hw/scripts/bd/cr_static_u280.tcl create mode 100644 hw/scripts/bd/cr_static_vcu118.tcl create mode 100644 hw/scripts/compile.tcl.in create mode 100644 hw/scripts/dynamic.tcl.in create mode 100644 hw/scripts/ip_inst/base_infrastructure.tcl create mode 100644 hw/scripts/ip_inst/network_infrastructure.tcl create mode 100644 hw/scripts/ip_inst/network_stack.tcl create mode 100644 hw/scripts/ip_inst/network_ultraplus.tcl create mode 100644 hw/scripts/post.tcl.in create mode 100644 hw/scripts/shell.tcl.in create mode 100644 hw/scripts/util_scripts/cr_hdl.tcl create mode 100644 hw/scripts/util_scripts/ila.tcl create mode 100644 hw/scripts/util_scripts/load_jtag.tcl create mode 100644 hw/scripts/wr_hdl/wr_hdl_dynamic.tcl create mode 100644 hw/scripts/wr_hdl/wr_hdl_pkg.tcl create mode 100644 hw/scripts/wr_hdl/wr_hdl_top_u250.tcl create mode 100644 hw/scripts/wr_hdl/wr_hdl_top_u280.tcl create mode 100644 hw/scripts/wr_hdl/wr_hdl_top_vcu118.tcl create mode 100644 hw/scripts/wr_hdl/wr_hdl_user.tcl create mode 100644 hw/sim/axi_intf_sim.sv create mode 100644 hw/sim/lynx_intf_sim.sv create mode 100644 hw/sim/lynx_pkg.sv create mode 100644 hw/sim/scripts_sim/tb.tcl create mode 100644 hw/sim/tb/axis_mux_ddr_sink_tb.sv create mode 100644 hw/sim/tb/axis_mux_ddr_sink_wide_tb.sv create mode 100644 hw/sim/tb/axis_mux_ddr_src_tb.sv create mode 100644 hw/sim/tb/axis_mux_ddr_src_wide_tb.sv create mode 100644 hw/sim/tb/axis_mux_ddr_user_tb.sv create mode 100644 hw/sim/tb/axis_mux_tb.sv create mode 100644 hw/sim/tb/tb_cdma_unaglined.sv create mode 100644 hw/sim/tb/tb_design_user_logic.sv create mode 100644 hw/sim/tb/tb_user.sv create mode 100644 hw/sim/tb/tlb_arbiter_tb.sv create mode 100644 hw/sim/tb/tlb_top.sv create mode 100644 sw/base/CMakeLists.txt create mode 100644 sw/base/include/fBench.hpp create mode 100644 sw/base/include/fDefs.hpp create mode 100644 sw/base/include/fDev.hpp create mode 100644 sw/base/include/tsc_x86.h create mode 100644 sw/base/src/fDev.cpp create mode 100644 sw/base/src/main.cpp create mode 100644 sw/gbm/CMakeLists.txt create mode 100644 sw/gbm/include/fDefs.hpp create mode 100644 sw/gbm/include/fDev.hpp create mode 100644 sw/gbm/src/fDev.cpp create mode 100644 sw/gbm/src/main.cpp create mode 100644 sw/gbm/verify/dtengine.cpp create mode 100755 sw/gbm/verify/infer create mode 100644 sw/rdma/CMakeLists.txt create mode 100644 sw/rdma/include/fBench.hpp create mode 100644 sw/rdma/include/fDefs.hpp create mode 100644 sw/rdma/include/fDev.hpp create mode 100644 sw/rdma/include/fQ.hpp create mode 100644 sw/rdma/include/fView.hpp create mode 100644 sw/rdma/include/tsc_x86.h create mode 100644 sw/rdma/src/fDev.cpp create mode 100644 sw/rdma/src/fQ.cpp create mode 100644 sw/rdma/src/fView.cpp create mode 100644 sw/rdma/src/main.cpp create mode 100644 sw/scheduling/CMakeLists.txt create mode 100644 sw/scheduling/include/classical_kmeans.h create mode 100644 sw/scheduling/include/fApp.hpp create mode 100644 sw/scheduling/include/fArbiter.hpp create mode 100644 sw/scheduling/include/fDefs.hpp create mode 100644 sw/scheduling/include/fDev.hpp create mode 100644 sw/scheduling/include/fHll.hpp create mode 100644 sw/scheduling/include/fJob.hpp create mode 100644 sw/scheduling/include/fKmeans.hpp create mode 100644 sw/scheduling/include/fOp0.hpp create mode 100644 sw/scheduling/include/fOp1.hpp create mode 100644 sw/scheduling/include/fOp2.hpp create mode 100644 sw/scheduling/include/fOp3.hpp create mode 100644 sw/scheduling/include/fScheduler.hpp create mode 100644 sw/scheduling/include/kmeans.hpp create mode 100644 sw/scheduling/include/mlweaving.h create mode 100644 sw/scheduling/include/utils.hpp create mode 100644 sw/scheduling/src/classical_kmeans.cpp create mode 100644 sw/scheduling/src/fArbiter.cpp create mode 100644 sw/scheduling/src/fDev.cpp create mode 100644 sw/scheduling/src/fScheduler.cpp create mode 100644 sw/scheduling/src/main.cpp create mode 100644 sw/scheduling/src/utils.cpp create mode 100644 sw/stride/CMakeLists.txt create mode 100644 sw/stride/include/fBench.hpp create mode 100644 sw/stride/include/fDefs.hpp create mode 100644 sw/stride/include/fDev.hpp create mode 100644 sw/stride/include/fQ.hpp create mode 100644 sw/stride/include/fView.hpp create mode 100644 sw/stride/include/tsc_x86.h create mode 100644 sw/stride/src/fDev.cpp create mode 100644 sw/stride/src/fQ.cpp create mode 100644 sw/stride/src/fView.cpp create mode 100644 sw/stride/src/main.cpp create mode 100644 util/hot_reset.sh diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..1d65be45 --- /dev/null +++ b/.gitignore @@ -0,0 +1,20 @@ +build*/ +bstreams/ +iprepo/ +tmp/ +hd_visual/ +ip_dir/ +xsim.dir/ +tb_user/ +ip/ +.Xil/ +.vscode/ +*.pb +*.log +*.tmp +*.jou +*.str +*.xml +*.config +*.zip + diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 00000000..75da1c66 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,4 @@ +[submodule "hw/ext/network"] + path = hw/ext/network + url = https://github.com/fpgasystems/fpga-network-stack.git + branch = master diff --git a/README.md b/README.md new file mode 100644 index 00000000..c96d36e6 --- /dev/null +++ b/README.md @@ -0,0 +1,89 @@ +# Coyote +Reconfigurable Heterogeneous Architecture Framework aiming to provide operating system abstractions. + +## Prerequisites +Framework was tested with `Vivado 2019.2` and `Vivado 2020.1`. Following Xilinx platforms are supported: `vcu118`, `Alveo u250`, `Alveo u280`. Minimum version of CMake required is 3.0. + +## Dependencies +Initiate the network stack: + + $ git clone + $ git submodule update --init --recursive + +## Build `HW` + +Create a build directory: + + $ cd hw + $ mkdir build + $ cd build + +Enter a valid system configuration: + + $ cmake .. -DFDEV_NAME=u250 + +Following configuration options are provided: + +| Name | Values | Desription | +|------------------------|--------------------------|------------------------------------------------------------------------------------| +| FDEV\_NAME | <**u250**, u280, vcu118> | Supported devices | +| N\_REGIONS | <**1**:16> | Number of independent regions | +| EN\_STRM | <0, **1**> | Enable direct host-fpga streaming | +| EN\_DDR | <**0**, 1> | Enable local FPGA memory stack | +| EN\_AVX | <0,**1**> | AVX support | +| EN\_BPSS | <0,**1**> | Bypass descriptors in user logic | +| N\_DDR\_CHAN | <0:4> | Number of DDR channels in striping mode | +| EN\_PR | <**0**, 1> | Enable dynamic reconfiguration of the regions | +| EN\_TCP | <**0**, 1> | Enable TCP/IP stack | +| EN\_RDMA | <**0**, 1> | Enable RDMA stack | +| EN\_FVV | <**0**, 1> | Enable Farview verbs | + +If network stack is used, the IP dependencies can be installed with: + + make installip + +Create the shell and the project: + + make shell + +If PR is enabled, additional sets of configurations can be added by running the following command: + + make dynamic + +At this point user logic can be inserted. User logic wrappers can be found under build project directory in the **hdl/config_X** where **X** represents the chosen PR configuration. If multiple PR configurations are present it is advisable to put the most complex configuration in the initial one (**config_0**). For best results explicit floorplanning should be done manually after synthesis. + +Once the user design is ready to be compiled, run the following command: + + make compile + +Once the compilation finishes, the initial bitstream with the static region can be loaded to the FPGA via JTAG. At any point during the compilation, the status can be checked by opening the project in Vivado. This can be done by running `start_gui` in the same terminal shell. All compiled bitstreams, including partial ones, can be found in the build directory under **bitstreams**. + +## Driver + +After the bitstream has been loaded, the driver can be compiled on the target host machine: + + cd driver + make + +Insert the driver into the kernel: + + insmod fpga_drv.ko + +Run the script **util/hot_reset.sh** to rescan the PCIe. If this fails the restart of the machine might be necessary after this step. + +## Build `SW` + +Any of the `sw` projects can be built with the following commands: + + cd sw/ + mkdir build + cd build + cmake .. + make main + +## Simulation + +User logic can be simulated by creating the testbench project: + + cd hw/sim/scripts/sim + vivado -mode tcl -source tb.tcl \ No newline at end of file diff --git a/cmake/FindVivado.cmake b/cmake/FindVivado.cmake new file mode 100644 index 00000000..1db955f6 --- /dev/null +++ b/cmake/FindVivado.cmake @@ -0,0 +1,31 @@ +# Author: Johannes de Fine Licht (johannes.definelicht@inf.ethz.ch) +# Created: October 2016 +# +# To specify the path to the Vivado installation, provide: +# -DVIVADO_ROOT_DIR= +# If successful, this script defines: +# VIVADO_FOUND +# VIVADO_BINARY + +cmake_minimum_required(VERSION 3.0) + +find_path(VIVADO_PATH + NAMES vivado + PATHS ${VIVADO_ROOT_DIR} ENV XILINX_VIVADO + PATH_SUFFIXES bin +) + +if(NOT EXISTS ${VIVADO_PATH}) + + message(WARNING "Vivado not found.") + +else() + + get_filename_component(VIVADO_ROOT_DIR ${VIVADO_PATH} DIRECTORY) + + set(VIVADO_FOUND TRUE) + set(VIVADO_BINARY ${VIVADO_ROOT_DIR}/bin/vivado) + + message(STATUS "Found Vivado at ${VIVADO_ROOT_DIR}.") + +endif() diff --git a/cmake/FindVivadoHLS.cmake b/cmake/FindVivadoHLS.cmake new file mode 100644 index 00000000..d6babe16 --- /dev/null +++ b/cmake/FindVivadoHLS.cmake @@ -0,0 +1,33 @@ +# Author: Johannes de Fine Licht (johannes.definelicht@inf.ethz.ch) +# Created: October 2016 +# +# To specify the path to the Vivado HLS installation, provide: +# -DVIVADO_HLS_ROOT_DIR= +# If successful, this script defines: +# VIVADO_HLS_FOUND +# VIVADO_HLS_BINARY +# VIVADO_HLS_INCLUDE_DIRS + +cmake_minimum_required(VERSION 3.0) + +find_path(VIVADO_HLS_PATH + NAMES vivado_hls + PATHS ${VIVADO_HLS_ROOT_DIR} ENV XILINX_VIVADO_HLS ENV XILINX_HLS + PATH_SUFFIXES bin +) + +if(NOT EXISTS ${VIVADO_HLS_PATH}) + + message(WARNING "Vivado HLS not found.") + +else() + + get_filename_component(VIVADO_HLS_ROOT_DIR ${VIVADO_HLS_PATH} DIRECTORY) + + set(VIVADO_HLS_FOUND TRUE) + set(VIVADO_HLS_INCLUDE_DIRS ${VIVADO_HLS_ROOT_DIR}/include/) + set(VIVADO_HLS_BINARY ${VIVADO_HLS_ROOT_DIR}/bin/vivado_hls) + + message(STATUS "Found Vivado HLS at ${VIVADO_HLS_ROOT_DIR}.") + +endif() diff --git a/driver/Makefile b/driver/Makefile new file mode 100644 index 00000000..0e53fb90 --- /dev/null +++ b/driver/Makefile @@ -0,0 +1,13 @@ +obj-m := fpga_drv.o + +KERNELDIR ?= /lib/modules/$(shell uname -r)/build +PWD := $(shell pwd) + +ROOT := $(dir $(M)) +XILINXINCLUDE := -I$(ROOT)../include -I$(ROOT)/include + +all: + $(MAKE) -C $(KERNELDIR) M=$(PWD) modules + +clean: + rm -rf *.o *~ core .depend .*.cmd *.ko *.mod.c .tmp_versions *.order *.symvers diff --git a/driver/fpga_drv.c b/driver/fpga_drv.c new file mode 100644 index 00000000..ad64ff4e --- /dev/null +++ b/driver/fpga_drv.c @@ -0,0 +1,2401 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +//#include /* usleep_range */ + +#include "fpga_drv.h" + +static int fpga_major = FPGA_MAJOR; +static struct class *fpga_class = NULL; + +/* + _ _ _ _ _ +| | | | |_(_) | +| | | | __| | | +| |_| | |_| | | + \___/ \__|_|_| +*/ + +static inline uint32_t build_u32(uint32_t hi, uint32_t lo) { + return ((hi & 0xFFFFUL) << 16) | (lo & 0xFFFFUL); +} + +static inline uint64_t build_u64(uint64_t hi, uint64_t lo) { + return ((hi & 0xFFFFFFFULL) << 32) | (lo & 0xFFFFFFFFULL); +} + +/* -- Declarations ----------------------------------------------------------------------- */ + +// tlb operations +static void ltlb_service_map(struct fpga_dev *d, uint64_t vaddr, uint64_t paddr_host, uint64_t paddr_card); +static void ltlb_service_unmap(struct fpga_dev *d, uint64_t vaddr); +static void stlb_service_map(struct fpga_dev *d, uint64_t vaddr, uint64_t paddr_host, uint64_t paddr_card); +static void stlb_service_unmap(struct fpga_dev *d, uint64_t vaddr); +static int tlb_put_user_pages(struct fpga_dev *d, int dirtied); +static int tlb_get_user_pages(struct fpga_dev *d, uint64_t start, size_t count); + +// dynamic reconfiguration +static int reconfigure(struct fpga_dev *d, uint64_t vaddr, uint64_t len); +static int alloc_pr_buffers(struct fpga_dev *d, unsigned long n_pages); +static int free_pr_buffers(struct fpga_dev *d, uint64_t vaddr); + +// buffer allocation +static int card_alloc(struct fpga_dev *d, uint64_t *card_paddr, uint64_t n_pages, int type); +static void card_free(struct fpga_dev *d, uint64_t *card_paddr, uint64_t n_pages, int type); +static int alloc_user_buffers(struct fpga_dev *d, unsigned long n_pages); +static int free_user_buffers(struct fpga_dev *d, uint64_t vaddr); + +// fops +static int fpga_open(struct inode *inode, struct file *file); +static int fpga_release(struct inode *inode, struct file *file); +static int fpga_mmap(struct file *file, struct vm_area_struct *vma); +static long fpga_ioctl(struct file *file, unsigned int cmd, unsigned long arg); + +// engine +static uint32_t get_engine_channel_id(struct engine_regs *regs); +static uint32_t get_engine_id(struct engine_regs *regs); +static void engine_writeback_teardown(struct pci_drvdata *d, struct xdma_engine *engine); +static void engine_destroy(struct pci_drvdata *d, struct xdma_engine *engine); +static void remove_engines(struct pci_drvdata *d); +static void engine_alignments(struct xdma_engine *engine); +static int engine_writeback_setup(struct pci_drvdata *d, struct xdma_engine *engine); +static struct xdma_engine *engine_create(struct pci_drvdata *d, int offs, int c2h, int channel); +static int probe_for_engine(struct pci_drvdata *d, int c2h, int channel); +static int probe_engines(struct pci_drvdata *d); + +// interrupts +static irqreturn_t fpga_tlb_miss_isr(int irq, void *dev_id); +static void user_interrupts_enable(struct pci_drvdata *d, uint32_t mask); +static void user_interrupts_disable(struct pci_drvdata *d, uint32_t mask); +static uint32_t read_interrupts(struct pci_drvdata *d); +static uint32_t build_vector_reg(uint32_t a, uint32_t b, uint32_t c, uint32_t d); +static void write_msix_vectors(struct pci_drvdata *d); +static int msix_irq_setup(struct pci_drvdata *d); +static int irq_setup(struct pci_drvdata *d, struct pci_dev *pdev); +static void irq_teardown(struct pci_drvdata *d); +static int msix_capable(struct pci_dev *pdev, int type); +static int pci_check_msix(struct pci_drvdata *d, struct pci_dev *pdev); + +// BARs +static int map_single_bar(struct pci_drvdata *d, struct pci_dev *pdev, int idx, int curr_idx); +static void unmap_bars(struct pci_drvdata *d, struct pci_dev *pdev); +static int map_bars(struct pci_drvdata *d, struct pci_dev *pdev); + +// regions +static int request_regions(struct pci_drvdata *d, struct pci_dev *pdev); + +// probe +static int pci_probe(struct pci_dev *pdev, const struct pci_device_id *id); +static void pci_remove(struct pci_dev *pdev); + +/* -- Definitions ------------------------------------------------------------------------ */ + +/** + * Read current engine status + */ + +static uint32_t engine_status_read(struct xdma_engine *engine) +{ + uint32_t val; + + BUG_ON(!engine); + + dbg_info("engine %s status:\n", engine->name); + val = ioread32(&engine->regs->status); + dbg_info("status = 0x%08x: %s%s%s%s%s%s%s%s%s\n", (uint32_t)val, + (val & XDMA_STAT_BUSY) ? "BUSY " : "IDLE ", + (val & XDMA_STAT_DESC_STOPPED) ? "DESC_STOPPED " : "", + (val & XDMA_STAT_DESC_COMPLETED) ? "DESC_COMPLETED " : "", + (val & XDMA_STAT_ALIGN_MISMATCH) ? "ALIGN_MISMATCH " : "", + (val & XDMA_STAT_MAGIC_STOPPED) ? "MAGIC_STOPPED " : "", + (val & XDMA_STAT_FETCH_STOPPED) ? "FETCH_STOPPED " : "", + (val & XDMA_STAT_READ_ERROR) ? "READ_ERROR " : "", + (val & XDMA_STAT_DESC_ERROR) ? "DESC_ERROR " : "", + (val & XDMA_STAT_IDLE_STOPPED) ? "IDLE_STOPPED " : ""); + + return val; +} + +/* +____________________ +\______ \______ \ + | ___/| _/ + | | | | \ + |____| |____|_ / + \/ + +*/ + +/** + * Reconfiguration + * @param vaddr - start of the bitstream + * @param len - length of the bitstream + */ +static int reconfigure(struct fpga_dev *d, uint64_t vaddr, uint64_t len) +{ + struct pr_ctrl *prc; + struct pr_pages *tmp_buff; + int i; + uint64_t fsz_m; + uint64_t fsz_r; + uint64_t pr_bsize = PR_BATCH_SIZE; + struct pci_drvdata *pd; + + BUG_ON(!d); + pd = d->pd; + + prc = d->prc; + BUG_ON(!prc); + + hash_for_each_possible(pr_buff_map, tmp_buff, entry, vaddr) { + if(tmp_buff->vaddr == vaddr && tmp_buff->reg_id == d->id) { + // Reconfiguration + fsz_m = len / pr_bsize; + fsz_r = len % pr_bsize; + dbg_info("bitstream full %lld, partial %lld\n", fsz_m, fsz_r); + + // full + for(i = 0; i < fsz_m; i++) { + dbg_info("page %d, phys %llx, len %llx\n", i, page_to_phys(prc->curr_buff.pages[i]), pr_bsize); + pd->fpga_stat_cnfg->pr_addr = page_to_phys(tmp_buff->pages[i]); + pd->fpga_stat_cnfg->pr_len = pr_bsize; + if(fsz_r == 0 && i == fsz_m-1) + pd->fpga_stat_cnfg->pr_ctrl = 0x7; + else + pd->fpga_stat_cnfg->pr_ctrl = 0x7; + + while((pd->fpga_stat_cnfg->pr_stat & 0x1) != 0x1) ndelay(100); + } + + // partial + if(fsz_r > 0) { + dbg_info("page %lld, phys %llx, len %llx\n", fsz_m, page_to_phys(prc->curr_buff.pages[fsz_m]), fsz_r); + pd->fpga_stat_cnfg->pr_addr = page_to_phys(tmp_buff->pages[fsz_m]); + pd->fpga_stat_cnfg->pr_len = fsz_r; + pd->fpga_stat_cnfg->pr_ctrl = 0x7; + while((pd->fpga_stat_cnfg->pr_stat & 0x1) != 0x1) ndelay(100); + } + } + } + + return 0; +} + +/** + * Allocate PR buffers + * @param n_pages - number of large pages requested + */ +static int alloc_pr_buffers(struct fpga_dev *d, unsigned long n_pages) +{ + int i; + struct pr_ctrl *prc; + + prc = d->prc; + + BUG_ON(!d); + BUG_ON(!prc); + + // obtain PR lock + spin_lock(&prc->lock); + + if(prc->curr_buff.n_pages){ + dbg_info("allocated PR buffers exist and are not mapped\n"); + return -1; + } + + if(n_pages > MAX_PR_BUFF_NUM) + prc->curr_buff.n_pages = MAX_PR_BUFF_NUM; + else + prc->curr_buff.n_pages = n_pages; + + prc->curr_buff.pages = kzalloc(n_pages * sizeof(*prc->curr_buff.pages), GFP_KERNEL); + if (prc->curr_buff.pages == NULL) { + return -ENOMEM; + } + + dbg_info("allocated %lu bytes for page pointer array for %ld PR buffers @0x%p.\n", + n_pages * sizeof(*prc->curr_buff.pages), n_pages, prc->curr_buff.pages); + + + for(i = 0; i < prc->curr_buff.n_pages; i++) { + prc->curr_buff.pages[i] = alloc_pages(GFP_ATOMIC, LARGE_PAGE_ORDER); + if(!prc->curr_buff.pages[i]) { + dbg_info("PR buffer %d could not be allocated\n", i); + goto fail_alloc; + } + + dbg_info("PR buffer allocated @ %llx \n", page_to_phys(prc->curr_buff.pages[i])); + } + + // release PR lock + spin_unlock(&prc->lock); + + return 0; +fail_alloc: + while(i) + __free_pages(prc->curr_buff.pages[--i], LARGE_PAGE_ORDER); + // release PR lock + spin_unlock(&prc->lock); + return -ENOMEM; +} + +/** + * Free PR buffers + */ +static int free_pr_buffers(struct fpga_dev *d, uint64_t vaddr) +{ + int i; + struct pr_pages *tmp_buff; + struct pr_ctrl *prc; + + prc = d->prc; + + BUG_ON(!d); + BUG_ON(!prc); + + // obtain PR lock + spin_lock(&prc->lock); + + hash_for_each_possible(pr_buff_map, tmp_buff, entry, vaddr) { + if(tmp_buff->vaddr == vaddr && tmp_buff->reg_id == d->id) { + + // free pages + for(i = 0; i < tmp_buff->n_pages; i++) { + if(tmp_buff->pages[i]) + __free_pages(tmp_buff->pages[i], LARGE_PAGE_ORDER); + } + + kfree(tmp_buff->pages); + + // Free from hash + hash_del(&tmp_buff->entry); + } + } + + // obtain PR lock + spin_unlock(&prc->lock); + + return 0; +} + + +/* + _____ +| ___|__ _ __ ___ +| |_ / _ \| '_ \/ __| +| _| (_) | |_) \__ \ +|_| \___/| .__/|___/ + |_| +*/ + +/** + * Allocate card buffers + * @param n_pages - number of pages requested + */ +static int card_alloc(struct fpga_dev *d, uint64_t *card_paddr, uint64_t n_pages, int type) +{ + int i; + struct pci_drvdata* pd; + + pd = d->pd; + + if(type) { + // lock + spin_lock(&pd->card_l_lock); + + if(pd->num_free_lchunks < n_pages) { + dbg_info("not enough free large card pages\n"); + return -ENOMEM; + } + + for(i = 0; i < n_pages; i++) { + card_paddr[i] = (pd->lalloc->id << LTLB_PAGE_BITS) + MEM_SEP; + dbg_info("user card buffer allocated @ %llx device %d\n", card_paddr[i], d->id); + pd->lalloc = pd->lalloc->next; + } + + // release lock + spin_unlock(&pd->card_l_lock); + } else { + // lock + spin_lock(&pd->card_s_lock); + + if(pd->num_free_schunks < n_pages) { + dbg_info("not enough free small card pages\n"); + return -ENOMEM; + } + + for(i = 0; i < n_pages; i++) { + card_paddr[i] = pd->salloc->id << STLB_PAGE_BITS; + dbg_info("user card buffer allocated @ %llx device %d\n", card_paddr[i], d->id); + pd->salloc = pd->salloc->next; + } + + // release lock + spin_unlock(&pd->card_s_lock); + } + + return 0; +} + +/** + * Free card buffers + * @param n_pages - number of pages + */ +void card_free(struct fpga_dev *d, uint64_t *card_paddr, uint64_t n_pages, int type) +{ + int i; + uint64_t tmp_id; + struct pci_drvdata* pd; + + pd = d->pd; + + if(type) { + // lock + spin_lock(&pd->card_l_lock); + + for(i = n_pages-1; i >= 0; i--) { + tmp_id = (card_paddr[i] - MEM_SEP) >> LTLB_PAGE_BITS; + pd->lchunks[tmp_id].next = pd->lalloc; + pd->lalloc = &pd->lchunks[tmp_id]; + } + + // release lock + spin_unlock(&pd->card_l_lock); + } else { + // lock + spin_lock(&pd->card_s_lock); + + for(i = n_pages-1; i >= 0; i--) { + tmp_id = card_paddr[i] >> STLB_PAGE_BITS; + pd->schunks[tmp_id].next = pd->salloc; + pd->salloc = &pd->schunks[tmp_id]; + } + + // release lock + spin_unlock(&pd->card_s_lock); + } +} + +/** + * Allocate user buffers + * @param n_pages - number of large pages requested + */ +static int alloc_user_buffers(struct fpga_dev *d, unsigned long n_pages) +{ + int i, ret_val = 0; + struct pci_drvdata* pd; + + pd = d->pd; + + if(d->curr_user_buff.n_pages){ + dbg_info("allocated user buffers exist and are not mapped\n"); + return -1; + } + + // check host + if(n_pages > MAX_BUFF_NUM) + d->curr_user_buff.n_pages = MAX_BUFF_NUM; + else + d->curr_user_buff.n_pages = n_pages; + + // check card + if(d->curr_user_buff.n_pages > pd->num_free_lchunks) + return -ENOMEM; + + // alloc host + d->curr_user_buff.hpages = kzalloc(n_pages * sizeof(*d->curr_user_buff.hpages), GFP_KERNEL); + if (d->curr_user_buff.hpages == NULL) { + return -ENOMEM; + } + dbg_info("allocated %lu bytes for page pointer array for %ld user host buffers @0x%p.\n", + n_pages * sizeof(*d->curr_user_buff.hpages), n_pages, d->curr_user_buff.hpages); + + + for(i = 0; i < d->curr_user_buff.n_pages; i++) { + d->curr_user_buff.hpages[i] = alloc_pages(GFP_ATOMIC, LARGE_PAGE_ORDER); + if(!d->curr_user_buff.hpages[i]) { + dbg_info("user host buffer %d could not be allocated\n", i); + goto fail_host_alloc; + } + + dbg_info("user host buffer allocated @ %llx device %d\n", page_to_phys(d->curr_user_buff.hpages[i]), d->id); + } + + // alloc card + d->curr_user_buff.cpages = kzalloc(d->curr_user_buff.n_pages * sizeof(uint64_t), GFP_KERNEL); + if(d->curr_user_buff.cpages == NULL) { + return -ENOMEM; + } + dbg_info("allocated %lu bytes for page pointer array for %ld user card buffers @0x%p.\n", + n_pages * sizeof(*d->curr_user_buff.cpages), n_pages, d->curr_user_buff.cpages); + + ret_val = card_alloc(d, d->curr_user_buff.cpages, d->curr_user_buff.n_pages, LARGE_CHUNK_ALLOC); + if(ret_val) { + dbg_info("user card buffer %d could not be allocated\n", i); + goto fail_card_alloc; + } + + return 0; +fail_host_alloc: + while(i) + __free_pages(d->curr_user_buff.hpages[--i], LARGE_PAGE_ORDER); + + kfree(d->curr_user_buff.hpages); + + return -ENOMEM; + +fail_card_alloc: + // release host + for(i = 0; i < d->curr_user_buff.n_pages; i++) + __free_pages(d->curr_user_buff.hpages[i], LARGE_PAGE_ORDER); + + kfree(d->curr_user_buff.hpages); + kfree(d->curr_user_buff.cpages); + + return -ENOMEM; + +} + +/** + * Free host and card buffers + */ +static int free_user_buffers(struct fpga_dev *d, uint64_t vaddr) +{ + int i; + uint64_t vaddr_tmp; + struct user_pages *tmp_buff; + struct pci_drvdata *pd; + + pd = d->pd; + + hash_for_each_possible(user_lbuff_map[d->id], tmp_buff, entry, vaddr) { + + + if(tmp_buff->vaddr == vaddr) { + + vaddr_tmp = tmp_buff->vaddr; + + // free host pages + for(i = 0; i < tmp_buff->n_pages; i++) { + if(tmp_buff->hpages[i]) + __free_pages(tmp_buff->hpages[i], LARGE_PAGE_ORDER); + + // remove from TLB + ltlb_service_unmap(d, vaddr_tmp); + vaddr_tmp += LARGE_PAGE_SIZE; + } + kfree(tmp_buff->hpages); + + // free card pages + card_free(d, tmp_buff->cpages, tmp_buff->n_pages, LARGE_CHUNK_ALLOC); + kfree(tmp_buff->cpages); + + // Free from hash + hash_del(&tmp_buff->entry); + } + } + + return 0; +} + +/** + * OPEN: Acquire a region + */ +static int fpga_open(struct inode *inode, struct file *file) +{ + int minor = iminor(inode); + + struct fpga_dev *d = container_of(inode->i_cdev, struct fpga_dev, cdev); + BUG_ON(!d); + + // Check busy flag + if(!atomic_add_unless(&d->in_use, 1, 1)) { + dbg_info("fpga device %d busy\n", minor); + return -EBUSY; + } + + dbg_info("fpga device %d acquired\n", minor); + + // get current task handle + d->curr_task = current; + d->curr_mm = current->mm; + + // set private data + file->private_data = (void *)d; + + return 0; +} + +/** + * RELEASE: Release a region + */ +static int fpga_release(struct inode *inode, struct file *file) +{ + int minor = iminor(inode); + + struct fpga_dev *d = container_of(inode->i_cdev, struct fpga_dev, cdev); + BUG_ON(!d); + + // clear flag + atomic_set(&d->in_use, 0); + + // unamp all user pages + tlb_put_user_pages(d, 1); + + dbg_info("fpga device %d released\n", minor); + + return 0; +} + +/** + * MMAP: Control and buffers + */ +static int fpga_mmap(struct file *file, struct vm_area_struct *vma) +{ + int i; + unsigned long vaddr; + struct fpga_dev *d; + struct pr_ctrl *prc; + struct user_pages *new_user_buff; + struct pr_pages *new_pr_buff; + + d = (struct fpga_dev*)file->private_data; + + prc = d->prc; + + BUG_ON(!d); + BUG_ON(!prc); + + vaddr = vma->vm_start; + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + // map user ctrl region + if(vma->vm_pgoff == MMAP_CTRL) { + dbg_info("fpga dev. %d, memory mapping user ctrl region at %llx of size %x\n", + d->id, d->fpga_phys_addr_ctrl + FPGA_CTRL_USER_OFFS, FPGA_CTRL_USER_SIZE); + if(remap_pfn_range(vma, vma->vm_start, (d->fpga_phys_addr_ctrl + FPGA_CTRL_USER_OFFS) >> PAGE_SHIFT, + FPGA_CTRL_USER_SIZE, vma->vm_page_prot)) { + return -EIO; + } + return 0; + } + + // map cnfg region + if(vma->vm_pgoff == MMAP_CNFG) { + dbg_info("fpga dev. %d, memory mapping config region at %llx of size %x\n", + d->id, d->fpga_phys_addr_ctrl + FPGA_CTRL_CNFG_OFFS, FPGA_CTRL_CNFG_SIZE); + if(remap_pfn_range(vma, vma->vm_start, (d->fpga_phys_addr_ctrl + FPGA_CTRL_CNFG_OFFS) >> PAGE_SHIFT, + FPGA_CTRL_CNFG_SIZE, vma->vm_page_prot)) { + return -EIO; + } + return 0; + } + + // map cnfg AVX region + if(vma->vm_pgoff == MMAP_CNFG_AVX) { + dbg_info("fpga dev. %d, memory mapping config AVX region at %llx of size %x\n", + d->id, d->fpga_phys_addr_ctrl_avx, FPGA_CTRL_CNFG_AVX_SIZE); + if(remap_pfn_range(vma, vma->vm_start, d->fpga_phys_addr_ctrl_avx >> PAGE_SHIFT, + FPGA_CTRL_CNFG_AVX_SIZE, vma->vm_page_prot)) { + return -EIO; + } + return 0; + } + + // map user buffers + if(vma->vm_pgoff == MMAP_BUFF) { + dbg_info("fpga dev. %d, memory mapping buffer\n", d->id); + + // aligned page virtual address + vaddr = ((vma->vm_start + LARGE_PAGE_SIZE - 1) >> LARGE_PAGE_SHIFT) << LARGE_PAGE_SHIFT; + + if(d->curr_user_buff.n_pages != 0) { + + new_user_buff = kzalloc(sizeof(struct user_pages), GFP_KERNEL); + BUG_ON(!new_user_buff); + + // Map entry + new_user_buff->vaddr = vaddr; + new_user_buff->n_pages = d->curr_user_buff.n_pages; + new_user_buff->hpages = d->curr_user_buff.hpages; + new_user_buff->cpages = d->curr_user_buff.cpages; + + hash_add(user_lbuff_map[d->id], &new_user_buff->entry, vaddr); + + for(i = 0; i < d->curr_user_buff.n_pages; i++) { + // map to user space + if(remap_pfn_range(vma, vaddr, page_to_pfn(d->curr_user_buff.hpages[i]), + LARGE_PAGE_SIZE, vma->vm_page_prot)) { + return -EIO; + } + // tlb entry + ltlb_service_map(d, vaddr, page_to_phys(d->curr_user_buff.hpages[i]), d->curr_user_buff.cpages[i]); + + // next page vaddr + vaddr += LARGE_PAGE_SIZE; + } + + // Current host buff empty + d->curr_user_buff.n_pages = 0; + + return 0; + } + } + + // map PR buffers + if(vma->vm_pgoff == MMAP_PR) { + dbg_info("fpga dev. %d, memory mapping PR buffer\n", d->id); + + // aligned page virtual address + vaddr = ((vma->vm_start + LARGE_PAGE_SIZE - 1) >> LARGE_PAGE_SHIFT) << LARGE_PAGE_SHIFT; + + if(prc->curr_buff.n_pages != 0) { + + // obtain PR lock + spin_lock(&prc->lock); + + new_pr_buff = kzalloc(sizeof(struct pr_pages), GFP_KERNEL); + BUG_ON(!new_pr_buff); + + // Map entry + new_pr_buff->vaddr = vaddr; + new_pr_buff->reg_id = d->id; + new_pr_buff->n_pages = prc->curr_buff.n_pages; + new_pr_buff->pages = prc->curr_buff.pages; + + hash_add(pr_buff_map, &new_pr_buff->entry, vaddr); + + for(i = 0; i < prc->curr_buff.n_pages; i++) { + // map to user space + if(remap_pfn_range(vma, vaddr, page_to_pfn(prc->curr_buff.pages[i]), + LARGE_PAGE_SIZE, vma->vm_page_prot)) { + return -EIO; + } + // next page vaddr + vaddr += LARGE_PAGE_SIZE; + } + + // Current host buff empty + prc->curr_buff.n_pages = 0; + + // release PR lock + spin_unlock(&prc->lock); + + return 0; + } + + } + + return -EINVAL; +} + +/** + * IOCTL + */ +static long fpga_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + int ret_val, i; + uint64_t n_pages; + uint64_t vaddr; + uint64_t len; + uint64_t qp[3]; + uint64_t stat; + + struct fpga_dev *d = (struct fpga_dev*)file->private_data; + struct pci_drvdata* pd; + + pd = d->pd; + + BUG_ON(!d); + BUG_ON(!pd); + + switch(cmd) { + case IOCTL_ALLOC_HOST_USER_MEM: + ret_val = copy_from_user(&n_pages, (unsigned long*)arg, sizeof(unsigned long)); + if(ret_val != 0) { + dbg_info("user data could not be coppied, return %d\n", ret_val); + } else { + ret_val = alloc_user_buffers(d, n_pages); + dbg_info("buff_num %lld, arg %lx\n", d->curr_user_buff.n_pages, arg); + if(ret_val != 0) { + pr_info("user buffers could not be allocated\n"); + } + } + break; + + case IOCTL_FREE_HOST_USER_MEM: + ret_val = copy_from_user(&vaddr, (unsigned long*)arg, sizeof(unsigned long)); + if(ret_val != 0) { + dbg_info("user data could not be coppied, return %d\n", ret_val); + } else { + ret_val = free_user_buffers(d, vaddr); + dbg_info("user buffers freed\n"); + } + break; + + case IOCTL_ALLOC_HOST_PR_MEM: + ret_val = copy_from_user(&n_pages, (unsigned long*)arg, sizeof(unsigned long)); + if(ret_val != 0) { + dbg_info("user data could not be coppied, return %d\n", ret_val); + } else { + ret_val = alloc_pr_buffers(d, n_pages); + dbg_info("buff_num %lld, arg %lx\n", d->prc->curr_buff.n_pages, arg); + if(ret_val != 0) { + pr_info("PR buffers could not be allocated\n"); + } + } + break; + + case IOCTL_FREE_HOST_PR_MEM: + ret_val = copy_from_user(&vaddr, (unsigned long*)arg, sizeof(unsigned long)); + if(ret_val != 0) { + dbg_info("user data could not be coppied, return %d\n", ret_val); + } else { + ret_val = free_pr_buffers(d, vaddr); + dbg_info("PR buffers freed\n"); + } + break; + + case IOCTL_RECONFIG_LOAD: + ret_val = copy_from_user(&vaddr, (unsigned long*)arg, sizeof(unsigned long)); + if(ret_val != 0) { + dbg_info("user data could not be coppied, return %d\n", ret_val); + return -1; + } + ret_val = copy_from_user(&len, (unsigned long*)arg+1, sizeof(unsigned long)); + if(ret_val != 0) { + dbg_info("user data could not be coppied, return %d\n", ret_val); + return -1; + } + + dbg_info("trying to obtain reconfig lock\n"); + spin_lock(&d->prc->lock); + if(pd->en_avx) + d->fpga_cnfg_avx->datapath_set[0] = 0x1; + else + d->fpga_cnfg->datapath_set = 0x1; + + ret_val = reconfigure(d, vaddr, len); + if(ret_val != 0) { + dbg_info("reconfiguration failed, return %d\n", ret_val); + return -1; + } else { + dbg_info("reconfiguration successfull\n"); + } + + dbg_info("releasing reconfig lock, coupling the design\n"); + if(pd->en_avx) + d->fpga_cnfg_avx->datapath_clr[0] = 0x1; + else + d->fpga_cnfg->datapath_clr = 0x1; + + spin_unlock(&d->prc->lock); + + break; + + case IOCTL_MAP_USER: + ret_val = copy_from_user(&vaddr, (unsigned long*)arg, sizeof(unsigned long)); + if(ret_val != 0) { + dbg_info("user data could not be coppied, return %d\n", ret_val); + return -1; + } + ret_val = copy_from_user(&len, (unsigned long*)arg+1, sizeof(unsigned long)); + if(ret_val != 0) { + dbg_info("user data could not be coppied, return %d\n", ret_val); + return -1; + } + tlb_get_user_pages(d, vaddr, len); + break; + + case IOCTL_UNMAP_USER: + dbg_info("unmapping user pages\n"); + tlb_put_user_pages(d, 1); + break; + + case IOCTL_ARP_LOOKUP: + if(pd->en_rdma) { + dbg_info("arp lookup..."); + spin_lock(&pd->stat_lock); + + for(i = 0; i < N_TOTAL_NODES; i++) { + if(i == NODE_ID) continue; + pd->fpga_stat_cnfg->rdma_arp = BASE_IP_ADDR + i; + } + + spin_unlock(&pd->stat_lock); + } else { + dbg_info("RDMA not enabled"); + } + break; + + case IOCTL_WRITE_CTX: + if(pd->en_rdma) { + dbg_info("writing qp context..."); + spin_lock(&pd->stat_lock); + + ret_val = copy_from_user(&qp, (unsigned long*)arg, 3 * sizeof(unsigned long)); + for(i = 0; i < 3; i++) { + pd->fpga_stat_cnfg->qp_ctx[i] = qp[i]; + } + + spin_unlock(&pd->stat_lock); + } else { + dbg_info("RDMA not enabled"); + } + break; + + case IOCTL_WRITE_CONN: + if(pd->en_rdma) { + dbg_info("writing qp connection..."); + spin_lock(&pd->stat_lock); + + ret_val = copy_from_user(&qp, (unsigned long*)arg, 3 * sizeof(unsigned long)); + for(i = 0; i < 3; i++) { + pd->fpga_stat_cnfg->qp_conn[i] = qp[i]; + } + + spin_unlock(&pd->stat_lock); + } else { + dbg_info("RDMA not enabled"); + } + break; + + case IOCTL_RDMA_STAT: + if(pd->en_rdma) { + dbg_info("retreiving RDMA status..."); + spin_lock(&pd->stat_lock); + + for(i = 0; i < N_RDMA_STAT_REGS; i++) { + stat = pd->fpga_stat_cnfg->rdma_debug[i]; + ret_val = copy_to_user((unsigned long*)arg+i, &stat, sizeof(unsigned long)); + } + + spin_unlock(&pd->stat_lock); + } else { + dbg_info("RDMA not enabled"); + } + break; + + case IOCTL_READ_ENG_STATUS: + dbg_info("fpga dev %d engine report\n", d->id); + engine_status_read(d->engine_c2h); + engine_status_read(d->engine_h2c); + break; + default: + break; + } + + return 0; +} + +/* File operations */ +struct file_operations fpga_fops = { + .owner = THIS_MODULE, + .open = fpga_open, + .release = fpga_release, + .mmap = fpga_mmap, + .unlocked_ioctl = fpga_ioctl, +}; + +/*___________.____ + __________ +\__ ___/| | \______ \ + | | | | | | _/ + | | | |___| | \ + |____| |_______ \______ / + \/ \/ + +*/ + +/** + * Enter new mapping into the TLB (large pages) + * @param vaddr - virtual address + * @param paddr_host - physical address host + * @param paddr_card - physical address card + */ +static void ltlb_service_map(struct fpga_dev *d, uint64_t vaddr, uint64_t paddr_host, uint64_t paddr_card) +{ + int i = 0; + uint64_t key, tag; + uint64_t old_entry; + uint64_t new_entry_host = 0; + uint64_t new_entry_card = 0; + struct pci_drvdata *pd; + + pd = d->pd; + + dbg_info("new lTLB entry, vaddr %llx, paddr host %llx, paddr card %llx\n", vaddr, paddr_host, paddr_card); + + key = (vaddr >> LARGE_PAGE_SHIFT) & LTLB_HASH_MASK; + tag = vaddr >> (LARGE_PAGE_SHIFT + LTLB_ORDER); + + // new entry + new_entry_host |= TLB_VALID_MASK | (tag << LTLB_PADDR_SIZE) | ((paddr_host >> LARGE_PAGE_SHIFT) & LTLB_PADDR_MASK); + new_entry_card |= TLB_VALID_MASK | TLB_CARD_MASK | (tag << LTLB_PADDR_SIZE) | ((paddr_card >> LARGE_PAGE_SHIFT) & LTLB_PADDR_MASK); + + // host mapping + while(i < LTLB_ASSOC_ENTRIES) { + old_entry = d->fpga_lTlb[key + i*LTLB_OFFS]; + + if(!(old_entry & TLB_VALID_MASK)) { + d->fpga_lTlb[key + i*LTLB_OFFS] = new_entry_host; + break; + } + i++; + } + + // eviction + if(i == LTLB_ASSOC_ENTRIES) { + i = 0; + d->fpga_lTlb[key + i*LTLB_OFFS] = new_entry_host; + dbg_info("lTLB eviction at %llx\n", key); + } + + // card mapping + if(pd->en_ddr) + d->fpga_lTlb[key + ((i+1)%LTLB_ASSOC_ENTRIES)*LTLB_OFFS] = new_entry_card; +} + +/** + * Enter new mapping into the TLB (small pages) + * @param vaddr - virtual address + * @param paddr_host - physical address host + * @param paddr_card - physical address card + */ +static void stlb_service_map(struct fpga_dev *d, uint64_t vaddr, uint64_t paddr_host, uint64_t paddr_card) +{ + int i = 0; + uint64_t key, tag; + uint64_t old_entry; + uint64_t new_entry_host = 0; + uint64_t new_entry_card = 0; + struct pci_drvdata *pd; + + pd = d->pd; + + dbg_info("new sTLB entry, vaddr %llx, paddr host %llx, paddr card %llx\n", vaddr, paddr_host, paddr_card); + + key = (vaddr >> PAGE_SHIFT) & STLB_HASH_MASK; + tag = vaddr >> (PAGE_SHIFT + STLB_ORDER); + + // new entry host + new_entry_host |= TLB_VALID_MASK | (tag << STLB_PADDR_SIZE) | ((paddr_host >> PAGE_SHIFT) & STLB_PADDR_MASK); + new_entry_card |= TLB_VALID_MASK | TLB_CARD_MASK | (tag << STLB_PADDR_SIZE) | ((paddr_card >> PAGE_SHIFT) & STLB_PADDR_MASK); + + // host mapping + while(i < STLB_ASSOC_ENTRIES) { + old_entry = d->fpga_sTlb[key + i*STLB_OFFS]; + + if(!(old_entry & TLB_VALID_MASK)) { + d->fpga_sTlb[key + i*STLB_OFFS] = new_entry_host; + break; + } + i++; + } + + // eviction + if(i == STLB_ASSOC_ENTRIES) { + i = 0; + d->fpga_lTlb[key + i*STLB_OFFS] = new_entry_host; + dbg_info("sTLB eviction at %llx\n", key); + } + + // card mapping + if(pd->en_ddr) + d->fpga_sTlb[key + ((i+1)%STLB_ASSOC_ENTRIES)*STLB_OFFS] = new_entry_card; +} + +/** + * Remove mapping from the TLB (large pages) + * @param vaddr - virtual address + */ +static void ltlb_service_unmap(struct fpga_dev *d, uint64_t vaddr) +{ + int i = 0; + uint64_t key, tag, tag_old; + uint64_t old_entry; + + key = (vaddr >> LARGE_PAGE_SHIFT) & LTLB_HASH_MASK; + tag = vaddr >> (LARGE_PAGE_SHIFT + LTLB_ORDER); + + // check entries at key + while(i < LTLB_ASSOC_ENTRIES) { + old_entry = d->fpga_lTlb[key + i*LTLB_OFFS]; + + if(old_entry & TLB_VALID_MASK) { + tag_old = (old_entry >> LTLB_PADDR_SIZE) & LTLB_TAG_MASK; + // invalidate the entry + if(tag_old == tag) { + dbg_info("remove lTLB entry %llx, vaddr %llx\n", old_entry, vaddr); + d->fpga_lTlb[key + i*LTLB_OFFS] = 0x0; + } + } + + i++; + } +} + +/** + * Remove mapping from the TLB (small pages) + * @param vaddr - virtual address + */ +static void stlb_service_unmap(struct fpga_dev *d, uint64_t vaddr) +{ + int i = 0; + uint64_t key, tag, tag_old; + uint64_t old_entry; + + key = (vaddr >> PAGE_SHIFT) & STLB_HASH_MASK; + tag = vaddr >> (PAGE_SHIFT + STLB_ORDER); + + // check entries at key + while(i < STLB_ASSOC_ENTRIES) { + old_entry = d->fpga_sTlb[key + i*STLB_OFFS]; + + if(old_entry & TLB_VALID_MASK) { + tag_old = (old_entry >> STLB_PADDR_SIZE) & STLB_TAG_MASK; + // invalidate the entry + if(tag_old == tag) { + dbg_info("remove sTLB entry, vaddr %llx\n", vaddr); + d->fpga_sTlb[key + i*STLB_OFFS] = 0x0; + } + } + + i++; + } +} + +/** + * Release user pages + */ +static int tlb_put_user_pages(struct fpga_dev *d, int dirtied) +{ + int i, bkt; + struct user_pages *tmp_buff; + uint64_t vaddr; + + hash_for_each(user_sbuff_map[d->id], bkt, tmp_buff, entry) { + // unmap from TLB + vaddr = tmp_buff->vaddr; + for(i = 0; i < tmp_buff->n_pages; i++) { + stlb_service_unmap(d, vaddr); + vaddr += PAGE_SIZE; + } + + // release host pages + if(dirtied) + for(i = 0; i < tmp_buff->n_pages; i++) + SetPageDirty(tmp_buff->hpages[i]); + + for(i = 0; i < tmp_buff->n_pages; i++) + put_page(tmp_buff->hpages[i]); + + kfree(tmp_buff->hpages); + + // release card pages + card_free(d, tmp_buff->cpages, tmp_buff->n_pages, SMALL_CHUNK_ALLOC); + + // remove from map + hash_del(&tmp_buff->entry); + } + + return 0; +} + +/** + * Get user pages and fill TLB + */ +static int tlb_get_user_pages(struct fpga_dev *d, uint64_t start, size_t count) +{ + int ret_val = 0, i; + int n_pages; + uint64_t first; + uint64_t last; + struct user_pages *user_pg; + + user_pg = kzalloc(sizeof(struct user_pages), GFP_KERNEL); + BUG_ON(!user_pg); + + first = (start & PAGE_MASK) >> PAGE_SHIFT; + last = ((start + count - 1) & PAGE_MASK) >> PAGE_SHIFT; + + n_pages = last - first + 1; + + if(n_pages > MAX_N_MAP_PAGES) + n_pages = MAX_N_MAP_PAGES; + + user_pg->hpages = kcalloc(n_pages, sizeof(*user_pg->hpages), GFP_KERNEL); + if (user_pg->hpages == NULL) { + return -1; + } + dbg_info("allocated %lu bytes for page pointer array for %d pages @0x%p.\n", + n_pages * sizeof(*user_pg->hpages), n_pages, user_pg->hpages); + + if (start + count < start) + return -EINVAL; + if (count == 0) + return 0; + + dbg_info("pages=0x%p\n", user_pg->hpages); + dbg_info("first = %llx, last = %llx\n", first, last); + + for (i = 0; i < n_pages - 1; i++) { + user_pg->hpages[i] = NULL; + } + + ret_val = get_user_pages_remote(d->curr_task, d->curr_mm, (unsigned long)start, n_pages, 1, user_pg->hpages, NULL, NULL); + dbg_info("get_user_pages_remote(%llx, n_pages = %d)\n", start, n_pages); + + if(ret_val < n_pages) { + dbg_info("could not get all user pages, %d\n", ret_val); + goto fail_host_unmap; + } + + for(i = 0; i < n_pages; i++) + flush_dcache_page(user_pg->hpages[i]); + + for(i = 0; i < n_pages; i++) + dbg_info("page %d at %lx\n", i, page_to_pfn(user_pg->hpages[i])); + + // add mapped entry + user_pg->vaddr = start; + user_pg->n_pages = n_pages; + + // card alloc + user_pg->cpages = kzalloc(user_pg->n_pages * sizeof(uint64_t), GFP_KERNEL); + if(user_pg->cpages == NULL) { + dbg_info("card buffer %d could not be allocated\n", i); + return -ENOMEM; + } + + ret_val = card_alloc(d, user_pg->cpages, n_pages, SMALL_CHUNK_ALLOC); + if(ret_val) { + dbg_info("could not get all card pages, %d\n", ret_val); + goto fail_card_unmap; + } + + hash_add(user_sbuff_map[d->id], &user_pg->entry, start); + + // map to TLB + for(i = 0; i < n_pages; i++) { + stlb_service_map(d, start, page_to_phys(user_pg->hpages[i]), user_pg->cpages[i]); + start += PAGE_SIZE; + } + + return n_pages; + +fail_host_unmap: + // release host pages + for(i = 0; i < ret_val; i++) { + put_page(user_pg->hpages[i]); + } + + kfree(user_pg->hpages); + + return -ENOMEM; + +fail_card_unmap: + // release host pages + for(i = 0; i < user_pg->n_pages; i++) { + put_page(user_pg->hpages[i]); + } + + kfree(user_pg->hpages); + kfree(user_pg->cpages); + + return -ENOMEM; +} + +/* + ___ ____ ____ +|_ _/ ___|| _ \ + | |\___ \| |_) | + | | ___) | _ < +|___|____/|_| \_\ +*/ + +/** + * TLB page fault handling + */ +static irqreturn_t fpga_tlb_miss_isr(int irq, void *dev_id) +{ + unsigned long flags; + uint64_t vaddr; + uint32_t len; + struct fpga_dev *d; + struct pci_drvdata *pd; + int ret_val = 0; + + dbg_info("(irq=%d) page fault ISR\n", irq); + BUG_ON(!dev_id); + + d = (struct fpga_dev*) dev_id; + + pd = d->pd; + + // lock + spin_lock_irqsave(&(d->lock), flags); + + // read page fault + if(pd->en_avx) { + vaddr = d->fpga_cnfg_avx->vaddr_miss; + len = d->fpga_cnfg_avx->len_miss; + } else { + vaddr = d->fpga_cnfg->vaddr_miss; + len = d->fpga_cnfg->len_miss; + } + dbg_info("page fault, vaddr %llx, length %x\n", vaddr, len); + + // get user pages + ret_val = tlb_get_user_pages(d, vaddr, len); + + if(ret_val > 0) { + // restart the engine + if(pd->en_avx) + d->fpga_cnfg_avx->ctrl[0] = FPGA_CNFG_CTRL_IRQ_RESTART; + else + d->fpga_cnfg->ctrl = FPGA_CNFG_CTRL_IRQ_RESTART; + } else { + dbg_info("pages could not be obtained\n"); + } + + // unlock + spin_unlock_irqrestore(&(d->lock), flags); + + return IRQ_HANDLED; +} + +/* + ___ _ _ +|_ _|_ __ | |_ ___ _ __ _ __ _ _ _ __ | |_ ___ + | || '_ \| __/ _ \ '__| '__| | | | '_ \| __/ __| + | || | | | || __/ | | | | |_| | |_) | |_\__ \ +|___|_| |_|\__\___|_| |_| \__,_| .__/ \__|___/ + |_| +*/ + +static void user_interrupts_enable(struct pci_drvdata *d, uint32_t mask) +{ + struct interrupt_regs *reg = (struct interrupt_regs*) + (d->bar[BAR_XDMA_CONFIG] + XDMA_OFS_INT_CTRL); + + iowrite32(mask, ®->user_int_enable_w1s); +} + +static void user_interrupts_disable(struct pci_drvdata *d, uint32_t mask) +{ + struct interrupt_regs *reg = (struct interrupt_regs*) + (d->bar[BAR_XDMA_CONFIG] + XDMA_OFS_INT_CTRL); + + iowrite32(mask, ®->user_int_enable_w1c); +} + +/** + * Read interrupt status + */ +static uint32_t read_interrupts(struct pci_drvdata *d) { + struct interrupt_regs *reg = (struct interrupt_regs*) (d->bar[BAR_XDMA_CONFIG] + XDMA_OFS_INT_CTRL); + uint32_t lo, hi; + + // interrupt check + hi = ioread32(®->user_int_request); + printk(KERN_INFO "ioread32(0x%p) returned 0x%08x (user_int_request).\n", + ®->user_int_request, hi); + lo = ioread32(®->channel_int_request); + printk(KERN_INFO + "ioread32(0x%p) returned 0x%08x (channel_int_request)\n", + ®->channel_int_request, lo); + + // return interrupts: user in upper 16-bits, channel in lower 16-bits + return build_u32(hi, lo); +} + +static uint32_t build_vector_reg(uint32_t a, uint32_t b, uint32_t c, uint32_t d) +{ + uint32_t reg_val = 0; + + reg_val |= (a & 0x1f) << 0; + reg_val |= (b & 0x1f) << 8; + reg_val |= (c & 0x1f) << 16; + reg_val |= (d & 0x1f) << 24; + + return reg_val; +} + +/** + * Write MSI-X vectors + */ +static void write_msix_vectors(struct pci_drvdata *d) +{ + struct interrupt_regs *int_regs; + uint32_t reg_val = 0; + + BUG_ON(!d); + + int_regs = (struct interrupt_regs *) + (d->bar[0] + XDMA_OFS_INT_CTRL); + + // user MSI-X + reg_val = build_vector_reg(0, 1, 2, 3); + iowrite32(reg_val, &int_regs->user_msi_vector[0]); + + reg_val = build_vector_reg(4, 5, 6, 7); + iowrite32(reg_val, &int_regs->user_msi_vector[1]); + + reg_val = build_vector_reg(8, 9, 10, 11); + iowrite32(reg_val, &int_regs->user_msi_vector[2]); + + reg_val = build_vector_reg(12, 13, 14, 15); + iowrite32(reg_val, &int_regs->user_msi_vector[3]); + + // channel MSI-X + reg_val = build_vector_reg(16, 17, 18, 19); + iowrite32(reg_val, &int_regs->channel_msi_vector[0]); + + reg_val = build_vector_reg(20, 21, 22, 23); + iowrite32(reg_val, &int_regs->channel_msi_vector[1]); +} + +/** + * Remove user IRQs + */ +static void irq_teardown(struct pci_drvdata *d) { + int i; + + if(d->msix_enabled) { + for(i = 0; i < d->n_fpga_tot_reg; i++) { + pr_info("releasing IRQ%d\n", d->irq_entry[i].vector); + free_irq(d->irq_entry[i].vector, &d->fpga_dev[i]); + } + } else if(d->irq_line != -1) { + pr_info("releasing IRQ%d\n", d->irq_line); + free_irq(d->irq_line, d); + } +} + +/** + * Setup user MSI-X + */ +static int msix_irq_setup(struct pci_drvdata *d) +{ + int i; + int ret_val; + + BUG_ON(!d); + + write_msix_vectors(d); + + for (i = 0; i < d->n_fpga_tot_reg; i++) { + ret_val = request_irq(d->irq_entry[i].vector, fpga_tlb_miss_isr, 0, + DRV_NAME, &d->fpga_dev[i]); + + if (ret_val) { + pr_info("couldn't use IRQ#%d, ret=%d\n", d->irq_entry[i].vector, ret_val); + break; + } + + pr_info("using IRQ#%d with %d\n", d->irq_entry[i].vector, d->fpga_dev[i].id); + } + + // unwind + if (ret_val) { + while (--i >= 0) + free_irq(d->irq_entry[i].vector, &d->fpga_dev[i]); + } + + return ret_val; +} + +/** + * Setup user IRQs + */ +static int irq_setup(struct pci_drvdata *d, struct pci_dev *pdev) +{ + int ret_val = 0; + + if(d->msix_enabled) { + ret_val = msix_irq_setup(d); + } + + return ret_val; +} + +/** + * Check whether support for MSI-X exists + */ +static int msix_capable(struct pci_dev *pdev, int type) +{ + struct pci_bus *bus; + + BUG_ON(!pdev); + + if(pdev->no_msi) + return 0; + + for(bus = pdev->bus; bus; bus = bus->parent) + if(bus->bus_flags & PCI_BUS_FLAGS_NO_MSI) + return 0; + + if(!pci_find_capability(pdev, type)) + return 0; + + return 1; +} + +/** + * Check whether MSI-X is present + */ +static int pci_check_msix(struct pci_drvdata *d, struct pci_dev *pdev) +{ + int ret_val = 0, i; + int req_nvec = MAX_NUM_ENGINES + MAX_USER_IRQS; + + BUG_ON(!d); + BUG_ON(!pdev); + + if(msix_capable(pdev, PCI_CAP_ID_MSIX)) { + pr_info("enabling MSI-X\n"); + + for(i = 0; i < req_nvec; i++) + d->irq_entry[i].entry = i; + + ret_val = pci_enable_msix_range(pdev, d->irq_entry, 0, req_nvec); + if(ret_val < 0) + pr_info("could not enable MSI-X mode, ret %d\n", ret_val); + else + pr_info("obtained %d MSI-X irqs\n", ret_val); + + d->msix_enabled = 1; + } else { + pr_info("MSI-X not present, forcing polling mode\n"); + ret_val = -1; + d->msix_enabled = 0; + } + + if(ret_val < 0) + return ret_val; + else + return 0; +} + +/* + _____ _ _ +| ____|_ __ __ _(_)_ __ ___ ___ ___| |_ _ _ _ __ +| _| | '_ \ / _` | | '_ \ / _ \ / __|/ _ \ __| | | | '_ \ +| |___| | | | (_| | | | | | __/ \__ \ __/ |_| |_| | |_) | +|_____|_| |_|\__, |_|_| |_|\___| |___/\___|\__|\__,_| .__/ + |___/ |_| +*/ + +static uint32_t get_engine_channel_id(struct engine_regs *regs) +{ + uint32_t val; + + BUG_ON(!regs); + + val = ioread32(®s->id); + return (val & 0x00000f00U) >> 8; +} + +static uint32_t get_engine_id(struct engine_regs *regs) +{ + uint32_t val; + + BUG_ON(!regs); + + val = ioread32(®s->id); + return (val & 0xffff0000U) >> 16; +} + +/** + * Free writeback memory + */ +static void engine_writeback_teardown(struct pci_drvdata *d, struct xdma_engine *engine) +{ + BUG_ON(!d); + BUG_ON(!engine); + + if (engine->poll_mode_addr_virt) { + pci_free_consistent(d->pci_dev, sizeof(struct xdma_poll_wb), + engine->poll_mode_addr_virt, engine->poll_mode_phys_addr); + pr_info("released memory for descriptor writeback\n"); + } +} + +/** + * Remove single engine + */ +static void engine_destroy(struct pci_drvdata *d, struct xdma_engine *engine) +{ + BUG_ON(!d); + BUG_ON(!engine); + + pr_info("shutting off engine %s%d\n", engine->name, engine->channel); + iowrite32(0x0, &engine->regs->ctrl); + engine->running = 0; + + engine_writeback_teardown(d, engine); + + kfree(engine); + + d->engines_num--; +} + +/** + * Remove all present engines + */ +static void remove_engines(struct pci_drvdata *d) +{ + int i; + struct xdma_engine *engine; + + BUG_ON(!d); + + for(i = 0; i < d->n_fpga_chan; i++) { + engine = d->fpga_dev[i*d->n_fpga_reg].engine_h2c; + if(engine) { + pr_info("remove %s%d\n", engine->name, engine->channel); + engine_destroy(d, engine); + } + + engine = d->fpga_dev[i*d->n_fpga_reg].engine_c2h; + if(engine) { + pr_info("remove %s%d\n", engine->name, engine->channel); + engine_destroy(d, engine); + } + } + + if(d->pr_flow) { + engine = d->prc.engine_h2c; + if(engine) { + pr_info("remove %s%d\n", engine->name, engine->channel); + engine_destroy(d, engine); + } + + engine = d->prc.engine_c2h; + if(engine) { + pr_info("remove %s%d\n", engine->name, engine->channel); + engine_destroy(d, engine); + } + } +} + +/** + * Check engine alignments + */ +static void engine_alignments(struct xdma_engine *engine) +{ + uint32_t val; + uint32_t align_bytes; + uint32_t granularity_bytes; + uint32_t address_bits; + + val = ioread32(&engine->regs->alignments); + pr_info("engine %p name %s alignments=0x%08x\n", engine, + engine->name, (int)val); + + align_bytes = (val & 0x00ff0000U) >> 16; + granularity_bytes = (val & 0x0000ff00U) >> 8; + address_bits = (val & 0x000000ffU); + + if (val) { + engine->addr_align = align_bytes; + engine->len_granularity = granularity_bytes; + engine->addr_bits = address_bits; + } else { + // Default + engine->addr_align = 1; + engine->len_granularity = 1; + engine->addr_bits = 64; + } +} + +static int engine_writeback_setup(struct pci_drvdata *d, struct xdma_engine *engine) +{ + uint32_t w; + struct xdma_poll_wb *writeback; + + BUG_ON(!d); + BUG_ON(!engine); + + /* Set up address for polled mode writeback */ + pr_info("allocating memory for descriptor writeback for %s%d", + engine->name, engine->channel); + engine->poll_mode_addr_virt = pci_alloc_consistent(d->pci_dev, + sizeof(struct xdma_poll_wb), &engine->poll_mode_phys_addr); + if (!engine->poll_mode_addr_virt) { + pr_err("engine %p (%s) couldn't allocate writeback\n", engine, + engine->name); + return -1; + } + pr_info("allocated memory for descriptor writeback for %s%d", + engine->name, engine->channel); + + writeback = (struct xdma_poll_wb *)engine->poll_mode_addr_virt; + writeback->completed_desc_count = 0; + + pr_info("setting writeback location to 0x%llx for engine %p", + engine->poll_mode_phys_addr, engine); + w = cpu_to_le32(PCI_DMA_L(engine->poll_mode_phys_addr)); + iowrite32(w, &engine->regs->poll_mode_wb_lo); + w = cpu_to_le32(PCI_DMA_H(engine->poll_mode_phys_addr)); + iowrite32(w, &engine->regs->poll_mode_wb_hi); + + return 0; +} + +/** + * Create c2h or h2c vFPGA engine + * @param offs - engine config register offset + * @param c2h - engine direction + * @param channel - engine channel + * @return created engine structure + */ +static struct xdma_engine *engine_create(struct pci_drvdata *d, int offs, int c2h, int channel) +{ + struct xdma_engine *engine; + uint32_t reg_val = 0, ret_val = 0; + + // allocate memory for engine struct + engine = kzalloc(sizeof(struct xdma_engine), GFP_KERNEL); + if(!engine) + return NULL; + + // info + engine->channel = channel; + engine->name = c2h ? "c2h" : "h2c"; + + // associate devices + engine->pd = d; + + // direction + engine->c2h = c2h; + + // registers + engine->regs = (d->bar[BAR_XDMA_CONFIG] + offs); + engine->sgdma_regs = (d->bar[BAR_XDMA_CONFIG] + offs + SGDMA_OFFSET_FROM_CHANNEL); + + // Incremental mode + iowrite32(!XDMA_CTRL_NON_INCR_ADDR, &engine->regs->ctrl_w1c); + + // alignments + engine_alignments(engine); + + // writeback + ret_val = engine_writeback_setup(d, engine); + if (ret_val) { + pr_info("Descriptor writeback setup failed for %p, channel %d\n", engine, engine->channel); + return NULL; + } + + // start engine + reg_val |= XDMA_CTRL_POLL_MODE_WB; + reg_val |= XDMA_CTRL_IE_DESC_STOPPED; + reg_val |= XDMA_CTRL_IE_DESC_COMPLETED; + reg_val |= XDMA_CTRL_RUN_STOP; + engine->running = 0; + + iowrite32(reg_val, &engine->regs->ctrl); + reg_val = ioread32(&engine->regs->status); + dbg_info("ioread32(0x%p) = 0x%08x (dummy read flushes writes).\n", &engine->regs->status, reg_val); + + return engine; +} + +/** + * Probes a single c2h or h2c engine + * @param c2h - engine direction + * @param channel - engine channel + */ +static int probe_for_engine(struct pci_drvdata *d, int c2h, int channel) +{ + int offs, i; + struct engine_regs *regs; + uint32_t engine_id, engine_id_expected, channel_id; + struct xdma_engine *tmp_engine; + + offs = (c2h * C2H_CHAN_OFFS) + (channel * CHAN_RANGE); + regs = d->bar[BAR_XDMA_CONFIG] + offs; + + if(c2h) { // c2h + pr_info("probing for c2h engine %d at %p\n", channel, regs); + engine_id_expected = XDMA_ID_C2H; + } else { // h2c + pr_info("probing for h2c engine %d at %p\n", channel, regs); + engine_id_expected = XDMA_ID_H2C; + } + + engine_id = get_engine_id(regs); + channel_id = get_engine_channel_id(regs); + pr_info("engine ID = 0x%x, channel ID = %d\n", engine_id, channel_id); + + if(engine_id != engine_id_expected) { + pr_info("incorrect engine ID - skipping\n"); + return 0; + } + + if(channel_id != channel) { + pr_info("expected channel ID %d, read %d\n", channel, channel_id); + return 0; + } + + // init engine + if(channel == d->n_fpga_chan && d->pr_flow) { + if(c2h) { // c2h + pr_info("found PR c2h %d engine at %p\n", channel, regs); + d->prc.engine_c2h = engine_create(d, offs, c2h, channel); + if(!d->prc.engine_c2h) { + pr_err("error creating channel engine\n"); + return -1; + } + pr_info("engine channel %d assigned to PR", channel); + d->engines_num++; + } else { // h2c + pr_info("found PR h2c %d engine at %p\n", channel, regs); + d->prc.engine_h2c = engine_create(d, offs, c2h, channel); + if(!d->prc.engine_h2c) { + pr_err("error creating channel engine\n"); + return -1; + } + pr_info("engine channel %d assigned to PR", channel); + d->engines_num++; + } + } else { + if(c2h) { // c2h + pr_info("found vFPGA c2h %d engine at %p\n", channel, regs); + tmp_engine = engine_create(d, offs, c2h, channel); + if(!tmp_engine) { + pr_err("error creating channel engine\n"); + return -1; + } + for(i = 0; i < d->n_fpga_reg; i++) { + d->fpga_dev[channel*d->n_fpga_reg+i].engine_h2c = tmp_engine; + pr_info("engine channel %d assigned to vFPGA %d", channel, d->fpga_dev[channel*d->n_fpga_reg+i].id); + } + d->engines_num++; + } else { // h2c + pr_info("found vFPGA h2c %d engine at %p\n", channel, regs); + tmp_engine = engine_create(d, offs, c2h, channel); + if(!tmp_engine) { + pr_err("error creating channel engine\n"); + return -1; + } + for(i = 0; i < d->n_fpga_reg; i++) { + d->fpga_dev[channel*d->n_fpga_reg+i].engine_c2h = tmp_engine; + pr_info("engine channel %d assigned to vFPGA %d", channel, d->fpga_dev[channel*d->n_fpga_reg+i].id); + } + d->engines_num++; + } + } + + return 0; +} + +/** + * Probe c2h and h2c engines + */ +static int probe_engines(struct pci_drvdata *d) +{ + int ret_val = 0; + int channel; + + BUG_ON(!d); + + // probe for vFPGA h2c engines + for(channel = 0; channel < MAX_NUM_CHANNELS; channel++) { + ret_val = probe_for_engine(d, 0, channel); // h2c + if(ret_val) goto fail; + } + + // probe for vFPGA c2h engines + for(channel = 0; channel < MAX_NUM_CHANNELS; channel++) { + ret_val = probe_for_engine(d, 1, channel); // c2h + if(ret_val) goto fail; + } + + if(d->pr_flow) { + if(d->engines_num < 2 * (d->n_fpga_chan + 1)) { + pr_info("failed to detect all required c2h or h2c engines\n"); + return -ENODEV; + } + } else { + if(d->engines_num < 2 * d->n_fpga_chan) { + pr_info("failed to detect all required c2h or h2c engines\n"); + return -ENODEV; + } + } + + pr_info("found %d engines\n", d->engines_num); + + goto success; +fail: + pr_err("engine probing failed - unwinding\n"); + remove_engines(d); +success: + return ret_val; +} + +/* + ____ _ ____ _ +| __ ) / \ | _ \ _ __ ___ __ _ _ __ _ __ (_)_ __ __ _ +| _ \ / _ \ | |_) | | '_ ` _ \ / _` | '_ \| '_ \| | '_ \ / _` | +| |_) / ___ \| _ < | | | | | | (_| | |_) | |_) | | | | | (_| | +|____/_/ \_\_| \_\ |_| |_| |_|\__,_| .__/| .__/|_|_| |_|\__, | + |_| |_| |___/ +*/ + +/** + * Map a single BAR + * @param idx - BAR index + * @param curr_idx - current BAR mapping + */ +static int map_single_bar(struct pci_drvdata *d, struct pci_dev *pdev, int idx, int curr_idx) +{ + resource_size_t bar_start, bar_len, map_len; + + bar_start = pci_resource_start(pdev, idx); + bar_len = pci_resource_len(pdev, idx); + map_len = bar_len; + + d->bar[curr_idx] = NULL; + + if(!bar_len) { + pr_info("BAR #%d is not present\n", idx); + return 0; + } + + if(bar_len > INT_MAX) { + pr_info("BAR %d limited from %llu to %d bytes\n", idx, (u64)bar_len, INT_MAX); + map_len = (resource_size_t)INT_MAX; + } + + pr_info("mapping BAR %d, %llu bytes to be mapped", idx, (u64)map_len); + d->bar[curr_idx] = pci_iomap(pdev, idx, map_len); + + if(!d->bar[curr_idx]) { + dev_err(&pdev->dev, "could not map BAR %d\n", idx); + return -1; + } + + pr_info("BAR%d at 0x%llx mapped at 0x%p, length=%llu, (%llu)\n", + idx, (u64)bar_start, d->bar[curr_idx], (u64)map_len, (u64)bar_len); + + d->bar_phys_addr[curr_idx] = bar_start; + d->bar_len[curr_idx] = map_len; + + return (int)map_len; +} + +/** + * Unmap mapped bars + */ +static void unmap_bars(struct pci_drvdata *d, struct pci_dev *pdev) +{ + int i; + + for(i = 0; i < MAX_NUM_BARS; i++) { + if(d->bar[i]) { + pci_iounmap(pdev, d->bar[i]); + d->bar[i] = NULL; + pr_info("BAR%d unmapped\n", i); + } + } +} + +/** + * Mapping of the bars + */ +static int map_bars(struct pci_drvdata *d, struct pci_dev *pdev) +{ + int ret_val; + int i; + int curr_idx = 0; + + for (i = 0; i < MAX_NUM_BARS; ++i) + { + int bar_len = map_single_bar(d, pdev, i, curr_idx); + if (bar_len == 0) { + continue; + } else if(bar_len < 0) { + ret_val = -1; + goto fail; + } + curr_idx++; + } + goto success; +fail: + pr_err("mapping of the bars failed\n"); + unmap_bars(d, pdev); + return ret_val; +success: + return 0; +} + +/* + ____ _ +| _ \ ___ __ _(_) ___ _ __ ___ +| |_) / _ \/ _` | |/ _ \| '_ \/ __| +| _ < __/ (_| | | (_) | | | \__ \ +|_| \_\___|\__, |_|\___/|_| |_|___/ + |___/ +*/ + +static int request_regions(struct pci_drvdata *d, struct pci_dev *pdev) +{ + int ret_val; + + BUG_ON(!d); + BUG_ON(!pdev); + + pr_info("pci request regions\n"); + ret_val = pci_request_regions(pdev, DRV_NAME); + if (ret_val) { + pr_info("device in use, return %d\n", ret_val); + d->got_regions = 0; + d->regions_in_use = 1; + } else { + d->got_regions = 1; + d->regions_in_use = 0; + } + + return ret_val; +} + +/* + ____ _ +| _ \ _ __ ___ | |__ ___ +| |_) | '__/ _ \| '_ \ / _ \ +| __/| | | (_) | |_) | __/su +|_| |_| \___/|_.__/ \___| +*/ + +/** + * PCI device probe function + */ +static int pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + int ret_val = 0, i, j; + struct pci_drvdata *d = NULL; + + // dynamic major + dev_t dev = MKDEV(fpga_major, 0); + int devno; + + // entering probe + pr_info("probe (pdev = 0x%p, pci_id = 0x%p)\n", pdev, id); + + // allocate mem. for device instance + d = devm_kzalloc(&pdev->dev, sizeof(struct pci_drvdata), GFP_KERNEL); + if(!d) { + dev_err(&pdev->dev, "device memory region not obtained\n"); + goto err_alloc; + } + // set device private data + d->pci_dev = pdev; + dev_set_drvdata(&pdev->dev, d); + + // enable PCIe device + ret_val = pci_enable_device(pdev); + if(ret_val) { + dev_err(&pdev->dev, "pci device could not be enabled\n"); + goto err_enable; + } + pr_info("pci device node %p enabled\n", &pdev->dev); + + // enable bus master capability + pci_set_master(pdev); + pr_info("pci bus master capability enabled\n"); + + // check IRQ + ret_val = pci_check_msix(d, pdev); + if(ret_val < 0) { + dev_err(&pdev->dev, "pci IRQ error\n"); + goto err_irq_en; + } + + // request PCI regions + ret_val = request_regions(d, pdev); + if(ret_val) { + dev_err(&pdev->dev, "pci regions could not be obtained\n"); + goto err_regions; + } + pr_info("pci regions obtained\n"); + + // BAR mapping + ret_val = map_bars(d, pdev); + if(ret_val) { + dev_err(&pdev->dev, "mapping of the BARs failed\n"); + goto err_map; + } + + // DMA addressing + pr_info("sizeof(dma_addr_t) == %ld\n", sizeof(dma_addr_t)); + ret_val = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if(ret_val) { + dev_err(&pdev->dev, "failed to set 64b DMA mask\n"); + goto err_mask; + } + + // get static config + d->fpga_stat_cnfg = ioremap(d->bar_phys_addr[BAR_FPGA_CONFIG] + FPGA_STAT_CNFG_OFFS, FPGA_STAT_CNFG_SIZE); + + d->n_fpga_chan = d->fpga_stat_cnfg->n_chan; + d->n_fpga_reg = d->fpga_stat_cnfg->n_regions; + d->n_fpga_tot_reg = d->n_fpga_reg;//d->n_fpga_chan * d->n_fpga_reg; + pr_info("detected %d virtual FPGA regions, %d channels\n", d->n_fpga_tot_reg, d->n_fpga_chan); + + d->en_avx = d->fpga_stat_cnfg->ctrl_cnfg & EN_AVX_MASK; + d->en_bypass = (d->fpga_stat_cnfg->ctrl_cnfg & EN_BYPASS_MASK) >> 1; + pr_info("enabled AVX %d, enabled bypass %d\n", d->en_avx, d->en_bypass); + + d->on_board = d->fpga_stat_cnfg->on_board; + d->en_ddr = d->on_board & EN_DDR_MASK; + d->n_ddr_chan = (d->on_board & N_DDR_CHAN_MASK) >> 1; + pr_info("enabled DDR %d, number of channels %d\n", d->en_ddr, d->n_ddr_chan); + + d->pr_flow = d->fpga_stat_cnfg->pr; + pr_info("PR flow %d\n", d->pr_flow); + + d->en_rdma = d->fpga_stat_cnfg->rdma; + pr_info("RDMA config %08x\n", d->en_rdma); + + // Network board setup + if(d->en_rdma) { + d->fpga_stat_cnfg->rdma_ip = BASE_IP_ADDR + NODE_ID; + d->fpga_stat_cnfg->rdma_boardnum = NODE_ID; + } + + // Init chunks + d->num_free_lchunks = N_LARGE_CHUNKS; + d->num_free_schunks = N_SMALL_CHUNKS; + for(i = 0; i < N_LARGE_CHUNKS-1; i++) { + d->lchunks[i].id = i; + d->lchunks[i].next = &d->lchunks[i+1]; + } + for(i = 0; i < N_SMALL_CHUNKS-1; i++) { + d->schunks[i].id = i; + d->schunks[i].next = &d->schunks[i+1]; + } + d->lalloc = &d->lchunks[0]; + d->salloc = &d->schunks[0]; + + // initialize spinlocks + spin_lock_init(&d->card_l_lock); + spin_lock_init(&d->card_s_lock); + spin_lock_init(&d->prc.lock); + spin_lock_init(&d->stat_lock); + + // create FPGA devices + // register major + ret_val = alloc_chrdev_region(&dev, 0, d->n_fpga_tot_reg, DEV_NAME); + fpga_major = MAJOR(dev); + if(ret_val) { + dev_err(&pdev->dev, "failed to register virtual FPGA devices"); + goto err_char_alloc; + } + pr_info("virtual FPGA device regions allocated, major number %d\n", fpga_major); + + // create device class + fpga_class = class_create(THIS_MODULE, DEV_NAME); + + // virtual FPGA devices + d->fpga_dev = kmalloc(d->n_fpga_tot_reg * sizeof(struct fpga_dev), GFP_KERNEL); + if(!d->fpga_dev) { + ret_val = -ENOMEM; + dev_err(&pdev->dev, "could not allocate memory for fpga devices\n"); + goto err_char_mem; + } + memset(d->fpga_dev, 0, d->n_fpga_tot_reg * sizeof (struct fpga_dev)); + pr_info("allocated memory for fpga devices\n"); + + for(i = 0; i < d->n_fpga_tot_reg; i++) { + // ID + d->fpga_dev[i].id = i; + d->fpga_dev[i].chan_id = i / d->n_fpga_chan; + + // PCI device + d->fpga_dev[i].pd = d; + d->fpga_dev[i].prc = &d->prc; + + // physical + d->fpga_dev[i].fpga_phys_addr_ctrl = d->bar_phys_addr[BAR_FPGA_CONFIG] + FPGA_CTRL_OFFS + i*FPGA_CTRL_SIZE; + d->fpga_dev[i].fpga_phys_addr_ctrl_avx = d->bar_phys_addr[BAR_FPGA_CONFIG] + FPGA_CTRL_CNFG_AVX_OFFS + i*FPGA_CTRL_CNFG_AVX_SIZE; + + // MMU control region + d->fpga_dev[i].fpga_lTlb = ioremap(d->fpga_dev[i].fpga_phys_addr_ctrl + FPGA_CTRL_LTLB_OFFS, FPGA_CTRL_LTLB_SIZE); + d->fpga_dev[i].fpga_sTlb = ioremap(d->fpga_dev[i].fpga_phys_addr_ctrl + FPGA_CTRL_STLB_OFFS, FPGA_CTRL_STLB_SIZE); + + // FPGA engine control + d->fpga_dev[i].fpga_cnfg = ioremap(d->fpga_dev[i].fpga_phys_addr_ctrl + FPGA_CTRL_CNFG_OFFS, FPGA_CTRL_CNFG_SIZE); + + // FPGA engine control AVX + d->fpga_dev[i].fpga_cnfg_avx = ioremap(d->fpga_dev[i].fpga_phys_addr_ctrl_avx, FPGA_CTRL_CNFG_AVX_SIZE); + + // initialize device spinlock + spin_lock_init(&d->fpga_dev[i].lock); + + // create device + devno = MKDEV(fpga_major, i); + device_create(fpga_class, NULL, devno, NULL, DEV_NAME "%d", i); + pr_info("virtual FPGA device %d created\n", i); + + // add device + cdev_init(&d->fpga_dev[i].cdev, &fpga_fops); + d->fpga_dev[i].cdev.owner = THIS_MODULE; + d->fpga_dev[i].cdev.ops = &fpga_fops; + + // Init hash + hash_init(user_lbuff_map[i]); + hash_init(user_sbuff_map[i]); + + ret_val = cdev_add(&d->fpga_dev[i].cdev, devno, 1); + if(ret_val) { + dev_err(&pdev->dev, "could not create a virtual FPGA device %d\n", i); + goto err_char_reg; + } + } + pr_info("all virtual FPGA devices added\n"); + + // Init hash + hash_init(pr_buff_map); + + // probe DMA engines + ret_val = probe_engines(d); + if(ret_val) { + dev_err(&pdev->dev, "error whilst probing DMA engines\n"); + goto err_engines; + } + + // user IRQs + ret_val = irq_setup(d, pdev); + if(ret_val) { + dev_err(&pdev->dev, "IRQ setup error\n"); + goto err_irq; + } + + // enable interrupts + user_interrupts_enable(d, ~0); + + // flush writes + read_interrupts(d); + + if(ret_val == 0) + goto end; + +err_irq: + remove_engines(d); +err_engines: +err_char_reg: + for(j = 0; j < i; j++) { + device_destroy(fpga_class, MKDEV(fpga_major, j)); + cdev_del(&d->fpga_dev[j].cdev); + } + kfree(d->fpga_dev); + class_destroy(fpga_class); +err_char_mem: + unregister_chrdev_region(dev, d->n_fpga_tot_reg); +err_char_alloc: +err_mask: + unmap_bars(d, pdev); +err_map: + if(d->got_regions) pci_release_regions(pdev); +err_regions: + if(d->msix_enabled) { + pci_disable_msix(pdev); + pr_info("MSI-X disabled\n"); + } +err_irq_en: + if(!d->regions_in_use) pci_disable_device(pdev); +err_enable: + kfree(d); +err_alloc: +end: + pr_info("probe returning %d\n", ret_val); + return ret_val; +} + +/** + * Removal of the PCI device + */ +static void pci_remove(struct pci_dev *pdev) +{ + struct pci_drvdata *d; + int i; + + d = (struct pci_drvdata*) dev_get_drvdata(&pdev->dev); + + // disable FPGA interrupts + user_interrupts_disable(d, ~0); + pr_info("interrupts disabled\n"); + + // remove IRQ + irq_teardown(d); + pr_info("IRQ teardown\n"); + + // engine removal + remove_engines(d); + pr_info("engines removed\n"); + + // delete char devices + for (i = 0; i < d->n_fpga_tot_reg; i++) { + device_destroy(fpga_class, MKDEV(fpga_major, i)); + cdev_del(&d->fpga_dev[i].cdev); + } + pr_info("char devices deleted\n"); + + // free virtual FPGA memory + kfree(d->fpga_dev); + pr_info("virtual FPGA device memory freed\n"); + + // remove class + class_destroy(fpga_class); + pr_info("fpga class deleted\n"); + + // remove char devices + unregister_chrdev_region(MKDEV (fpga_major, 0), d->n_fpga_tot_reg); + pr_info("char devices unregistered\n"); + + // unmap BARs + unmap_bars(d, pdev); + pr_info("BARs unmapped\n"); + + // release regions + if(d->got_regions) pci_release_regions(pdev); + pr_info("pci regions released\n"); + + // disable interrupts + if(d->msix_enabled) { + pci_disable_msix(pdev); + pr_info("MSI-X disabled\n"); + } + + // disable device + if(!d->regions_in_use) pci_disable_device(pdev); + pr_info("pci device disabled\n"); + + // free device data + devm_kfree(&pdev->dev, d); + pr_info("device memory freed\n"); +} + +/* + ____ _ +| _ \ _ __(_)_ _____ _ __ +| | | | '__| \ \ / / _ \ '__| +| |_| | | | |\ V / __/ | +|____/|_| |_| \_/ \___|_| +*/ + +static const struct pci_device_id pci_ids[] = { + { PCI_DEVICE(0x10ee, 0x9011), }, + { PCI_DEVICE(0x10ee, 0x9012), }, + { PCI_DEVICE(0x10ee, 0x9014), }, + { PCI_DEVICE(0x10ee, 0x9018), }, + { PCI_DEVICE(0x10ee, 0x901F), }, + { PCI_DEVICE(0x10ee, 0x9021), }, + { PCI_DEVICE(0x10ee, 0x9022), }, + { PCI_DEVICE(0x10ee, 0x9024), }, + { PCI_DEVICE(0x10ee, 0x9028), }, + { PCI_DEVICE(0x10ee, 0x902F), }, + { PCI_DEVICE(0x10ee, 0x9031), }, + { PCI_DEVICE(0x10ee, 0x9032), }, + { PCI_DEVICE(0x10ee, 0x9034), }, + { PCI_DEVICE(0x10ee, 0x9038), }, + { PCI_DEVICE(0x10ee, 0x903F), }, + { PCI_DEVICE(0x10ee, 0x8011), }, + { PCI_DEVICE(0x10ee, 0x8012), }, + { PCI_DEVICE(0x10ee, 0x8014), }, + { PCI_DEVICE(0x10ee, 0x8018), }, + { PCI_DEVICE(0x10ee, 0x8021), }, + { PCI_DEVICE(0x10ee, 0x8022), }, + { PCI_DEVICE(0x10ee, 0x8024), }, + { PCI_DEVICE(0x10ee, 0x8028), }, + { PCI_DEVICE(0x10ee, 0x8031), }, + { PCI_DEVICE(0x10ee, 0x8032), }, + { PCI_DEVICE(0x10ee, 0x8034), }, + { PCI_DEVICE(0x10ee, 0x8038), }, + { PCI_DEVICE(0x10ee, 0x7011), }, + { PCI_DEVICE(0x10ee, 0x7012), }, + { PCI_DEVICE(0x10ee, 0x7014), }, + { PCI_DEVICE(0x10ee, 0x7018), }, + { PCI_DEVICE(0x10ee, 0x7021), }, + { PCI_DEVICE(0x10ee, 0x7022), }, + { PCI_DEVICE(0x10ee, 0x7024), }, + { PCI_DEVICE(0x10ee, 0x7028), }, + { PCI_DEVICE(0x10ee, 0x7031), }, + { PCI_DEVICE(0x10ee, 0x7032), }, + { PCI_DEVICE(0x10ee, 0x7034), }, + { PCI_DEVICE(0x10ee, 0x7038), }, + {0,} +}; +MODULE_DEVICE_TABLE(pci, pci_ids); + +static struct pci_driver pci_driver = { + .name = DRV_NAME, + .id_table = pci_ids, + .probe = pci_probe, + .remove = pci_remove, +}; + +/* + ____ _ _ +| _ \ ___ __ _(_)___| |_ ___ _ __ +| |_) / _ \/ _` | / __| __/ _ \ '__| +| _ < __/ (_| | \__ \ || __/ | +|_| \_\___|\__, |_|___/\__\___|_| + |___/ +*/ + +static int __init pci_init(void) +{ + int ret_val; + pr_info("loading: LynX XDMA driver ...\n"); + + ret_val = pci_register_driver(&pci_driver); + if(ret_val) { + pr_err("LynX XDMA driver register returned %d\n", ret_val); + return ret_val; + } + + return 0; +} + +static void __exit pci_exit(void) +{ + pr_info("removal: LynX XDMA driver ...\n"); + pci_unregister_driver(&pci_driver); +} + +module_init(pci_init); +module_exit(pci_exit); + +/* --------------------------------------------------------------------------- */ +MODULE_DESCRIPTION("LynX XDMA driver."); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Dario Korolija +#include +#include +#include +#include +#include + +/* Driver debug */ +#define LYNX_DEBUG 1 + +/* Network setup */ +#define BASE_IP_ADDR 0x0B01D4D1 +#define NODE_ID 0 +#define N_TOTAL_NODES 2 + +/* Debug print */ +#if (LYNX_DEBUG == 0) + #define dbg_info(...) +#else + #define dbg_info(fmt, ...) pr_info("%s():" fmt, \ + __func__, ##__VA_ARGS__) +#endif + +/* obtain the 32 most significant (high) bits of a 32-bit or 64-bit address */ +#define PCI_DMA_H(addr) ((addr >> 16) >> 16) +/* obtain the 32 least significant (low) bits of a 32-bit or 64-bit address */ +#define PCI_DMA_L(addr) (addr & 0xffffffffUL) + +/* Driver info */ +#define DRV_NAME "lynx_driver" +#define DEV_NAME "fpga" + +/** + * XDMA info + */ +#define MAX_NUM_BARS 3 +#define MAX_NUM_CHANNELS 4 +#define MAX_NUM_ENGINES (MAX_NUM_CHANNELS * 2) +#define MAX_USER_IRQS 16 +#define C2H_CHAN_OFFS 0x1000 +#define H2C_CHAN_OFFS 0x0000 +#define CHAN_RANGE 0x100 +#define SGDMA_OFFSET_FROM_CHANNEL 0x4000 + +/* Engine IDs */ +#define XDMA_ID_H2C 0x1fc0U +#define XDMA_ID_C2H 0x1fc1U + +/* Engine regs */ +#define XDMA_ENG_IRQ_NUM (1) +#define XDMA_OFS_INT_CTRL (0x2000UL) +#define XDMA_OFS_CONFIG (0x3000UL) + +/* Bits of the SG DMA control register */ +#define XDMA_CTRL_RUN_STOP (1UL << 0) +#define XDMA_CTRL_IE_DESC_STOPPED (1UL << 1) +#define XDMA_CTRL_IE_DESC_COMPLETED (1UL << 2) +#define XDMA_CTRL_IE_DESC_ALIGN_MISMATCH (1UL << 3) +#define XDMA_CTRL_IE_MAGIC_STOPPED (1UL << 4) +#define XDMA_CTRL_IE_IDLE_STOPPED (1UL << 6) +#define XDMA_CTRL_IE_READ_ERROR (0x1FUL << 9) +#define XDMA_CTRL_IE_DESC_ERROR (0x1FUL << 19) +#define XDMA_CTRL_NON_INCR_ADDR (1UL << 25) +#define XDMA_CTRL_POLL_MODE_WB (1UL << 26) + +/* Bits of the SG DMA status register */ +#define XDMA_STAT_BUSY (1UL << 0) +#define XDMA_STAT_DESC_STOPPED (1UL << 1) +#define XDMA_STAT_DESC_COMPLETED (1UL << 2) +#define XDMA_STAT_ALIGN_MISMATCH (1UL << 3) +#define XDMA_STAT_MAGIC_STOPPED (1UL << 4) +#define XDMA_STAT_FETCH_STOPPED (1UL << 5) +#define XDMA_STAT_IDLE_STOPPED (1UL << 6) +#define XDMA_STAT_READ_ERROR (0x1FUL << 9) +#define XDMA_STAT_DESC_ERROR (0x1FUL << 19) + +/* Bits of the performance control register */ +#define XDMA_PERF_RUN (1UL << 0) +#define XDMA_PERF_CLEAR (1UL << 1) +#define XDMA_PERF_AUTO (1UL << 2) + +/* Polling */ +#define WB_COUNT_MASK 0x00ffffffUL +#define WB_ERR_MASK (1UL << 31) +#define POLL_TIMEOUT_SECONDS 10 +#define NUM_POLLS_PER_SCHED 100 + +/** + * Static layer + */ +#define BAR_XDMA_CONFIG 0 +#define BAR_FPGA_CONFIG 1 + +/* FPGA static config */ +#define FPGA_STAT_CNFG_OFFS 0x0 +#define FPGA_STAT_CNFG_SIZE 32 * 1024 +#define EN_AVX_MASK 0x1 +#define EN_BYPASS_MASK 0x2 +#define EN_DDR_MASK 0x1 +#define N_DDR_CHAN_MASK 0x3e + +/** + * Dynamic layer + */ + +/* FPGA control regions */ +#define FPGA_CTRL_SIZE 256 * 1024 +#define FPGA_CTRL_OFFS 0x100000 +#define FPGA_CTRL_LTLB_SIZE FPGA_CTRL_SIZE / 4 +#define FPGA_CTRL_LTLB_OFFS 0x0 +#define FPGA_CTRL_STLB_SIZE FPGA_CTRL_SIZE / 4 +#define FPGA_CTRL_STLB_OFFS 0x10000 +#define FPGA_CTRL_USER_SIZE FPGA_CTRL_SIZE / 4 +#define FPGA_CTRL_USER_OFFS 0x20000 +#define FPGA_CTRL_CNFG_SIZE FPGA_CTRL_SIZE / 4 +#define FPGA_CTRL_CNFG_OFFS 0x30000 + +#define FPGA_CTRL_CNFG_AVX_SIZE 256 * 1024 +#define FPGA_CTRL_CNFG_AVX_OFFS 0x1000000 + +/* FPGA dynamic control config */ +#define FPGA_CNFG_CTRL_IRQ_RESTART 0x100 + +/* TODO: Maximum transfer size */ +#define XDMA_TRANSFER_MAX_BYTES (8 * 1024 * 1024) + +/* TLB */ +#define TLB_VALID_MASK (1UL << 63) +#define TLB_CARD_MASK (1UL << 62) +#define TLB_VADDR_RANGE 48 +#define TLB_PADDR_RANGE 40 +#define TLB_VADDR_TAG_MASK 0x3ffffffUL + +#define LTLB_ORDER 6 +#define STLB_ORDER 10 +#define LTLB_MAX_KEYS (1UL << LTLB_ORDER) +#define STLB_MAX_KEYS (1UL << STLB_ORDER) +#define LTLB_OFFS LTLB_MAX_KEYS // TODO: +#define STLB_OFFS STLB_MAX_KEYS // TODO: +#define LTLB_HASH_MASK ((1UL << LTLB_ORDER) - 1) +#define STLB_HASH_MASK ((1UL << STLB_ORDER) - 1) +#define LTLB_ASSOC_ENTRIES 2 +#define STLB_ASSOC_ENTRIES 4 +#define LTLB_PAGE_BITS 21 +#define STLB_PAGE_BITS 12 +#define LTLB_PADDR_SIZE (TLB_PADDR_RANGE - LTLB_PAGE_BITS) +#define STLB_PADDR_SIZE (TLB_PADDR_RANGE - STLB_PAGE_BITS) +#define LTLB_PADDR_MASK ((1UL << LTLB_PADDR_SIZE) - 1) +#define STLB_PADDR_MASK ((1UL << STLB_PADDR_SIZE) - 1) +#define LTLB_TAG_SIZE (TLB_VADDR_RANGE - LTLB_ORDER - LTLB_PAGE_BITS) +#define STLB_TAG_SIZE (TLB_VADDR_RANGE - STLB_ORDER - STLB_PAGE_BITS) +#define LTLB_TAG_MASK ((1UL << LTLB_TAG_SIZE) - 1) +#define STLB_TAG_MASK ((1UL << STLB_TAG_SIZE) - 1) + +/* FPGA config commands */ + +/** + * Cdev + */ + +/* Major number */ +#define FPGA_MAJOR 0 // dynamic + +/* Memory allocation */ +#define LARGE_PAGE_ORDER 9 // 2MB pages +#define LARGE_PAGE_SHIFT (LARGE_PAGE_ORDER + PAGE_SHIFT) +#define LARGE_PAGE_SIZE (PAGE_SIZE << LARGE_PAGE_ORDER) +#define MAX_BUFF_NUM 1024 // Maximum number of huge pages on the host system +#define MAX_PR_BUFF_NUM 1024 // Maximum number of huge pages on the host system +#define NUM_LARGE_CARD_PAGES 4 * 1024 + +#define MAX_N_MAP_PAGES 128 + +/* MMAP */ +#define MMAP_CTRL 0x0 +#define MMAP_CNFG 0x1 +#define MMAP_CNFG_AVX 0x2 +#define MMAP_BUFF 0x200 +#define MMAP_PR 0x400 + +/* IOCTL */ +#define IOCTL_ALLOC_HOST_USER_MEM _IOR('D', 1, unsigned long) +#define IOCTL_FREE_HOST_USER_MEM _IOR('D', 2, unsigned long) +#define IOCTL_ALLOC_HOST_PR_MEM _IOR('D', 3, unsigned long) +#define IOCTL_FREE_HOST_PR_MEM _IOR('D', 4, unsigned long) +#define IOCTL_MAP_USER _IOR('D', 5, unsigned long) +#define IOCTL_UNMAP_USER _IOR('D', 6, unsigned long) +#define IOCTL_RECONFIG_LOAD _IOR('D', 7, unsigned long) +#define IOCTL_ARP_LOOKUP _IOR('D', 8, unsigned long) +#define IOCTL_WRITE_CTX _IOR('D', 9, unsigned long) +#define IOCTL_WRITE_CONN _IOR('D', 10, unsigned long) +#define IOCTL_RDMA_STAT _IOR('D', 11, unsigned long) +#define IOCTL_READ_ENG_STATUS _IOR('D', 12, unsigned long) + +/* Hash */ +#define PR_HASH_TABLE_ORDER 8 +#define PR_BATCH_SIZE (2 * 1024 * 1024) + +#define USER_HASH_TABLE_ORDER 8 + +/* Max card pages */ +#define N_LARGE_CHUNKS 1024 +#define N_SMALL_CHUNKS 1024 +#define MAX_N_REGIONS 16 +#define SMALL_CHUNK_ALLOC 0 +#define LARGE_CHUNK_ALLOC 1 +#define MEM_SEP 0x40000000 + +/* RDMA */ +#define N_RDMA_STAT_REGS 24 + +/** + * Reg maps + */ + +/* DMA engine reg map */ +struct engine_regs { + uint32_t id; + uint32_t ctrl; + uint32_t ctrl_w1s; + uint32_t ctrl_w1c; + uint32_t rsrvd_1[12]; + + uint32_t status; + uint32_t status_rc; + uint32_t completed_desc_count; + uint32_t alignments; + uint32_t rsrvd_2[14]; // padding + + uint32_t poll_mode_wb_lo; + uint32_t poll_mode_wb_hi; + uint32_t interrupt_enable_mask; + uint32_t interrupt_enable_mask_w1s; + uint32_t interrupt_enable_mask_w1c; + uint32_t rsrvd_3[9]; // padding + + uint32_t perf_ctrl; + uint32_t perf_cyc_lo; + uint32_t perf_cyc_hi; + uint32_t perf_dat_lo; + uint32_t perf_dat_hi; + uint32_t perf_pnd_lo; + uint32_t perf_pnd_hi; +} __packed; + +/* Interrupt reg map */ +struct interrupt_regs { + uint32_t id; + uint32_t user_int_enable; + uint32_t user_int_enable_w1s; + uint32_t user_int_enable_w1c; + uint32_t channel_int_enable; + uint32_t channel_int_enable_w1s; + uint32_t channel_int_enable_w1c; + uint32_t reserved_1[9]; // padding + + uint32_t user_int_request; + uint32_t channel_int_request; + uint32_t user_int_pending; + uint32_t channel_int_pending; + uint32_t reserved_2[12]; // padding + + uint32_t user_msi_vector[8]; + uint32_t channel_msi_vector[8]; +} __packed; + + +/* Polled mode descriptors struct */ +struct xdma_poll_wb { + uint32_t completed_desc_count; + uint32_t reserved_1[7]; +} __packed; + +/* FPGA static config reg map */ +struct fpga_stat_cnfg_regs { + uint64_t probe; + uint64_t n_chan; + uint64_t n_regions; + uint64_t ctrl_cnfg; + uint64_t on_board; + uint64_t pr; + uint64_t rdma; + uint64_t reserved_0[3]; + uint64_t pr_ctrl; + uint64_t pr_stat; + uint64_t pr_addr; + uint64_t pr_len; + uint64_t reserved_1[6]; + uint64_t rdma_ip; + uint64_t rdma_boardnum; + uint64_t rdma_arp; + uint64_t qp_ctx[3]; + uint64_t qp_conn[3]; + uint64_t reserved_2[1]; + uint64_t rdma_debug[24]; +} __packed; + +/* FPGA dynamic config reg map */ +struct fpga_cnfg_regs { + uint64_t ctrl; + uint64_t vaddr_rd; + uint64_t len_rd; + uint64_t vaddr_wr; + uint64_t len_wr; + uint64_t vaddr_miss; + uint64_t len_miss; + uint64_t datapath_set; + uint64_t datapath_clr; + uint64_t tmr_stop; + uint64_t tmr_rd; + uint64_t tmr_wr; + uint64_t stat_cmd_used_rd; + uint64_t stat_cmd_used_wr; + uint64_t stat_dma_rd; + uint64_t stat_dma_wr; + uint64_t stat_sent_rd; + uint64_t stat_sent_wr; + uint64_t stat_pfaults; + // RDMA regs not used in the driver +} __packed; + +/* FPGA dynamic config reg map */ +struct fpga_cnfg_regs_avx { + uint64_t ctrl[4]; + uint64_t vaddr_miss; + uint64_t len_miss; + uint64_t pf[2]; + uint64_t datapath_set[4]; + uint64_t datapath_clr[4]; + uint64_t tmr_stop[4]; + uint64_t tmr[4]; + uint64_t stat[4]; + // RDMA regs not used in the driver +} __packed; + +/** + * Structs + */ + +/* Engine descriptors */ +struct xdma_sgdma_regs { + uint32_t identifier; + uint32_t reserved_1[31]; /* padding */ + + /* bus address to first descriptor in Root Complex Memory */ + uint32_t first_desc_lo; + uint32_t first_desc_hi; + /* number of adjacent descriptors at first_desc */ + uint32_t first_desc_adjacent; + uint32_t credits; +} __packed; + + +/* Engine struct */ +struct xdma_engine { + int channel; // egnine channel + char *name; // engine name + struct pci_drvdata *pd; // PCI device + + struct engine_regs *regs; // HW regs, control and status + struct engine_sgdma_regs *sgdma_regs; // SGDMA reg BAR offset + + // Config + int running; // engine state + int c2h; // c2h(write) or h2c(read) + uint32_t status; + + int addr_align; // source/dest alignment in bytes + int len_granularity; // transfer length multiple + int addr_bits; // HW datapath address width + + /* Members associated with polled mode support */ + uint8_t *poll_mode_addr_virt; /* virt addr for descriptor writeback */ + uint64_t poll_mode_phys_addr; /* bus addr for descriptor writeback */ +}; + +/* Mapped user pages */ +struct user_pages { + struct hlist_node entry; + uint64_t vaddr; + uint64_t n_pages; + struct page **hpages; + uint64_t *cpages; +}; +/* User tables */ +struct hlist_head user_lbuff_map[MAX_N_REGIONS][1 << (USER_HASH_TABLE_ORDER)]; +struct hlist_head user_sbuff_map[MAX_N_REGIONS][1 << (USER_HASH_TABLE_ORDER)]; + +/* Mapped large PR pages */ +struct pr_pages { + struct hlist_node entry; + int reg_id; + uint64_t vaddr; + uint64_t n_pages; + struct page **pages; +}; +/* PR table */ +struct hlist_head pr_buff_map[1 << (PR_HASH_TABLE_ORDER)]; + +/* Card chunks */ +struct small_chunk { + uint32_t id; + struct small_chunk *next; +}; + +struct large_chunk { + uint32_t id; + struct large_chunk *next; +}; + + +/* Virtual FPGA device */ +struct fpga_dev { + int id; // identifier + int chan_id; // channel id + struct cdev cdev; // char device + struct pci_drvdata *pd; // PCI device + struct pr_ctrl *prc; + + // Current task + struct task_struct *curr_task; + struct mm_struct *curr_mm; + + // Control region + uint64_t fpga_phys_addr_ctrl; + uint64_t fpga_phys_addr_ctrl_avx; + + uint64_t *fpga_lTlb; // large page TLB + uint64_t *fpga_sTlb; // small page TLB + struct fpga_cnfg_regs *fpga_cnfg; // config + struct fpga_cnfg_regs_avx *fpga_cnfg_avx; // config AVX + + // Engines + struct xdma_engine *engine_h2c; // h2c engine + struct xdma_engine *engine_c2h; // c2h engine + + // In use + atomic_t in_use; // busy flag + + // Lock + spinlock_t lock; // protects concurrent accesses + + // Allocated buffers + struct user_pages curr_user_buff; +}; + +/* PR controller */ +struct pr_ctrl { + struct pci_drvdata *pd; // PCI device + spinlock_t lock; + + // Engines + struct xdma_engine *engine_h2c; // h2c engine + struct xdma_engine *engine_c2h; // c2h engine + + // Allocated buffers + struct pr_pages curr_buff; +}; + +/* PCI driver data */ +struct pci_drvdata { + struct pci_dev *pci_dev; + + // BARs + int regions_in_use; + int got_regions; + void *__iomem bar[MAX_NUM_BARS]; + unsigned long bar_phys_addr[MAX_NUM_BARS]; + unsigned long bar_len[MAX_NUM_BARS]; + + // Engines + int engines_num; + + // FPGA static config + int n_fpga_chan; + int n_fpga_reg; + int n_fpga_tot_reg; + int en_avx; + int en_bypass; + int on_board; + int pr_flow; + int en_ddr; + int n_ddr_chan; + int en_rdma; + struct fpga_stat_cnfg_regs *fpga_stat_cnfg; + struct fpga_dev *fpga_dev; + spinlock_t stat_lock; + + // PR control + struct pr_ctrl prc; + + // IRQ + int irq_count; + int irq_line; + int msix_enabled; + struct msix_entry irq_entry[32]; + + // Card memory + spinlock_t card_l_lock; + struct large_chunk lchunks[N_LARGE_CHUNKS]; + int num_free_lchunks; + struct large_chunk *lalloc; + + spinlock_t card_s_lock; + struct small_chunk schunks[N_SMALL_CHUNKS]; + int num_free_schunks; + struct small_chunk *salloc; +}; + +#endif \ No newline at end of file diff --git a/hw/CMakeLists.txt b/hw/CMakeLists.txt new file mode 100644 index 00000000..953e5f4d --- /dev/null +++ b/hw/CMakeLists.txt @@ -0,0 +1,136 @@ +cmake_minimum_required(VERSION 3.0) +project(lynx) + +# +# Vivado +# + +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_SOURCE_DIR}/../cmake) + +set(IPREPO_DIR ${CMAKE_BINARY_DIR}/iprepo) +file(MAKE_DIRECTORY ${IPREPO_DIR}) + +# +# Target options +# + +if(FDEV_NAME STREQUAL "vcu118") + set(FPGA_PART xcvu9p-flga2104-2L-e CACHE STRING "FPGA device.") +elseif(FDEV_NAME STREQUAL "u250") + set(FPGA_PART xcu250-figd2104-2L-e CACHE STRING "FPGA device.") +elseif(FDEV_NAME STREQUAL "u280") + set(FPGA_PART xcu280-fsvh2892-2L-e CACHE STRING "FPGA device.") +else() + message(FATAL_ERROR "Target device not supported.") +endif() + +# +# Domain options +# + +# Number of user regions +set(N_REGIONS 1 CACHE STRING "Number of regions per channel.") + +# User logic descriptors +set(EN_BPSS 0 CACHE STRING "User logic bypass descriptors.") +# AVX support +set(EN_AVX 1 CACHE STRING "AVX environment.") + +# Host streams +set(EN_STRM 1 CACHE STRING "Enable streams.") + +# DDR streams +set(EN_DDR 0 CACHE STRING "Enable local DDR.") +# DDR channels +set(N_DDR_CHAN 1 CACHE STRING "Channel 0.") + +# Partial reconfiguration +set(EN_PR 0 CACHE STRING "Enable PR flow.") + +# Enable Network stack +set(EN_TCP 0 CACHE STRING "Enable TCP.") +set(EN_RDMA 0 CACHE STRING "Enable RDMA.") +set(EN_FVV 0 CACHE STRING "Enable Farview.") + +# QSFP port +set(QSFP_PORT 1 CACHE STRING "Network traffic route.") + +# +# Validation checks +# + +if(N_REGIONS GREATER 16) + message(FATAL_ERROR "Max 16 regions supported.") +endif() + +if(NOT EN_STRM AND NOT EN_DDR) + message(FATAL_ERROR "Neither of the transfer options selected.") +endif() + +if(FDEV_NAME STREQUAL "vcu118" OR FDEV_NAME STREQUAL "u280") + if(N_DDR_CHAN GREATER 2) + message(FATAL_ERROR "Maximum of 2 channels supported.") + endif() +endif() + +if(FDEV_NAME STREQUAL "u250") + if(N_DDR_CHAN GREATER 4) + message(FATAL_ERROR "Maximum of 4 channels supported.") + endif() +endif() + +if(EN_RDMA) + set(EN_FV 1 CACHE BOOL "Enable Farview") +else() + set(EN_FV 0 CACHE BOOL "Enable Farview") +endif() + +if(EN_FV AND NOT EN_BPSS) + set(EN_BPSS 1) + if(QSFP_PORT GREATER 1) + message(FATAL_ERROR "QSFP port doesn't exist.") + endif() +endif() + +# FPGA-network-stack +if(EN_FV EQUAL 1) + set(ROCE_STACK_EN 1 CACHE BOOL "Enable RDMA stack.") +else() + set(ROCE_STACK_EN 0 CACHE BOOL "Enable RDMA stack.") +endif() + +if(EN_TCP EQUAL 1) + set(TCP_STACK_EN 1 CACHE BOOL "Enable TCP stack.") +else() + set(TCP_STACK_EN 0 CACHE BOOL "Enable TCP stack.") +endif() + +set(UDP_STACK_EN 0 CACHE BOOL "Enable UDP/IP stack") + +# +# Network stack +# + +add_subdirectory(ext/network) + +# +# Find Vivado +# + +find_package(Vivado REQUIRED) +if (NOT VIVADO_FOUND) + message(FATAL_ERROR "Vivado not found.") +endif() + +# +# Configure files +# + +configure_file(${CMAKE_SOURCE_DIR}/scripts/shell.tcl.in ${CMAKE_BINARY_DIR}/shell.tcl) +configure_file(${CMAKE_SOURCE_DIR}/scripts/post.tcl.in ${CMAKE_BINARY_DIR}/post.tcl) +configure_file(${CMAKE_SOURCE_DIR}/scripts/dynamic.tcl.in ${CMAKE_BINARY_DIR}/dynamic.tcl) +configure_file(${CMAKE_SOURCE_DIR}/scripts/compile.tcl.in ${CMAKE_BINARY_DIR}/compile.tcl) + +add_custom_target(shell COMMAND ${VIVADO_BINARY} -mode tcl -source ${CMAKE_BINARY_DIR}/shell.tcl) +add_custom_target(compile COMMAND ${VIVADO_BINARY} -mode tcl -source ${CMAKE_BINARY_DIR}/compile.tcl) +add_custom_target(dynamic COMMAND ${VIVADO_BINARY} -mode tcl -source ${CMAKE_BINARY_DIR}/dynamic.tcl) diff --git a/hw/constraints/u250/u250_base.xdc b/hw/constraints/u250/u250_base.xdc new file mode 100644 index 00000000..b74d2525 --- /dev/null +++ b/hw/constraints/u250/u250_base.xdc @@ -0,0 +1,29 @@ +# Board constraints +set_operating_conditions -design_power_budget 160 +# Bitstream Generation for QSPI +#set_property CONFIG_VOLTAGE 1.8 [current_design] + +#set_property BITSTREAM.CONFIG.EXTMASTERCCLK_EN disable [current_design] +#set_property BITSTREAM.CONFIG.CONFIGRATE 63.8 [current_design] +##set_property BITSTREAM.CONFIG.CONFIGRATE 85.0 [current_design] ;# Customer can try but may not be reliable over all conditions. +#set_property BITSTREAM.CONFIG.SPI_BUSWIDTH 4 [current_design] +#set_property BITSTREAM.GENERAL.COMPRESS TRUE [current_design] +#set_property BITSTREAM.CONFIG.SPI_FALL_EDGE YES [current_design] +#set_property BITSTREAM.CONFIG.SPI_OPCODE 8'h6B [current_design] +#set_property BITSTREAM.CONFIG.SPI_32BIT_ADDR Yes [current_design] +#set_property BITSTREAM.CONFIG.UNUSEDPIN Pullup [current_design] +set_property BITSTREAM.GENERAL.COMPRESS TRUE [current_design] + +# Clocks and reset +set_property PACKAGE_PIN AL20 [get_ports resetn_0_nb] ; +set_property IOSTANDARD LVCMOS12 [get_ports resetn_0_nb] ; + +# Reset false path +set_false_path -from [get_ports resetn_0_nb] + +# User general purpose (156.25 MHz) +#set_property PACKAGE_PIN AV19 [get_ports user_si570_clk_n] ; +#set_property IOSTANDARD LVDS [get_ports user_si570_clk_n] ; +#set_property PACKAGE_PIN AU19 [get_ports user_si570_clk_p] ; +#set_property IOSTANDARD LVDS [get_ports user_si570_clk_p] ; + diff --git a/hw/constraints/u250/u250_ddr.xdc b/hw/constraints/u250/u250_ddr.xdc new file mode 100644 index 00000000..6856fa7b --- /dev/null +++ b/hw/constraints/u250/u250_ddr.xdc @@ -0,0 +1,628 @@ +# DDR clocks +set_property PACKAGE_PIN AY38 [get_ports c0_sys_clk_n] ; +set_property IOSTANDARD DIFF_POD12_DCI [get_ports c0_sys_clk_n] ; +set_property PACKAGE_PIN AY37 [get_ports c0_sys_clk_p] ; +set_property IOSTANDARD DIFF_POD12_DCI [get_ports c0_sys_clk_p] ; + +set_property PACKAGE_PIN AW19 [get_ports c1_sys_clk_n] ; +set_property IOSTANDARD LVDS [get_ports c1_sys_clk_n] ; +set_property PACKAGE_PIN AW20 [get_ports c1_sys_clk_p] ; +set_property IOSTANDARD LVDS [get_ports c1_sys_clk_p] ; + +set_property PACKAGE_PIN E32 [get_ports c2_sys_clk_n] ; +set_property IOSTANDARD DIFF_POD12_DCI [get_ports c2_sys_clk_n] ; +set_property PACKAGE_PIN F32 [get_ports c2_sys_clk_p] ; +set_property IOSTANDARD DIFF_POD12_DCI [get_ports c2_sys_clk_p] ; + +set_property PACKAGE_PIN H16 [get_ports c3_sys_clk_n] ; +set_property IOSTANDARD DIFF_POD12_DCI [get_ports c3_sys_clk_n] ; +set_property PACKAGE_PIN J16 [get_ports c3_sys_clk_p] ; +set_property IOSTANDARD DIFF_POD12_DCI [get_ports c3_sys_clk_p] ; + +#### +### DDR4 c0 +#### + +set_property -dict {PACKAGE_PIN AR36 IOSTANDARD SSTL12_DCI } [get_ports c0_ddr4_adr[16] ]; # Bank 42 VCCO - VCC1V2 Net "DDR4_C0_ADR16" - IO_L23N_T3U_N9_42 +set_property -dict {PACKAGE_PIN AP36 IOSTANDARD SSTL12_DCI } [get_ports c0_ddr4_adr[15] ]; # Bank 42 VCCO - VCC1V2 Net "DDR4_C0_ADR15" - IO_L23P_T3U_N8_42 +#set_property -dict {PACKAGE_PIN AN34 IOSTANDARD SSTL12_DCI } [get_ports c0_ddr4_odt[1] ]; # Bank 42 VCCO - VCC1V2 Net "DDR4_C0_ODT1" - IO_L22N_T3U_N7_DBC_AD0N_42 +#set_property -dict {PACKAGE_PIN AM34 IOSTANDARD SSTL12_DCI } [get_ports c0_ddr4_cs_n[3] ]; # Bank 42 VCCO - VCC1V2 Net "DDR4_C0_CS_B3" - IO_L22P_T3U_N6_DBC_AD0P_42 +set_property -dict {PACKAGE_PIN AR33 IOSTANDARD SSTL12_DCI } [get_ports c0_ddr4_cs_n[0] ]; # Bank 42 VCCO - VCC1V2 Net "DDR4_C0_CS_B0" - IO_T3U_N12_42 +set_property -dict {PACKAGE_PIN AN36 IOSTANDARD SSTL12_DCI } [get_ports c0_ddr4_adr[13] ]; # Bank 42 VCCO - VCC1V2 Net "DDR4_C0_ADR13" - IO_L24N_T3U_N11_42 +#set_property -dict {PACKAGE_PIN AN35 IOSTANDARD SSTL12_DCI } [get_ports c0_ddr4_adr[17] ]; # Bank 42 VCCO - VCC1V2 Net "DDR4_C0_ADR17" - IO_L24P_T3U_N10_42 +set_property -dict {PACKAGE_PIN AP35 IOSTANDARD SSTL12_DCI } [get_ports c0_ddr4_adr[14] ]; # Bank 42 VCCO - VCC1V2 Net "DDR4_C0_ADR14" - IO_L21N_T3L_N5_AD8N_42 +set_property -dict {PACKAGE_PIN AP34 IOSTANDARD SSTL12_DCI } [get_ports c0_ddr4_odt[0] ]; # Bank 42 VCCO - VCC1V2 Net "DDR4_C0_ODT0" - IO_L21P_T3L_N4_AD8P_42 +#set_property -dict {PACKAGE_PIN AP33 IOSTANDARD SSTL12_DCI } [get_ports c0_ddr4_cs_n[1] ]; # Bank 42 VCCO - VCC1V2 Net "DDR4_C0_CS_B1" - IO_L20N_T3L_N3_AD1N_42 +#set_property -dict {PACKAGE_PIN AN33 IOSTANDARD SSTL12_DCI } [get_ports c0_ddr4_cs_n[2] ]; # Bank 42 VCCO - VCC1V2 Net "DDR4_C0_CS_B2" - IO_L20P_T3L_N2_AD1P_42 +set_property -dict {PACKAGE_PIN AT35 IOSTANDARD SSTL12_DCI } [get_ports c0_ddr4_ba[0] ]; # Bank 42 VCCO - VCC1V2 Net "DDR4_C0_BA0" - IO_L19N_T3L_N1_DBC_AD9N_42 +set_property -dict {PACKAGE_PIN AR35 IOSTANDARD SSTL12_DCI } [get_ports c0_ddr4_adr[10] ]; # Bank 42 VCCO - VCC1V2 Net "DDR4_C0_ADR10" - IO_L19P_T3L_N0_DBC_AD9P_42 +set_property -dict {PACKAGE_PIN AW38 IOSTANDARD DIFF_SSTL12_DCI} [get_ports c0_ddr4_ck_c[0] ]; # Bank 42 VCCO - VCC1V2 Net "DDR4_C0_CK_C0" - IO_L17N_T2U_N9_AD10N_42 +set_property -dict {PACKAGE_PIN AV38 IOSTANDARD DIFF_SSTL12_DCI} [get_ports c0_ddr4_ck_t[0] ]; # Bank 42 VCCO - VCC1V2 Net "DDR4_C0_CK_T0" - IO_L17P_T2U_N8_AD10P_42 +#set_property -dict {PACKAGE_PIN AU35 IOSTANDARD DIFF_SSTL12_DCI} [get_ports c0_ddr4_ck_c[1] ]; # Bank 42 VCCO - VCC1V2 Net "DDR4_C0_CK_C1" - IO_L16N_T2U_N7_QBC_AD3N_42 +#set_property -dict {PACKAGE_PIN AU34 IOSTANDARD DIFF_SSTL12_DCI} [get_ports c0_ddr4_ck_t[1] ]; # Bank 42 VCCO - VCC1V2 Net "DDR4_C0_CK_T1" - IO_L16P_T2U_N6_QBC_AD3P_42 +set_property -dict {PACKAGE_PIN AT34 IOSTANDARD SSTL12_DCI } [get_ports c0_ddr4_ba[1] ]; # Bank 42 VCCO - VCC1V2 Net "DDR4_C0_BA1" - IO_T2U_N12_42 +set_property -dict {PACKAGE_PIN AU36 IOSTANDARD SSTL12_DCI } [get_ports c0_ddr4_par ]; # Bank 42 VCCO - VCC1V2 Net "DDR4_C0_PAR" - IO_L18N_T2U_N11_AD2N_42 +set_property -dict {PACKAGE_PIN AT36 IOSTANDARD SSTL12_DCI } [get_ports c0_ddr4_adr[0] ]; # Bank 42 VCCO - VCC1V2 Net "DDR4_C0_ADR0" - IO_L18P_T2U_N10_AD2P_42 +set_property -dict {PACKAGE_PIN AV37 IOSTANDARD SSTL12_DCI } [get_ports c0_ddr4_adr[2] ]; # Bank 42 VCCO - VCC1V2 Net "DDR4_C0_ADR2" - IO_L15N_T2L_N5_AD11N_42 +set_property -dict {PACKAGE_PIN AV36 IOSTANDARD SSTL12_DCI } [get_ports c0_ddr4_adr[1] ]; # Bank 42 VCCO - VCC1V2 Net "DDR4_C0_ADR1" - IO_L15P_T2L_N4_AD11P_42 +set_property -dict {PACKAGE_PIN AW36 IOSTANDARD SSTL12_DCI } [get_ports c0_ddr4_adr[4] ]; # Bank 42 VCCO - VCC1V2 Net "DDR4_C0_ADR4" - IO_L14N_T2L_N3_GC_42 +set_property -dict {PACKAGE_PIN AW35 IOSTANDARD SSTL12_DCI } [get_ports c0_ddr4_adr[3] ]; # Bank 42 VCCO - VCC1V2 Net "DDR4_C0_ADR3" - IO_L14P_T2L_N2_GC_42 +set_property -dict {PACKAGE_PIN BA38 IOSTANDARD LVCMOS12 } [get_ports c0_ddr4_alert_n ]; # Bank 42 VCCO - VCC1V2 Net "DDR4_C0_ALERT_B" - IO_L11N_T1U_N9_GC_42 +set_property -dict {PACKAGE_PIN BA37 IOSTANDARD SSTL12_DCI } [get_ports c0_ddr4_adr[8] ]; # Bank 42 VCCO - VCC1V2 Net "DDR4_C0_ADR8" - IO_L11P_T1U_N8_GC_42 +set_property -dict {PACKAGE_PIN BA40 IOSTANDARD SSTL12_DCI } [get_ports c0_ddr4_adr[7] ]; # Bank 42 VCCO - VCC1V2 Net "DDR4_C0_ADR7" - IO_L10N_T1U_N7_QBC_AD4N_42 +set_property -dict {PACKAGE_PIN BA39 IOSTANDARD SSTL12_DCI } [get_ports c0_ddr4_adr[11] ]; # Bank 42 VCCO - VCC1V2 Net "DDR4_C0_ADR11" - IO_L10P_T1U_N6_QBC_AD4P_42 +set_property -dict {PACKAGE_PIN BB37 IOSTANDARD SSTL12_DCI } [get_ports c0_ddr4_adr[9] ]; # Bank 42 VCCO - VCC1V2 Net "DDR4_C0_ADR9" - IO_T1U_N12_42 +set_property -dict {PACKAGE_PIN AY36 IOSTANDARD SSTL12_DCI } [get_ports c0_ddr4_adr[5] ]; # Bank 42 VCCO - VCC1V2 Net "DDR4_C0_ADR5" - IO_L12N_T1U_N11_GC_42 +set_property -dict {PACKAGE_PIN AY35 IOSTANDARD SSTL12_DCI } [get_ports c0_ddr4_adr[6] ]; # Bank 42 VCCO - VCC1V2 Net "DDR4_C0_ADR6" - IO_L12P_T1U_N10_GC_42 +#set_property -dict {PACKAGE_PIN BC40 IOSTANDARD SSTL12_DCI } [get_ports c0_ddr4_cke[1] ]; # Bank 42 VCCO - VCC1V2 Net "DDR4_C0_CKE1" - IO_L9N_T1L_N5_AD12N_42 +set_property -dict {PACKAGE_PIN BC39 IOSTANDARD SSTL12_DCI } [get_ports c0_ddr4_bg[1] ]; # Bank 42 VCCO - VCC1V2 Net "DDR4_C0_BG1" - IO_L9P_T1L_N4_AD12P_42 +set_property -dict {PACKAGE_PIN BB40 IOSTANDARD SSTL12_DCI } [get_ports c0_ddr4_adr[12] ]; # Bank 42 VCCO - VCC1V2 Net "DDR4_C0_ADR12" - IO_L8N_T1L_N3_AD5N_42 +set_property -dict {PACKAGE_PIN BB39 IOSTANDARD SSTL12_DCI } [get_ports c0_ddr4_act_n ]; # Bank 42 VCCO - VCC1V2 Net "DDR4_C0_ACT_B" - IO_L8P_T1L_N2_AD5P_42 +set_property -dict {PACKAGE_PIN BC38 IOSTANDARD SSTL12_DCI } [get_ports c0_ddr4_cke[0] ]; # Bank 42 VCCO - VCC1V2 Net "DDR4_C0_CKE0" - IO_L7N_T1L_N1_QBC_AD13N_42 +set_property -dict {PACKAGE_PIN BC37 IOSTANDARD SSTL12_DCI } [get_ports c0_ddr4_bg[0] ]; # Bank 42 VCCO - VCC1V2 Net "DDR4_C0_BG0" - IO_L7P_T1L_N0_QBC_AD13P_42 +set_property -dict {PACKAGE_PIN BF43 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[66] ]; # Bank 42 VCCO - VCC1V2 Net "DDR4_C0_DQ66" - IO_L5N_T0U_N9_AD14N_42 +set_property -dict {PACKAGE_PIN BF42 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[67] ]; # Bank 42 VCCO - VCC1V2 Net "DDR4_C0_DQ67" - IO_L5P_T0U_N8_AD14P_42 +set_property -dict {PACKAGE_PIN BF38 IOSTANDARD DIFF_POD12_DCI } [get_ports c0_ddr4_dqs_c[16]]; # Bank 42 VCCO - VCC1V2 Net "DDR4_C0_DQS_C8" - IO_L4N_T0U_N7_DBC_AD7N_42 +set_property -dict {PACKAGE_PIN BE38 IOSTANDARD DIFF_POD12_DCI } [get_ports c0_ddr4_dqs_t[16]]; # Bank 42 VCCO - VCC1V2 Net "DDR4_C0_DQS_T8" - IO_L4P_T0U_N6_DBC_AD7P_42 +set_property -dict {PACKAGE_PIN BD40 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[64] ]; # Bank 42 VCCO - VCC1V2 Net "DDR4_C0_DQ64" - IO_L6N_T0U_N11_AD6N_42 +set_property -dict {PACKAGE_PIN BD39 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[65] ]; # Bank 42 VCCO - VCC1V2 Net "DDR4_C0_DQ65" - IO_L6P_T0U_N10_AD6P_42 +set_property -dict {PACKAGE_PIN BF41 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[71] ]; # Bank 42 VCCO - VCC1V2 Net "DDR4_C0_DQ71" - IO_L3N_T0L_N5_AD15N_42 +set_property -dict {PACKAGE_PIN BE40 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[70] ]; # Bank 42 VCCO - VCC1V2 Net "DDR4_C0_DQ70" - IO_L3P_T0L_N4_AD15P_42 +set_property -dict {PACKAGE_PIN BF37 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[68] ]; # Bank 42 VCCO - VCC1V2 Net "DDR4_C0_DQ68" - IO_L2N_T0L_N3_42 +set_property -dict {PACKAGE_PIN BE37 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[69] ]; # Bank 42 VCCO - VCC1V2 Net "DDR4_C0_DQ69" - IO_L2P_T0L_N2_42 +set_property -dict {PACKAGE_PIN BF40 IOSTANDARD DIFF_POD12_DCI } [get_ports c0_ddr4_dqs_c[17]]; # Bank 42 VCCO - VCC1V2 Net "DDR4_C0_DQS_C17" - IO_L1N_T0L_N1_DBC_42 +set_property -dict {PACKAGE_PIN BF39 IOSTANDARD DIFF_POD12_DCI } [get_ports c0_ddr4_dqs_t[17]]; # Bank 42 VCCO - VCC1V2 Net "DDR4_C0_DQS_T17" - IO_L1P_T0L_N0_DBC_42 +set_property -dict {PACKAGE_PIN AU32 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[34] ]; # Bank 41 VCCO - VCC1V2 Net "DDR4_C0_DQ34" - IO_L23N_T3U_N9_41 +set_property -dict {PACKAGE_PIN AT32 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[35] ]; # Bank 41 VCCO - VCC1V2 Net "DDR4_C0_DQ35" - IO_L23P_T3U_N8_41 +set_property -dict {PACKAGE_PIN AM32 IOSTANDARD DIFF_POD12_DCI } [get_ports c0_ddr4_dqs_c[8] ]; # Bank 41 VCCO - VCC1V2 Net "DDR4_C0_DQS_C4" - IO_L22N_T3U_N7_DBC_AD0N_41 +set_property -dict {PACKAGE_PIN AM31 IOSTANDARD DIFF_POD12_DCI } [get_ports c0_ddr4_dqs_t[8] ]; # Bank 41 VCCO - VCC1V2 Net "DDR4_C0_DQS_T4" - IO_L22P_T3U_N6_DBC_AD0P_41 +#set_property -dict {PACKAGE_PIN AT33 IOSTANDARD LVCMOS12 } [get_ports c0_ddr4_event_n ]; # Bank 41 VCCO - VCC1V2 Net "DDR4_C0_EVENT_B" - IO_T3U_N12_41 +set_property -dict {PACKAGE_PIN AM30 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[33] ]; # Bank 41 VCCO - VCC1V2 Net "DDR4_C0_DQ33" - IO_L24N_T3U_N11_41 +set_property -dict {PACKAGE_PIN AL30 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[32] ]; # Bank 41 VCCO - VCC1V2 Net "DDR4_C0_DQ32" - IO_L24P_T3U_N10_41 +set_property -dict {PACKAGE_PIN AR32 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[38] ]; # Bank 41 VCCO - VCC1V2 Net "DDR4_C0_DQ38" - IO_L21N_T3L_N5_AD8N_41 +set_property -dict {PACKAGE_PIN AR31 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[39] ]; # Bank 41 VCCO - VCC1V2 Net "DDR4_C0_DQ39" - IO_L21P_T3L_N4_AD8P_41 +set_property -dict {PACKAGE_PIN AN32 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[37] ]; # Bank 41 VCCO - VCC1V2 Net "DDR4_C0_DQ37" - IO_L20N_T3L_N3_AD1N_41 +set_property -dict {PACKAGE_PIN AN31 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[36] ]; # Bank 41 VCCO - VCC1V2 Net "DDR4_C0_DQ36" - IO_L20P_T3L_N2_AD1P_41 +set_property -dict {PACKAGE_PIN AP31 IOSTANDARD DIFF_POD12_DCI } [get_ports c0_ddr4_dqs_c[9] ]; # Bank 41 VCCO - VCC1V2 Net "DDR4_C0_DQS_C13" - IO_L19N_T3L_N1_DBC_AD9N_41 +set_property -dict {PACKAGE_PIN AP30 IOSTANDARD DIFF_POD12_DCI } [get_ports c0_ddr4_dqs_t[9] ]; # Bank 41 VCCO - VCC1V2 Net "DDR4_C0_DQS_T13" - IO_L19P_T3L_N0_DBC_AD9P_41 +set_property -dict {PACKAGE_PIN AV32 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[25] ]; # Bank 41 VCCO - VCC1V2 Net "DDR4_C0_DQ25" - IO_L17N_T2U_N9_AD10N_41 +set_property -dict {PACKAGE_PIN AV31 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[24] ]; # Bank 41 VCCO - VCC1V2 Net "DDR4_C0_DQ24" - IO_L17P_T2U_N8_AD10P_41 +set_property -dict {PACKAGE_PIN AW33 IOSTANDARD DIFF_POD12_DCI } [get_ports c0_ddr4_dqs_c[6] ]; # Bank 41 VCCO - VCC1V2 Net "DDR4_C0_DQS_C3" - IO_L16N_T2U_N7_QBC_AD3N_41 +set_property -dict {PACKAGE_PIN AV33 IOSTANDARD DIFF_POD12_DCI } [get_ports c0_ddr4_dqs_t[6] ]; # Bank 41 VCCO - VCC1V2 Net "DDR4_C0_DQS_T3" - IO_L16P_T2U_N6_QBC_AD3P_41 +set_property -dict {PACKAGE_PIN AU31 IOSTANDARD LVCMOS12 } [get_ports c0_ddr4_reset_n ]; # Bank 41 VCCO - VCC1V2 Net "DDR4_C0_RESET_N" - IO_T2U_N12_41 +set_property -dict {PACKAGE_PIN AW34 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[27] ]; # Bank 41 VCCO - VCC1V2 Net "DDR4_C0_DQ27" - IO_L18N_T2U_N11_AD2N_41 +set_property -dict {PACKAGE_PIN AV34 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[26] ]; # Bank 41 VCCO - VCC1V2 Net "DDR4_C0_DQ26" - IO_L18P_T2U_N10_AD2P_41 +set_property -dict {PACKAGE_PIN AY31 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[29] ]; # Bank 41 VCCO - VCC1V2 Net "DDR4_C0_DQ29" - IO_L15N_T2L_N5_AD11N_41 +set_property -dict {PACKAGE_PIN AW31 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[28] ]; # Bank 41 VCCO - VCC1V2 Net "DDR4_C0_DQ28" - IO_L15P_T2L_N4_AD11P_41 +set_property -dict {PACKAGE_PIN BA35 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[30] ]; # Bank 41 VCCO - VCC1V2 Net "DDR4_C0_DQ30" - IO_L14N_T2L_N3_GC_41 +set_property -dict {PACKAGE_PIN BA34 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[31] ]; # Bank 41 VCCO - VCC1V2 Net "DDR4_C0_DQ31" - IO_L14P_T2L_N2_GC_41 +set_property -dict {PACKAGE_PIN BA33 IOSTANDARD DIFF_POD12_DCI } [get_ports c0_ddr4_dqs_c[7] ]; # Bank 41 VCCO - VCC1V2 Net "DDR4_C0_DQS_C12" - IO_L13N_T2L_N1_GC_QBC_41 +set_property -dict {PACKAGE_PIN BA32 IOSTANDARD DIFF_POD12_DCI } [get_ports c0_ddr4_dqs_t[7] ]; # Bank 41 VCCO - VCC1V2 Net "DDR4_C0_DQS_T12" - IO_L13P_T2L_N0_GC_QBC_41 +set_property -dict {PACKAGE_PIN BB32 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[17] ]; # Bank 41 VCCO - VCC1V2 Net "DDR4_C0_DQ17" - IO_L11N_T1U_N9_GC_41 +set_property -dict {PACKAGE_PIN BB31 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[16] ]; # Bank 41 VCCO - VCC1V2 Net "DDR4_C0_DQ16" - IO_L11P_T1U_N8_GC_41 +set_property -dict {PACKAGE_PIN BB36 IOSTANDARD DIFF_POD12_DCI } [get_ports c0_ddr4_dqs_c[4] ]; # Bank 41 VCCO - VCC1V2 Net "DDR4_C0_DQS_C2" - IO_L10N_T1U_N7_QBC_AD4N_41 +set_property -dict {PACKAGE_PIN BB35 IOSTANDARD DIFF_POD12_DCI } [get_ports c0_ddr4_dqs_t[4] ]; # Bank 41 VCCO - VCC1V2 Net "DDR4_C0_DQS_T2" - IO_L10P_T1U_N6_QBC_AD4P_41 +set_property -dict {PACKAGE_PIN AY33 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[19] ]; # Bank 41 VCCO - VCC1V2 Net "DDR4_C0_DQ19" - IO_L12N_T1U_N11_GC_41 +set_property -dict {PACKAGE_PIN AY32 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[18] ]; # Bank 41 VCCO - VCC1V2 Net "DDR4_C0_DQ18" - IO_L12P_T1U_N10_GC_41 +set_property -dict {PACKAGE_PIN BC33 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[21] ]; # Bank 41 VCCO - VCC1V2 Net "DDR4_C0_DQ21" - IO_L9N_T1L_N5_AD12N_41 +set_property -dict {PACKAGE_PIN BC32 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[20] ]; # Bank 41 VCCO - VCC1V2 Net "DDR4_C0_DQ20" - IO_L9P_T1L_N4_AD12P_41 +set_property -dict {PACKAGE_PIN BC34 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[23] ]; # Bank 41 VCCO - VCC1V2 Net "DDR4_C0_DQ23" - IO_L8N_T1L_N3_AD5N_41 +set_property -dict {PACKAGE_PIN BB34 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[22] ]; # Bank 41 VCCO - VCC1V2 Net "DDR4_C0_DQ22" - IO_L8P_T1L_N2_AD5P_41 +set_property -dict {PACKAGE_PIN BD31 IOSTANDARD DIFF_POD12_DCI } [get_ports c0_ddr4_dqs_c[5] ]; # Bank 41 VCCO - VCC1V2 Net "DDR4_C0_DQS_C11" - IO_L7N_T1L_N1_QBC_AD13N_41 +set_property -dict {PACKAGE_PIN BC31 IOSTANDARD DIFF_POD12_DCI } [get_ports c0_ddr4_dqs_t[5] ]; # Bank 41 VCCO - VCC1V2 Net "DDR4_C0_DQS_T11" - IO_L7P_T1L_N0_QBC_AD13P_41 +set_property -dict {PACKAGE_PIN BE33 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[58] ]; # Bank 41 VCCO - VCC1V2 Net "DDR4_C0_DQ58" - IO_L5N_T0U_N9_AD14N_41 +set_property -dict {PACKAGE_PIN BD33 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[57] ]; # Bank 41 VCCO - VCC1V2 Net "DDR4_C0_DQ57" - IO_L5P_T0U_N8_AD14P_41 +set_property -dict {PACKAGE_PIN BE36 IOSTANDARD DIFF_POD12_DCI } [get_ports c0_ddr4_dqs_c[14]]; # Bank 41 VCCO - VCC1V2 Net "DDR4_C0_DQS_C7" - IO_L4N_T0U_N7_DBC_AD7N_41 +set_property -dict {PACKAGE_PIN BE35 IOSTANDARD DIFF_POD12_DCI } [get_ports c0_ddr4_dqs_t[14]]; # Bank 41 VCCO - VCC1V2 Net "DDR4_C0_DQS_T7" - IO_L4P_T0U_N6_DBC_AD7P_41 +set_property -dict {PACKAGE_PIN BD35 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[59] ]; # Bank 41 VCCO - VCC1V2 Net "DDR4_C0_DQ59" - IO_L6N_T0U_N11_AD6N_41 +set_property -dict {PACKAGE_PIN BD34 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[56] ]; # Bank 41 VCCO - VCC1V2 Net "DDR4_C0_DQ56" - IO_L6P_T0U_N10_AD6P_41 +set_property -dict {PACKAGE_PIN BF33 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[61] ]; # Bank 41 VCCO - VCC1V2 Net "DDR4_C0_DQ61" - IO_L3N_T0L_N5_AD15N_41 +set_property -dict {PACKAGE_PIN BF32 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[60] ]; # Bank 41 VCCO - VCC1V2 Net "DDR4_C0_DQ60" - IO_L3P_T0L_N4_AD15P_41 +set_property -dict {PACKAGE_PIN BF35 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[63] ]; # Bank 41 VCCO - VCC1V2 Net "DDR4_C0_DQ63" - IO_L2N_T0L_N3_41 +set_property -dict {PACKAGE_PIN BF34 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[62] ]; # Bank 41 VCCO - VCC1V2 Net "DDR4_C0_DQ62" - IO_L2P_T0L_N2_41 +set_property -dict {PACKAGE_PIN BE32 IOSTANDARD DIFF_POD12_DCI } [get_ports c0_ddr4_dqs_c[15]]; # Bank 41 VCCO - VCC1V2 Net "DDR4_C0_DQS_C16" - IO_L1N_T0L_N1_DBC_41 +set_property -dict {PACKAGE_PIN BE31 IOSTANDARD DIFF_POD12_DCI } [get_ports c0_ddr4_dqs_t[15]]; # Bank 41 VCCO - VCC1V2 Net "DDR4_C0_DQS_T16" - IO_L1P_T0L_N0_DBC_41 +set_property -dict {PACKAGE_PIN AP29 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[40] ]; # Bank 40 VCCO - VCC1V2 Net "DDR4_C0_DQ40" - IO_L23N_T3U_N9_40 +set_property -dict {PACKAGE_PIN AP28 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[41] ]; # Bank 40 VCCO - VCC1V2 Net "DDR4_C0_DQ41" - IO_L23P_T3U_N8_40 +set_property -dict {PACKAGE_PIN AL29 IOSTANDARD DIFF_POD12_DCI } [get_ports c0_ddr4_dqs_c[10]]; # Bank 40 VCCO - VCC1V2 Net "DDR4_C0_DQS_C5" - IO_L22N_T3U_N7_DBC_AD0N_40 +set_property -dict {PACKAGE_PIN AL28 IOSTANDARD DIFF_POD12_DCI } [get_ports c0_ddr4_dqs_t[10]]; # Bank 40 VCCO - VCC1V2 Net "DDR4_C0_DQS_T5" - IO_L22P_T3U_N6_DBC_AD0P_40 +set_property -dict {PACKAGE_PIN AN27 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[42] ]; # Bank 40 VCCO - VCC1V2 Net "DDR4_C0_DQ42" - IO_L24N_T3U_N11_40 +set_property -dict {PACKAGE_PIN AM27 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[43] ]; # Bank 40 VCCO - VCC1V2 Net "DDR4_C0_DQ43" - IO_L24P_T3U_N10_40 +set_property -dict {PACKAGE_PIN AR28 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[47] ]; # Bank 40 VCCO - VCC1V2 Net "DDR4_C0_DQ47" - IO_L21N_T3L_N5_AD8N_40 +set_property -dict {PACKAGE_PIN AR27 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[46] ]; # Bank 40 VCCO - VCC1V2 Net "DDR4_C0_DQ46" - IO_L21P_T3L_N4_AD8P_40 +set_property -dict {PACKAGE_PIN AN29 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[44] ]; # Bank 40 VCCO - VCC1V2 Net "DDR4_C0_DQ44" - IO_L20N_T3L_N3_AD1N_40 +set_property -dict {PACKAGE_PIN AM29 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[45] ]; # Bank 40 VCCO - VCC1V2 Net "DDR4_C0_DQ45" - IO_L20P_T3L_N2_AD1P_40 +set_property -dict {PACKAGE_PIN AT30 IOSTANDARD DIFF_POD12_DCI } [get_ports c0_ddr4_dqs_c[11]]; # Bank 40 VCCO - VCC1V2 Net "DDR4_C0_DQS_C14" - IO_L19N_T3L_N1_DBC_AD9N_40 +set_property -dict {PACKAGE_PIN AR30 IOSTANDARD DIFF_POD12_DCI } [get_ports c0_ddr4_dqs_t[11]]; # Bank 40 VCCO - VCC1V2 Net "DDR4_C0_DQS_T14" - IO_L19P_T3L_N0_DBC_AD9P_40 +set_property -dict {PACKAGE_PIN AV27 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[49] ]; # Bank 40 VCCO - VCC1V2 Net "DDR4_C0_DQ49" - IO_L17N_T2U_N9_AD10N_40 +set_property -dict {PACKAGE_PIN AU27 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[50] ]; # Bank 40 VCCO - VCC1V2 Net "DDR4_C0_DQ50" - IO_L17P_T2U_N8_AD10P_40 +set_property -dict {PACKAGE_PIN AU30 IOSTANDARD DIFF_POD12_DCI } [get_ports c0_ddr4_dqs_c[12]]; # Bank 40 VCCO - VCC1V2 Net "DDR4_C0_DQS_C6" - IO_L16N_T2U_N7_QBC_AD3N_40 +set_property -dict {PACKAGE_PIN AU29 IOSTANDARD DIFF_POD12_DCI } [get_ports c0_ddr4_dqs_t[12]]; # Bank 40 VCCO - VCC1V2 Net "DDR4_C0_DQS_T6" - IO_L16P_T2U_N6_QBC_AD3P_40 +set_property -dict {PACKAGE_PIN AT28 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[48] ]; # Bank 40 VCCO - VCC1V2 Net "DDR4_C0_DQ48" - IO_L18N_T2U_N11_AD2N_40 +set_property -dict {PACKAGE_PIN AT27 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[51] ]; # Bank 40 VCCO - VCC1V2 Net "DDR4_C0_DQ51" - IO_L18P_T2U_N10_AD2P_40 +set_property -dict {PACKAGE_PIN AV29 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[52] ]; # Bank 40 VCCO - VCC1V2 Net "DDR4_C0_DQ52" - IO_L15N_T2L_N5_AD11N_40 +set_property -dict {PACKAGE_PIN AV28 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[55] ]; # Bank 40 VCCO - VCC1V2 Net "DDR4_C0_DQ55" - IO_L15P_T2L_N4_AD11P_40 +set_property -dict {PACKAGE_PIN AY30 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[53] ]; # Bank 40 VCCO - VCC1V2 Net "DDR4_C0_DQ53" - IO_L14N_T2L_N3_GC_40 +set_property -dict {PACKAGE_PIN AW30 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[54] ]; # Bank 40 VCCO - VCC1V2 Net "DDR4_C0_DQ54" - IO_L14P_T2L_N2_GC_40 +set_property -dict {PACKAGE_PIN AY28 IOSTANDARD DIFF_POD12_DCI } [get_ports c0_ddr4_dqs_c[13]]; # Bank 40 VCCO - VCC1V2 Net "DDR4_C0_DQS_C15" - IO_L13N_T2L_N1_GC_QBC_40 +set_property -dict {PACKAGE_PIN AY27 IOSTANDARD DIFF_POD12_DCI } [get_ports c0_ddr4_dqs_t[13]]; # Bank 40 VCCO - VCC1V2 Net "DDR4_C0_DQS_T15" - IO_L13P_T2L_N0_GC_QBC_40 +set_property -dict {PACKAGE_PIN BA28 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[2] ]; # Bank 40 VCCO - VCC1V2 Net "DDR4_C0_DQ2" - IO_L11N_T1U_N9_GC_40 +set_property -dict {PACKAGE_PIN BA27 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[3] ]; # Bank 40 VCCO - VCC1V2 Net "DDR4_C0_DQ3" - IO_L11P_T1U_N8_GC_40 +set_property -dict {PACKAGE_PIN BB30 IOSTANDARD DIFF_POD12_DCI } [get_ports c0_ddr4_dqs_c[0] ]; # Bank 40 VCCO - VCC1V2 Net "DDR4_C0_DQS_C0" - IO_L10N_T1U_N7_QBC_AD4N_40 +set_property -dict {PACKAGE_PIN BA30 IOSTANDARD DIFF_POD12_DCI } [get_ports c0_ddr4_dqs_t[0] ]; # Bank 40 VCCO - VCC1V2 Net "DDR4_C0_DQS_T0" - IO_L10P_T1U_N6_QBC_AD4P_40 +set_property -dict {PACKAGE_PIN AW29 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[1] ]; # Bank 40 VCCO - VCC1V2 Net "DDR4_C0_DQ1" - IO_L12N_T1U_N11_GC_40 +set_property -dict {PACKAGE_PIN AW28 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[0] ]; # Bank 40 VCCO - VCC1V2 Net "DDR4_C0_DQ0" - IO_L12P_T1U_N10_GC_40 +set_property -dict {PACKAGE_PIN BC27 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[6] ]; # Bank 40 VCCO - VCC1V2 Net "DDR4_C0_DQ6" - IO_L9N_T1L_N5_AD12N_40 +set_property -dict {PACKAGE_PIN BB27 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[7] ]; # Bank 40 VCCO - VCC1V2 Net "DDR4_C0_DQ7" - IO_L9P_T1L_N4_AD12P_40 +set_property -dict {PACKAGE_PIN BB29 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[4] ]; # Bank 40 VCCO - VCC1V2 Net "DDR4_C0_DQ4" - IO_L8N_T1L_N3_AD5N_40 +set_property -dict {PACKAGE_PIN BA29 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[5] ]; # Bank 40 VCCO - VCC1V2 Net "DDR4_C0_DQ5" - IO_L8P_T1L_N2_AD5P_40 +set_property -dict {PACKAGE_PIN BC26 IOSTANDARD DIFF_POD12_DCI } [get_ports c0_ddr4_dqs_c[1] ]; # Bank 40 VCCO - VCC1V2 Net "DDR4_C0_DQS_C9" - IO_L7N_T1L_N1_QBC_AD13N_40 +set_property -dict {PACKAGE_PIN BB26 IOSTANDARD DIFF_POD12_DCI } [get_ports c0_ddr4_dqs_t[1] ]; # Bank 40 VCCO - VCC1V2 Net "DDR4_C0_DQS_T9" - IO_L7P_T1L_N0_QBC_AD13P_40 +set_property -dict {PACKAGE_PIN BF28 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[9] ]; # Bank 40 VCCO - VCC1V2 Net "DDR4_C0_DQ9" - IO_L5N_T0U_N9_AD14N_40 +set_property -dict {PACKAGE_PIN BE28 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[8] ]; # Bank 40 VCCO - VCC1V2 Net "DDR4_C0_DQ8" - IO_L5P_T0U_N8_AD14P_40 +set_property -dict {PACKAGE_PIN BD29 IOSTANDARD DIFF_POD12_DCI } [get_ports c0_ddr4_dqs_c[2] ]; # Bank 40 VCCO - VCC1V2 Net "DDR4_C0_DQS_C1" - IO_L4N_T0U_N7_DBC_AD7N_40 +set_property -dict {PACKAGE_PIN BD28 IOSTANDARD DIFF_POD12_DCI } [get_ports c0_ddr4_dqs_t[2] ]; # Bank 40 VCCO - VCC1V2 Net "DDR4_C0_DQS_T1" - IO_L4P_T0U_N6_DBC_AD7P_40 +set_property -dict {PACKAGE_PIN BE30 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[10] ]; # Bank 40 VCCO - VCC1V2 Net "DDR4_C0_DQ10" - IO_L6N_T0U_N11_AD6N_40 +set_property -dict {PACKAGE_PIN BD30 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[11] ]; # Bank 40 VCCO - VCC1V2 Net "DDR4_C0_DQ11" - IO_L6P_T0U_N10_AD6P_40 +set_property -dict {PACKAGE_PIN BF27 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[12] ]; # Bank 40 VCCO - VCC1V2 Net "DDR4_C0_DQ12" - IO_L3N_T0L_N5_AD15N_40 +set_property -dict {PACKAGE_PIN BE27 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[13] ]; # Bank 40 VCCO - VCC1V2 Net "DDR4_C0_DQ13" - IO_L3P_T0L_N4_AD15P_40 +set_property -dict {PACKAGE_PIN BF30 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[14] ]; # Bank 40 VCCO - VCC1V2 Net "DDR4_C0_DQ14" - IO_L2N_T0L_N3_40 +set_property -dict {PACKAGE_PIN BF29 IOSTANDARD POD12_DCI } [get_ports c0_ddr4_dq[15] ]; # Bank 40 VCCO - VCC1V2 Net "DDR4_C0_DQ15" - IO_L2P_T0L_N2_40 +set_property -dict {PACKAGE_PIN BE26 IOSTANDARD DIFF_POD12_DCI } [get_ports c0_ddr4_dqs_c[3] ]; # Bank 40 VCCO - VCC1V2 Net "DDR4_C0_DQS_C10" - IO_L1N_T0L_N1_DBC_40 +set_property -dict {PACKAGE_PIN BD26 IOSTANDARD DIFF_POD12_DCI } [get_ports c0_ddr4_dqs_t[3] ]; # Bank 40 VCCO - VCC1V2 Net "DDR4_C0_DQS_T10" - IO_L1P_T0L_N0_DBC_40 + +#### +### DDR4 c1 +#### + +set_property -dict {PACKAGE_PIN AN13 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[24] ]; # Bank 67 VCCO - VCC1V2 Net "DDR4_C1_DQ24" - IO_L23N_T3U_N9_67 +set_property -dict {PACKAGE_PIN AM13 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[26] ]; # Bank 67 VCCO - VCC1V2 Net "DDR4_C1_DQ26" - IO_L23P_T3U_N8_67 +set_property -dict {PACKAGE_PIN AT13 IOSTANDARD DIFF_POD12_DCI } [get_ports c1_ddr4_dqs_c[6] ]; # Bank 67 VCCO - VCC1V2 Net "DDR4_C1_DQS_C3" - IO_L22N_T3U_N7_DBC_AD0N_67 +set_property -dict {PACKAGE_PIN AT14 IOSTANDARD DIFF_POD12_DCI } [get_ports c1_ddr4_dqs_t[6] ]; # Bank 67 VCCO - VCC1V2 Net "DDR4_C1_DQS_T3" - IO_L22P_T3U_N6_DBC_AD0P_67 +set_property -dict {PACKAGE_PIN AR13 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[25] ]; # Bank 67 VCCO - VCC1V2 Net "DDR4_C1_DQ25" - IO_L24N_T3U_N11_67 +set_property -dict {PACKAGE_PIN AP13 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[27] ]; # Bank 67 VCCO - VCC1V2 Net "DDR4_C1_DQ27" - IO_L24P_T3U_N10_67 +set_property -dict {PACKAGE_PIN AM14 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[28] ]; # Bank 67 VCCO - VCC1V2 Net "DDR4_C1_DQ28" - IO_L21N_T3L_N5_AD8N_67 +set_property -dict {PACKAGE_PIN AL14 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[30] ]; # Bank 67 VCCO - VCC1V2 Net "DDR4_C1_DQ30" - IO_L21P_T3L_N4_AD8P_67 +set_property -dict {PACKAGE_PIN AT15 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[31] ]; # Bank 67 VCCO - VCC1V2 Net "DDR4_C1_DQ31" - IO_L20N_T3L_N3_AD1N_67 +set_property -dict {PACKAGE_PIN AR15 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[29] ]; # Bank 67 VCCO - VCC1V2 Net "DDR4_C1_DQ29" - IO_L20P_T3L_N2_AD1P_67 +set_property -dict {PACKAGE_PIN AP14 IOSTANDARD DIFF_POD12_DCI } [get_ports c1_ddr4_dqs_c[7] ]; # Bank 67 VCCO - VCC1V2 Net "DDR4_C1_DQS_C12" - IO_L19N_T3L_N1_DBC_AD9N_67 +set_property -dict {PACKAGE_PIN AN14 IOSTANDARD DIFF_POD12_DCI } [get_ports c1_ddr4_dqs_t[7] ]; # Bank 67 VCCO - VCC1V2 Net "DDR4_C1_DQS_T12" - IO_L19P_T3L_N0_DBC_AD9P_67 +set_property -dict {PACKAGE_PIN AV13 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[9] ]; # Bank 67 VCCO - VCC1V2 Net "DDR4_C1_DQ9" - IO_L17N_T2U_N9_AD10N_67 +set_property -dict {PACKAGE_PIN AU13 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[8] ]; # Bank 67 VCCO - VCC1V2 Net "DDR4_C1_DQ8" - IO_L17P_T2U_N8_AD10P_67 +set_property -dict {PACKAGE_PIN AY15 IOSTANDARD DIFF_POD12_DCI } [get_ports c1_ddr4_dqs_c[2] ]; # Bank 67 VCCO - VCC1V2 Net "DDR4_C1_DQS_C1" - IO_L16N_T2U_N7_QBC_AD3N_67 +set_property -dict {PACKAGE_PIN AW15 IOSTANDARD DIFF_POD12_DCI } [get_ports c1_ddr4_dqs_t[2] ]; # Bank 67 VCCO - VCC1V2 Net "DDR4_C1_DQS_T1" - IO_L16P_T2U_N6_QBC_AD3P_67 +set_property -dict {PACKAGE_PIN AW13 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[10] ]; # Bank 67 VCCO - VCC1V2 Net "DDR4_C1_DQ10" - IO_L18N_T2U_N11_AD2N_67 +set_property -dict {PACKAGE_PIN AW14 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[11] ]; # Bank 67 VCCO - VCC1V2 Net "DDR4_C1_DQ11" - IO_L18P_T2U_N10_AD2P_67 +set_property -dict {PACKAGE_PIN AV14 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[14] ]; # Bank 67 VCCO - VCC1V2 Net "DDR4_C1_DQ14" - IO_L15N_T2L_N5_AD11N_67 +set_property -dict {PACKAGE_PIN AU14 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[12] ]; # Bank 67 VCCO - VCC1V2 Net "DDR4_C1_DQ12" - IO_L15P_T2L_N4_AD11P_67 +set_property -dict {PACKAGE_PIN BA11 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[15] ]; # Bank 67 VCCO - VCC1V2 Net "DDR4_C1_DQ15" - IO_L14N_T2L_N3_GC_67 +set_property -dict {PACKAGE_PIN AY11 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[13] ]; # Bank 67 VCCO - VCC1V2 Net "DDR4_C1_DQ13" - IO_L14P_T2L_N2_GC_67 +set_property -dict {PACKAGE_PIN AY12 IOSTANDARD DIFF_POD12_DCI } [get_ports c1_ddr4_dqs_c[3] ]; # Bank 67 VCCO - VCC1V2 Net "DDR4_C1_DQS_C10" - IO_L13N_T2L_N1_GC_QBC_67 +set_property -dict {PACKAGE_PIN AY13 IOSTANDARD DIFF_POD12_DCI } [get_ports c1_ddr4_dqs_t[3] ]; # Bank 67 VCCO - VCC1V2 Net "DDR4_C1_DQS_T10" - IO_L13P_T2L_N0_GC_QBC_67 +set_property -dict {PACKAGE_PIN BA13 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[18] ]; # Bank 67 VCCO - VCC1V2 Net "DDR4_C1_DQ18" - IO_L11N_T1U_N9_GC_67 +set_property -dict {PACKAGE_PIN BA14 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[19] ]; # Bank 67 VCCO - VCC1V2 Net "DDR4_C1_DQ19" - IO_L11P_T1U_N8_GC_67 +set_property -dict {PACKAGE_PIN BB10 IOSTANDARD DIFF_POD12_DCI } [get_ports c1_ddr4_dqs_c[4] ]; # Bank 67 VCCO - VCC1V2 Net "DDR4_C1_DQS_C2" - IO_L10N_T1U_N7_QBC_AD4N_67 +set_property -dict {PACKAGE_PIN BB11 IOSTANDARD DIFF_POD12_DCI } [get_ports c1_ddr4_dqs_t[4] ]; # Bank 67 VCCO - VCC1V2 Net "DDR4_C1_DQS_T2" - IO_L10P_T1U_N6_QBC_AD4P_67 +set_property -dict {PACKAGE_PIN BB12 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[17] ]; # Bank 67 VCCO - VCC1V2 Net "DDR4_C1_DQ17" - IO_L12N_T1U_N11_GC_67 +set_property -dict {PACKAGE_PIN BA12 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[16] ]; # Bank 67 VCCO - VCC1V2 Net "DDR4_C1_DQ16" - IO_L12P_T1U_N10_GC_67 +set_property -dict {PACKAGE_PIN BA7 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[22] ]; # Bank 67 VCCO - VCC1V2 Net "DDR4_C1_DQ22" - IO_L9N_T1L_N5_AD12N_67 +set_property -dict {PACKAGE_PIN BA8 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[23] ]; # Bank 67 VCCO - VCC1V2 Net "DDR4_C1_DQ23" - IO_L9P_T1L_N4_AD12P_67 +set_property -dict {PACKAGE_PIN BC9 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[20] ]; # Bank 67 VCCO - VCC1V2 Net "DDR4_C1_DQ20" - IO_L8N_T1L_N3_AD5N_67 +set_property -dict {PACKAGE_PIN BB9 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[21] ]; # Bank 67 VCCO - VCC1V2 Net "DDR4_C1_DQ21" - IO_L8P_T1L_N2_AD5P_67 +set_property -dict {PACKAGE_PIN BA9 IOSTANDARD DIFF_POD12_DCI } [get_ports c1_ddr4_dqs_c[5] ]; # Bank 67 VCCO - VCC1V2 Net "DDR4_C1_DQS_C11" - IO_L7N_T1L_N1_QBC_AD13N_67 +set_property -dict {PACKAGE_PIN BA10 IOSTANDARD DIFF_POD12_DCI } [get_ports c1_ddr4_dqs_t[5] ]; # Bank 67 VCCO - VCC1V2 Net "DDR4_C1_DQS_T11" - IO_L7P_T1L_N0_QBC_AD13P_67 +set_property -dict {PACKAGE_PIN BD7 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[1] ]; # Bank 67 VCCO - VCC1V2 Net "DDR4_C1_DQ1" - IO_L5N_T0U_N9_AD14N_67 +set_property -dict {PACKAGE_PIN BC7 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[2] ]; # Bank 67 VCCO - VCC1V2 Net "DDR4_C1_DQ2" - IO_L5P_T0U_N8_AD14P_67 +set_property -dict {PACKAGE_PIN BF9 IOSTANDARD DIFF_POD12_DCI } [get_ports c1_ddr4_dqs_c[0] ]; # Bank 67 VCCO - VCC1V2 Net "DDR4_C1_DQS_C0" - IO_L4N_T0U_N7_DBC_AD7N_67 +set_property -dict {PACKAGE_PIN BF10 IOSTANDARD DIFF_POD12_DCI } [get_ports c1_ddr4_dqs_t[0] ]; # Bank 67 VCCO - VCC1V2 Net "DDR4_C1_DQS_T0" - IO_L4P_T0U_N6_DBC_AD7P_67 +set_property -dict {PACKAGE_PIN BD8 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[3] ]; # Bank 67 VCCO - VCC1V2 Net "DDR4_C1_DQ3" - IO_L6N_T0U_N11_AD6N_67 +set_property -dict {PACKAGE_PIN BD9 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[0] ]; # Bank 67 VCCO - VCC1V2 Net "DDR4_C1_DQ0" - IO_L6P_T0U_N10_AD6P_67 +set_property -dict {PACKAGE_PIN BF7 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[7] ]; # Bank 67 VCCO - VCC1V2 Net "DDR4_C1_DQ7" - IO_L3N_T0L_N5_AD15N_67 +set_property -dict {PACKAGE_PIN BE7 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[6] ]; # Bank 67 VCCO - VCC1V2 Net "DDR4_C1_DQ6" - IO_L3P_T0L_N4_AD15P_67 +set_property -dict {PACKAGE_PIN BE10 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[5] ]; # Bank 67 VCCO - VCC1V2 Net "DDR4_C1_DQ5" - IO_L2N_T0L_N3_67 +set_property -dict {PACKAGE_PIN BD10 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[4] ]; # Bank 67 VCCO - VCC1V2 Net "DDR4_C1_DQ4" - IO_L2P_T0L_N2_67 +set_property -dict {PACKAGE_PIN BF8 IOSTANDARD DIFF_POD12_DCI } [get_ports c1_ddr4_dqs_c[1] ]; # Bank 67 VCCO - VCC1V2 Net "DDR4_C1_DQS_C9" - IO_L1N_T0L_N1_DBC_67 +set_property -dict {PACKAGE_PIN BE8 IOSTANDARD DIFF_POD12_DCI } [get_ports c1_ddr4_dqs_t[1] ]; # Bank 67 VCCO - VCC1V2 Net "DDR4_C1_DQS_T9" - IO_L1P_T0L_N0_DBC_67 +set_property -dict {PACKAGE_PIN AM15 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[56] ]; # Bank 66 VCCO - VCC1V2 Net "DDR4_C1_DQ56" - IO_L23N_T3U_N9_66 +set_property -dict {PACKAGE_PIN AL15 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[57] ]; # Bank 66 VCCO - VCC1V2 Net "DDR4_C1_DQ57" - IO_L23P_T3U_N8_66 +set_property -dict {PACKAGE_PIN AR16 IOSTANDARD DIFF_POD12_DCI } [get_ports c1_ddr4_dqs_c[14]]; # Bank 66 VCCO - VCC1V2 Net "DDR4_C1_DQS_C7" - IO_L22N_T3U_N7_DBC_AD0N_66 +set_property -dict {PACKAGE_PIN AP16 IOSTANDARD DIFF_POD12_DCI } [get_ports c1_ddr4_dqs_t[14]]; # Bank 66 VCCO - VCC1V2 Net "DDR4_C1_DQS_T7" - IO_L22P_T3U_N6_DBC_AD0P_66 +#set_property -dict {PACKAGE_PIN AN18 IOSTANDARD LVCMOS12 } [get_ports c1_ddr4_event_n ]; # Bank 66 VCCO - VCC1V2 Net "DDR4_C1_EVENT_B" - IO_T3U_N12_66 +set_property -dict {PACKAGE_PIN AN16 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[59] ]; # Bank 66 VCCO - VCC1V2 Net "DDR4_C1_DQ59" - IO_L24N_T3U_N11_66 +set_property -dict {PACKAGE_PIN AN17 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[58] ]; # Bank 66 VCCO - VCC1V2 Net "DDR4_C1_DQ58" - IO_L24P_T3U_N10_66 +set_property -dict {PACKAGE_PIN AL16 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[63] ]; # Bank 66 VCCO - VCC1V2 Net "DDR4_C1_DQ63" - IO_L21N_T3L_N5_AD8N_66 +set_property -dict {PACKAGE_PIN AL17 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[62] ]; # Bank 66 VCCO - VCC1V2 Net "DDR4_C1_DQ62" - IO_L21P_T3L_N4_AD8P_66 +set_property -dict {PACKAGE_PIN AR18 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[60] ]; # Bank 66 VCCO - VCC1V2 Net "DDR4_C1_DQ60" - IO_L20N_T3L_N3_AD1N_66 +set_property -dict {PACKAGE_PIN AP18 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[61] ]; # Bank 66 VCCO - VCC1V2 Net "DDR4_C1_DQ61" - IO_L20P_T3L_N2_AD1P_66 +set_property -dict {PACKAGE_PIN AM16 IOSTANDARD DIFF_POD12_DCI } [get_ports c1_ddr4_dqs_c[15]]; # Bank 66 VCCO - VCC1V2 Net "DDR4_C1_DQS_C16" - IO_L19N_T3L_N1_DBC_AD9N_66 +set_property -dict {PACKAGE_PIN AM17 IOSTANDARD DIFF_POD12_DCI } [get_ports c1_ddr4_dqs_t[15]]; # Bank 66 VCCO - VCC1V2 Net "DDR4_C1_DQS_T16" - IO_L19P_T3L_N0_DBC_AD9P_66 +set_property -dict {PACKAGE_PIN AU16 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[50] ]; # Bank 66 VCCO - VCC1V2 Net "DDR4_C1_DQ50" - IO_L17N_T2U_N9_AD10N_66 +set_property -dict {PACKAGE_PIN AU17 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[51] ]; # Bank 66 VCCO - VCC1V2 Net "DDR4_C1_DQ51" - IO_L17P_T2U_N8_AD10P_66 +set_property -dict {PACKAGE_PIN AW18 IOSTANDARD DIFF_POD12_DCI } [get_ports c1_ddr4_dqs_c[12]]; # Bank 66 VCCO - VCC1V2 Net "DDR4_C1_DQS_C6" - IO_L16N_T2U_N7_QBC_AD3N_66 +set_property -dict {PACKAGE_PIN AV18 IOSTANDARD DIFF_POD12_DCI } [get_ports c1_ddr4_dqs_t[12]]; # Bank 66 VCCO - VCC1V2 Net "DDR4_C1_DQS_T6" - IO_L16P_T2U_N6_QBC_AD3P_66 +set_property -dict {PACKAGE_PIN AR17 IOSTANDARD LVCMOS12 } [get_ports c1_ddr4_reset_n ]; # Bank 66 VCCO - VCC1V2 Net "DDR4_C1_RESET_N" - IO_T2U_N12_66 +set_property -dict {PACKAGE_PIN AV16 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[48] ]; # Bank 66 VCCO - VCC1V2 Net "DDR4_C1_DQ48" - IO_L18N_T2U_N11_AD2N_66 +set_property -dict {PACKAGE_PIN AV17 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[49] ]; # Bank 66 VCCO - VCC1V2 Net "DDR4_C1_DQ49" - IO_L18P_T2U_N10_AD2P_66 +set_property -dict {PACKAGE_PIN AT17 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[55] ]; # Bank 66 VCCO - VCC1V2 Net "DDR4_C1_DQ55" - IO_L15N_T2L_N5_AD11N_66 +set_property -dict {PACKAGE_PIN AT18 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[54] ]; # Bank 66 VCCO - VCC1V2 Net "DDR4_C1_DQ54" - IO_L15P_T2L_N4_AD11P_66 +set_property -dict {PACKAGE_PIN BB16 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[53] ]; # Bank 66 VCCO - VCC1V2 Net "DDR4_C1_DQ53" - IO_L14N_T2L_N3_GC_66 +set_property -dict {PACKAGE_PIN BB17 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[52] ]; # Bank 66 VCCO - VCC1V2 Net "DDR4_C1_DQ52" - IO_L14P_T2L_N2_GC_66 +set_property -dict {PACKAGE_PIN AY16 IOSTANDARD DIFF_POD12_DCI } [get_ports c1_ddr4_dqs_c[13]]; # Bank 66 VCCO - VCC1V2 Net "DDR4_C1_DQS_C15" - IO_L13N_T2L_N1_GC_QBC_66 +set_property -dict {PACKAGE_PIN AW16 IOSTANDARD DIFF_POD12_DCI } [get_ports c1_ddr4_dqs_t[13]]; # Bank 66 VCCO - VCC1V2 Net "DDR4_C1_DQS_T15" - IO_L13P_T2L_N0_GC_QBC_66 +set_property -dict {PACKAGE_PIN AY17 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[40] ]; # Bank 66 VCCO - VCC1V2 Net "DDR4_C1_DQ40" - IO_L11N_T1U_N9_GC_66 +set_property -dict {PACKAGE_PIN AY18 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[42] ]; # Bank 66 VCCO - VCC1V2 Net "DDR4_C1_DQ42" - IO_L11P_T1U_N8_GC_66 +set_property -dict {PACKAGE_PIN BC12 IOSTANDARD DIFF_POD12_DCI } [get_ports c1_ddr4_dqs_c[10]]; # Bank 66 VCCO - VCC1V2 Net "DDR4_C1_DQS_C5" - IO_L10N_T1U_N7_QBC_AD4N_66 +set_property -dict {PACKAGE_PIN BC13 IOSTANDARD DIFF_POD12_DCI } [get_ports c1_ddr4_dqs_t[10]]; # Bank 66 VCCO - VCC1V2 Net "DDR4_C1_DQS_T5" - IO_L10P_T1U_N6_QBC_AD4P_66 +set_property -dict {PACKAGE_PIN BA17 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[41] ]; # Bank 66 VCCO - VCC1V2 Net "DDR4_C1_DQ41" - IO_L12N_T1U_N11_GC_66 +set_property -dict {PACKAGE_PIN BA18 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[43] ]; # Bank 66 VCCO - VCC1V2 Net "DDR4_C1_DQ43" - IO_L12P_T1U_N10_GC_66 +set_property -dict {PACKAGE_PIN BB15 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[45] ]; # Bank 66 VCCO - VCC1V2 Net "DDR4_C1_DQ45" - IO_L9N_T1L_N5_AD12N_66 +set_property -dict {PACKAGE_PIN BA15 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[44] ]; # Bank 66 VCCO - VCC1V2 Net "DDR4_C1_DQ44" - IO_L9P_T1L_N4_AD12P_66 +set_property -dict {PACKAGE_PIN BD11 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[47] ]; # Bank 66 VCCO - VCC1V2 Net "DDR4_C1_DQ47" - IO_L8N_T1L_N3_AD5N_66 +set_property -dict {PACKAGE_PIN BC11 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[46] ]; # Bank 66 VCCO - VCC1V2 Net "DDR4_C1_DQ46" - IO_L8P_T1L_N2_AD5P_66 +set_property -dict {PACKAGE_PIN BC14 IOSTANDARD DIFF_POD12_DCI } [get_ports c1_ddr4_dqs_c[11]]; # Bank 66 VCCO - VCC1V2 Net "DDR4_C1_DQS_C14" - IO_L7N_T1L_N1_QBC_AD13N_66 +set_property -dict {PACKAGE_PIN BB14 IOSTANDARD DIFF_POD12_DCI } [get_ports c1_ddr4_dqs_t[11]]; # Bank 66 VCCO - VCC1V2 Net "DDR4_C1_DQS_T14" - IO_L7P_T1L_N0_QBC_AD13P_66 +set_property -dict {PACKAGE_PIN BD13 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[35] ]; # Bank 66 VCCO - VCC1V2 Net "DDR4_C1_DQ35" - IO_L5N_T0U_N9_AD14N_66 +set_property -dict {PACKAGE_PIN BD14 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[33] ]; # Bank 66 VCCO - VCC1V2 Net "DDR4_C1_DQ33" - IO_L5P_T0U_N8_AD14P_66 +set_property -dict {PACKAGE_PIN BE11 IOSTANDARD DIFF_POD12_DCI } [get_ports c1_ddr4_dqs_c[8] ]; # Bank 66 VCCO - VCC1V2 Net "DDR4_C1_DQS_C4" - IO_L4N_T0U_N7_DBC_AD7N_66 +set_property -dict {PACKAGE_PIN BE12 IOSTANDARD DIFF_POD12_DCI } [get_ports c1_ddr4_dqs_t[8] ]; # Bank 66 VCCO - VCC1V2 Net "DDR4_C1_DQS_T4" - IO_L4P_T0U_N6_DBC_AD7P_66 +set_property -dict {PACKAGE_PIN BF12 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[34] ]; # Bank 66 VCCO - VCC1V2 Net "DDR4_C1_DQ34" - IO_L6N_T0U_N11_AD6N_66 +set_property -dict {PACKAGE_PIN BE13 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[32] ]; # Bank 66 VCCO - VCC1V2 Net "DDR4_C1_DQ32" - IO_L6P_T0U_N10_AD6P_66 +set_property -dict {PACKAGE_PIN BD15 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[36] ]; # Bank 66 VCCO - VCC1V2 Net "DDR4_C1_DQ36" - IO_L3N_T0L_N5_AD15N_66 +set_property -dict {PACKAGE_PIN BD16 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[37] ]; # Bank 66 VCCO - VCC1V2 Net "DDR4_C1_DQ37" - IO_L3P_T0L_N4_AD15P_66 +set_property -dict {PACKAGE_PIN BF13 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[39] ]; # Bank 66 VCCO - VCC1V2 Net "DDR4_C1_DQ39" - IO_L2N_T0L_N3_66 +set_property -dict {PACKAGE_PIN BF14 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[38] ]; # Bank 66 VCCO - VCC1V2 Net "DDR4_C1_DQ38" - IO_L2P_T0L_N2_66 +set_property -dict {PACKAGE_PIN BF15 IOSTANDARD DIFF_POD12_DCI } [get_ports c1_ddr4_dqs_c[9] ]; # Bank 66 VCCO - VCC1V2 Net "DDR4_C1_DQS_C13" - IO_L1N_T0L_N1_DBC_66 +set_property -dict {PACKAGE_PIN BE15 IOSTANDARD DIFF_POD12_DCI } [get_ports c1_ddr4_dqs_t[9] ]; # Bank 66 VCCO - VCC1V2 Net "DDR4_C1_DQS_T13" - IO_L1P_T0L_N0_DBC_66 +set_property -dict {PACKAGE_PIN AM25 IOSTANDARD SSTL12_DCI } [get_ports c1_ddr4_adr[15] ]; # Bank 65 VCCO - VCC1V2 Net "DDR4_C1_ADR15" - IO_L22N_T3U_N7_DBC_AD0N_D05_65 +set_property -dict {PACKAGE_PIN AL25 IOSTANDARD SSTL12_DCI } [get_ports c1_ddr4_adr[14] ]; # Bank 65 VCCO - VCC1V2 Net "DDR4_C1_ADR14" - IO_L22P_T3U_N6_DBC_AD0P_D04_65 +set_property -dict {PACKAGE_PIN AP26 IOSTANDARD SSTL12_DCI } [get_ports c1_ddr4_ba[1] ]; # Bank 65 VCCO - VCC1V2 Net "DDR4_C1_BA1" - IO_T3U_N12_PERSTN0_65 +set_property -dict {PACKAGE_PIN AN26 IOSTANDARD SSTL12_DCI } [get_ports c1_ddr4_adr[3] ]; # Bank 65 VCCO - VCC1V2 Net "DDR4_C1_ADR3" - IO_L24N_T3U_N11_DOUT_CSO_B_65 +set_property -dict {PACKAGE_PIN AM26 IOSTANDARD SSTL12_DCI } [get_ports c1_ddr4_adr[10] ]; # Bank 65 VCCO - VCC1V2 Net "DDR4_C1_ADR10" - IO_L24P_T3U_N10_EMCCLK_65 +#set_property -dict {PACKAGE_PIN AP24 IOSTANDARD SSTL12_DCI } [get_ports c1_ddr4_odt[1] ]; # Bank 65 VCCO - VCC1V2 Net "DDR4_C1_ODT1" - IO_L21N_T3L_N5_AD8N_D07_65 +#set_property -dict {PACKAGE_PIN AP23 IOSTANDARD SSTL12_DCI } [get_ports c1_ddr4_cs_n[3] ]; # Bank 65 VCCO - VCC1V2 Net "DDR4_C1_CS_B3" - IO_L21P_T3L_N4_AD8P_D06_65 +#set_property -dict {PACKAGE_PIN AM24 IOSTANDARD SSTL12_DCI } [get_ports c1_ddr4_adr[17] ]; # Bank 65 VCCO - VCC1V2 Net "DDR4_C1_ADR17" - IO_L20N_T3L_N3_AD1N_D09_65 +set_property -dict {PACKAGE_PIN AL24 IOSTANDARD SSTL12_DCI } [get_ports c1_ddr4_adr[13] ]; # Bank 65 VCCO - VCC1V2 Net "DDR4_C1_ADR13" - IO_L20P_T3L_N2_AD1P_D08_65 +set_property -dict {PACKAGE_PIN AN24 IOSTANDARD SSTL12_DCI } [get_ports c1_ddr4_adr[0] ]; # Bank 65 VCCO - VCC1V2 Net "DDR4_C1_ADR0" - IO_L19N_T3L_N1_DBC_AD9N_D11_65 +set_property -dict {PACKAGE_PIN AN23 IOSTANDARD SSTL12_DCI } [get_ports c1_ddr4_adr[16] ]; # Bank 65 VCCO - VCC1V2 Net "DDR4_C1_ADR16" - IO_L19P_T3L_N0_DBC_AD9P_D10_65 +#set_property -dict {PACKAGE_PIN AV26 IOSTANDARD DIFF_SSTL12_DCI} [get_ports c1_ddr4_ck_c[1] ]; # Bank 65 VCCO - VCC1V2 Net "DDR4_C1_CK_C1" - IO_L17N_T2U_N9_AD10N_D15_65 +#set_property -dict {PACKAGE_PIN AU26 IOSTANDARD DIFF_SSTL12_DCI} [get_ports c1_ddr4_ck_t[1] ]; # Bank 65 VCCO - VCC1V2 Net "DDR4_C1_CK_T1" - IO_L17P_T2U_N8_AD10P_D14_65 +set_property -dict {PACKAGE_PIN AT23 IOSTANDARD SSTL12_DCI } [get_ports c1_ddr4_par ]; # Bank 65 VCCO - VCC1V2 Net "DDR4_C1_PAR" - IO_L16N_T2U_N7_QBC_AD3N_A01_D17_65 +#set_property -dict {PACKAGE_PIN AR23 IOSTANDARD SSTL12_DCI } [get_ports c1_ddr4_cs_n[2] ]; # Bank 65 VCCO - VCC1V2 Net "DDR4_C1_CS_B2" - IO_L16P_T2U_N6_QBC_AD3P_A00_D16_65 +#set_property -dict {PACKAGE_PIN AP25 IOSTANDARD SSTL12_DCI } [get_ports c1_ddr4_cs_n[1] ]; # Bank 65 VCCO - VCC1V2 Net "DDR4_C1_CS_B1" - IO_T2U_N12_CSI_ADV_B_65 +set_property -dict {PACKAGE_PIN AU24 IOSTANDARD SSTL12_DCI } [get_ports c1_ddr4_ba[0] ]; # Bank 65 VCCO - VCC1V2 Net "DDR4_C1_BA0" - IO_L18N_T2U_N11_AD2N_D13_65 +set_property -dict {PACKAGE_PIN AT24 IOSTANDARD SSTL12_DCI } [get_ports c1_ddr4_adr[1] ]; # Bank 65 VCCO - VCC1V2 Net "DDR4_C1_ADR1" - IO_L18P_T2U_N10_AD2P_D12_65 +set_property -dict {PACKAGE_PIN AU25 IOSTANDARD DIFF_SSTL12_DCI} [get_ports c1_ddr4_ck_c[0] ]; # Bank 65 VCCO - VCC1V2 Net "DDR4_C1_CK_C0" - IO_L15N_T2L_N5_AD11N_A03_D19_65 +set_property -dict {PACKAGE_PIN AT25 IOSTANDARD DIFF_SSTL12_DCI} [get_ports c1_ddr4_ck_t[0] ]; # Bank 65 VCCO - VCC1V2 Net "DDR4_C1_CK_T0" - IO_L15P_T2L_N4_AD11P_A02_D18_65 +set_property -dict {PACKAGE_PIN AV24 IOSTANDARD SSTL12_DCI } [get_ports c1_ddr4_adr[6] ]; # Bank 65 VCCO - VCC1V2 Net "DDR4_C1_ADR6" - IO_L14N_T2L_N3_GC_A05_D21_65 +set_property -dict {PACKAGE_PIN AV23 IOSTANDARD SSTL12_DCI } [get_ports c1_ddr4_cs_n[0] ]; # Bank 65 VCCO - VCC1V2 Net "DDR4_C1_CS_B0" - IO_L14P_T2L_N2_GC_A04_D20_65 +set_property -dict {PACKAGE_PIN AW26 IOSTANDARD SSTL12_DCI } [get_ports c1_ddr4_bg[1] ]; # Bank 65 VCCO - VCC1V2 Net "DDR4_C1_BG1" - IO_L13N_T2L_N1_GC_QBC_A07_D23_65 +set_property -dict {PACKAGE_PIN AW25 IOSTANDARD SSTL12_DCI } [get_ports c1_ddr4_act_n ]; # Bank 65 VCCO - VCC1V2 Net "DDR4_C1_ACT_B" - IO_L13P_T2L_N0_GC_QBC_A06_D22_65 +set_property -dict {PACKAGE_PIN AY26 IOSTANDARD LVCMOS12 } [get_ports c1_ddr4_alert_n ]; # Bank 65 VCCO - VCC1V2 Net "DDR4_C1_ALERT_B" - IO_L11N_T1U_N9_GC_A11_D27_65 +set_property -dict {PACKAGE_PIN AY25 IOSTANDARD SSTL12_DCI } [get_ports c1_ddr4_adr[8] ]; # Bank 65 VCCO - VCC1V2 Net "DDR4_C1_ADR8" - IO_L11P_T1U_N8_GC_A10_D26_65 +set_property -dict {PACKAGE_PIN AY23 IOSTANDARD SSTL12_DCI } [get_ports c1_ddr4_adr[5] ]; # Bank 65 VCCO - VCC1V2 Net "DDR4_C1_ADR5" - IO_L10N_T1U_N7_QBC_AD4N_A13_D29_65 +set_property -dict {PACKAGE_PIN AY22 IOSTANDARD SSTL12_DCI } [get_ports c1_ddr4_adr[4] ]; # Bank 65 VCCO - VCC1V2 Net "DDR4_C1_ADR4" - IO_L10P_T1U_N6_QBC_AD4P_A12_D28_65 +set_property -dict {PACKAGE_PIN BA25 IOSTANDARD SSTL12_DCI } [get_ports c1_ddr4_adr[11] ]; # Bank 65 VCCO - VCC1V2 Net "DDR4_C1_ADR11" - IO_T1U_N12_SMBALERT_65 +set_property -dict {PACKAGE_PIN AW24 IOSTANDARD SSTL12_DCI } [get_ports c1_ddr4_adr[2] ]; # Bank 65 VCCO - VCC1V2 Net "DDR4_C1_ADR2" - IO_L12N_T1U_N11_GC_A09_D25_65 +set_property -dict {PACKAGE_PIN AW23 IOSTANDARD SSTL12_DCI } [get_ports c1_ddr4_odt[0] ]; # Bank 65 VCCO - VCC1V2 Net "DDR4_C1_ODT0" - IO_L12P_T1U_N10_GC_A08_D24_65 +set_property -dict {PACKAGE_PIN BB25 IOSTANDARD SSTL12_DCI } [get_ports c1_ddr4_cke[0] ]; # Bank 65 VCCO - VCC1V2 Net "DDR4_C1_CKE0" - IO_L9N_T1L_N5_AD12N_A15_D31_65 +#set_property -dict {PACKAGE_PIN BB24 IOSTANDARD SSTL12_DCI } [get_ports c1_ddr4_cke[1] ]; # Bank 65 VCCO - VCC1V2 Net "DDR4_C1_CKE1" - IO_L9P_T1L_N4_AD12P_A14_D30_65 +set_property -dict {PACKAGE_PIN BA23 IOSTANDARD SSTL12_DCI } [get_ports c1_ddr4_adr[9] ]; # Bank 65 VCCO - VCC1V2 Net "DDR4_C1_ADR9" - IO_L8N_T1L_N3_AD5N_A17_65 +set_property -dict {PACKAGE_PIN BA22 IOSTANDARD SSTL12_DCI } [get_ports c1_ddr4_adr[7] ]; # Bank 65 VCCO - VCC1V2 Net "DDR4_C1_ADR7" - IO_L8P_T1L_N2_AD5P_A16_65 +set_property -dict {PACKAGE_PIN BC22 IOSTANDARD SSTL12_DCI } [get_ports c1_ddr4_bg[0] ]; # Bank 65 VCCO - VCC1V2 Net "DDR4_C1_BG0" - IO_L7N_T1L_N1_QBC_AD13N_A19_65 +set_property -dict {PACKAGE_PIN BB22 IOSTANDARD SSTL12_DCI } [get_ports c1_ddr4_adr[12] ]; # Bank 65 VCCO - VCC1V2 Net "DDR4_C1_ADR12" - IO_L7P_T1L_N0_QBC_AD13P_A18_65 +set_property -dict {PACKAGE_PIN BF25 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[64] ]; # Bank 65 VCCO - VCC1V2 Net "DDR4_C1_DQ64" - IO_L5N_T0U_N9_AD14N_A23_65 +set_property -dict {PACKAGE_PIN BF24 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[65] ]; # Bank 65 VCCO - VCC1V2 Net "DDR4_C1_DQ65" - IO_L5P_T0U_N8_AD14P_A22_65 +set_property -dict {PACKAGE_PIN BD24 IOSTANDARD DIFF_POD12_DCI } [get_ports c1_ddr4_dqs_c[16]]; # Bank 65 VCCO - VCC1V2 Net "DDR4_C1_DQS_C8" - IO_L4N_T0U_N7_DBC_AD7N_A25_65 +set_property -dict {PACKAGE_PIN BC24 IOSTANDARD DIFF_POD12_DCI } [get_ports c1_ddr4_dqs_t[16]]; # Bank 65 VCCO - VCC1V2 Net "DDR4_C1_DQS_T8" - IO_L4P_T0U_N6_DBC_AD7P_A24_65 +set_property -dict {PACKAGE_PIN BE25 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[67] ]; # Bank 65 VCCO - VCC1V2 Net "DDR4_C1_DQ67" - IO_L6N_T0U_N11_AD6N_A21_65 +set_property -dict {PACKAGE_PIN BD25 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[66] ]; # Bank 65 VCCO - VCC1V2 Net "DDR4_C1_DQ66" - IO_L6P_T0U_N10_AD6P_A20_65 +set_property -dict {PACKAGE_PIN BF23 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[70] ]; # Bank 65 VCCO - VCC1V2 Net "DDR4_C1_DQ70" - IO_L3N_T0L_N5_AD15N_A27_65 +set_property -dict {PACKAGE_PIN BE23 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[71] ]; # Bank 65 VCCO - VCC1V2 Net "DDR4_C1_DQ71" - IO_L3P_T0L_N4_AD15P_A26_65 +set_property -dict {PACKAGE_PIN BD23 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[68] ]; # Bank 65 VCCO - VCC1V2 Net "DDR4_C1_DQ68" - IO_L2N_T0L_N3_FWE_FCS2_B_65 +set_property -dict {PACKAGE_PIN BC23 IOSTANDARD POD12_DCI } [get_ports c1_ddr4_dq[69] ]; # Bank 65 VCCO - VCC1V2 Net "DDR4_C1_DQ69" - IO_L2P_T0L_N2_FOE_B_65 +set_property -dict {PACKAGE_PIN BF22 IOSTANDARD DIFF_POD12_DCI } [get_ports c1_ddr4_dqs_c[17]]; # Bank 65 VCCO - VCC1V2 Net "DDR4_C1_DQS_C17" - IO_L1N_T0L_N1_DBC_RS1_65 +set_property -dict {PACKAGE_PIN BE22 IOSTANDARD DIFF_POD12_DCI } [get_ports c1_ddr4_dqs_t[17]]; # Bank 65 VCCO - VCC1V2 Net "DDR4_C1_DQS_T17" - IO_L1P_T0L_N0_DBC_RS0_65 + +#### +### DDR4 c2 +#### + +set_property -dict {PACKAGE_PIN C26 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[25] ]; # Bank 48 VCCO - VCC1V2 Net "DDR4_C2_DQ25" - IO_L23N_T3U_N9_48 +set_property -dict {PACKAGE_PIN D26 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[24] ]; # Bank 48 VCCO - VCC1V2 Net "DDR4_C2_DQ24" - IO_L23P_T3U_N8_48 +set_property -dict {PACKAGE_PIN A28 IOSTANDARD DIFF_POD12_DCI } [get_ports c2_ddr4_dqs_c[6] ]; # Bank 48 VCCO - VCC1V2 Net "DDR4_C2_DQS_C3" - IO_L22N_T3U_N7_DBC_AD0N_48 +set_property -dict {PACKAGE_PIN A27 IOSTANDARD DIFF_POD12_DCI } [get_ports c2_ddr4_dqs_t[6] ]; # Bank 48 VCCO - VCC1V2 Net "DDR4_C2_DQS_T3" - IO_L22P_T3U_N6_DBC_AD0P_48 +set_property -dict {PACKAGE_PIN B27 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[26] ]; # Bank 48 VCCO - VCC1V2 Net "DDR4_C2_DQ26" - IO_L24N_T3U_N11_48 +set_property -dict {PACKAGE_PIN B26 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[27] ]; # Bank 48 VCCO - VCC1V2 Net "DDR4_C2_DQ27" - IO_L24P_T3U_N10_48 +set_property -dict {PACKAGE_PIN C28 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[31] ]; # Bank 48 VCCO - VCC1V2 Net "DDR4_C2_DQ31" - IO_L21N_T3L_N5_AD8N_48 +set_property -dict {PACKAGE_PIN C27 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[30] ]; # Bank 48 VCCO - VCC1V2 Net "DDR4_C2_DQ30" - IO_L21P_T3L_N4_AD8P_48 +set_property -dict {PACKAGE_PIN A30 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[29] ]; # Bank 48 VCCO - VCC1V2 Net "DDR4_C2_DQ29" - IO_L20N_T3L_N3_AD1N_48 +set_property -dict {PACKAGE_PIN A29 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[28] ]; # Bank 48 VCCO - VCC1V2 Net "DDR4_C2_DQ28" - IO_L20P_T3L_N2_AD1P_48 +set_property -dict {PACKAGE_PIN B29 IOSTANDARD DIFF_POD12_DCI } [get_ports c2_ddr4_dqs_c[7] ]; # Bank 48 VCCO - VCC1V2 Net "DDR4_C2_DQS_C12" - IO_L19N_T3L_N1_DBC_AD9N_48 +set_property -dict {PACKAGE_PIN C29 IOSTANDARD DIFF_POD12_DCI } [get_ports c2_ddr4_dqs_t[7] ]; # Bank 48 VCCO - VCC1V2 Net "DDR4_C2_DQS_T12" - IO_L19P_T3L_N0_DBC_AD9P_48 +set_property -dict {PACKAGE_PIN E27 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[17] ]; # Bank 48 VCCO - VCC1V2 Net "DDR4_C2_DQ17" - IO_L17N_T2U_N9_AD10N_48 +set_property -dict {PACKAGE_PIN F27 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[16] ]; # Bank 48 VCCO - VCC1V2 Net "DDR4_C2_DQ16" - IO_L17P_T2U_N8_AD10P_48 +set_property -dict {PACKAGE_PIN D30 IOSTANDARD DIFF_POD12_DCI } [get_ports c2_ddr4_dqs_c[4] ]; # Bank 48 VCCO - VCC1V2 Net "DDR4_C2_DQS_C2" - IO_L16N_T2U_N7_QBC_AD3N_48 +set_property -dict {PACKAGE_PIN D29 IOSTANDARD DIFF_POD12_DCI } [get_ports c2_ddr4_dqs_t[4] ]; # Bank 48 VCCO - VCC1V2 Net "DDR4_C2_DQS_T2" - IO_L16P_T2U_N6_QBC_AD3P_48 +set_property -dict {PACKAGE_PIN D28 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[19] ]; # Bank 48 VCCO - VCC1V2 Net "DDR4_C2_DQ19" - IO_L18N_T2U_N11_AD2N_48 +set_property -dict {PACKAGE_PIN E28 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[18] ]; # Bank 48 VCCO - VCC1V2 Net "DDR4_C2_DQ18" - IO_L18P_T2U_N10_AD2P_48 +set_property -dict {PACKAGE_PIN F29 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[23] ]; # Bank 48 VCCO - VCC1V2 Net "DDR4_C2_DQ23" - IO_L15N_T2L_N5_AD11N_48 +set_property -dict {PACKAGE_PIN F28 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[22] ]; # Bank 48 VCCO - VCC1V2 Net "DDR4_C2_DQ22" - IO_L15P_T2L_N4_AD11P_48 +set_property -dict {PACKAGE_PIN G27 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[20] ]; # Bank 48 VCCO - VCC1V2 Net "DDR4_C2_DQ20" - IO_L14N_T2L_N3_GC_48 +set_property -dict {PACKAGE_PIN G26 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[21] ]; # Bank 48 VCCO - VCC1V2 Net "DDR4_C2_DQ21" - IO_L14P_T2L_N2_GC_48 +set_property -dict {PACKAGE_PIN H27 IOSTANDARD DIFF_POD12_DCI } [get_ports c2_ddr4_dqs_c[5] ]; # Bank 48 VCCO - VCC1V2 Net "DDR4_C2_DQS_C11" - IO_L13N_T2L_N1_GC_QBC_48 +set_property -dict {PACKAGE_PIN H26 IOSTANDARD DIFF_POD12_DCI } [get_ports c2_ddr4_dqs_t[5] ]; # Bank 48 VCCO - VCC1V2 Net "DDR4_C2_DQS_T11" - IO_L13P_T2L_N0_GC_QBC_48 +set_property -dict {PACKAGE_PIN H28 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[10] ]; # Bank 48 VCCO - VCC1V2 Net "DDR4_C2_DQ10" - IO_L11N_T1U_N9_GC_48 +set_property -dict {PACKAGE_PIN J28 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[8] ]; # Bank 48 VCCO - VCC1V2 Net "DDR4_C2_DQ8" - IO_L11P_T1U_N8_GC_48 +set_property -dict {PACKAGE_PIN J26 IOSTANDARD DIFF_POD12_DCI } [get_ports c2_ddr4_dqs_c[2] ]; # Bank 48 VCCO - VCC1V2 Net "DDR4_C2_DQS_C1" - IO_L10N_T1U_N7_QBC_AD4N_48 +set_property -dict {PACKAGE_PIN J25 IOSTANDARD DIFF_POD12_DCI } [get_ports c2_ddr4_dqs_t[2] ]; # Bank 48 VCCO - VCC1V2 Net "DDR4_C2_DQS_T1" - IO_L10P_T1U_N6_QBC_AD4P_48 +set_property -dict {PACKAGE_PIN G29 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[11] ]; # Bank 48 VCCO - VCC1V2 Net "DDR4_C2_DQ11" - IO_L12N_T1U_N11_GC_48 +set_property -dict {PACKAGE_PIN H29 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[9] ]; # Bank 48 VCCO - VCC1V2 Net "DDR4_C2_DQ9" - IO_L12P_T1U_N10_GC_48 +set_property -dict {PACKAGE_PIN K27 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[15] ]; # Bank 48 VCCO - VCC1V2 Net "DDR4_C2_DQ15" - IO_L9N_T1L_N5_AD12N_48 +set_property -dict {PACKAGE_PIN L27 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[13] ]; # Bank 48 VCCO - VCC1V2 Net "DDR4_C2_DQ13" - IO_L9P_T1L_N4_AD12P_48 +set_property -dict {PACKAGE_PIN K26 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[14] ]; # Bank 48 VCCO - VCC1V2 Net "DDR4_C2_DQ14" - IO_L8N_T1L_N3_AD5N_48 +set_property -dict {PACKAGE_PIN K25 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[12] ]; # Bank 48 VCCO - VCC1V2 Net "DDR4_C2_DQ12" - IO_L8P_T1L_N2_AD5P_48 +set_property -dict {PACKAGE_PIN L28 IOSTANDARD DIFF_POD12_DCI } [get_ports c2_ddr4_dqs_c[3] ]; # Bank 48 VCCO - VCC1V2 Net "DDR4_C2_DQS_C10" - IO_L7N_T1L_N1_QBC_AD13N_48 +set_property -dict {PACKAGE_PIN M27 IOSTANDARD DIFF_POD12_DCI } [get_ports c2_ddr4_dqs_t[3] ]; # Bank 48 VCCO - VCC1V2 Net "DDR4_C2_DQS_T10" - IO_L7P_T1L_N0_QBC_AD13P_48 +set_property -dict {PACKAGE_PIN P25 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[1] ]; # Bank 48 VCCO - VCC1V2 Net "DDR4_C2_DQ1" - IO_L5N_T0U_N9_AD14N_48 +set_property -dict {PACKAGE_PIN R25 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[0] ]; # Bank 48 VCCO - VCC1V2 Net "DDR4_C2_DQ0" - IO_L5P_T0U_N8_AD14P_48 +set_property -dict {PACKAGE_PIN M26 IOSTANDARD DIFF_POD12_DCI } [get_ports c2_ddr4_dqs_c[0] ]; # Bank 48 VCCO - VCC1V2 Net "DDR4_C2_DQS_C0" - IO_L4N_T0U_N7_DBC_AD7N_48 +set_property -dict {PACKAGE_PIN N26 IOSTANDARD DIFF_POD12_DCI } [get_ports c2_ddr4_dqs_t[0] ]; # Bank 48 VCCO - VCC1V2 Net "DDR4_C2_DQS_T0" - IO_L4P_T0U_N6_DBC_AD7P_48 +set_property -dict {PACKAGE_PIN L25 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[3] ]; # Bank 48 VCCO - VCC1V2 Net "DDR4_C2_DQ3" - IO_L6N_T0U_N11_AD6N_48 +set_property -dict {PACKAGE_PIN M25 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[2] ]; # Bank 48 VCCO - VCC1V2 Net "DDR4_C2_DQ2" - IO_L6P_T0U_N10_AD6P_48 +set_property -dict {PACKAGE_PIN P26 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[4] ]; # Bank 48 VCCO - VCC1V2 Net "DDR4_C2_DQ4" - IO_L3N_T0L_N5_AD15N_48 +set_property -dict {PACKAGE_PIN R26 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[5] ]; # Bank 48 VCCO - VCC1V2 Net "DDR4_C2_DQ5" - IO_L3P_T0L_N4_AD15P_48 +set_property -dict {PACKAGE_PIN N28 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[7] ]; # Bank 48 VCCO - VCC1V2 Net "DDR4_C2_DQ7" - IO_L2N_T0L_N3_48 +set_property -dict {PACKAGE_PIN N27 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[6] ]; # Bank 48 VCCO - VCC1V2 Net "DDR4_C2_DQ6" - IO_L2P_T0L_N2_48 +set_property -dict {PACKAGE_PIN P28 IOSTANDARD DIFF_POD12_DCI } [get_ports c2_ddr4_dqs_c[1] ]; # Bank 48 VCCO - VCC1V2 Net "DDR4_C2_DQS_C9" - IO_L1N_T0L_N1_DBC_48 +set_property -dict {PACKAGE_PIN R28 IOSTANDARD DIFF_POD12_DCI } [get_ports c2_ddr4_dqs_t[1] ]; # Bank 48 VCCO - VCC1V2 Net "DDR4_C2_DQS_T9" - IO_L1P_T0L_N0_DBC_48 +set_property -dict {PACKAGE_PIN B32 IOSTANDARD SSTL12_DCI } [get_ports c2_ddr4_adr[7] ]; # Bank 47 VCCO - VCC1V2 Net "DDR4_C2_ADR7" - IO_L23N_T3U_N9_47 +set_property -dict {PACKAGE_PIN B31 IOSTANDARD SSTL12_DCI } [get_ports c2_ddr4_act_n ]; # Bank 47 VCCO - VCC1V2 Net "DDR4_C2_ACT_B" - IO_L23P_T3U_N8_47 +set_property -dict {PACKAGE_PIN A35 IOSTANDARD SSTL12_DCI } [get_ports c2_ddr4_adr[14] ]; # Bank 47 VCCO - VCC1V2 Net "DDR4_C2_ADR14" - IO_L22N_T3U_N7_DBC_AD0N_47 +set_property -dict {PACKAGE_PIN A34 IOSTANDARD SSTL12_DCI } [get_ports c2_ddr4_adr[10] ]; # Bank 47 VCCO - VCC1V2 Net "DDR4_C2_ADR10" - IO_L22P_T3U_N6_DBC_AD0P_47 +set_property -dict {PACKAGE_PIN C31 IOSTANDARD SSTL12_DCI } [get_ports c2_ddr4_bg[0] ]; # Bank 47 VCCO - VCC1V2 Net "DDR4_C2_BG0" - IO_T3U_N12_47 +set_property -dict {PACKAGE_PIN A33 IOSTANDARD SSTL12_DCI } [get_ports c2_ddr4_adr[1] ]; # Bank 47 VCCO - VCC1V2 Net "DDR4_C2_ADR1" - IO_L24N_T3U_N11_47 +set_property -dict {PACKAGE_PIN A32 IOSTANDARD SSTL12_DCI } [get_ports c2_ddr4_adr[8] ]; # Bank 47 VCCO - VCC1V2 Net "DDR4_C2_ADR8" - IO_L24P_T3U_N10_47 +set_property -dict {PACKAGE_PIN C33 IOSTANDARD SSTL12_DCI } [get_ports c2_ddr4_adr[2] ]; # Bank 47 VCCO - VCC1V2 Net "DDR4_C2_ADR2" - IO_L21N_T3L_N5_AD8N_47 +set_property -dict {PACKAGE_PIN C32 IOSTANDARD SSTL12_DCI } [get_ports c2_ddr4_adr[6] ]; # Bank 47 VCCO - VCC1V2 Net "DDR4_C2_ADR6" - IO_L21P_T3L_N4_AD8P_47 +set_property -dict {PACKAGE_PIN B36 IOSTANDARD SSTL12_DCI } [get_ports c2_ddr4_ba[1] ]; # Bank 47 VCCO - VCC1V2 Net "DDR4_C2_BA1" - IO_L20N_T3L_N3_AD1N_47 +set_property -dict {PACKAGE_PIN B35 IOSTANDARD SSTL12_DCI } [get_ports c2_ddr4_cs_n[0] ]; # Bank 47 VCCO - VCC1V2 Net "DDR4_C2_CS_B0" - IO_L20P_T3L_N2_AD1P_47 +set_property -dict {PACKAGE_PIN B34 IOSTANDARD DIFF_SSTL12_DCI} [get_ports c2_ddr4_ck_c[0] ]; # Bank 47 VCCO - VCC1V2 Net "DDR4_C2_CK_C0" - IO_L19N_T3L_N1_DBC_AD9N_47 +set_property -dict {PACKAGE_PIN C34 IOSTANDARD DIFF_SSTL12_DCI} [get_ports c2_ddr4_ck_t[0] ]; # Bank 47 VCCO - VCC1V2 Net "DDR4_C2_CK_T0" - IO_L19P_T3L_N0_DBC_AD9P_47 +set_property -dict {PACKAGE_PIN J30 IOSTANDARD SSTL12_DCI } [get_ports c2_ddr4_bg[1] ]; # Bank 47 VCCO - VCC1V2 Net "DDR4_C2_BG1" - IO_L17N_T2U_N9_AD10N_47 +set_property -dict {PACKAGE_PIN J29 IOSTANDARD SSTL12_DCI } [get_ports c2_ddr4_adr[3] ]; # Bank 47 VCCO - VCC1V2 Net "DDR4_C2_ADR3" - IO_L17P_T2U_N8_AD10P_47 +#set_property -dict {PACKAGE_PIN D35 IOSTANDARD DIFF_SSTL12_DCI} [get_ports c2_ddr4_ck_c[1] ]; # Bank 47 VCCO - VCC1V2 Net "DDR4_C2_CK_C1" - IO_L16N_T2U_N7_QBC_AD3N_47 +#set_property -dict {PACKAGE_PIN D34 IOSTANDARD DIFF_SSTL12_DCI} [get_ports c2_ddr4_ck_t[1] ]; # Bank 47 VCCO - VCC1V2 Net "DDR4_C2_CK_T1" - IO_L16P_T2U_N6_QBC_AD3P_47 +#set_property -dict {PACKAGE_PIN E30 IOSTANDARD SSTL12_DCI } [get_ports c2_ddr4_cke[1] ]; # Bank 47 VCCO - VCC1V2 Net "DDR4_C2_CKE1" - IO_T2U_N12_47 +set_property -dict {PACKAGE_PIN D31 IOSTANDARD SSTL12_DCI } [get_ports c2_ddr4_adr[9] ]; # Bank 47 VCCO - VCC1V2 Net "DDR4_C2_ADR9" - IO_L18N_T2U_N11_AD2N_47 +set_property -dict {PACKAGE_PIN E31 IOSTANDARD SSTL12_DCI } [get_ports c2_ddr4_adr[11] ]; # Bank 47 VCCO - VCC1V2 Net "DDR4_C2_ADR11" - IO_L18P_T2U_N10_AD2P_47 +#set_property -dict {PACKAGE_PIN K31 IOSTANDARD SSTL12_DCI } [get_ports c2_ddr4_cs_n[3] ]; # Bank 47 VCCO - VCC1V2 Net "DDR4_C2_CS_B3" - IO_L15N_T2L_N5_AD11N_47 +set_property -dict {PACKAGE_PIN K30 IOSTANDARD SSTL12_DCI } [get_ports c2_ddr4_adr[16] ]; # Bank 47 VCCO - VCC1V2 Net "DDR4_C2_ADR16" - IO_L15P_T2L_N4_AD11P_47 +set_property -dict {PACKAGE_PIN D33 IOSTANDARD SSTL12_DCI } [get_ports c2_ddr4_ba[0] ]; # Bank 47 VCCO - VCC1V2 Net "DDR4_C2_BA0" - IO_L14N_T2L_N3_GC_47 +set_property -dict {PACKAGE_PIN E33 IOSTANDARD SSTL12_DCI } [get_ports c2_ddr4_odt[0] ]; # Bank 47 VCCO - VCC1V2 Net "DDR4_C2_ODT0" - IO_L14P_T2L_N2_GC_47 +set_property -dict {PACKAGE_PIN F30 IOSTANDARD LVCMOS12 } [get_ports c2_ddr4_alert_n ]; # Bank 47 VCCO - VCC1V2 Net "DDR4_C2_ALERT_B" - IO_L11N_T1U_N9_GC_47 +set_property -dict {PACKAGE_PIN G30 IOSTANDARD SSTL12_DCI } [get_ports c2_ddr4_cke[0] ]; # Bank 47 VCCO - VCC1V2 Net "DDR4_C2_CKE0" - IO_L11P_T1U_N8_GC_47 +set_property -dict {PACKAGE_PIN G32 IOSTANDARD SSTL12_DCI } [get_ports c2_ddr4_adr[15] ]; # Bank 47 VCCO - VCC1V2 Net "DDR4_C2_ADR15" - IO_L10N_T1U_N7_QBC_AD4N_47 +set_property -dict {PACKAGE_PIN G31 IOSTANDARD SSTL12_DCI } [get_ports c2_ddr4_adr[5] ]; # Bank 47 VCCO - VCC1V2 Net "DDR4_C2_ADR5" - IO_L10P_T1U_N6_QBC_AD4P_47 +#set_property -dict {PACKAGE_PIN J31 IOSTANDARD SSTL12_DCI } [get_ports c2_ddr4_cs_n[1] ]; # Bank 47 VCCO - VCC1V2 Net "DDR4_C2_CS_B1" - IO_T1U_N12_47 +#set_property -dict {PACKAGE_PIN F34 IOSTANDARD SSTL12_DCI } [get_ports c2_ddr4_odt[1] ]; # Bank 47 VCCO - VCC1V2 Net "DDR4_C2_ODT1" - IO_L12N_T1U_N11_GC_47 +set_property -dict {PACKAGE_PIN F33 IOSTANDARD SSTL12_DCI } [get_ports c2_ddr4_adr[13] ]; # Bank 47 VCCO - VCC1V2 Net "DDR4_C2_ADR13" - IO_L12P_T1U_N10_GC_47 +#set_property -dict {PACKAGE_PIN L30 IOSTANDARD SSTL12_DCI } [get_ports c2_ddr4_cs_n[2] ]; # Bank 47 VCCO - VCC1V2 Net "DDR4_C2_CS_B2" - IO_L9N_T1L_N5_AD12N_47 +set_property -dict {PACKAGE_PIN L29 IOSTANDARD SSTL12_DCI } [get_ports c2_ddr4_adr[0] ]; # Bank 47 VCCO - VCC1V2 Net "DDR4_C2_ADR0" - IO_L9P_T1L_N4_AD12P_47 +#set_property -dict {PACKAGE_PIN H32 IOSTANDARD SSTL12_DCI } [get_ports c2_ddr4_adr[17] ]; # Bank 47 VCCO - VCC1V2 Net "DDR4_C2_ADR17" - IO_L8N_T1L_N3_AD5N_47 +set_property -dict {PACKAGE_PIN H31 IOSTANDARD SSTL12_DCI } [get_ports c2_ddr4_adr[4] ]; # Bank 47 VCCO - VCC1V2 Net "DDR4_C2_ADR4" - IO_L8P_T1L_N2_AD5P_47 +set_property -dict {PACKAGE_PIN M30 IOSTANDARD SSTL12_DCI } [get_ports c2_ddr4_adr[12] ]; # Bank 47 VCCO - VCC1V2 Net "DDR4_C2_ADR12" - IO_L7N_T1L_N1_QBC_AD13N_47 +set_property -dict {PACKAGE_PIN M29 IOSTANDARD SSTL12_DCI } [get_ports c2_ddr4_par ]; # Bank 47 VCCO - VCC1V2 Net "DDR4_C2_PAR" - IO_L7P_T1L_N0_QBC_AD13P_47 +set_property -dict {PACKAGE_PIN P30 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[40] ]; # Bank 47 VCCO - VCC1V2 Net "DDR4_C2_DQ40" - IO_L5N_T0U_N9_AD14N_47 +set_property -dict {PACKAGE_PIN R30 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[41] ]; # Bank 47 VCCO - VCC1V2 Net "DDR4_C2_DQ41" - IO_L5P_T0U_N8_AD14P_47 +set_property -dict {PACKAGE_PIN M31 IOSTANDARD DIFF_POD12_DCI } [get_ports c2_ddr4_dqs_c[10]]; # Bank 47 VCCO - VCC1V2 Net "DDR4_C2_DQS_C5" - IO_L4N_T0U_N7_DBC_AD7N_47 +set_property -dict {PACKAGE_PIN N31 IOSTANDARD DIFF_POD12_DCI } [get_ports c2_ddr4_dqs_t[10]]; # Bank 47 VCCO - VCC1V2 Net "DDR4_C2_DQS_T5" - IO_L4P_T0U_N6_DBC_AD7P_47 +set_property -dict {PACKAGE_PIN N29 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[43] ]; # Bank 47 VCCO - VCC1V2 Net "DDR4_C2_DQ43" - IO_L6N_T0U_N11_AD6N_47 +set_property -dict {PACKAGE_PIN P29 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[42] ]; # Bank 47 VCCO - VCC1V2 Net "DDR4_C2_DQ42" - IO_L6P_T0U_N10_AD6P_47 +set_property -dict {PACKAGE_PIN N32 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[47] ]; # Bank 47 VCCO - VCC1V2 Net "DDR4_C2_DQ47" - IO_L3N_T0L_N5_AD15N_47 +set_property -dict {PACKAGE_PIN P31 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[46] ]; # Bank 47 VCCO - VCC1V2 Net "DDR4_C2_DQ46" - IO_L3P_T0L_N4_AD15P_47 +set_property -dict {PACKAGE_PIN L32 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[44] ]; # Bank 47 VCCO - VCC1V2 Net "DDR4_C2_DQ44" - IO_L2N_T0L_N3_47 +set_property -dict {PACKAGE_PIN M32 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[45] ]; # Bank 47 VCCO - VCC1V2 Net "DDR4_C2_DQ45" - IO_L2P_T0L_N2_47 +set_property -dict {PACKAGE_PIN R31 IOSTANDARD DIFF_POD12_DCI } [get_ports c2_ddr4_dqs_c[11]]; # Bank 47 VCCO - VCC1V2 Net "DDR4_C2_DQS_C14" - IO_L1N_T0L_N1_DBC_47 +set_property -dict {PACKAGE_PIN T30 IOSTANDARD DIFF_POD12_DCI } [get_ports c2_ddr4_dqs_t[11]]; # Bank 47 VCCO - VCC1V2 Net "DDR4_C2_DQS_T14" - IO_L1P_T0L_N0_DBC_47 +set_property -dict {PACKAGE_PIN B37 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[65] ]; # Bank 46 VCCO - VCC1V2 Net "DDR4_C2_DQ65" - IO_L23N_T3U_N9_46 +set_property -dict {PACKAGE_PIN C36 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[64] ]; # Bank 46 VCCO - VCC1V2 Net "DDR4_C2_DQ64" - IO_L23P_T3U_N8_46 +set_property -dict {PACKAGE_PIN A39 IOSTANDARD DIFF_POD12_DCI } [get_ports c2_ddr4_dqs_c[16]]; # Bank 46 VCCO - VCC1V2 Net "DDR4_C2_DQS_C8" - IO_L22N_T3U_N7_DBC_AD0N_46 +set_property -dict {PACKAGE_PIN B39 IOSTANDARD DIFF_POD12_DCI } [get_ports c2_ddr4_dqs_t[16]]; # Bank 46 VCCO - VCC1V2 Net "DDR4_C2_DQS_T8" - IO_L22P_T3U_N6_DBC_AD0P_46 +#set_property -dict {PACKAGE_PIN D40 IOSTANDARD LVCMOS12 } [get_ports c2_ddr4_event_n ]; # Bank 46 VCCO - VCC1V2 Net "DDR4_C2_EVENT_B" - IO_T3U_N12_46 +set_property -dict {PACKAGE_PIN A38 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[67] ]; # Bank 46 VCCO - VCC1V2 Net "DDR4_C2_DQ67" - IO_L24N_T3U_N11_46 +set_property -dict {PACKAGE_PIN A37 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[66] ]; # Bank 46 VCCO - VCC1V2 Net "DDR4_C2_DQ66" - IO_L24P_T3U_N10_46 +set_property -dict {PACKAGE_PIN C39 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[68] ]; # Bank 46 VCCO - VCC1V2 Net "DDR4_C2_DQ68" - IO_L21N_T3L_N5_AD8N_46 +set_property -dict {PACKAGE_PIN D39 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[69] ]; # Bank 46 VCCO - VCC1V2 Net "DDR4_C2_DQ69" - IO_L21P_T3L_N4_AD8P_46 +set_property -dict {PACKAGE_PIN A40 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[70] ]; # Bank 46 VCCO - VCC1V2 Net "DDR4_C2_DQ70" - IO_L20N_T3L_N3_AD1N_46 +set_property -dict {PACKAGE_PIN B40 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[71] ]; # Bank 46 VCCO - VCC1V2 Net "DDR4_C2_DQ71" - IO_L20P_T3L_N2_AD1P_46 +set_property -dict {PACKAGE_PIN C38 IOSTANDARD DIFF_POD12_DCI } [get_ports c2_ddr4_dqs_c[17]]; # Bank 46 VCCO - VCC1V2 Net "DDR4_C2_DQS_C17" - IO_L19N_T3L_N1_DBC_AD9N_46 +set_property -dict {PACKAGE_PIN C37 IOSTANDARD DIFF_POD12_DCI } [get_ports c2_ddr4_dqs_t[17]]; # Bank 46 VCCO - VCC1V2 Net "DDR4_C2_DQS_T17" - IO_L19P_T3L_N0_DBC_AD9P_46 +set_property -dict {PACKAGE_PIN E35 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[35] ]; # Bank 46 VCCO - VCC1V2 Net "DDR4_C2_DQ35" - IO_L17N_T2U_N9_AD10N_46 +set_property -dict {PACKAGE_PIN F35 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[32] ]; # Bank 46 VCCO - VCC1V2 Net "DDR4_C2_DQ32" - IO_L17P_T2U_N8_AD10P_46 +set_property -dict {PACKAGE_PIN E40 IOSTANDARD DIFF_POD12_DCI } [get_ports c2_ddr4_dqs_c[8] ]; # Bank 46 VCCO - VCC1V2 Net "DDR4_C2_DQS_C4" - IO_L16N_T2U_N7_QBC_AD3N_46 +set_property -dict {PACKAGE_PIN E39 IOSTANDARD DIFF_POD12_DCI } [get_ports c2_ddr4_dqs_t[8] ]; # Bank 46 VCCO - VCC1V2 Net "DDR4_C2_DQS_T4" - IO_L16P_T2U_N6_QBC_AD3P_46 +set_property -dict {PACKAGE_PIN D36 IOSTANDARD LVCMOS12 } [get_ports c2_ddr4_reset_n ]; # Bank 46 VCCO - VCC1V2 Net "DDR4_C2_RESET_N" - IO_T2U_N12_46 +set_property -dict {PACKAGE_PIN D38 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[34] ]; # Bank 46 VCCO - VCC1V2 Net "DDR4_C2_DQ34" - IO_L18N_T2U_N11_AD2N_46 +set_property -dict {PACKAGE_PIN E38 IOSTANDARD DIFF_SSTL12_DCI} [get_ports c2_ddr4_dq[33] ]; # Bank 46 VCCO - VCC1V2 Net "DDR4_C2_DQ33" - IO_L18P_T2U_N10_AD2P_46 +set_property -dict {PACKAGE_PIN F38 IOSTANDARD DIFF_SSTL12_DCI} [get_ports c2_ddr4_dq[38] ]; # Bank 46 VCCO - VCC1V2 Net "DDR4_C2_DQ38" - IO_L15N_T2L_N5_AD11N_46 +set_property -dict {PACKAGE_PIN G38 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[39] ]; # Bank 46 VCCO - VCC1V2 Net "DDR4_C2_DQ39" - IO_L15P_T2L_N4_AD11P_46 +set_property -dict {PACKAGE_PIN E37 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[37] ]; # Bank 46 VCCO - VCC1V2 Net "DDR4_C2_DQ37" - IO_L14N_T2L_N3_GC_46 +set_property -dict {PACKAGE_PIN E36 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[36] ]; # Bank 46 VCCO - VCC1V2 Net "DDR4_C2_DQ36" - IO_L14P_T2L_N2_GC_46 +set_property -dict {PACKAGE_PIN F37 IOSTANDARD DIFF_POD12_DCI } [get_ports c2_ddr4_dqs_c[9] ]; # Bank 46 VCCO - VCC1V2 Net "DDR4_C2_DQS_C13" - IO_L13N_T2L_N1_GC_QBC_46 +set_property -dict {PACKAGE_PIN G37 IOSTANDARD DIFF_POD12_DCI } [get_ports c2_ddr4_dqs_t[9] ]; # Bank 46 VCCO - VCC1V2 Net "DDR4_C2_DQS_T13" - IO_L13P_T2L_N0_GC_QBC_46 +set_property -dict {PACKAGE_PIN G36 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[57] ]; # Bank 46 VCCO - VCC1V2 Net "DDR4_C2_DQ57" - IO_L11N_T1U_N9_GC_46 +set_property -dict {PACKAGE_PIN H36 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[56] ]; # Bank 46 VCCO - VCC1V2 Net "DDR4_C2_DQ56" - IO_L11P_T1U_N8_GC_46 +set_property -dict {PACKAGE_PIN H38 IOSTANDARD DIFF_POD12_DCI } [get_ports c2_ddr4_dqs_c[14]]; # Bank 46 VCCO - VCC1V2 Net "DDR4_C2_DQS_C7" - IO_L10N_T1U_N7_QBC_AD4N_46 +set_property -dict {PACKAGE_PIN J38 IOSTANDARD DIFF_POD12_DCI } [get_ports c2_ddr4_dqs_t[14]]; # Bank 46 VCCO - VCC1V2 Net "DDR4_C2_DQS_T7" - IO_L10P_T1U_N6_QBC_AD4P_46 +set_property -dict {PACKAGE_PIN H37 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[58] ]; # Bank 46 VCCO - VCC1V2 Net "DDR4_C2_DQ58" - IO_L12N_T1U_N11_GC_46 +set_property -dict {PACKAGE_PIN J36 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[59] ]; # Bank 46 VCCO - VCC1V2 Net "DDR4_C2_DQ59" - IO_L12P_T1U_N10_GC_46 +set_property -dict {PACKAGE_PIN G35 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[62] ]; # Bank 46 VCCO - VCC1V2 Net "DDR4_C2_DQ62" - IO_L9N_T1L_N5_AD12N_46 +set_property -dict {PACKAGE_PIN G34 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[63] ]; # Bank 46 VCCO - VCC1V2 Net "DDR4_C2_DQ63" - IO_L9P_T1L_N4_AD12P_46 +set_property -dict {PACKAGE_PIN K38 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[61] ]; # Bank 46 VCCO - VCC1V2 Net "DDR4_C2_DQ61" - IO_L8N_T1L_N3_AD5N_46 +set_property -dict {PACKAGE_PIN K37 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[60] ]; # Bank 46 VCCO - VCC1V2 Net "DDR4_C2_DQ60" - IO_L8P_T1L_N2_AD5P_46 +set_property -dict {PACKAGE_PIN H34 IOSTANDARD DIFF_POD12_DCI } [get_ports c2_ddr4_dqs_c[15]]; # Bank 46 VCCO - VCC1V2 Net "DDR4_C2_DQS_C16" - IO_L7N_T1L_N1_QBC_AD13N_46 +set_property -dict {PACKAGE_PIN H33 IOSTANDARD DIFF_POD12_DCI } [get_ports c2_ddr4_dqs_t[15]]; # Bank 46 VCCO - VCC1V2 Net "DDR4_C2_DQS_T16" - IO_L7P_T1L_N0_QBC_AD13P_46 +set_property -dict {PACKAGE_PIN K33 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[51] ]; # Bank 46 VCCO - VCC1V2 Net "DDR4_C2_DQ51" - IO_L5N_T0U_N9_AD14N_46 +set_property -dict {PACKAGE_PIN L33 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[50] ]; # Bank 46 VCCO - VCC1V2 Net "DDR4_C2_DQ50" - IO_L5P_T0U_N8_AD14P_46 +set_property -dict {PACKAGE_PIN L36 IOSTANDARD DIFF_POD12_DCI } [get_ports c2_ddr4_dqs_c[12]]; # Bank 46 VCCO - VCC1V2 Net "DDR4_C2_DQS_C6" - IO_L4N_T0U_N7_DBC_AD7N_46 +set_property -dict {PACKAGE_PIN L35 IOSTANDARD DIFF_POD12_DCI } [get_ports c2_ddr4_dqs_t[12]]; # Bank 46 VCCO - VCC1V2 Net "DDR4_C2_DQS_T6" - IO_L4P_T0U_N6_DBC_AD7P_46 +set_property -dict {PACKAGE_PIN J35 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[48] ]; # Bank 46 VCCO - VCC1V2 Net "DDR4_C2_DQ48" - IO_L6N_T0U_N11_AD6N_46 +set_property -dict {PACKAGE_PIN K35 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[49] ]; # Bank 46 VCCO - VCC1V2 Net "DDR4_C2_DQ49" - IO_L6P_T0U_N10_AD6P_46 +set_property -dict {PACKAGE_PIN J34 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[52] ]; # Bank 46 VCCO - VCC1V2 Net "DDR4_C2_DQ52" - IO_L3N_T0L_N5_AD15N_46 +set_property -dict {PACKAGE_PIN J33 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[53] ]; # Bank 46 VCCO - VCC1V2 Net "DDR4_C2_DQ53" - IO_L3P_T0L_N4_AD15P_46 +set_property -dict {PACKAGE_PIN N34 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[54] ]; # Bank 46 VCCO - VCC1V2 Net "DDR4_C2_DQ54" - IO_L2N_T0L_N3_46 +set_property -dict {PACKAGE_PIN P34 IOSTANDARD POD12_DCI } [get_ports c2_ddr4_dq[55] ]; # Bank 46 VCCO - VCC1V2 Net "DDR4_C2_DQ55" - IO_L2P_T0L_N2_46 +set_property -dict {PACKAGE_PIN L34 IOSTANDARD DIFF_POD12_DCI } [get_ports c2_ddr4_dqs_c[13]]; # Bank 46 VCCO - VCC1V2 Net "DDR4_C2_DQS_C15" - IO_L1N_T0L_N1_DBC_46 +set_property -dict {PACKAGE_PIN M34 IOSTANDARD DIFF_POD12_DCI } [get_ports c2_ddr4_dqs_t[13]]; # Bank 46 VCCO - VCC1V2 Net "DDR4_C2_DQS_T15" - IO_L1P_T0L_N0_DBC_46 + +#### +### DDR4 c3 +#### + +set_property -dict {PACKAGE_PIN B24 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[34] ]; # Bank 72 VCCO - VCC1V2 Net "DDR4_C3_DQ34" - IO_L23N_T3U_N9_72 +set_property -dict {PACKAGE_PIN B25 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[35] ]; # Bank 72 VCCO - VCC1V2 Net "DDR4_C3_DQ35" - IO_L23P_T3U_N8_72 +set_property -dict {PACKAGE_PIN A24 IOSTANDARD DIFF_POD12_DCI } [get_ports c3_ddr4_dqs_c[8] ]; # Bank 72 VCCO - VCC1V2 Net "DDR4_C3_DQS_C4" - IO_L22N_T3U_N7_DBC_AD0N_72 +set_property -dict {PACKAGE_PIN A25 IOSTANDARD DIFF_POD12_DCI } [get_ports c3_ddr4_dqs_t[8] ]; # Bank 72 VCCO - VCC1V2 Net "DDR4_C3_DQS_T4" - IO_L22P_T3U_N6_DBC_AD0P_72 +set_property -dict {PACKAGE_PIN A22 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[33] ]; # Bank 72 VCCO - VCC1V2 Net "DDR4_C3_DQ33" - IO_L24N_T3U_N11_72 +set_property -dict {PACKAGE_PIN A23 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[32] ]; # Bank 72 VCCO - VCC1V2 Net "DDR4_C3_DQ32" - IO_L24P_T3U_N10_72 +set_property -dict {PACKAGE_PIN C23 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[39] ]; # Bank 72 VCCO - VCC1V2 Net "DDR4_C3_DQ39" - IO_L21N_T3L_N5_AD8N_72 +set_property -dict {PACKAGE_PIN C24 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[38] ]; # Bank 72 VCCO - VCC1V2 Net "DDR4_C3_DQ38" - IO_L21P_T3L_N4_AD8P_72 +set_property -dict {PACKAGE_PIN B22 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[36] ]; # Bank 72 VCCO - VCC1V2 Net "DDR4_C3_DQ36" - IO_L20N_T3L_N3_AD1N_72 +set_property -dict {PACKAGE_PIN C22 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[37] ]; # Bank 72 VCCO - VCC1V2 Net "DDR4_C3_DQ37" - IO_L20P_T3L_N2_AD1P_72 +set_property -dict {PACKAGE_PIN D23 IOSTANDARD DIFF_POD12_DCI } [get_ports c3_ddr4_dqs_c[9] ]; # Bank 72 VCCO - VCC1V2 Net "DDR4_C3_DQS_C13" - IO_L19N_T3L_N1_DBC_AD9N_72 +set_property -dict {PACKAGE_PIN D24 IOSTANDARD DIFF_POD12_DCI } [get_ports c3_ddr4_dqs_t[9] ]; # Bank 72 VCCO - VCC1V2 Net "DDR4_C3_DQS_T13" - IO_L19P_T3L_N0_DBC_AD9P_72 +set_property -dict {PACKAGE_PIN E22 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[57] ]; # Bank 72 VCCO - VCC1V2 Net "DDR4_C3_DQ57" - IO_L17N_T2U_N9_AD10N_72 +set_property -dict {PACKAGE_PIN F22 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[56] ]; # Bank 72 VCCO - VCC1V2 Net "DDR4_C3_DQ56" - IO_L17P_T2U_N8_AD10P_72 +set_property -dict {PACKAGE_PIN E23 IOSTANDARD DIFF_POD12_DCI } [get_ports c3_ddr4_dqs_c[14]]; # Bank 72 VCCO - VCC1V2 Net "DDR4_C3_DQS_C7" - IO_L16N_T2U_N7_QBC_AD3N_72 +set_property -dict {PACKAGE_PIN F23 IOSTANDARD DIFF_POD12_DCI } [get_ports c3_ddr4_dqs_t[14]]; # Bank 72 VCCO - VCC1V2 Net "DDR4_C3_DQS_T7" - IO_L16P_T2U_N6_QBC_AD3P_72 +set_property -dict {PACKAGE_PIN G21 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[59] ]; # Bank 72 VCCO - VCC1V2 Net "DDR4_C3_DQ59" - IO_L18N_T2U_N11_AD2N_72 +set_property -dict {PACKAGE_PIN G22 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[58] ]; # Bank 72 VCCO - VCC1V2 Net "DDR4_C3_DQ58" - IO_L18P_T2U_N10_AD2P_72 +set_property -dict {PACKAGE_PIN E25 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[61] ]; # Bank 72 VCCO - VCC1V2 Net "DDR4_C3_DQ61" - IO_L15N_T2L_N5_AD11N_72 +set_property -dict {PACKAGE_PIN F25 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[62] ]; # Bank 72 VCCO - VCC1V2 Net "DDR4_C3_DQ62" - IO_L15P_T2L_N4_AD11P_72 +set_property -dict {PACKAGE_PIN F24 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[60] ]; # Bank 72 VCCO - VCC1V2 Net "DDR4_C3_DQ60" - IO_L14N_T2L_N3_GC_72 +set_property -dict {PACKAGE_PIN G25 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[63] ]; # Bank 72 VCCO - VCC1V2 Net "DDR4_C3_DQ63" - IO_L14P_T2L_N2_GC_72 +set_property -dict {PACKAGE_PIN H22 IOSTANDARD DIFF_POD12_DCI } [get_ports c3_ddr4_dqs_c[15]]; # Bank 72 VCCO - VCC1V2 Net "DDR4_C3_DQS_C16" - IO_L13N_T2L_N1_GC_QBC_72 +set_property -dict {PACKAGE_PIN H23 IOSTANDARD DIFF_POD12_DCI } [get_ports c3_ddr4_dqs_t[15]]; # Bank 72 VCCO - VCC1V2 Net "DDR4_C3_DQS_T16" - IO_L13P_T2L_N0_GC_QBC_72 +set_property -dict {PACKAGE_PIN J23 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[9] ]; # Bank 72 VCCO - VCC1V2 Net "DDR4_C3_DQ9" - IO_L11N_T1U_N9_GC_72 +set_property -dict {PACKAGE_PIN J24 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[8] ]; # Bank 72 VCCO - VCC1V2 Net "DDR4_C3_DQ8" - IO_L11P_T1U_N8_GC_72 +set_property -dict {PACKAGE_PIN H21 IOSTANDARD DIFF_POD12_DCI } [get_ports c3_ddr4_dqs_c[2] ]; # Bank 72 VCCO - VCC1V2 Net "DDR4_C3_DQS_C1" - IO_L10N_T1U_N7_QBC_AD4N_72 +set_property -dict {PACKAGE_PIN J21 IOSTANDARD DIFF_POD12_DCI } [get_ports c3_ddr4_dqs_t[2] ]; # Bank 72 VCCO - VCC1V2 Net "DDR4_C3_DQS_T1" - IO_L10P_T1U_N6_QBC_AD4P_72 +set_property -dict {PACKAGE_PIN G24 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[11] ]; # Bank 72 VCCO - VCC1V2 Net "DDR4_C3_DQ11" - IO_L12N_T1U_N11_GC_72 +set_property -dict {PACKAGE_PIN H24 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[10] ]; # Bank 72 VCCO - VCC1V2 Net "DDR4_C3_DQ10" - IO_L12P_T1U_N10_GC_72 +set_property -dict {PACKAGE_PIN L23 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[13] ]; # Bank 72 VCCO - VCC1V2 Net "DDR4_C3_DQ13" - IO_L9N_T1L_N5_AD12N_72 +set_property -dict {PACKAGE_PIN L24 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[12] ]; # Bank 72 VCCO - VCC1V2 Net "DDR4_C3_DQ12" - IO_L9P_T1L_N4_AD12P_72 +set_property -dict {PACKAGE_PIN K21 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[15] ]; # Bank 72 VCCO - VCC1V2 Net "DDR4_C3_DQ15" - IO_L8N_T1L_N3_AD5N_72 +set_property -dict {PACKAGE_PIN K22 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[14] ]; # Bank 72 VCCO - VCC1V2 Net "DDR4_C3_DQ14" - IO_L8P_T1L_N2_AD5P_72 +set_property -dict {PACKAGE_PIN L22 IOSTANDARD DIFF_POD12_DCI } [get_ports c3_ddr4_dqs_c[3] ]; # Bank 72 VCCO - VCC1V2 Net "DDR4_C3_DQS_C10" - IO_L7N_T1L_N1_QBC_AD13N_72 +set_property -dict {PACKAGE_PIN M22 IOSTANDARD DIFF_POD12_DCI } [get_ports c3_ddr4_dqs_t[3] ]; # Bank 72 VCCO - VCC1V2 Net "DDR4_C3_DQS_T10" - IO_L7P_T1L_N0_QBC_AD13P_72 +set_property -dict {PACKAGE_PIN N24 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[1] ]; # Bank 72 VCCO - VCC1V2 Net "DDR4_C3_DQ1" - IO_L5N_T0U_N9_AD14N_72 +set_property -dict {PACKAGE_PIN P24 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[0] ]; # Bank 72 VCCO - VCC1V2 Net "DDR4_C3_DQ0" - IO_L5P_T0U_N8_AD14P_72 +set_property -dict {PACKAGE_PIN R22 IOSTANDARD DIFF_POD12_DCI } [get_ports c3_ddr4_dqs_c[0] ]; # Bank 72 VCCO - VCC1V2 Net "DDR4_C3_DQS_C0" - IO_L4N_T0U_N7_DBC_AD7N_72 +set_property -dict {PACKAGE_PIN T22 IOSTANDARD DIFF_POD12_DCI } [get_ports c3_ddr4_dqs_t[0] ]; # Bank 72 VCCO - VCC1V2 Net "DDR4_C3_DQS_T0" - IO_L4P_T0U_N6_DBC_AD7P_72 +set_property -dict {PACKAGE_PIN R23 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[3] ]; # Bank 72 VCCO - VCC1V2 Net "DDR4_C3_DQ3" - IO_L6N_T0U_N11_AD6N_72 +set_property -dict {PACKAGE_PIN T24 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[2] ]; # Bank 72 VCCO - VCC1V2 Net "DDR4_C3_DQ2" - IO_L6P_T0U_N10_AD6P_72 +set_property -dict {PACKAGE_PIN N23 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[4] ]; # Bank 72 VCCO - VCC1V2 Net "DDR4_C3_DQ4" - IO_L3N_T0L_N5_AD15N_72 +set_property -dict {PACKAGE_PIN P23 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[6] ]; # Bank 72 VCCO - VCC1V2 Net "DDR4_C3_DQ6" - IO_L3P_T0L_N4_AD15P_72 +set_property -dict {PACKAGE_PIN P21 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[5] ]; # Bank 72 VCCO - VCC1V2 Net "DDR4_C3_DQ5" - IO_L2N_T0L_N3_72 +set_property -dict {PACKAGE_PIN R21 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[7] ]; # Bank 72 VCCO - VCC1V2 Net "DDR4_C3_DQ7" - IO_L2P_T0L_N2_72 +set_property -dict {PACKAGE_PIN N21 IOSTANDARD DIFF_POD12_DCI } [get_ports c3_ddr4_dqs_c[1] ]; # Bank 72 VCCO - VCC1V2 Net "DDR4_C3_DQS_C9" - IO_L1N_T0L_N1_DBC_72 +set_property -dict {PACKAGE_PIN N22 IOSTANDARD DIFF_POD12_DCI } [get_ports c3_ddr4_dqs_t[1] ]; # Bank 72 VCCO - VCC1V2 Net "DDR4_C3_DQS_T9" - IO_L1P_T0L_N0_DBC_72 +set_property -dict {PACKAGE_PIN B21 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[43] ]; # Bank 71 VCCO - VCC1V2 Net "DDR4_C3_DQ43" - IO_L23N_T3U_N9_71 +set_property -dict {PACKAGE_PIN C21 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[42] ]; # Bank 71 VCCO - VCC1V2 Net "DDR4_C3_DQ42" - IO_L23P_T3U_N8_71 +set_property -dict {PACKAGE_PIN B17 IOSTANDARD DIFF_POD12_DCI } [get_ports c3_ddr4_dqs_c[10]]; # Bank 71 VCCO - VCC1V2 Net "DDR4_C3_DQS_C5" - IO_L22N_T3U_N7_DBC_AD0N_71 +set_property -dict {PACKAGE_PIN C17 IOSTANDARD DIFF_POD12_DCI } [get_ports c3_ddr4_dqs_t[10]]; # Bank 71 VCCO - VCC1V2 Net "DDR4_C3_DQS_T5" - IO_L22P_T3U_N6_DBC_AD0P_71 +#set_property -dict {PACKAGE_PIN D18 IOSTANDARD LVCMOS12 } [get_ports c3_ddr4_event_n ]; # Bank 71 VCCO - VCC1V2 Net "DDR4_C3_EVENT_B" - IO_T3U_N12_71 +set_property -dict {PACKAGE_PIN C18 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[41] ]; # Bank 71 VCCO - VCC1V2 Net "DDR4_C3_DQ41" - IO_L24N_T3U_N11_71 +set_property -dict {PACKAGE_PIN C19 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[40] ]; # Bank 71 VCCO - VCC1V2 Net "DDR4_C3_DQ40" - IO_L24P_T3U_N10_71 +set_property -dict {PACKAGE_PIN A20 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[46] ]; # Bank 71 VCCO - VCC1V2 Net "DDR4_C3_DQ46" - IO_L21N_T3L_N5_AD8N_71 +set_property -dict {PACKAGE_PIN B20 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[47] ]; # Bank 71 VCCO - VCC1V2 Net "DDR4_C3_DQ47" - IO_L21P_T3L_N4_AD8P_71 +set_property -dict {PACKAGE_PIN A17 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[45] ]; # Bank 71 VCCO - VCC1V2 Net "DDR4_C3_DQ45" - IO_L20N_T3L_N3_AD1N_71 +set_property -dict {PACKAGE_PIN A18 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[44] ]; # Bank 71 VCCO - VCC1V2 Net "DDR4_C3_DQ44" - IO_L20P_T3L_N2_AD1P_71 +set_property -dict {PACKAGE_PIN A19 IOSTANDARD DIFF_POD12_DCI } [get_ports c3_ddr4_dqs_c[11]]; # Bank 71 VCCO - VCC1V2 Net "DDR4_C3_DQS_C14" - IO_L19N_T3L_N1_DBC_AD9N_71 +set_property -dict {PACKAGE_PIN B19 IOSTANDARD DIFF_POD12_DCI } [get_ports c3_ddr4_dqs_t[11]]; # Bank 71 VCCO - VCC1V2 Net "DDR4_C3_DQS_T14" - IO_L19P_T3L_N0_DBC_AD9P_71 +set_property -dict {PACKAGE_PIN E20 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[51] ]; # Bank 71 VCCO - VCC1V2 Net "DDR4_C3_DQ51" - IO_L17N_T2U_N9_AD10N_71 +set_property -dict {PACKAGE_PIN F20 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[49] ]; # Bank 71 VCCO - VCC1V2 Net "DDR4_C3_DQ49" - IO_L17P_T2U_N8_AD10P_71 +set_property -dict {PACKAGE_PIN F17 IOSTANDARD DIFF_POD12_DCI } [get_ports c3_ddr4_dqs_c[12]]; # Bank 71 VCCO - VCC1V2 Net "DDR4_C3_DQS_C6" - IO_L16N_T2U_N7_QBC_AD3N_71 +set_property -dict {PACKAGE_PIN F18 IOSTANDARD DIFF_POD12_DCI } [get_ports c3_ddr4_dqs_t[12]]; # Bank 71 VCCO - VCC1V2 Net "DDR4_C3_DQS_T6" - IO_L16P_T2U_N6_QBC_AD3P_71 +set_property -dict {PACKAGE_PIN D21 IOSTANDARD LVCMOS12 } [get_ports c3_ddr4_reset_n ]; # Bank 71 VCCO - VCC1V2 Net "DDR4_C3_RESET_N" - IO_T2U_N12_71 +set_property -dict {PACKAGE_PIN E17 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[48] ]; # Bank 71 VCCO - VCC1V2 Net "DDR4_C3_DQ48" - IO_L18N_T2U_N11_AD2N_71 +set_property -dict {PACKAGE_PIN E18 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[50] ]; # Bank 71 VCCO - VCC1V2 Net "DDR4_C3_DQ50" - IO_L18P_T2U_N10_AD2P_71 +set_property -dict {PACKAGE_PIN D19 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[52] ]; # Bank 71 VCCO - VCC1V2 Net "DDR4_C3_DQ52" - IO_L15N_T2L_N5_AD11N_71 +set_property -dict {PACKAGE_PIN D20 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[53] ]; # Bank 71 VCCO - VCC1V2 Net "DDR4_C3_DQ53" - IO_L15P_T2L_N4_AD11P_71 +set_property -dict {PACKAGE_PIN H18 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[54] ]; # Bank 71 VCCO - VCC1V2 Net "DDR4_C3_DQ54" - IO_L14N_T2L_N3_GC_71 +set_property -dict {PACKAGE_PIN J18 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[55] ]; # Bank 71 VCCO - VCC1V2 Net "DDR4_C3_DQ55" - IO_L14P_T2L_N2_GC_71 +set_property -dict {PACKAGE_PIN G19 IOSTANDARD DIFF_POD12_DCI } [get_ports c3_ddr4_dqs_c[13]]; # Bank 71 VCCO - VCC1V2 Net "DDR4_C3_DQS_C15" - IO_L13N_T2L_N1_GC_QBC_71 +set_property -dict {PACKAGE_PIN H19 IOSTANDARD DIFF_POD12_DCI } [get_ports c3_ddr4_dqs_t[13]]; # Bank 71 VCCO - VCC1V2 Net "DDR4_C3_DQS_T15" - IO_L13P_T2L_N0_GC_QBC_71 +set_property -dict {PACKAGE_PIN F19 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[18] ]; # Bank 71 VCCO - VCC1V2 Net "DDR4_C3_DQ18" - IO_L11N_T1U_N9_GC_71 +set_property -dict {PACKAGE_PIN G20 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[16] ]; # Bank 71 VCCO - VCC1V2 Net "DDR4_C3_DQ16" - IO_L11P_T1U_N8_GC_71 +set_property -dict {PACKAGE_PIN K20 IOSTANDARD DIFF_POD12_DCI } [get_ports c3_ddr4_dqs_c[4] ]; # Bank 71 VCCO - VCC1V2 Net "DDR4_C3_DQS_C2" - IO_L10N_T1U_N7_QBC_AD4N_71 +set_property -dict {PACKAGE_PIN L20 IOSTANDARD DIFF_POD12_DCI } [get_ports c3_ddr4_dqs_t[4] ]; # Bank 71 VCCO - VCC1V2 Net "DDR4_C3_DQS_T2" - IO_L10P_T1U_N6_QBC_AD4P_71 +set_property -dict {PACKAGE_PIN G17 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[19] ]; # Bank 71 VCCO - VCC1V2 Net "DDR4_C3_DQ19" - IO_L12N_T1U_N11_GC_71 +set_property -dict {PACKAGE_PIN H17 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[17] ]; # Bank 71 VCCO - VCC1V2 Net "DDR4_C3_DQ17" - IO_L12P_T1U_N10_GC_71 +set_property -dict {PACKAGE_PIN J19 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[23] ]; # Bank 71 VCCO - VCC1V2 Net "DDR4_C3_DQ23" - IO_L9N_T1L_N5_AD12N_71 +set_property -dict {PACKAGE_PIN J20 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[20] ]; # Bank 71 VCCO - VCC1V2 Net "DDR4_C3_DQ20" - IO_L9P_T1L_N4_AD12P_71 +set_property -dict {PACKAGE_PIN L18 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[22] ]; # Bank 71 VCCO - VCC1V2 Net "DDR4_C3_DQ22" - IO_L8N_T1L_N3_AD5N_71 +set_property -dict {PACKAGE_PIN L19 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[21] ]; # Bank 71 VCCO - VCC1V2 Net "DDR4_C3_DQ21" - IO_L8P_T1L_N2_AD5P_71 +set_property -dict {PACKAGE_PIN K17 IOSTANDARD DIFF_POD12_DCI } [get_ports c3_ddr4_dqs_c[5] ]; # Bank 71 VCCO - VCC1V2 Net "DDR4_C3_DQS_C11" - IO_L7N_T1L_N1_QBC_AD13N_71 +set_property -dict {PACKAGE_PIN K18 IOSTANDARD DIFF_POD12_DCI } [get_ports c3_ddr4_dqs_t[5] ]; # Bank 71 VCCO - VCC1V2 Net "DDR4_C3_DQS_T11" - IO_L7P_T1L_N0_QBC_AD13P_71 +set_property -dict {PACKAGE_PIN M19 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[24] ]; # Bank 71 VCCO - VCC1V2 Net "DDR4_C3_DQ24" - IO_L5N_T0U_N9_AD14N_71 +set_property -dict {PACKAGE_PIN M20 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[25] ]; # Bank 71 VCCO - VCC1V2 Net "DDR4_C3_DQ25" - IO_L5P_T0U_N8_AD14P_71 +set_property -dict {PACKAGE_PIN P18 IOSTANDARD DIFF_POD12_DCI } [get_ports c3_ddr4_dqs_c[6] ]; # Bank 71 VCCO - VCC1V2 Net "DDR4_C3_DQS_C3" - IO_L4N_T0U_N7_DBC_AD7N_71 +set_property -dict {PACKAGE_PIN P19 IOSTANDARD DIFF_POD12_DCI } [get_ports c3_ddr4_dqs_t[6] ]; # Bank 71 VCCO - VCC1V2 Net "DDR4_C3_DQS_T3" - IO_L4P_T0U_N6_DBC_AD7P_71 +set_property -dict {PACKAGE_PIN R17 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[27] ]; # Bank 71 VCCO - VCC1V2 Net "DDR4_C3_DQ27" - IO_L6N_T0U_N11_AD6N_71 +set_property -dict {PACKAGE_PIN R18 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[26] ]; # Bank 71 VCCO - VCC1V2 Net "DDR4_C3_DQ26" - IO_L6P_T0U_N10_AD6P_71 +set_property -dict {PACKAGE_PIN N18 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[30] ]; # Bank 71 VCCO - VCC1V2 Net "DDR4_C3_DQ30" - IO_L3N_T0L_N5_AD15N_71 +set_property -dict {PACKAGE_PIN N19 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[31] ]; # Bank 71 VCCO - VCC1V2 Net "DDR4_C3_DQ31" - IO_L3P_T0L_N4_AD15P_71 +set_property -dict {PACKAGE_PIN R20 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[28] ]; # Bank 71 VCCO - VCC1V2 Net "DDR4_C3_DQ28" - IO_L2N_T0L_N3_71 +set_property -dict {PACKAGE_PIN T20 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[29] ]; # Bank 71 VCCO - VCC1V2 Net "DDR4_C3_DQ29" - IO_L2P_T0L_N2_71 +set_property -dict {PACKAGE_PIN M17 IOSTANDARD DIFF_POD12_DCI } [get_ports c3_ddr4_dqs_c[7] ]; # Bank 71 VCCO - VCC1V2 Net "DDR4_C3_DQS_C12" - IO_L1N_T0L_N1_DBC_71 +set_property -dict {PACKAGE_PIN N17 IOSTANDARD DIFF_POD12_DCI } [get_ports c3_ddr4_dqs_t[7] ]; # Bank 71 VCCO - VCC1V2 Net "DDR4_C3_DQS_T12" - IO_L1P_T0L_N0_DBC_71 +set_property -dict {PACKAGE_PIN B16 IOSTANDARD SSTL12_DCI } [get_ports c3_ddr4_cs_n[0] ]; # Bank 70 VCCO - VCC1V2 Net "DDR4_C3_CS_B0" - IO_L23N_T3U_N9_70 +set_property -dict {PACKAGE_PIN C16 IOSTANDARD SSTL12_DCI } [get_ports c3_ddr4_odt[0] ]; # Bank 70 VCCO - VCC1V2 Net "DDR4_C3_ODT0" - IO_L23P_T3U_N8_70 +set_property -dict {PACKAGE_PIN C13 IOSTANDARD SSTL12_DCI } [get_ports c3_ddr4_adr[11] ]; # Bank 70 VCCO - VCC1V2 Net "DDR4_C3_ADR11" - IO_L22N_T3U_N7_DBC_AD0N_70 +set_property -dict {PACKAGE_PIN D13 IOSTANDARD SSTL12_DCI } [get_ports c3_ddr4_bg[0] ]; # Bank 70 VCCO - VCC1V2 Net "DDR4_C3_BG0" - IO_L22P_T3U_N6_DBC_AD0P_70 +#set_property -dict {PACKAGE_PIN D16 IOSTANDARD SSTL12_DCI } [get_ports c3_ddr4_cs_n[1] ]; # Bank 70 VCCO - VCC1V2 Net "DDR4_C3_CS_B1" - IO_T3U_N12_70 +set_property -dict {PACKAGE_PIN A13 IOSTANDARD SSTL12_DCI } [get_ports c3_ddr4_adr[9] ]; # Bank 70 VCCO - VCC1V2 Net "DDR4_C3_ADR9" - IO_L24N_T3U_N11_70 +set_property -dict {PACKAGE_PIN B13 IOSTANDARD SSTL12_DCI } [get_ports c3_ddr4_adr[12] ]; # Bank 70 VCCO - VCC1V2 Net "DDR4_C3_ADR12" - IO_L24P_T3U_N10_70 +set_property -dict {PACKAGE_PIN A15 IOSTANDARD SSTL12_DCI } [get_ports c3_ddr4_adr[3] ]; # Bank 70 VCCO - VCC1V2 Net "DDR4_C3_ADR3" - IO_L21N_T3L_N5_AD8N_70 +set_property -dict {PACKAGE_PIN B15 IOSTANDARD SSTL12_DCI } [get_ports c3_ddr4_adr[1] ]; # Bank 70 VCCO - VCC1V2 Net "DDR4_C3_ADR1" - IO_L21P_T3L_N4_AD8P_70 +set_property -dict {PACKAGE_PIN C14 IOSTANDARD SSTL12_DCI } [get_ports c3_ddr4_adr[4] ]; # Bank 70 VCCO - VCC1V2 Net "DDR4_C3_ADR4" - IO_L20N_T3L_N3_AD1N_70 +set_property -dict {PACKAGE_PIN D14 IOSTANDARD SSTL12_DCI } [get_ports c3_ddr4_adr[10] ]; # Bank 70 VCCO - VCC1V2 Net "DDR4_C3_ADR10" - IO_L20P_T3L_N2_AD1P_70 +set_property -dict {PACKAGE_PIN A14 IOSTANDARD SSTL12_DCI } [get_ports c3_ddr4_adr[5] ]; # Bank 70 VCCO - VCC1V2 Net "DDR4_C3_ADR5" - IO_L19N_T3L_N1_DBC_AD9N_70 +set_property -dict {PACKAGE_PIN B14 IOSTANDARD SSTL12_DCI } [get_ports c3_ddr4_adr[6] ]; # Bank 70 VCCO - VCC1V2 Net "DDR4_C3_ADR6" - IO_L19P_T3L_N0_DBC_AD9P_70 +set_property -dict {PACKAGE_PIN E15 IOSTANDARD SSTL12_DCI } [get_ports c3_ddr4_adr[15] ]; # Bank 70 VCCO - VCC1V2 Net "DDR4_C3_ADR15" - IO_L17N_T2U_N9_AD10N_70 +#set_property -dict {PACKAGE_PIN E16 IOSTANDARD SSTL12_DCI } [get_ports c3_ddr4_odt[1] ]; # Bank 70 VCCO - VCC1V2 Net "DDR4_C3_ODT1" - IO_L17P_T2U_N8_AD10P_70 +#set_property -dict {PACKAGE_PIN G13 IOSTANDARD DIFF_SSTL12_DCI} [get_ports c3_ddr4_ck_c[1] ]; # Bank 70 VCCO - VCC1V2 Net "DDR4_C3_CK_C1" - IO_L16N_T2U_N7_QBC_AD3N_70 +#set_property -dict {PACKAGE_PIN G14 IOSTANDARD DIFF_SSTL12_DCI} [get_ports c3_ddr4_ck_t[1] ]; # Bank 70 VCCO - VCC1V2 Net "DDR4_C3_CK_T1" - IO_L16P_T2U_N6_QBC_AD3P_70 +set_property -dict {PACKAGE_PIN D15 IOSTANDARD SSTL12_DCI } [get_ports c3_ddr4_adr[14] ]; # Bank 70 VCCO - VCC1V2 Net "DDR4_C3_ADR14" - IO_T2U_N12_70 +set_property -dict {PACKAGE_PIN F14 IOSTANDARD SSTL12_DCI } [get_ports c3_ddr4_adr[2] ]; # Bank 70 VCCO - VCC1V2 Net "DDR4_C3_ADR2" - IO_L18N_T2U_N11_AD2N_70 +set_property -dict {PACKAGE_PIN F15 IOSTANDARD SSTL12_DCI } [get_ports c3_ddr4_adr[16] ]; # Bank 70 VCCO - VCC1V2 Net "DDR4_C3_ADR16" - IO_L18P_T2U_N10_AD2P_70 +set_property -dict {PACKAGE_PIN E13 IOSTANDARD SSTL12_DCI } [get_ports c3_ddr4_adr[7] ]; # Bank 70 VCCO - VCC1V2 Net "DDR4_C3_ADR7" - IO_L15N_T2L_N5_AD11N_70 +set_property -dict {PACKAGE_PIN F13 IOSTANDARD SSTL12_DCI } [get_ports c3_ddr4_adr[8] ]; # Bank 70 VCCO - VCC1V2 Net "DDR4_C3_ADR8" - IO_L15P_T2L_N4_AD11P_70 +set_property -dict {PACKAGE_PIN H13 IOSTANDARD SSTL12_DCI } [get_ports c3_ddr4_act_n ]; # Bank 70 VCCO - VCC1V2 Net "DDR4_C3_ACT_B" - IO_L14N_T2L_N3_GC_70 +set_property -dict {PACKAGE_PIN H14 IOSTANDARD SSTL12_DCI } [get_ports c3_ddr4_ba[1] ]; # Bank 70 VCCO - VCC1V2 Net "DDR4_C3_BA1" - IO_L14P_T2L_N2_GC_70 +set_property -dict {PACKAGE_PIN G15 IOSTANDARD LVCMOS12 } [get_ports c3_ddr4_alert_n ]; # Bank 70 VCCO - VCC1V2 Net "DDR4_C3_ALERT_B" - IO_L11N_T1U_N9_GC_70 +#set_property -dict {PACKAGE_PIN G16 IOSTANDARD SSTL12_DCI } [get_ports c3_ddr4_adr[17] ]; # Bank 70 VCCO - VCC1V2 Net "DDR4_C3_ADR17" - IO_L11P_T1U_N8_GC_70 +set_property -dict {PACKAGE_PIN L13 IOSTANDARD DIFF_SSTL12_DCI} [get_ports c3_ddr4_ck_c[0] ]; # Bank 70 VCCO - VCC1V2 Net "DDR4_C3_CK_C0" - IO_L10N_T1U_N7_QBC_AD4N_70 +set_property -dict {PACKAGE_PIN L14 IOSTANDARD DIFF_SSTL12_DCI} [get_ports c3_ddr4_ck_t[0] ]; # Bank 70 VCCO - VCC1V2 Net "DDR4_C3_CK_T0" - IO_L10P_T1U_N6_QBC_AD4P_70 +set_property -dict {PACKAGE_PIN K13 IOSTANDARD SSTL12_DCI } [get_ports c3_ddr4_cke[0] ]; # Bank 70 VCCO - VCC1V2 Net "DDR4_C3_CKE0" - IO_T1U_N12_70 +set_property -dict {PACKAGE_PIN J13 IOSTANDARD SSTL12_DCI } [get_ports c3_ddr4_bg[1] ]; # Bank 70 VCCO - VCC1V2 Net "DDR4_C3_BG1" - IO_L12N_T1U_N11_GC_70 +set_property -dict {PACKAGE_PIN J14 IOSTANDARD SSTL12_DCI } [get_ports c3_ddr4_par ]; # Bank 70 VCCO - VCC1V2 Net "DDR4_C3_PAR" - IO_L12P_T1U_N10_GC_70 +set_property -dict {PACKAGE_PIN J15 IOSTANDARD SSTL12_DCI } [get_ports c3_ddr4_ba[0] ]; # Bank 70 VCCO - VCC1V2 Net "DDR4_C3_BA0" - IO_L9N_T1L_N5_AD12N_70 +set_property -dict {PACKAGE_PIN K16 IOSTANDARD SSTL12_DCI } [get_ports c3_ddr4_adr[13] ]; # Bank 70 VCCO - VCC1V2 Net "DDR4_C3_ADR13" - IO_L9P_T1L_N4_AD12P_70 +#set_property -dict {PACKAGE_PIN M13 IOSTANDARD SSTL12_DCI } [get_ports c3_ddr4_cs_n[3] ]; # Bank 70 VCCO - VCC1V2 Net "DDR4_C3_CS_B3" - IO_L8N_T1L_N3_AD5N_70 +#set_property -dict {PACKAGE_PIN M14 IOSTANDARD SSTL12_DCI } [get_ports c3_ddr4_cs_n[2] ]; # Bank 70 VCCO - VCC1V2 Net "DDR4_C3_CS_B2" - IO_L8P_T1L_N2_AD5P_70 +set_property -dict {PACKAGE_PIN K15 IOSTANDARD SSTL12_DCI } [get_ports c3_ddr4_adr[0] ]; # Bank 70 VCCO - VCC1V2 Net "DDR4_C3_ADR0" - IO_L7N_T1L_N1_QBC_AD13N_70 +#set_property -dict {PACKAGE_PIN L15 IOSTANDARD SSTL12_DCI } [get_ports c3_ddr4_cke[1] ]; # Bank 70 VCCO - VCC1V2 Net "DDR4_C3_CKE1" - IO_L7P_T1L_N0_QBC_AD13P_70 +set_property -dict {PACKAGE_PIN N13 IOSTANDARD SSTL12_DCI } [get_ports c3_ddr4_dq[66] ]; # Bank 70 VCCO - VCC1V2 Net "DDR4_C3_DQ66" - IO_L5N_T0U_N9_AD14N_70 +set_property -dict {PACKAGE_PIN N14 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[67] ]; # Bank 70 VCCO - VCC1V2 Net "DDR4_C3_DQ67" - IO_L5P_T0U_N8_AD14P_70 +set_property -dict {PACKAGE_PIN P15 IOSTANDARD DIFF_POD12_DCI } [get_ports c3_ddr4_dqs_c[16]]; # Bank 70 VCCO - VCC1V2 Net "DDR4_C3_DQS_C8" - IO_L4N_T0U_N7_DBC_AD7N_70 +set_property -dict {PACKAGE_PIN R16 IOSTANDARD DIFF_POD12_DCI } [get_ports c3_ddr4_dqs_t[16]]; # Bank 70 VCCO - VCC1V2 Net "DDR4_C3_DQS_T8" - IO_L4P_T0U_N6_DBC_AD7P_70 +set_property -dict {PACKAGE_PIN M16 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[64] ]; # Bank 70 VCCO - VCC1V2 Net "DDR4_C3_DQ64" - IO_L6N_T0U_N11_AD6N_70 +set_property -dict {PACKAGE_PIN N16 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[65] ]; # Bank 70 VCCO - VCC1V2 Net "DDR4_C3_DQ65" - IO_L6P_T0U_N10_AD6P_70 +set_property -dict {PACKAGE_PIN P13 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[70] ]; # Bank 70 VCCO - VCC1V2 Net "DDR4_C3_DQ70" - IO_L3N_T0L_N5_AD15N_70 +set_property -dict {PACKAGE_PIN P14 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[71] ]; # Bank 70 VCCO - VCC1V2 Net "DDR4_C3_DQ71" - IO_L3P_T0L_N4_AD15P_70 +set_property -dict {PACKAGE_PIN R15 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[69] ]; # Bank 70 VCCO - VCC1V2 Net "DDR4_C3_DQ69" - IO_L2N_T0L_N3_70 +set_property -dict {PACKAGE_PIN T15 IOSTANDARD POD12_DCI } [get_ports c3_ddr4_dq[68] ]; # Bank 70 VCCO - VCC1V2 Net "DDR4_C3_DQ68" - IO_L2P_T0L_N2_70 +set_property -dict {PACKAGE_PIN R13 IOSTANDARD DIFF_POD12_DCI } [get_ports c3_ddr4_dqs_c[17]]; # Bank 70 VCCO - VCC1V2 Net "DDR4_C3_DQS_C17" - IO_L1N_T0L_N1_DBC_70 +set_property -dict {PACKAGE_PIN T13 IOSTANDARD DIFF_POD12_DCI } [get_ports c3_ddr4_dqs_t[17]]; # Bank 70 VCCO - VCC1V2 Net "DDR4_C3_DQS_T17" - IO_L1P_T0L_N0_DBC_70 \ No newline at end of file diff --git a/hw/constraints/u250/u250_net.xdc b/hw/constraints/u250/u250_net.xdc new file mode 100644 index 00000000..d8e1459c --- /dev/null +++ b/hw/constraints/u250/u250_net.xdc @@ -0,0 +1,83 @@ +### +### QSFP 0 +### + +# Control +set_property PACKAGE_PIN BE17 [get_ports qsfp0_resetn ] ; +set_property IOSTANDARD LVCMOS12 [get_ports qsfp0_resetn ] ; +set_property PACKAGE_PIN BD18 [get_ports qsfp0_lpmode ] ; +set_property IOSTANDARD LVCMOS12 [get_ports qsfp0_lpmode ] ; +set_property PACKAGE_PIN BE16 [get_ports qsfp0_modseln ] ; +set_property IOSTANDARD LVCMOS12 [get_ports qsfp0_modseln ] ; + +set_false_path -to [get_ports qsfp0_resetn] +set_false_path -to [get_ports qsfp0_lpmode] +set_false_path -to [get_ports qsfp0_modseln] + +# Clock (156.25 MHz) +set_property PACKAGE_PIN M10 [get_ports gt0_refclk_n ] ; +set_property PACKAGE_PIN M11 [get_ports gt0_refclk_p ] ; + +# Clock (161 MHz) +#set_property PACKAGE_PIN K10 [get_ports gt0_refclk_n ] ; +#set_property PACKAGE_PIN K11 [get_ports gt0_refclk_p ] ; + +# Transceiver +set_property PACKAGE_PIN N3 [get_ports {gt0_rxn_in[0]} ] ; +set_property PACKAGE_PIN M1 [get_ports {gt0_rxn_in[1]} ] ; +set_property PACKAGE_PIN L3 [get_ports {gt0_rxn_in[2]} ] ; +set_property PACKAGE_PIN K1 [get_ports {gt0_rxn_in[3]} ] ; +set_property PACKAGE_PIN N4 [get_ports {gt0_rxp_in[0]} ] ; +set_property PACKAGE_PIN M2 [get_ports {gt0_rxp_in[1]} ] ; +set_property PACKAGE_PIN L4 [get_ports {gt0_rxp_in[2]} ] ; +set_property PACKAGE_PIN K2 [get_ports {gt0_rxp_in[3]} ] ; +set_property PACKAGE_PIN N8 [get_ports {gt0_txn_in[0]} ] ; +set_property PACKAGE_PIN M6 [get_ports {gt0_txn_in[1]} ] ; +set_property PACKAGE_PIN L8 [get_ports {gt0_txn_in[2]} ] ; +set_property PACKAGE_PIN K6 [get_ports {gt0_txn_in[3]} ] ; +set_property PACKAGE_PIN N9 [get_ports {gt0_txp_in[0]} ] ; +set_property PACKAGE_PIN M7 [get_ports {gt0_txp_in[1]} ] ; +set_property PACKAGE_PIN L9 [get_ports {gt0_txp_in[2]} ] ; +set_property PACKAGE_PIN K7 [get_ports {gt0_txp_in[3]} ] ; + +### +### QSFP 1 +### + +# Control +set_property PACKAGE_PIN BC18 [get_ports qsfp1_resetn ] ; +set_property IOSTANDARD LVCMOS12 [get_ports qsfp1_resetn ] ; +set_property PACKAGE_PIN AV22 [get_ports qsfp1_lpmode ] ; +set_property IOSTANDARD LVCMOS12 [get_ports qsfp1_lpmode ] ; +set_property PACKAGE_PIN AY20 [get_ports qsfp1_modseln ] ; +set_property IOSTANDARD LVCMOS12 [get_ports qsfp1_modseln ] ; + +set_false_path -to [get_ports qsfp1_resetn] +set_false_path -to [get_ports qsfp1_lpmode] +set_false_path -to [get_ports qsfp1_modseln] + +# Clock (156 MHz) +set_property PACKAGE_PIN T10 [get_ports gt1_refclk_n ] ; +set_property PACKAGE_PIN T11 [get_ports gt1_refclk_p ] ; + +# Clock (161 MHz) +#set_property PACKAGE_PIN P10 [get_ports gt1_refclk_n ] ; +#set_property PACKAGE_PIN P11 [get_ports gt1_refclk_p ] ; + +# Transceiver +set_property PACKAGE_PIN U3 [get_ports {gt1_rxn_in[0]} ] ; +set_property PACKAGE_PIN T1 [get_ports {gt1_rxn_in[1]} ] ; +set_property PACKAGE_PIN R3 [get_ports {gt1_rxn_in[2]} ] ; +set_property PACKAGE_PIN P1 [get_ports {gt1_rxn_in[3]} ] ; +set_property PACKAGE_PIN U4 [get_ports {gt1_rxp_in[0]} ] ; +set_property PACKAGE_PIN T2 [get_ports {gt1_rxp_in[1]} ] ; +set_property PACKAGE_PIN R4 [get_ports {gt1_rxp_in[2]} ] ; +set_property PACKAGE_PIN P2 [get_ports {gt1_rxp_in[3]} ] ; +set_property PACKAGE_PIN U8 [get_ports {gt1_txn_in[0]} ] ; +set_property PACKAGE_PIN T6 [get_ports {gt1_txn_in[1]} ] ; +set_property PACKAGE_PIN R8 [get_ports {gt1_txn_in[2]} ] ; +set_property PACKAGE_PIN P6 [get_ports {gt1_txn_in[3]} ] ; +set_property PACKAGE_PIN U9 [get_ports {gt1_txp_in[0]} ] ; +set_property PACKAGE_PIN T7 [get_ports {gt1_txp_in[1]} ] ; +set_property PACKAGE_PIN R9 [get_ports {gt1_txp_in[2]} ] ; +set_property PACKAGE_PIN P7 [get_ports {gt1_txp_in[3]} ] ; diff --git a/hw/constraints/u250/u250_pcie.xdc b/hw/constraints/u250/u250_pcie.xdc new file mode 100644 index 00000000..95da3e77 --- /dev/null +++ b/hw/constraints/u250/u250_pcie.xdc @@ -0,0 +1,82 @@ +#PCIe + +#Clock +set_property PACKAGE_PIN AM10 [get_ports {pcie_clk_clk_n}] ; +set_property PACKAGE_PIN AM11 [get_ports {pcie_clk_clk_p}] ; + +create_clock -period 10.000 -name pcie_ref_clk [get_ports pcie_clk_clk_p] + +set_property PACKAGE_PIN BD21 [get_ports perst_n_nb ] ; +set_property IOSTANDARD LVCMOS12 [get_ports perst_n_nb ] ; + +# Set false path +set_false_path -from [get_ports perst_n_nb] + +# Transceiver +set_property PACKAGE_PIN AJ3 [get_ports {pcie_x16_rxn[3]}] ; +set_property PACKAGE_PIN AH1 [get_ports {pcie_x16_rxn[2]}] ; +set_property PACKAGE_PIN AG3 [get_ports {pcie_x16_rxn[1]}] ; +set_property PACKAGE_PIN AF1 [get_ports {pcie_x16_rxn[0]}] ; +set_property PACKAGE_PIN AJ4 [get_ports {pcie_x16_rxp[3]}] ; +set_property PACKAGE_PIN AH2 [get_ports {pcie_x16_rxp[2]}] ; +set_property PACKAGE_PIN AG4 [get_ports {pcie_x16_rxp[1]}] ; +set_property PACKAGE_PIN AF2 [get_ports {pcie_x16_rxp[0]}] ; +set_property PACKAGE_PIN AJ8 [get_ports {pcie_x16_txn[3]}] ; +set_property PACKAGE_PIN AH6 [get_ports {pcie_x16_txn[2]}] ; +set_property PACKAGE_PIN AG8 [get_ports {pcie_x16_txn[1]}] ; +set_property PACKAGE_PIN AF6 [get_ports {pcie_x16_txn[0]}] ; +set_property PACKAGE_PIN AJ9 [get_ports {pcie_x16_txp[3]}] ; +set_property PACKAGE_PIN AH7 [get_ports {pcie_x16_txp[2]}] ; +set_property PACKAGE_PIN AG9 [get_ports {pcie_x16_txp[1]}] ; +set_property PACKAGE_PIN AF7 [get_ports {pcie_x16_txp[0]}] ; + +set_property PACKAGE_PIN AN3 [get_ports {pcie_x16_rxn[7]}] ; +set_property PACKAGE_PIN AM1 [get_ports {pcie_x16_rxn[6]}] ; +set_property PACKAGE_PIN AL3 [get_ports {pcie_x16_rxn[5]}] ; +set_property PACKAGE_PIN AK1 [get_ports {pcie_x16_rxn[4]}] ; +set_property PACKAGE_PIN AN4 [get_ports {pcie_x16_rxp[7]}] ; +set_property PACKAGE_PIN AM2 [get_ports {pcie_x16_rxp[6]}] ; +set_property PACKAGE_PIN AL4 [get_ports {pcie_x16_rxp[5]}] ; +set_property PACKAGE_PIN AK2 [get_ports {pcie_x16_rxp[4]}] ; +set_property PACKAGE_PIN AN8 [get_ports {pcie_x16_txn[7]}] ; +set_property PACKAGE_PIN AM6 [get_ports {pcie_x16_txn[6]}] ; +set_property PACKAGE_PIN AL8 [get_ports {pcie_x16_txn[5]}] ; +set_property PACKAGE_PIN AK6 [get_ports {pcie_x16_txn[4]}] ; +set_property PACKAGE_PIN AN9 [get_ports {pcie_x16_txp[7]}] ; +set_property PACKAGE_PIN AM7 [get_ports {pcie_x16_txp[6]}] ; +set_property PACKAGE_PIN AL9 [get_ports {pcie_x16_txp[5]}] ; +set_property PACKAGE_PIN AK7 [get_ports {pcie_x16_txp[4]}] ; + +set_property PACKAGE_PIN AU3 [get_ports {pcie_x16_rxn[11]} ] ; +set_property PACKAGE_PIN AT1 [get_ports {pcie_x16_rxn[10]} ] ; +set_property PACKAGE_PIN AR3 [get_ports {pcie_x16_rxn[9]} ] ; +set_property PACKAGE_PIN AP1 [get_ports {pcie_x16_rxn[8]} ] ; +set_property PACKAGE_PIN AU4 [get_ports {pcie_x16_rxp[11]} ] ; +set_property PACKAGE_PIN AT2 [get_ports {pcie_x16_rxp[10]} ] ; +set_property PACKAGE_PIN AR4 [get_ports {pcie_x16_rxp[9]} ] ; +set_property PACKAGE_PIN AP2 [get_ports {pcie_x16_rxp[8]} ] ; +set_property PACKAGE_PIN AU8 [get_ports {pcie_x16_txn[11]} ] ; +set_property PACKAGE_PIN AT6 [get_ports {pcie_x16_txn[10]} ] ; +set_property PACKAGE_PIN AR8 [get_ports {pcie_x16_txn[9]} ] ; +set_property PACKAGE_PIN AP6 [get_ports {pcie_x16_txn[8]} ] ; +set_property PACKAGE_PIN AU9 [get_ports {pcie_x16_txp[11]} ] ; +set_property PACKAGE_PIN AT7 [get_ports {pcie_x16_txp[10]} ] ; +set_property PACKAGE_PIN AR9 [get_ports {pcie_x16_txp[9]} ] ; +set_property PACKAGE_PIN AP7 [get_ports {pcie_x16_txp[8]} ] ; + +set_property PACKAGE_PIN BC1 [get_ports {pcie_x16_rxn[15]} ] ; +set_property PACKAGE_PIN BA1 [get_ports {pcie_x16_rxn[14]} ] ; +set_property PACKAGE_PIN AW3 [get_ports {pcie_x16_rxn[13]} ] ; +set_property PACKAGE_PIN AV1 [get_ports {pcie_x16_rxn[12]} ] ; +set_property PACKAGE_PIN BC2 [get_ports {pcie_x16_rxp[15]} ] ; +set_property PACKAGE_PIN BA2 [get_ports {pcie_x16_rxp[14]} ] ; +set_property PACKAGE_PIN AW4 [get_ports {pcie_x16_rxp[13]} ] ; +set_property PACKAGE_PIN AV2 [get_ports {pcie_x16_rxp[12]} ] ; +set_property PACKAGE_PIN BF4 [get_ports {pcie_x16_txn[15]} ] ; +set_property PACKAGE_PIN BD4 [get_ports {pcie_x16_txn[14]} ] ; +set_property PACKAGE_PIN BB4 [get_ports {pcie_x16_txn[13]} ] ; +set_property PACKAGE_PIN AV6 [get_ports {pcie_x16_txn[12]} ] ; +set_property PACKAGE_PIN BF5 [get_ports {pcie_x16_txp[15]} ] ; +set_property PACKAGE_PIN BD5 [get_ports {pcie_x16_txp[14]} ] ; +set_property PACKAGE_PIN BB5 [get_ports {pcie_x16_txp[13]} ] ; +set_property PACKAGE_PIN AV7 [get_ports {pcie_x16_txp[12]} ] ; diff --git a/hw/constraints/u280/u280_base.xdc b/hw/constraints/u280/u280_base.xdc new file mode 100644 index 00000000..feb9479c --- /dev/null +++ b/hw/constraints/u280/u280_base.xdc @@ -0,0 +1,29 @@ +# Power constraint +set_operating_conditions -design_power_budget 160 + +set_property BITSTREAM.GENERAL.COMPRESS TRUE [current_design] + +# Clocks and reset +set_property PACKAGE_PIN L30 [get_ports resetn_0_nb] ; +set_property IOSTANDARD LVCMOS18 [get_ports resetn_0_nb] ; + +# Reset false path +set_false_path -from [get_ports resetn_0_nb] + +# User general purpose (156.25 MHz) +set_property PACKAGE_PIN F30 [get_ports user_si570_clk_n] ; +set_property IOSTANDARD LVDS [get_ports user_si570_clk_n] ; +set_property PACKAGE_PIN G30 [get_ports user_si570_clk_p] ; +set_property IOSTANDARD LVDS [get_ports user_si570_clk_p] ; + +# HBM (100 MHz) +set_property PACKAGE_PIN F31 [get_ports sysclk3_100_n] ; +set_property IOSTANDARD LVDS [get_ports sysclk3_100_n] ; +set_property PACKAGE_PIN G31 [get_ports sysclk3_100_p] ; +set_property IOSTANDARD LVDS [get_ports sysclk3_100_p] ; + +create_clock -period 10.000 -name sysclk3 [get_ports sysclk3_100_p] + +# Xilinx supremacy +set_property PACKAGE_PIN D32 [get_ports fpga_burn] ; +set_property IOSTANDARD LVCMOS18 [get_ports fpga_burn] ; \ No newline at end of file diff --git a/hw/constraints/u280/u280_ddr.xdc b/hw/constraints/u280/u280_ddr.xdc new file mode 100644 index 00000000..5568a599 --- /dev/null +++ b/hw/constraints/u280/u280_ddr.xdc @@ -0,0 +1,641 @@ +# DDR clocks +set_property PACKAGE_PIN BJ44 [get_ports c0_sys_clk_n] ; +set_property IOSTANDARD LVDS [get_ports c0_sys_clk_n] ; +set_property PACKAGE_PIN BJ43 [get_ports c0_sys_clk_p] ; +set_property IOSTANDARD LVDS [get_ports c0_sys_clk_p] ; +set_property DQS_BIAS TRUE [get_ports c0_sys_clk_p] ; + +set_property PACKAGE_PIN BJ6 [get_ports c1_sys_clk_n] ; +set_property IOSTANDARD LVDS [get_ports c1_sys_clk_n] ; +set_property PACKAGE_PIN BH6 [get_ports c1_sys_clk_p] ; +set_property IOSTANDARD LVDS [get_ports c1_sys_clk_p] ; +set_property DQS_BIAS TRUE [get_ports c1_sys_clk_p] ; + +#### +### DDR4 c0 +#### +set_property PACKAGE_PIN BE51 [ get_ports {c0_ddr4_dq[42]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ42" - IO_L24N_T3U_N11_66 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[42]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ42" - IO_L24N_T3U_N11_66 +set_property PACKAGE_PIN BD51 [ get_ports {c0_ddr4_dq[43]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ43" - IO_L24P_T3U_N10_66 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[43]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ43" - IO_L24P_T3U_N10_66 +set_property PACKAGE_PIN BE50 [ get_ports {c0_ddr4_dq[40]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ40" - IO_L23N_T3U_N9_66 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[40]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ40" - IO_L23N_T3U_N9_66 +set_property PACKAGE_PIN BE49 [ get_ports {c0_ddr4_dq[41]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ41" - IO_L23P_T3U_N8_66 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[41]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ41" - IO_L23P_T3U_N8_66 +set_property PACKAGE_PIN BF48 [ get_ports {c0_ddr4_dqs_c[10]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQS_C5" - IO_L22N_T3U_N7_DBC_AD0N_66 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c0_ddr4_dqs_c[10]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQS_C5" - IO_L22N_T3U_N7_DBC_AD0N_66 +set_property PACKAGE_PIN BF47 [ get_ports {c0_ddr4_dqs_t[10]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQS_T5" - IO_L22P_T3U_N6_DBC_AD0P_66 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c0_ddr4_dqs_t[10]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQS_T5" - IO_L22P_T3U_N6_DBC_AD0P_66 +set_property PACKAGE_PIN BF52 [ get_ports {c0_ddr4_dq[44]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ44" - IO_L21N_T3L_N5_AD8N_66 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[44]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ44" - IO_L21N_T3L_N5_AD8N_66 +set_property PACKAGE_PIN BF51 [ get_ports {c0_ddr4_dq[45]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ45" - IO_L21P_T3L_N4_AD8P_66 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[45]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ45" - IO_L21P_T3L_N4_AD8P_66 +set_property PACKAGE_PIN BG50 [ get_ports {c0_ddr4_dq[46]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ46" - IO_L20N_T3L_N3_AD1N_66 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[46]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ46" - IO_L20N_T3L_N3_AD1N_66 +set_property PACKAGE_PIN BF50 [ get_ports {c0_ddr4_dq[47]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ47" - IO_L20P_T3L_N2_AD1P_66 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[47]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ47" - IO_L20P_T3L_N2_AD1P_66 +set_property PACKAGE_PIN BG49 [ get_ports {c0_ddr4_dqs_c[11]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQS_C14" - IO_L19N_T3L_N1_DBC_AD9N_66 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c0_ddr4_dqs_c[11]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQS_C14" - IO_L19N_T3L_N1_DBC_AD9N_66 +set_property PACKAGE_PIN BG48 [ get_ports {c0_ddr4_dqs_t[11]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQS_T14" - IO_L19P_T3L_N0_DBC_AD9P_66 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c0_ddr4_dqs_t[11]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQS_T14" - IO_L19P_T3L_N0_DBC_AD9P_66 +#set_property PACKAGE_PIN BG47 #N/A ;# Bank 66 VCCO - VCC1V2 Net "Not Connected" - IO_T3U_N12_66 +#set_property IOSTANDARD LVCMOS18 #N/A ;# Bank 66 VCCO - VCC1V2 Net "Not Connected" - IO_T3U_N12_66 +#set_property PACKAGE_PIN BF53 #N/A ;# Bank 66 VCCO - VCC1V2 Net "Not Connected" - IO_T2U_N12_66 +#set_property IOSTANDARD LVCMOS18 #N/A ;# Bank 66 VCCO - VCC1V2 Net "Not Connected" - IO_T2U_N12_66 +set_property PACKAGE_PIN BE54 [ get_ports {c0_ddr4_dq[67]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ67" - IO_L18N_T2U_N11_AD2N_66 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[67]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ67" - IO_L18N_T2U_N11_AD2N_66 +set_property PACKAGE_PIN BE53 [ get_ports {c0_ddr4_dq[66]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ66" - IO_L18P_T2U_N10_AD2P_66 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[66]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ66" - IO_L18P_T2U_N10_AD2P_66 +set_property PACKAGE_PIN BG54 [ get_ports {c0_ddr4_dq[64]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ64" - IO_L17N_T2U_N9_AD10N_66 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[64]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ64" - IO_L17N_T2U_N9_AD10N_66 +set_property PACKAGE_PIN BG53 [ get_ports {c0_ddr4_dq[65]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ65" - IO_L17P_T2U_N8_AD10P_66 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[65]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ65" - IO_L17P_T2U_N8_AD10P_66 +set_property PACKAGE_PIN BJ54 [ get_ports {c0_ddr4_dqs_c[16]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQS_C8" - IO_L16N_T2U_N7_QBC_AD3N_66 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c0_ddr4_dqs_c[16]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQS_C8" - IO_L16N_T2U_N7_QBC_AD3N_66 +set_property PACKAGE_PIN BH54 [ get_ports {c0_ddr4_dqs_t[16]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQS_T8" - IO_L16P_T2U_N6_QBC_AD3P_66 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c0_ddr4_dqs_t[16]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQS_T8" - IO_L16P_T2U_N6_QBC_AD3P_66 +set_property PACKAGE_PIN BK54 [ get_ports {c0_ddr4_dq[70]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ70" - IO_L15N_T2L_N5_AD11N_66 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[70]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ70" - IO_L15N_T2L_N5_AD11N_66 +set_property PACKAGE_PIN BK53 [ get_ports {c0_ddr4_dq[71]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ71" - IO_L15P_T2L_N4_AD11P_66 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[71]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ71" - IO_L15P_T2L_N4_AD11P_66 +set_property PACKAGE_PIN BH52 [ get_ports {c0_ddr4_dq[68]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ68" - IO_L14N_T2L_N3_GC_66 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[68]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ68" - IO_L14N_T2L_N3_GC_66 +set_property PACKAGE_PIN BG52 [ get_ports {c0_ddr4_dq[69]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ69" - IO_L14P_T2L_N2_GC_66 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[69]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ69" - IO_L14P_T2L_N2_GC_66 +set_property PACKAGE_PIN BJ53 [ get_ports {c0_ddr4_dqs_c[17]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQS_C17" - IO_L13N_T2L_N1_GC_QBC_66 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c0_ddr4_dqs_c[17]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQS_C17" - IO_L13N_T2L_N1_GC_QBC_66 +set_property PACKAGE_PIN BJ52 [ get_ports {c0_ddr4_dqs_t[17]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQS_T17" - IO_L13P_T2L_N0_GC_QBC_66 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c0_ddr4_dqs_t[17]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQS_T17" - IO_L13P_T2L_N0_GC_QBC_66 +set_property PACKAGE_PIN BH50 [ get_ports {c0_ddr4_dq[48]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ48" - IO_L12N_T1U_N11_GC_66 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[48]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ48" - IO_L12N_T1U_N11_GC_66 +set_property PACKAGE_PIN BH49 [ get_ports {c0_ddr4_dq[51]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ51" - IO_L12P_T1U_N10_GC_66 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[51]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ51" - IO_L12P_T1U_N10_GC_66 +set_property PACKAGE_PIN BJ51 [ get_ports {c0_ddr4_dq[49]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ49" - IO_L11N_T1U_N9_GC_66 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[49]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ49" - IO_L11N_T1U_N9_GC_66 +set_property PACKAGE_PIN BH51 [ get_ports {c0_ddr4_dq[50]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ50" - IO_L11P_T1U_N8_GC_66 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[50]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ50" - IO_L11P_T1U_N8_GC_66 +set_property PACKAGE_PIN BJ47 [ get_ports {c0_ddr4_dqs_c[12]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQS_C6" - IO_L10N_T1U_N7_QBC_AD4N_66 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c0_ddr4_dqs_c[12]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQS_C6" - IO_L10N_T1U_N7_QBC_AD4N_66 +set_property PACKAGE_PIN BH47 [ get_ports {c0_ddr4_dqs_t[12]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQS_T6" - IO_L10P_T1U_N6_QBC_AD4P_66 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c0_ddr4_dqs_t[12]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQS_T6" - IO_L10P_T1U_N6_QBC_AD4P_66 +set_property PACKAGE_PIN BJ49 [ get_ports {c0_ddr4_dq[54]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ54" - IO_L9N_T1L_N5_AD12N_66 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[54]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ54" - IO_L9N_T1L_N5_AD12N_66 +set_property PACKAGE_PIN BJ48 [ get_ports {c0_ddr4_dq[55]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ55" - IO_L9P_T1L_N4_AD12P_66 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[55]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ55" - IO_L9P_T1L_N4_AD12P_66 +set_property PACKAGE_PIN BK51 [ get_ports {c0_ddr4_dq[53]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ53" - IO_L8N_T1L_N3_AD5N_66 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[53]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ53" - IO_L8N_T1L_N3_AD5N_66 +set_property PACKAGE_PIN BK50 [ get_ports {c0_ddr4_dq[52]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ52" - IO_L8P_T1L_N2_AD5P_66 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[52]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ52" - IO_L8P_T1L_N2_AD5P_66 +set_property PACKAGE_PIN BK49 [ get_ports {c0_ddr4_dqs_c[13]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQS_C15" - IO_L7N_T1L_N1_QBC_AD13N_66 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c0_ddr4_dqs_c[13]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQS_C15" - IO_L7N_T1L_N1_QBC_AD13N_66 +set_property PACKAGE_PIN BK48 [ get_ports {c0_ddr4_dqs_t[13]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQS_T15" - IO_L7P_T1L_N0_QBC_AD13P_66 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c0_ddr4_dqs_t[13]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQS_T15" - IO_L7P_T1L_N0_QBC_AD13P_66 +#set_property PACKAGE_PIN BL48 #N/A ;# Bank 66 VCCO - VCC1V2 Net "Not Connected" - IO_T1U_N12_66 +#set_property IOSTANDARD LVCMOS18 #N/A ;# Bank 66 VCCO - VCC1V2 Net "Not Connected" - IO_T1U_N12_66 +#set_property PACKAGE_PIN BL50 #N/A ;# Bank 66 VCCO - VCC1V2 Net "VRP_61" - IO_T0U_N12_VRP_66 +#set_property IOSTANDARD LVCMOS18 #N/A ;# Bank 66 VCCO - VCC1V2 Net "VRP_61" - IO_T0U_N12_VRP_66 +set_property PACKAGE_PIN BL53 [ get_ports {c0_ddr4_dq[33]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ33" - IO_L6N_T0U_N11_AD6N_66 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[33]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ33" - IO_L6N_T0U_N11_AD6N_66 +set_property PACKAGE_PIN BL52 [ get_ports {c0_ddr4_dq[34]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ34" - IO_L6P_T0U_N10_AD6P_66 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[34]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ34" - IO_L6P_T0U_N10_AD6P_66 +set_property PACKAGE_PIN BM52 [ get_ports {c0_ddr4_dq[32]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ32" - IO_L5N_T0U_N9_AD14N_66 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[32]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ32" - IO_L5N_T0U_N9_AD14N_66 +set_property PACKAGE_PIN BL51 [ get_ports {c0_ddr4_dq[35]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ35" - IO_L5P_T0U_N8_AD14P_66 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[35]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ35" - IO_L5P_T0U_N8_AD14P_66 +set_property PACKAGE_PIN BM50 [ get_ports {c0_ddr4_dqs_c[8]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQS_C4" - IO_L4N_T0U_N7_DBC_AD7N_66 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c0_ddr4_dqs_c[8]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQS_C4" - IO_L4N_T0U_N7_DBC_AD7N_66 +set_property PACKAGE_PIN BM49 [ get_ports {c0_ddr4_dqs_t[8]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQS_T4" - IO_L4P_T0U_N6_DBC_AD7P_66 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c0_ddr4_dqs_t[8]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQS_T4" - IO_L4P_T0U_N6_DBC_AD7P_66 +set_property PACKAGE_PIN BN49 [ get_ports {c0_ddr4_dq[38]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ38" - IO_L3N_T0L_N5_AD15N_66 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[38]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ38" - IO_L3N_T0L_N5_AD15N_66 +set_property PACKAGE_PIN BM48 [ get_ports {c0_ddr4_dq[39]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ39" - IO_L3P_T0L_N4_AD15P_66 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[39]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ39" - IO_L3P_T0L_N4_AD15P_66 +set_property PACKAGE_PIN BN51 [ get_ports {c0_ddr4_dq[37]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ37" - IO_L2N_T0L_N3_66 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[37]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ37" - IO_L2N_T0L_N3_66 +set_property PACKAGE_PIN BN50 [ get_ports {c0_ddr4_dq[36]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ36" - IO_L2P_T0L_N2_66 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[36]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQ36" - IO_L2P_T0L_N2_66 +set_property PACKAGE_PIN BP49 [ get_ports {c0_ddr4_dqs_c[9]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQS_C13" - IO_L1N_T0L_N1_DBC_66 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c0_ddr4_dqs_c[9]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQS_C13" - IO_L1N_T0L_N1_DBC_66 +set_property PACKAGE_PIN BP48 [ get_ports {c0_ddr4_dqs_t[9]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQS_T13" - IO_L1P_T0L_N0_DBC_66 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c0_ddr4_dqs_t[9]} ] ;# Bank 66 VCCO - VCC1V2 Net "DDR4_C0_DQS_T13" - IO_L1P_T0L_N0_DBC_66 +set_property PACKAGE_PIN BE44 [ get_ports {c0_ddr4_adr[13]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_ADR13" - IO_L24N_T3U_N11_DOUT_CSO_B_65 +set_property IOSTANDARD SSTL12_DCI [ get_ports {c0_ddr4_adr[13]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_ADR13" - IO_L24N_T3U_N11_DOUT_CSO_B_65 +set_property PACKAGE_PIN BE43 [ get_ports {c0_ddr4_adr[14]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_ADR14" - IO_L24P_T3U_N10_EMCCLK_65 +set_property IOSTANDARD SSTL12_DCI [ get_ports {c0_ddr4_adr[14]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_ADR14" - IO_L24P_T3U_N10_EMCCLK_65 +#set_property PACKAGE_PIN BD42 [ get_ports {c0_ddr4_cs_n[2]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_CS_B2" - IO_L23N_T3U_N9_PERSTN1_I2C_SDA_65 +#set_property IOSTANDARD SSTL12_DCI [ get_ports {c0_ddr4_cs_n[2]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_CS_B2" - IO_L23N_T3U_N9_PERSTN1_I2C_SDA_65 +#set_property PACKAGE_PIN BC42 [ get_ports {c0_ddr4_alert_n} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_ALERT_B" - IO_L23P_T3U_N8_I2C_SCLK_65 +#set_property IOSTANDARD LVCMOS12 [ get_ports {c0_ddr4_alert_n} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_ALERT_B" - IO_L23P_T3U_N8_I2C_SCLK_65 +#set_property PACKAGE_PIN BE46 [ get_ports {c0_ddr4_odt[1]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_ODT1" - IO_L22N_T3U_N7_DBC_AD0N_D05_65 +#set_property IOSTANDARD SSTL12_DCI [ get_ports {c0_ddr4_odt[1]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_ODT1" - IO_L22N_T3U_N7_DBC_AD0N_D05_65 +#set_property PACKAGE_PIN BE45 [ get_ports {c0_ddr4_cs_n[1]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_CS_B1" - IO_L22P_T3U_N6_DBC_AD0P_D04_65 +#set_property IOSTANDARD SSTL12_DCI [ get_ports {c0_ddr4_cs_n[1]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_CS_B1" - IO_L22P_T3U_N6_DBC_AD0P_D04_65 +set_property PACKAGE_PIN BF43 [ get_ports {c0_ddr4_adr[5]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_ADR5" - IO_L21N_T3L_N5_AD8N_D07_65 +set_property IOSTANDARD SSTL12_DCI [ get_ports {c0_ddr4_adr[5]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_ADR5" - IO_L21N_T3L_N5_AD8N_D07_65 +set_property PACKAGE_PIN BF42 [ get_ports {c0_ddr4_adr[3]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_ADR3" - IO_L21P_T3L_N4_AD8P_D06_65 +set_property IOSTANDARD SSTL12_DCI [ get_ports {c0_ddr4_adr[3]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_ADR3" - IO_L21P_T3L_N4_AD8P_D06_65 +set_property PACKAGE_PIN BF46 [ get_ports {c0_ddr4_adr[0]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_ADR0" - IO_L20N_T3L_N3_AD1N_D09_65 +set_property IOSTANDARD SSTL12_DCI [ get_ports {c0_ddr4_adr[0]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_ADR0" - IO_L20N_T3L_N3_AD1N_D09_65 +set_property PACKAGE_PIN BF45 [ get_ports {c0_ddr4_par} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_PAR" - IO_L20P_T3L_N2_AD1P_D08_65 +set_property IOSTANDARD SSTL12_DCI [ get_ports {c0_ddr4_par} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_PAR" - IO_L20P_T3L_N2_AD1P_D08_65 +set_property PACKAGE_PIN BE41 [ get_ports {c0_ddr4_bg[1]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_BG1" - IO_L19N_T3L_N1_DBC_AD9N_D11_65 +set_property IOSTANDARD SSTL12_DCI [ get_ports {c0_ddr4_bg[1]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_BG1" - IO_L19N_T3L_N1_DBC_AD9N_D11_65 +set_property PACKAGE_PIN BD41 [ get_ports {c0_ddr4_adr[11]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_ADR11" - IO_L19P_T3L_N0_DBC_AD9P_D10_65 +set_property IOSTANDARD SSTL12_DCI [ get_ports {c0_ddr4_adr[11]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_ADR11" - IO_L19P_T3L_N0_DBC_AD9P_D10_65 +set_property PACKAGE_PIN BF41 [ get_ports {c0_ddr4_bg[0]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_BG0" - IO_T3U_N12_PERSTN0_65 +set_property IOSTANDARD SSTL12_DCI [ get_ports {c0_ddr4_bg[0]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_BG0" - IO_T3U_N12_PERSTN0_65 +set_property PACKAGE_PIN BH41 [ get_ports {c0_ddr4_act_n} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_ACT_B" - IO_T2U_N12_CSI_ADV_B_65 +set_property IOSTANDARD SSTL12_DCI [ get_ports {c0_ddr4_act_n} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_ACT_B" - IO_T2U_N12_CSI_ADV_B_65 +set_property PACKAGE_PIN BG45 [ get_ports {c0_ddr4_adr[10]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_ADR10" - IO_L18N_T2U_N11_AD2N_D13_65 +set_property IOSTANDARD SSTL12_DCI [ get_ports {c0_ddr4_adr[10]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_ADR10" - IO_L18N_T2U_N11_AD2N_D13_65 +set_property PACKAGE_PIN BG44 [ get_ports {c0_ddr4_odt[0]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_ODT0" - IO_L18P_T2U_N10_AD2P_D12_65 +set_property IOSTANDARD SSTL12_DCI [ get_ports {c0_ddr4_odt[0]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_ODT0" - IO_L18P_T2U_N10_AD2P_D12_65 +set_property PACKAGE_PIN BG43 [ get_ports {c0_ddr4_adr[1]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_ADR1" - IO_L17N_T2U_N9_AD10N_D15_65 +set_property IOSTANDARD SSTL12_DCI [ get_ports {c0_ddr4_adr[1]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_ADR1" - IO_L17N_T2U_N9_AD10N_D15_65 +set_property PACKAGE_PIN BG42 [ get_ports {c0_ddr4_adr[6]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_ADR6" - IO_L17P_T2U_N8_AD10P_D14_65 +set_property IOSTANDARD SSTL12_DCI [ get_ports {c0_ddr4_adr[6]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_ADR6" - IO_L17P_T2U_N8_AD10P_D14_65 +set_property PACKAGE_PIN BJ46 [ get_ports {c0_ddr4_ck_c[0]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_CK_C0" - IO_L16N_T2U_N7_QBC_AD3N_A01_D17_65 +set_property IOSTANDARD DIFF_SSTL12_DCI [ get_ports {c0_ddr4_ck_c[0]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_CK_C0" - IO_L16N_T2U_N7_QBC_AD3N_A01_D17_65 +set_property PACKAGE_PIN BH46 [ get_ports {c0_ddr4_ck_t[0]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_CK_T0" - IO_L16P_T2U_N6_QBC_AD3P_A00_D16_65 +set_property IOSTANDARD DIFF_SSTL12_DCI [ get_ports {c0_ddr4_ck_t[0]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_CK_T0" - IO_L16P_T2U_N6_QBC_AD3P_A00_D16_65 +#set_property PACKAGE_PIN BK41 [ get_ports {c0_ddr4_ck_c[1]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_CK_C1" - IO_L15N_T2L_N5_AD11N_A03_D19_65 +#set_property IOSTANDARD SSTL12_DCI [ get_ports {c0_ddr4_ck_c[1]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_CK_C1" - IO_L15N_T2L_N5_AD11N_A03_D19_65 +#set_property PACKAGE_PIN BJ41 [ get_ports {c0_ddr4_ck_t[1]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_CK_T1" - IO_L15P_T2L_N4_AD11P_A02_D18_65 +#set_property IOSTANDARD SSTL12_DCI [ get_ports {c0_ddr4_ck_t[1]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_CK_T1" - IO_L15P_T2L_N4_AD11P_A02_D18_65 +set_property PACKAGE_PIN BH45 [ get_ports {c0_ddr4_ba[0]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_BA0" - IO_L14N_T2L_N3_GC_A05_D21_65 +set_property IOSTANDARD SSTL12_DCI [ get_ports {c0_ddr4_ba[0]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_BA0" - IO_L14N_T2L_N3_GC_A05_D21_65 +set_property PACKAGE_PIN BH44 [ get_ports {c0_ddr4_adr[16]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_ADR16" - IO_L14P_T2L_N2_GC_A04_D20_65 +set_property IOSTANDARD SSTL12_DCI [ get_ports {c0_ddr4_adr[16]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_ADR16" - IO_L14P_T2L_N2_GC_A04_D20_65 +#set_property PACKAGE_PIN BJ42 [ get_ports {c0_ddr4_cke[1]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_CKE1" - IO_L13N_T2L_N1_GC_QBC_A07_D23_65 +#set_property IOSTANDARD SSTL12_DCI [ get_ports {c0_ddr4_cke[1]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_CKE1" - IO_L13N_T2L_N1_GC_QBC_A07_D23_65 +set_property PACKAGE_PIN BH42 [ get_ports {c0_ddr4_cke[0]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_CKE0" - IO_L13P_T2L_N0_GC_QBC_A06_D22_65 +set_property IOSTANDARD SSTL12_DCI [ get_ports {c0_ddr4_cke[0]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_CKE0" - IO_L13P_T2L_N0_GC_QBC_A06_D22_65 +## Clocks at top of XDC +#set_property PACKAGE_PIN BJ44 [ get_ports {sys_clk0_n} ] ;# Bank 65 VCCO - VCC1V2 Net "SYSCLK0_N" - IO_L12N_T1U_N11_GC_A09_D25_65 +#set_property IOSTANDARD LVDS [ get_ports {sys_clk0_n} ] ;# Bank 65 VCCO - VCC1V2 Net "SYSCLK0_N" - IO_L12N_T1U_N11_GC_A09_D25_65 +#set_property PACKAGE_PIN BJ43 [ get_ports {sys_clk0_p} ] ;# Bank 65 VCCO - VCC1V2 Net "SYSCLK0_P" - IO_L12P_T1U_N10_GC_A08_D24_65 +#set_property IOSTANDARD LVDS [ get_ports {sys_clk0_p} ] ;# Bank 65 VCCO - VCC1V2 Net "SYSCLK0_P" - IO_L12P_T1U_N10_GC_A08_D24_65 +### <<<>>> No external BIAS on AC coupled LVDS clock inputs to 1.2V bank so this constraint is added to recenter LVDS signal on 1.2V IO standard. +#set_property DQS_BIAS TRUE [ get_ports {sys_clk0_p} ] ;# Bank 65 VCCO - VCC1V2 Net "SYSCLK0_P" - IO_L12P_T1U_N10_GC_A08_D24_65 +#set_property PACKAGE_PIN BK44 #N/A ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_CS_B3" - IO_L11N_T1U_N9_GC_A11_D27_65 +#set_property IOSTANDARD LVCMOS18 #N/A ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_CS_B3" - IO_L11N_T1U_N9_GC_A11_D27_65 +set_property PACKAGE_PIN BK43 [ get_ports {c0_ddr4_adr[8]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_ADR8" - IO_L11P_T1U_N8_GC_A10_D26_65 +set_property IOSTANDARD SSTL12_DCI [ get_ports {c0_ddr4_adr[8]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_ADR8" - IO_L11P_T1U_N8_GC_A10_D26_65 +set_property PACKAGE_PIN BK46 [ get_ports {c0_ddr4_cs_n[0]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_CS_B0" - IO_L10N_T1U_N7_QBC_AD4N_A13_D29_65 +set_property IOSTANDARD SSTL12_DCI [ get_ports {c0_ddr4_cs_n[0]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_CS_B0" - IO_L10N_T1U_N7_QBC_AD4N_A13_D29_65 +set_property PACKAGE_PIN BK45 [ get_ports {c0_ddr4_adr[2]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_ADR2" - IO_L10P_T1U_N6_QBC_AD4P_A12_D28_65 +set_property IOSTANDARD SSTL12_DCI [ get_ports {c0_ddr4_adr[2]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_ADR2" - IO_L10P_T1U_N6_QBC_AD4P_A12_D28_65 +set_property PACKAGE_PIN BL43 [ get_ports {c0_ddr4_adr[7]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_ADR7" - IO_L9N_T1L_N5_AD12N_A15_D31_65 +set_property IOSTANDARD SSTL12_DCI [ get_ports {c0_ddr4_adr[7]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_ADR7" - IO_L9N_T1L_N5_AD12N_A15_D31_65 +set_property PACKAGE_PIN BL42 [ get_ports {c0_ddr4_adr[12]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_ADR12" - IO_L9P_T1L_N4_AD12P_A14_D30_65 +set_property IOSTANDARD SSTL12_DCI [ get_ports {c0_ddr4_adr[12]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_ADR12" - IO_L9P_T1L_N4_AD12P_A14_D30_65 +set_property PACKAGE_PIN BL46 [ get_ports {c0_ddr4_adr[15]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_ADR15" - IO_L8N_T1L_N3_AD5N_A17_65 +set_property IOSTANDARD SSTL12_DCI [ get_ports {c0_ddr4_adr[15]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_ADR15" - IO_L8N_T1L_N3_AD5N_A17_65 +set_property PACKAGE_PIN BL45 [ get_ports {c0_ddr4_adr[4]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_ADR4" - IO_L8P_T1L_N2_AD5P_A16_65 +set_property IOSTANDARD SSTL12_DCI [ get_ports {c0_ddr4_adr[4]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_ADR4" - IO_L8P_T1L_N2_AD5P_A16_65 +set_property PACKAGE_PIN BM47 [ get_ports {c0_ddr4_ba[1]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_BA1" - IO_L7N_T1L_N1_QBC_AD13N_A19_65 +set_property IOSTANDARD SSTL12_DCI [ get_ports {c0_ddr4_ba[1]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_BA1" - IO_L7N_T1L_N1_QBC_AD13N_A19_65 +#set_property PACKAGE_PIN BL47 [ get_ports {c0_ddr4_adr[17]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_ADR17" - IO_L7P_T1L_N0_QBC_AD13P_A18_65 +#set_property IOSTANDARD SSTL12_DCI [ get_ports {c0_ddr4_adr[17]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_ADR17" - IO_L7P_T1L_N0_QBC_AD13P_A18_65 +set_property PACKAGE_PIN BM42 [ get_ports {c0_ddr4_adr[9]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_ADR9" - IO_T1U_N12_SMBALERT_65 +set_property IOSTANDARD SSTL12_DCI [ get_ports {c0_ddr4_adr[9]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_ADR9" - IO_T1U_N12_SMBALERT_65 +set_property PACKAGE_PIN BN45 [ get_ports {c0_ddr4_dq[57]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_DQ57" - IO_L6N_T0U_N11_AD6N_A21_65 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[57]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_DQ57" - IO_L6N_T0U_N11_AD6N_A21_65 +set_property PACKAGE_PIN BM45 [ get_ports {c0_ddr4_dq[59]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_DQ59" - IO_L6P_T0U_N10_AD6P_A20_65 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[59]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_DQ59" - IO_L6P_T0U_N10_AD6P_A20_65 +set_property PACKAGE_PIN BN44 [ get_ports {c0_ddr4_dq[56]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_DQ56" - IO_L5N_T0U_N9_AD14N_A23_65 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[56]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_DQ56" - IO_L5N_T0U_N9_AD14N_A23_65 +set_property PACKAGE_PIN BM44 [ get_ports {c0_ddr4_dq[58]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_DQ58" - IO_L5P_T0U_N8_AD14P_A22_65 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[58]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_DQ58" - IO_L5P_T0U_N8_AD14P_A22_65 +set_property PACKAGE_PIN BP46 [ get_ports {c0_ddr4_dqs_c[14]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_DQS_C7" - IO_L4N_T0U_N7_DBC_AD7N_A25_65 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c0_ddr4_dqs_c[14]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_DQS_C7" - IO_L4N_T0U_N7_DBC_AD7N_A25_65 +set_property PACKAGE_PIN BN46 [ get_ports {c0_ddr4_dqs_t[14]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_DQS_T7" - IO_L4P_T0U_N6_DBC_AD7P_A24_65 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c0_ddr4_dqs_t[14]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_DQS_T7" - IO_L4P_T0U_N6_DBC_AD7P_A24_65 +set_property PACKAGE_PIN BP44 [ get_ports {c0_ddr4_dq[61]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_DQ61" - IO_L3N_T0L_N5_AD15N_A27_65 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[61]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_DQ61" - IO_L3N_T0L_N5_AD15N_A27_65 +set_property PACKAGE_PIN BP43 [ get_ports {c0_ddr4_dq[60]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_DQ60" - IO_L3P_T0L_N4_AD15P_A26_65 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[60]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_DQ60" - IO_L3P_T0L_N4_AD15P_A26_65 +set_property PACKAGE_PIN BP47 [ get_ports {c0_ddr4_dq[63]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_DQ63" - IO_L2N_T0L_N3_FWE_FCS2_B_65 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[63]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_DQ63" - IO_L2N_T0L_N3_FWE_FCS2_B_65 +set_property PACKAGE_PIN BN47 [ get_ports {c0_ddr4_dq[62]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_DQ62" - IO_L2P_T0L_N2_FOE_B_65 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[62]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_DQ62" - IO_L2P_T0L_N2_FOE_B_65 +set_property PACKAGE_PIN BP42 [ get_ports {c0_ddr4_dqs_c[15]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_DQS_C16" - IO_L1N_T0L_N1_DBC_RS1_65 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c0_ddr4_dqs_c[15]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_DQS_C16" - IO_L1N_T0L_N1_DBC_RS1_65 +set_property PACKAGE_PIN BN42 [ get_ports {c0_ddr4_dqs_t[15]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_DQS_T16" - IO_L1P_T0L_N0_DBC_RS0_65 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c0_ddr4_dqs_t[15]} ] ;# Bank 65 VCCO - VCC1V2 Net "DDR4_C0_DQS_T16" - IO_L1P_T0L_N0_DBC_RS0_65 +set_property PACKAGE_PIN BJ31 [ get_ports {c0_ddr4_dq[8]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ8" - IO_L24N_T3U_N11_64 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[8]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ8" - IO_L24N_T3U_N11_64 +set_property PACKAGE_PIN BH31 [ get_ports {c0_ddr4_dq[9]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ9" - IO_L24P_T3U_N10_64 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[9]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ9" - IO_L24P_T3U_N10_64 +set_property PACKAGE_PIN BF33 [ get_ports {c0_ddr4_dq[11]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ11" - IO_L23N_T3U_N9_64 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[11]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ11" - IO_L23N_T3U_N9_64 +set_property PACKAGE_PIN BF32 [ get_ports {c0_ddr4_dq[10]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ10" - IO_L23P_T3U_N8_64 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[10]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ10" - IO_L23P_T3U_N8_64 +set_property PACKAGE_PIN BK30 [ get_ports {c0_ddr4_dqs_c[2]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQS_C1" - IO_L22N_T3U_N7_DBC_AD0N_64 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c0_ddr4_dqs_c[2]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQS_C1" - IO_L22N_T3U_N7_DBC_AD0N_64 +set_property PACKAGE_PIN BJ29 [ get_ports {c0_ddr4_dqs_t[2]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQS_T1" - IO_L22P_T3U_N6_DBC_AD0P_64 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c0_ddr4_dqs_t[2]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQS_T1" - IO_L22P_T3U_N6_DBC_AD0P_64 +set_property PACKAGE_PIN BG32 [ get_ports {c0_ddr4_dq[15]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ15" - IO_L21N_T3L_N5_AD8N_64 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[15]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ15" - IO_L21N_T3L_N5_AD8N_64 +set_property PACKAGE_PIN BF31 [ get_ports {c0_ddr4_dq[14]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ14" - IO_L21P_T3L_N4_AD8P_64 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[14]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ14" - IO_L21P_T3L_N4_AD8P_64 +set_property PACKAGE_PIN BH30 [ get_ports {c0_ddr4_dq[13]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ13" - IO_L20N_T3L_N3_AD1N_64 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[13]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ13" - IO_L20N_T3L_N3_AD1N_64 +set_property PACKAGE_PIN BH29 [ get_ports {c0_ddr4_dq[12]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ12" - IO_L20P_T3L_N2_AD1P_64 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[12]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ12" - IO_L20P_T3L_N2_AD1P_64 +set_property PACKAGE_PIN BG30 [ get_ports {c0_ddr4_dqs_c[3]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQS_C10" - IO_L19N_T3L_N1_DBC_AD9N_64 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c0_ddr4_dqs_c[3]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQS_C10" - IO_L19N_T3L_N1_DBC_AD9N_64 +set_property PACKAGE_PIN BG29 [ get_ports {c0_ddr4_dqs_t[3]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQS_T10" - IO_L19P_T3L_N0_DBC_AD9P_64 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c0_ddr4_dqs_t[3]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQS_T10" - IO_L19P_T3L_N0_DBC_AD9P_64 +#set_property PACKAGE_PIN BK29 [ get_ports {c0_ddr4_event_n} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_EVENT_B" - IO_T3U_N12_64 +#set_property IOSTANDARD LVCMOS12 [ get_ports {c0_ddr4_event_n} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_EVENT_B" - IO_T3U_N12_64 +set_property PACKAGE_PIN BG33 [ get_ports {c0_ddr4_reset_n} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_RESET_N" - IO_T2U_N12_64 +set_property IOSTANDARD LVCMOS12 [ get_ports {c0_ddr4_reset_n} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_RESET_N" - IO_T2U_N12_64 +set_property PACKAGE_PIN BH35 [ get_ports {c0_ddr4_dq[25]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ25" - IO_L18N_T2U_N11_AD2N_64 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[25]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ25" - IO_L18N_T2U_N11_AD2N_64 +set_property PACKAGE_PIN BH34 [ get_ports {c0_ddr4_dq[24]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ24" - IO_L18P_T2U_N10_AD2P_64 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[24]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ24" - IO_L18P_T2U_N10_AD2P_64 +set_property PACKAGE_PIN BF36 [ get_ports {c0_ddr4_dq[27]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ27" - IO_L17N_T2U_N9_AD10N_64 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[27]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ27" - IO_L17N_T2U_N9_AD10N_64 +set_property PACKAGE_PIN BF35 [ get_ports {c0_ddr4_dq[26]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ26" - IO_L17P_T2U_N8_AD10P_64 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[26]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ26" - IO_L17P_T2U_N8_AD10P_64 +set_property PACKAGE_PIN BK35 [ get_ports {c0_ddr4_dqs_c[6]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQS_C3" - IO_L16N_T2U_N7_QBC_AD3N_64 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c0_ddr4_dqs_c[6]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQS_C3" - IO_L16N_T2U_N7_QBC_AD3N_64 +set_property PACKAGE_PIN BK34 [ get_ports {c0_ddr4_dqs_t[6]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQS_T3" - IO_L16P_T2U_N6_QBC_AD3P_64 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c0_ddr4_dqs_t[6]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQS_T3" - IO_L16P_T2U_N6_QBC_AD3P_64 +set_property PACKAGE_PIN BG35 [ get_ports {c0_ddr4_dq[31]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ31" - IO_L15N_T2L_N5_AD11N_64 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[31]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ31" - IO_L15N_T2L_N5_AD11N_64 +set_property PACKAGE_PIN BG34 [ get_ports {c0_ddr4_dq[30]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ30" - IO_L15P_T2L_N4_AD11P_64 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[30]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ30" - IO_L15P_T2L_N4_AD11P_64 +set_property PACKAGE_PIN BJ34 [ get_ports {c0_ddr4_dq[29]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ29" - IO_L14N_T2L_N3_GC_64 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[29]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ29" - IO_L14N_T2L_N3_GC_64 +set_property PACKAGE_PIN BJ33 [ get_ports {c0_ddr4_dq[28]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ28" - IO_L14P_T2L_N2_GC_64 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[28]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ28" - IO_L14P_T2L_N2_GC_64 +set_property PACKAGE_PIN BJ32 [ get_ports {c0_ddr4_dqs_c[7]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQS_C12" - IO_L13N_T2L_N1_GC_QBC_64 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c0_ddr4_dqs_c[7]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQS_C12" - IO_L13N_T2L_N1_GC_QBC_64 +set_property PACKAGE_PIN BH32 [ get_ports {c0_ddr4_dqs_t[7]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQS_T12" - IO_L13P_T2L_N0_GC_QBC_64 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c0_ddr4_dqs_t[7]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQS_T12" - IO_L13P_T2L_N0_GC_QBC_64 +set_property PACKAGE_PIN BL33 [ get_ports {c0_ddr4_dq[19]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ19" - IO_L12N_T1U_N11_GC_64 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[19]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ19" - IO_L12N_T1U_N11_GC_64 +set_property PACKAGE_PIN BK33 [ get_ports {c0_ddr4_dq[18]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ18" - IO_L12P_T1U_N10_GC_64 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[18]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ18" - IO_L12P_T1U_N10_GC_64 +set_property PACKAGE_PIN BL31 [ get_ports {c0_ddr4_dq[17]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ17" - IO_L11N_T1U_N9_GC_64 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[17]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ17" - IO_L11N_T1U_N9_GC_64 +set_property PACKAGE_PIN BK31 [ get_ports {c0_ddr4_dq[16]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ16" - IO_L11P_T1U_N8_GC_64 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[16]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ16" - IO_L11P_T1U_N8_GC_64 +set_property PACKAGE_PIN BM35 [ get_ports {c0_ddr4_dqs_c[4]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQS_C2" - IO_L10N_T1U_N7_QBC_AD4N_64 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c0_ddr4_dqs_c[4]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQS_C2" - IO_L10N_T1U_N7_QBC_AD4N_64 +set_property PACKAGE_PIN BL35 [ get_ports {c0_ddr4_dqs_t[4]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQS_T2" - IO_L10P_T1U_N6_QBC_AD4P_64 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c0_ddr4_dqs_t[4]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQS_T2" - IO_L10P_T1U_N6_QBC_AD4P_64 +set_property PACKAGE_PIN BM33 [ get_ports {c0_ddr4_dq[21]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ21" - IO_L9N_T1L_N5_AD12N_64 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[21]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ21" - IO_L9N_T1L_N5_AD12N_64 +set_property PACKAGE_PIN BL32 [ get_ports {c0_ddr4_dq[20]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ20" - IO_L9P_T1L_N4_AD12P_64 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[20]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ20" - IO_L9P_T1L_N4_AD12P_64 +set_property PACKAGE_PIN BP34 [ get_ports {c0_ddr4_dq[23]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ23" - IO_L8N_T1L_N3_AD5N_64 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[23]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ23" - IO_L8N_T1L_N3_AD5N_64 +set_property PACKAGE_PIN BN34 [ get_ports {c0_ddr4_dq[22]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ22" - IO_L8P_T1L_N2_AD5P_64 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[22]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ22" - IO_L8P_T1L_N2_AD5P_64 +set_property PACKAGE_PIN BN35 [ get_ports {c0_ddr4_dqs_c[5]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQS_C11" - IO_L7N_T1L_N1_QBC_AD13N_64 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c0_ddr4_dqs_c[5]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQS_C11" - IO_L7N_T1L_N1_QBC_AD13N_64 +set_property PACKAGE_PIN BM34 [ get_ports {c0_ddr4_dqs_t[5]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQS_T11" - IO_L7P_T1L_N0_QBC_AD13P_64 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c0_ddr4_dqs_t[5]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQS_T11" - IO_L7P_T1L_N0_QBC_AD13P_64 +#set_property PACKAGE_PIN BP33 #N/A ;# Bank 64 VCCO - VCC1V2 Net "Not Connected" - IO_T1U_N12_64 +#set_property IOSTANDARD LVCMOS12 #N/A ;# Bank 64 VCCO - VCC1V2 Net "Not Connected" - IO_T1U_N12_64 +set_property PACKAGE_PIN BP32 [ get_ports {c0_ddr4_dq[1]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ1" - IO_L6N_T0U_N11_AD6N_64 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[1]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ1" - IO_L6N_T0U_N11_AD6N_64 +set_property PACKAGE_PIN BN32 [ get_ports {c0_ddr4_dq[0]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ0" - IO_L6P_T0U_N10_AD6P_64 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[0]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ0" - IO_L6P_T0U_N10_AD6P_64 +set_property PACKAGE_PIN BM30 [ get_ports {c0_ddr4_dq[3]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ3" - IO_L5N_T0U_N9_AD14N_64 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[3]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ3" - IO_L5N_T0U_N9_AD14N_64 +set_property PACKAGE_PIN BL30 [ get_ports {c0_ddr4_dq[2]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ2" - IO_L5P_T0U_N8_AD14P_64 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[2]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ2" - IO_L5P_T0U_N8_AD14P_64 +set_property PACKAGE_PIN BN30 [ get_ports {c0_ddr4_dqs_c[0]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQS_C0" - IO_L4N_T0U_N7_DBC_AD7N_64 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c0_ddr4_dqs_c[0]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQS_C0" - IO_L4N_T0U_N7_DBC_AD7N_64 +set_property PACKAGE_PIN BN29 [ get_ports {c0_ddr4_dqs_t[0]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQS_T0" - IO_L4P_T0U_N6_DBC_AD7P_64 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c0_ddr4_dqs_t[0]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQS_T0" - IO_L4P_T0U_N6_DBC_AD7P_64 +set_property PACKAGE_PIN BP31 [ get_ports {c0_ddr4_dq[6]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ6" - IO_L3N_T0L_N5_AD15N_64 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[6]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ6" - IO_L3N_T0L_N5_AD15N_64 +set_property PACKAGE_PIN BN31 [ get_ports {c0_ddr4_dq[7]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ7" - IO_L3P_T0L_N4_AD15P_64 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[7]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ7" - IO_L3P_T0L_N4_AD15P_64 +set_property PACKAGE_PIN BP29 [ get_ports {c0_ddr4_dq[4]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ4" - IO_L2N_T0L_N3_64 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[4]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ4" - IO_L2N_T0L_N3_64 +set_property PACKAGE_PIN BP28 [ get_ports {c0_ddr4_dq[5]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ5" - IO_L2P_T0L_N2_64 +set_property IOSTANDARD POD12_DCI [ get_ports {c0_ddr4_dq[5]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQ5" - IO_L2P_T0L_N2_64 +set_property PACKAGE_PIN BM29 [ get_ports {c0_ddr4_dqs_c[1]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQS_C9" - IO_L1N_T0L_N1_DBC_64 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c0_ddr4_dqs_c[1]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQS_C9" - IO_L1N_T0L_N1_DBC_64 +set_property PACKAGE_PIN BM28 [ get_ports {c0_ddr4_dqs_t[1]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQS_T9" - IO_L1P_T0L_N0_DBC_64 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c0_ddr4_dqs_t[1]} ] ;# Bank 64 VCCO - VCC1V2 Net "DDR4_C0_DQS_T9" - IO_L1P_T0L_N0_DBC_64 + +#### +### DDR4 c1 +#### + +set_property PACKAGE_PIN A8 [ get_ports {c1_ddr4_dq[3]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ3" - IO_L24N_T3U_N11_70 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[3]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ3" - IO_L24N_T3U_N11_70 +set_property PACKAGE_PIN A9 [ get_ports {c1_ddr4_dq[2]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ2" - IO_L24P_T3U_N10_70 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[2]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ2" - IO_L24P_T3U_N10_70 +set_property PACKAGE_PIN A10 [ get_ports {c1_ddr4_dq[1]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ1" - IO_L23N_T3U_N9_70 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[1]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ1" - IO_L23N_T3U_N9_70 +set_property PACKAGE_PIN A11 [ get_ports {c1_ddr4_dq[0]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ0" - IO_L23P_T3U_N8_70 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[0]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ0" - IO_L23P_T3U_N8_70 +set_property PACKAGE_PIN A13 [ get_ports {c1_ddr4_dqs_c[0]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQS_C0" - IO_L22N_T3U_N7_DBC_AD0N_70 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c1_ddr4_dqs_c[0]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQS_C0" - IO_L22N_T3U_N7_DBC_AD0N_70 +set_property PACKAGE_PIN B13 [ get_ports {c1_ddr4_dqs_t[0]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQS_T0" - IO_L22P_T3U_N6_DBC_AD0P_70 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c1_ddr4_dqs_t[0]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQS_T0" - IO_L22P_T3U_N6_DBC_AD0P_70 +set_property PACKAGE_PIN B12 [ get_ports {c1_ddr4_dq[4]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ4" - IO_L21N_T3L_N5_AD8N_70 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[4]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ4" - IO_L21N_T3L_N5_AD8N_70 +set_property PACKAGE_PIN C12 [ get_ports {c1_ddr4_dq[6]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ6" - IO_L21P_T3L_N4_AD8P_70 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[6]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ6" - IO_L21P_T3L_N4_AD8P_70 +set_property PACKAGE_PIN B10 [ get_ports {c1_ddr4_dq[5]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ5" - IO_L20N_T3L_N3_AD1N_70 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[5]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ5" - IO_L20N_T3L_N3_AD1N_70 +set_property PACKAGE_PIN B11 [ get_ports {c1_ddr4_dq[7]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ7" - IO_L20P_T3L_N2_AD1P_70 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[7]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ7" - IO_L20P_T3L_N2_AD1P_70 +set_property PACKAGE_PIN C9 [ get_ports {c1_ddr4_dqs_c[1]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQS_C9" - IO_L19N_T3L_N1_DBC_AD9N_70 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c1_ddr4_dqs_c[1]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQS_C9" - IO_L19N_T3L_N1_DBC_AD9N_70 +set_property PACKAGE_PIN C10 [ get_ports {c1_ddr4_dqs_t[1]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQS_T9" - IO_L19P_T3L_N0_DBC_AD9P_70 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c1_ddr4_dqs_t[1]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQS_T9" - IO_L19P_T3L_N0_DBC_AD9P_70 +#set_property PACKAGE_PIN C13 #N/A ;# Bank 70 VCCO - VCC1V2 Net "Not Connected" - IO_T3U_N12_70 +#set_property IOSTANDARD LVCMOS12 #N/A ;# Bank 70 VCCO - VCC1V2 Net "Not Connected" - IO_T3U_N12_70 +#set_property PACKAGE_PIN C14 #N/A ;# Bank 70 VCCO - VCC1V2 Net "Not Connected" - IO_T2U_N12_70 +#set_property IOSTANDARD LVCMOS12 #N/A ;# Bank 70 VCCO - VCC1V2 Net "Not Connected" - IO_T2U_N12_70 +set_property PACKAGE_PIN A14 [ get_ports {c1_ddr4_dq[24]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ27" - IO_L18N_T2U_N11_AD2N_70 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[24]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ27" - IO_L18N_T2U_N11_AD2N_70 +set_property PACKAGE_PIN A15 [ get_ports {c1_ddr4_dq[26]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ25" - IO_L18P_T2U_N10_AD2P_70 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[26]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ25" - IO_L18P_T2U_N10_AD2P_70 +set_property PACKAGE_PIN B15 [ get_ports {c1_ddr4_dq[27]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ24" - IO_L17N_T2U_N9_AD10N_70 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[27]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ24" - IO_L17N_T2U_N9_AD10N_70 +set_property PACKAGE_PIN C15 [ get_ports {c1_ddr4_dq[25]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ26" - IO_L17P_T2U_N8_AD10P_70 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[25]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ26" - IO_L17P_T2U_N8_AD10P_70 +set_property PACKAGE_PIN D14 [ get_ports {c1_ddr4_dqs_c[6]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQS_C3" - IO_L16N_T2U_N7_QBC_AD3N_70 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c1_ddr4_dqs_c[6]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQS_C3" - IO_L16N_T2U_N7_QBC_AD3N_70 +set_property PACKAGE_PIN D15 [ get_ports {c1_ddr4_dqs_t[6]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQS_T3" - IO_L16P_T2U_N6_QBC_AD3P_70 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c1_ddr4_dqs_t[6]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQS_T3" - IO_L16P_T2U_N6_QBC_AD3P_70 +set_property PACKAGE_PIN E14 [ get_ports {c1_ddr4_dq[29]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ29" - IO_L15N_T2L_N5_AD11N_70 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[29]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ29" - IO_L15N_T2L_N5_AD11N_70 +set_property PACKAGE_PIN F15 [ get_ports {c1_ddr4_dq[28]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ31" - IO_L15P_T2L_N4_AD11P_70 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[28]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ31" - IO_L15P_T2L_N4_AD11P_70 +set_property PACKAGE_PIN F13 [ get_ports {c1_ddr4_dq[31]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ28" - IO_L14N_T2L_N3_GC_70 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[31]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ28" - IO_L14N_T2L_N3_GC_70 +set_property PACKAGE_PIN F14 [ get_ports {c1_ddr4_dq[30]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ30" - IO_L14P_T2L_N2_GC_70 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[30]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ30" - IO_L14P_T2L_N2_GC_70 +set_property PACKAGE_PIN D12 [ get_ports {c1_ddr4_dqs_c[7]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQS_C12" - IO_L13N_T2L_N1_GC_QBC_70 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c1_ddr4_dqs_c[7]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQS_C12" - IO_L13N_T2L_N1_GC_QBC_70 +set_property PACKAGE_PIN E13 [ get_ports {c1_ddr4_dqs_t[7]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQS_T12" - IO_L13P_T2L_N0_GC_QBC_70 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c1_ddr4_dqs_t[7]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQS_T12" - IO_L13P_T2L_N0_GC_QBC_70 +set_property PACKAGE_PIN E11 [ get_ports {c1_ddr4_dq[8]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ8" - IO_L12N_T1U_N11_GC_70 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[8]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ8" - IO_L12N_T1U_N11_GC_70 +set_property PACKAGE_PIN F11 [ get_ports {c1_ddr4_dq[11]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ11" - IO_L12P_T1U_N10_GC_70 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[11]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ11" - IO_L12P_T1U_N10_GC_70 +set_property PACKAGE_PIN D11 [ get_ports {c1_ddr4_dq[9]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ9" - IO_L11N_T1U_N9_GC_70 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[9]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ9" - IO_L11N_T1U_N9_GC_70 +set_property PACKAGE_PIN E12 [ get_ports {c1_ddr4_dq[10]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ10" - IO_L11P_T1U_N8_GC_70 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[10]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ10" - IO_L11P_T1U_N8_GC_70 +set_property PACKAGE_PIN D9 [ get_ports {c1_ddr4_dqs_c[2]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQS_C1" - IO_L10N_T1U_N7_QBC_AD4N_70 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c1_ddr4_dqs_c[2]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQS_C1" - IO_L10N_T1U_N7_QBC_AD4N_70 +set_property PACKAGE_PIN D10 [ get_ports {c1_ddr4_dqs_t[2]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQS_T1" - IO_L10P_T1U_N6_QBC_AD4P_70 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c1_ddr4_dqs_t[2]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQS_T1" - IO_L10P_T1U_N6_QBC_AD4P_70 +set_property PACKAGE_PIN E9 [ get_ports {c1_ddr4_dq[13]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ13" - IO_L9N_T1L_N5_AD12N_70 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[13]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ13" - IO_L9N_T1L_N5_AD12N_70 +set_property PACKAGE_PIN F9 [ get_ports {c1_ddr4_dq[14]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ14" - IO_L9P_T1L_N4_AD12P_70 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[14]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ14" - IO_L9P_T1L_N4_AD12P_70 +set_property PACKAGE_PIN F10 [ get_ports {c1_ddr4_dq[12]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ12" - IO_L8N_T1L_N3_AD5N_70 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[12]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ12" - IO_L8N_T1L_N3_AD5N_70 +set_property PACKAGE_PIN G11 [ get_ports {c1_ddr4_dq[15]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ15" - IO_L8P_T1L_N2_AD5P_70 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[15]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ15" - IO_L8P_T1L_N2_AD5P_70 +set_property PACKAGE_PIN G10 [ get_ports {c1_ddr4_dqs_c[3]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQS_C10" - IO_L7N_T1L_N1_QBC_AD13N_70 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c1_ddr4_dqs_c[3]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQS_C10" - IO_L7N_T1L_N1_QBC_AD13N_70 +set_property PACKAGE_PIN H10 [ get_ports {c1_ddr4_dqs_t[3]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQS_T10" - IO_L7P_T1L_N0_QBC_AD13P_70 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c1_ddr4_dqs_t[3]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQS_T10" - IO_L7P_T1L_N0_QBC_AD13P_70 +#set_property PACKAGE_PIN H9 #N/A ;# Bank 70 VCCO - VCC1V2 Net "Not Connected" - IO_T1U_N12_70 +#set_property IOSTANDARD LVCMOS18 #N/A ;# Bank 70 VCCO - VCC1V2 Net "Not Connected" - IO_T1U_N12_70 +set_property PACKAGE_PIN G13 [ get_ports {c1_ddr4_dq[17]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ17" - IO_L6N_T0U_N11_AD6N_70 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[17]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ17" - IO_L6N_T0U_N11_AD6N_70 +set_property PACKAGE_PIN H14 [ get_ports {c1_ddr4_dq[19]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ19" - IO_L6P_T0U_N10_AD6P_70 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[19]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ19" - IO_L6P_T0U_N10_AD6P_70 +set_property PACKAGE_PIN H12 [ get_ports {c1_ddr4_dq[16]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ16" - IO_L5N_T0U_N9_AD14N_70 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[16]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ16" - IO_L5N_T0U_N9_AD14N_70 +set_property PACKAGE_PIN H13 [ get_ports {c1_ddr4_dq[18]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ18" - IO_L5P_T0U_N8_AD14P_70 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[18]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ18" - IO_L5P_T0U_N8_AD14P_70 +set_property PACKAGE_PIN G15 [ get_ports {c1_ddr4_dqs_c[4]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQS_C2" - IO_L4N_T0U_N7_DBC_AD7N_70 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c1_ddr4_dqs_c[4]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQS_C2" - IO_L4N_T0U_N7_DBC_AD7N_70 +set_property PACKAGE_PIN H15 [ get_ports {c1_ddr4_dqs_t[4]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQS_T2" - IO_L4P_T0U_N6_DBC_AD7P_70 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c1_ddr4_dqs_t[4]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQS_T2" - IO_L4P_T0U_N6_DBC_AD7P_70 +set_property PACKAGE_PIN J11 [ get_ports {c1_ddr4_dq[20]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ20" - IO_L3N_T0L_N5_AD15N_70 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[20]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ20" - IO_L3N_T0L_N5_AD15N_70 +set_property PACKAGE_PIN J12 [ get_ports {c1_ddr4_dq[21]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ21" - IO_L3P_T0L_N4_AD15P_70 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[21]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ21" - IO_L3P_T0L_N4_AD15P_70 +set_property PACKAGE_PIN J14 [ get_ports {c1_ddr4_dq[23]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ23" - IO_L2N_T0L_N3_70 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[23]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ23" - IO_L2N_T0L_N3_70 +set_property PACKAGE_PIN J15 [ get_ports {c1_ddr4_dq[22]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ22" - IO_L2P_T0L_N2_70 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[22]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQ22" - IO_L2P_T0L_N2_70 +set_property PACKAGE_PIN K13 [ get_ports {c1_ddr4_dqs_c[5]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQS_C11" - IO_L1N_T0L_N1_DBC_70 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c1_ddr4_dqs_c[5]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQS_C11" - IO_L1N_T0L_N1_DBC_70 +set_property PACKAGE_PIN K14 [ get_ports {c1_ddr4_dqs_t[5]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQS_T11" - IO_L1P_T0L_N0_DBC_70 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c1_ddr4_dqs_t[5]} ] ;# Bank 70 VCCO - VCC1V2 Net "DDR4_C1_DQS_T11" - IO_L1P_T0L_N0_DBC_70 +#set_property PACKAGE_PIN BF1 [ get_ports {c1_ddr4_cke[1]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_CKE1" - IO_L24N_T3U_N11_69 +#set_property IOSTANDARD SSTL12_DCI [ get_ports {c1_ddr4_cke[1]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_CKE1" - IO_L24N_T3U_N11_69 +set_property PACKAGE_PIN BE1 [ get_ports {c1_ddr4_cke[0]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_CKE0" - IO_L24P_T3U_N10_69 +set_property IOSTANDARD SSTL12_DCI [ get_ports {c1_ddr4_cke[0]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_CKE0" - IO_L24P_T3U_N10_69 +set_property PACKAGE_PIN BE3 [ get_ports {c1_ddr4_adr[4]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_ADR4" - IO_L23N_T3U_N9_69 +set_property IOSTANDARD SSTL12_DCI [ get_ports {c1_ddr4_adr[4]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_ADR4" - IO_L23N_T3U_N9_69 +set_property PACKAGE_PIN BE4 [ get_ports {c1_ddr4_adr[11]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_ADR11" - IO_L23P_T3U_N8_69 +set_property IOSTANDARD SSTL12_DCI [ get_ports {c1_ddr4_adr[11]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_ADR11" - IO_L23P_T3U_N8_69 +set_property PACKAGE_PIN BE5 [ get_ports {c1_ddr4_adr[6]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_ADR6" - IO_L22N_T3U_N7_DBC_AD0N_69 +set_property IOSTANDARD SSTL12_DCI [ get_ports {c1_ddr4_adr[6]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_ADR6" - IO_L22N_T3U_N7_DBC_AD0N_69 +set_property PACKAGE_PIN BE6 [ get_ports {c1_ddr4_adr[5]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_ADR5" - IO_L22P_T3U_N6_DBC_AD0P_69 +set_property IOSTANDARD SSTL12_DCI [ get_ports {c1_ddr4_adr[5]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_ADR5" - IO_L22P_T3U_N6_DBC_AD0P_69 +set_property PACKAGE_PIN BF2 [ get_ports {c1_ddr4_bg[1]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_BG1" - IO_L21N_T3L_N5_AD8N_69 +set_property IOSTANDARD SSTL12_DCI [ get_ports {c1_ddr4_bg[1]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_BG1" - IO_L21N_T3L_N5_AD8N_69 +set_property PACKAGE_PIN BF3 [ get_ports {c1_ddr4_bg[0]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_BG0" - IO_L21P_T3L_N4_AD8P_69 +set_property IOSTANDARD SSTL12_DCI [ get_ports {c1_ddr4_bg[0]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_BG0" - IO_L21P_T3L_N4_AD8P_69 +set_property PACKAGE_PIN BG2 [ get_ports {c1_ddr4_adr[9]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_ADR9" - IO_L20N_T3L_N3_AD1N_69 +set_property IOSTANDARD SSTL12_DCI [ get_ports {c1_ddr4_adr[9]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_ADR9" - IO_L20N_T3L_N3_AD1N_69 +set_property PACKAGE_PIN BG3 [ get_ports {c1_ddr4_act_n} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_ACT_B" - IO_L20P_T3L_N2_AD1P_69 +set_property IOSTANDARD SSTL12_DCI [ get_ports {c1_ddr4_act_n} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_ACT_B" - IO_L20P_T3L_N2_AD1P_69 +#set_property PACKAGE_PIN BG4 [ get_ports {c1_ddr4_cs_n[3]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_CS_B3" - IO_L19N_T3L_N1_DBC_AD9N_69 +#set_property IOSTANDARD SSTL12_DCI [ get_ports {c1_ddr4_cs_n[3]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_CS_B3" - IO_L19N_T3L_N1_DBC_AD9N_69 +#set_property PACKAGE_PIN BG5 [ get_ports {c1_ddr4_cs_n[2]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_CS_B2" - IO_L19P_T3L_N0_DBC_AD9P_69 +#set_property IOSTANDARD SSTL12_DCI [ get_ports {c1_ddr4_cs_n[2]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_CS_B2" - IO_L19P_T3L_N0_DBC_AD9P_69 +set_property PACKAGE_PIN BF5 [ get_ports {c1_ddr4_adr[3]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_ADR3" - IO_T3U_N12_69 +set_property IOSTANDARD SSTL12_DCI [ get_ports {c1_ddr4_adr[3]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_ADR3" - IO_T3U_N12_69 +set_property PACKAGE_PIN BF6 [ get_ports {c1_ddr4_adr[2]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_ADR2" - IO_T2U_N12_69 +set_property IOSTANDARD SSTL12_DCI [ get_ports {c1_ddr4_adr[2]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_ADR2" - IO_T2U_N12_69 +set_property PACKAGE_PIN BF7 [ get_ports {c1_ddr4_adr[0]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_ADR0" - IO_L18N_T2U_N11_AD2N_69 +set_property IOSTANDARD SSTL12_DCI [ get_ports {c1_ddr4_adr[0]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_ADR0" - IO_L18N_T2U_N11_AD2N_69 +set_property PACKAGE_PIN BF8 [ get_ports {c1_ddr4_adr[16]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_ADR16" - IO_L18P_T2U_N10_AD2P_69 +set_property IOSTANDARD SSTL12_DCI [ get_ports {c1_ddr4_adr[16]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_ADR16" - IO_L18P_T2U_N10_AD2P_69 +set_property PACKAGE_PIN BG7 [ get_ports {c1_ddr4_adr[7]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_ADR7" - IO_L17N_T2U_N9_AD10N_69 +set_property IOSTANDARD SSTL12_DCI [ get_ports {c1_ddr4_adr[7]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_ADR7" - IO_L17N_T2U_N9_AD10N_69 +set_property PACKAGE_PIN BG8 [ get_ports {c1_ddr4_ba[0]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_BA0" - IO_L17P_T2U_N8_AD10P_69 +set_property IOSTANDARD SSTL12_DCI [ get_ports {c1_ddr4_ba[0]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_BA0" - IO_L17P_T2U_N8_AD10P_69 +#set_property PACKAGE_PIN BJ7 [ get_ports {c1_ddr4_ck_c[1]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_CK_C1" - IO_L16N_T2U_N7_QBC_AD3N_69 +#set_property IOSTANDARD DIFF_SSTL12_DCI [ get_ports {c1_ddr4_ck_c[1]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_CK_C1" - IO_L16N_T2U_N7_QBC_AD3N_69 +#set_property PACKAGE_PIN BH7 [ get_ports {c1_ddr4_ck_t[1]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_CK_T1" - IO_L16P_T2U_N6_QBC_AD3P_69 +#set_property IOSTANDARD DIFF_SSTL12_DCI [ get_ports {c1_ddr4_ck_t[1]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_CK_T1" - IO_L16P_T2U_N6_QBC_AD3P_69 +set_property PACKAGE_PIN BK8 [ get_ports {c1_ddr4_adr[14]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_ADR14" - IO_L15N_T2L_N5_AD11N_69 +set_property IOSTANDARD SSTL12_DCI [ get_ports {c1_ddr4_adr[14]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_ADR14" - IO_L15N_T2L_N5_AD11N_69 +set_property PACKAGE_PIN BJ8 [ get_ports {c1_ddr4_adr[10]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_ADR10" - IO_L15P_T2L_N4_AD11P_69 +set_property IOSTANDARD SSTL12_DCI [ get_ports {c1_ddr4_adr[10]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_ADR10" - IO_L15P_T2L_N4_AD11P_69 +#set_property PACKAGE_PIN BH4 [ get_ports {c1_ddr4_odt[1]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_ODT1" - IO_L14N_T2L_N3_GC_69 +#set_property IOSTANDARD SSTL12_DCI [ get_ports {c1_ddr4_odt[1]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_ODT1" - IO_L14N_T2L_N3_GC_69 +#set_property PACKAGE_PIN BH5 [ get_ports {c1_ddr4_cs_n[1]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_CS_B1" - IO_L14P_T2L_N2_GC_69 +#set_property IOSTANDARD SSTL12_DCI [ get_ports {c1_ddr4_cs_n[1]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_CS_B1" - IO_L14P_T2L_N2_GC_69 +## Clocks at top of XDC +#set_property PACKAGE_PIN BJ6 [ get_ports {sys_clk1_n} ] ;# Bank 69 VCCO - VCC1V2 Net "SYSCLK1_N" - IO_L13N_T2L_N1_GC_QBC_69 +#set_property IOSTANDARD LVCMOS18 [ get_ports {sys_clk1_n} ] ;# Bank 69 VCCO - VCC1V2 Net "SYSCLK1_N" - IO_L13N_T2L_N1_GC_QBC_69 +#set_property PACKAGE_PIN BH6 [ get_ports {sys_clk1_p} ] ;# Bank 69 VCCO - VCC1V2 Net "SYSCLK1_P" - IO_L13P_T2L_N0_GC_QBC_69 +#set_property IOSTANDARD LVCMOS18 [ get_ports {sys_clk1_p} ] ;# Bank 69 VCCO - VCC1V2 Net "SYSCLK1_P" - IO_L13P_T2L_N0_GC_QBC_69 +### <<<>>> No external BIAS on AC coupled LVDS clock inputs to 1.2V bank so this constraint is added to recenter LVDS signal on 1.2V IO standard. +#set_property DQS_BIAS TRUE [ get_ports {sys_clk1_p} ] ;# Bank 69 VCCO - VCC1V2 Net "SYSCLK1_P" - IO_L13P_T2L_N0_GC_QBC_69 +set_property PACKAGE_PIN BK4 [ get_ports {c1_ddr4_ba[1]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_BA1" - IO_L12N_T1U_N11_GC_69 +set_property IOSTANDARD SSTL12_DCI [ get_ports {c1_ddr4_ba[1]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_BA1" - IO_L12N_T1U_N11_GC_69 +set_property PACKAGE_PIN BK5 [ get_ports {c1_ddr4_adr[13]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_ADR13" - IO_L12P_T1U_N10_GC_69 +set_property IOSTANDARD SSTL12_DCI [ get_ports {c1_ddr4_adr[13]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_ADR13" - IO_L12P_T1U_N10_GC_69 +#set_property PACKAGE_PIN BK3 [ get_ports {c1_ddr4_alert_n} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_ALERT_B" - IO_L11N_T1U_N9_GC_69 +#set_property IOSTANDARD LVCMOS12 [ get_ports {c1_ddr4_alert_n} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_ALERT_B" - IO_L11N_T1U_N9_GC_69 +set_property PACKAGE_PIN BJ4 [ get_ports {c1_ddr4_adr[15]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_ADR15" - IO_L11P_T1U_N8_GC_69 +set_property IOSTANDARD SSTL12_DCI [ get_ports {c1_ddr4_adr[15]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_ADR15" - IO_L11P_T1U_N8_GC_69 +set_property PACKAGE_PIN BJ2 [ get_ports {c1_ddr4_ck_c[0]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_CK_C0" - IO_L10N_T1U_N7_QBC_AD4N_69 +set_property IOSTANDARD DIFF_SSTL12_DCI [ get_ports {c1_ddr4_ck_c[0]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_CK_C0" - IO_L10N_T1U_N7_QBC_AD4N_69 +set_property PACKAGE_PIN BJ3 [ get_ports {c1_ddr4_ck_t[0]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_CK_T0" - IO_L10P_T1U_N6_QBC_AD4P_69 +set_property IOSTANDARD DIFF_SSTL12_DCI [ get_ports {c1_ddr4_ck_t[0]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_CK_T0" - IO_L10P_T1U_N6_QBC_AD4P_69 +set_property PACKAGE_PIN BH1 [ get_ports {c1_ddr4_par} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_PAR" - IO_L9N_T1L_N5_AD12N_69 +set_property IOSTANDARD SSTL12_DCI [ get_ports {c1_ddr4_par} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_PAR" - IO_L9N_T1L_N5_AD12N_69 +set_property PACKAGE_PIN BH2 [ get_ports {c1_ddr4_odt[0]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_ODT0" - IO_L9P_T1L_N4_AD12P_69 +set_property IOSTANDARD SSTL12_DCI [ get_ports {c1_ddr4_odt[0]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_ODT0" - IO_L9P_T1L_N4_AD12P_69 +set_property PACKAGE_PIN BK1 [ get_ports {c1_ddr4_adr[1]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_ADR1" - IO_L8N_T1L_N3_AD5N_69 +set_property IOSTANDARD SSTL12_DCI [ get_ports {c1_ddr4_adr[1]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_ADR1" - IO_L8N_T1L_N3_AD5N_69 +set_property PACKAGE_PIN BJ1 [ get_ports {c1_ddr4_adr[8]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_ADR8" - IO_L8P_T1L_N2_AD5P_69 +set_property IOSTANDARD SSTL12_DCI [ get_ports {c1_ddr4_adr[8]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_ADR8" - IO_L8P_T1L_N2_AD5P_69 +set_property PACKAGE_PIN BL2 [ get_ports {c1_ddr4_adr[12]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_ADR12" - IO_L7N_T1L_N1_QBC_AD13N_69 +set_property IOSTANDARD SSTL12_DCI [ get_ports {c1_ddr4_adr[12]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_ADR12" - IO_L7N_T1L_N1_QBC_AD13N_69 +set_property PACKAGE_PIN BL3 [ get_ports {c1_ddr4_cs_n[0]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_CS_B0" - IO_L7P_T1L_N0_QBC_AD13P_69 +set_property IOSTANDARD SSTL12_DCI [ get_ports {c1_ddr4_cs_n[0]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_CS_B0" - IO_L7P_T1L_N0_QBC_AD13P_69 +#set_property PACKAGE_PIN BK6 [ get_ports {c1_ddr4_adr[17]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_ADR17" - IO_T1U_N12_69 +#set_property IOSTANDARD SSTL12_DCI [ get_ports {c1_ddr4_adr[17]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_ADR17" - IO_T1U_N12_69 +set_property PACKAGE_PIN BM3 [ get_ports {c1_ddr4_dq[32]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_DQ32" - IO_L6N_T0U_N11_AD6N_69 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[32]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_DQ32" - IO_L6N_T0U_N11_AD6N_69 +set_property PACKAGE_PIN BM4 [ get_ports {c1_ddr4_dq[33]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_DQ33" - IO_L6P_T0U_N10_AD6P_69 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[33]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_DQ33" - IO_L6P_T0U_N10_AD6P_69 +set_property PACKAGE_PIN BM5 [ get_ports {c1_ddr4_dq[34]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_DQ34" - IO_L5N_T0U_N9_AD14N_69 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[34]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_DQ34" - IO_L5N_T0U_N9_AD14N_69 +set_property PACKAGE_PIN BL6 [ get_ports {c1_ddr4_dq[35]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_DQ35" - IO_L5P_T0U_N8_AD14P_69 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[35]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_DQ35" - IO_L5P_T0U_N8_AD14P_69 +set_property PACKAGE_PIN BM7 [ get_ports {c1_ddr4_dqs_c[8]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_DQS_C4" - IO_L4N_T0U_N7_DBC_AD7N_69 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c1_ddr4_dqs_c[8]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_DQS_C4" - IO_L4N_T0U_N7_DBC_AD7N_69 +set_property PACKAGE_PIN BL7 [ get_ports {c1_ddr4_dqs_t[8]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_DQS_T4" - IO_L4P_T0U_N6_DBC_AD7P_69 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c1_ddr4_dqs_t[8]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_DQS_T4" - IO_L4P_T0U_N6_DBC_AD7P_69 +set_property PACKAGE_PIN BN4 [ get_ports {c1_ddr4_dq[36]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_DQ36" - IO_L3N_T0L_N5_AD15N_69 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[36]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_DQ36" - IO_L3N_T0L_N5_AD15N_69 +set_property PACKAGE_PIN BN5 [ get_ports {c1_ddr4_dq[37]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_DQ37" - IO_L3P_T0L_N4_AD15P_69 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[37]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_DQ37" - IO_L3P_T0L_N4_AD15P_69 +set_property PACKAGE_PIN BN6 [ get_ports {c1_ddr4_dq[38]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_DQ38" - IO_L2N_T0L_N3_69 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[38]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_DQ38" - IO_L2N_T0L_N3_69 +set_property PACKAGE_PIN BN7 [ get_ports {c1_ddr4_dq[39]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_DQ39" - IO_L2P_T0L_N2_69 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[39]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_DQ39" - IO_L2P_T0L_N2_69 +set_property PACKAGE_PIN BP6 [ get_ports {c1_ddr4_dqs_c[9]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_DQS_C13" - IO_L1N_T0L_N1_DBC_69 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c1_ddr4_dqs_c[9]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_DQS_C13" - IO_L1N_T0L_N1_DBC_69 +set_property PACKAGE_PIN BP7 [ get_ports {c1_ddr4_dqs_t[9]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_DQS_T13" - IO_L1P_T0L_N0_DBC_69 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c1_ddr4_dqs_t[9]} ] ;# Bank 69 VCCO - VCC1V2 Net "DDR4_C1_DQS_T13" - IO_L1P_T0L_N0_DBC_69 +set_property PACKAGE_PIN BE9 [ get_ports {c1_ddr4_dq[64]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ65" - IO_L24N_T3U_N11_68 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[64]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ65" - IO_L24N_T3U_N11_68 +set_property PACKAGE_PIN BE10 [ get_ports {c1_ddr4_dq[65]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ67" - IO_L24P_T3U_N10_68 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[65]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ67" - IO_L24P_T3U_N10_68 +set_property PACKAGE_PIN BF10 [ get_ports {c1_ddr4_dq[66]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ66" - IO_L23N_T3U_N9_68 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[66]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ66" - IO_L23N_T3U_N9_68 +set_property PACKAGE_PIN BE11 [ get_ports {c1_ddr4_dq[67]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ64" - IO_L23P_T3U_N8_68 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[67]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ64" - IO_L23P_T3U_N8_68 +set_property PACKAGE_PIN BF11 [ get_ports {c1_ddr4_dqs_c[16]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQS_C8" - IO_L22N_T3U_N7_DBC_AD0N_68 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c1_ddr4_dqs_c[16]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQS_C8" - IO_L22N_T3U_N7_DBC_AD0N_68 +set_property PACKAGE_PIN BF12 [ get_ports {c1_ddr4_dqs_t[16]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQS_T8" - IO_L22P_T3U_N6_DBC_AD0P_68 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c1_ddr4_dqs_t[16]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQS_T8" - IO_L22P_T3U_N6_DBC_AD0P_68 +set_property PACKAGE_PIN BG9 [ get_ports {c1_ddr4_dq[70]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ71" - IO_L21N_T3L_N5_AD8N_68 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[70]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ71" - IO_L21N_T3L_N5_AD8N_68 +set_property PACKAGE_PIN BG10 [ get_ports {c1_ddr4_dq[71]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ69" - IO_L21P_T3L_N4_AD8P_68 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[71]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ69" - IO_L21P_T3L_N4_AD8P_68 +set_property PACKAGE_PIN BG12 [ get_ports {c1_ddr4_dq[69]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ70" - IO_L20N_T3L_N3_AD1N_68 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[69]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ70" - IO_L20N_T3L_N3_AD1N_68 +set_property PACKAGE_PIN BG13 [ get_ports {c1_ddr4_dq[68]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ68" - IO_L20P_T3L_N2_AD1P_68 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[68]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ68" - IO_L20P_T3L_N2_AD1P_68 +set_property PACKAGE_PIN BH9 [ get_ports {c1_ddr4_dqs_c[17]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQS_C17" - IO_L19N_T3L_N1_DBC_AD9N_68 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c1_ddr4_dqs_c[17]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQS_C17" - IO_L19N_T3L_N1_DBC_AD9N_68 +set_property PACKAGE_PIN BH10 [ get_ports {c1_ddr4_dqs_t[17]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQS_T17" - IO_L19P_T3L_N0_DBC_AD9P_68 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c1_ddr4_dqs_t[17]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQS_T17" - IO_L19P_T3L_N0_DBC_AD9P_68 +#set_property PACKAGE_PIN BH11 [ get_ports {c1_ddr4_event_n} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_EVENT_B" - IO_T3U_N12_68 +#set_property IOSTANDARD SSTL12_DCI [ get_ports {c1_ddr4_event_n} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_EVENT_B" - IO_T3U_N12_68 +set_property PACKAGE_PIN BH12 [ get_ports {c1_ddr4_reset_n} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_RESET_N" - IO_T2U_N12_68 +set_property IOSTANDARD LVCMOS12 [ get_ports {c1_ddr4_reset_n} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_RESET_N" - IO_T2U_N12_68 +set_property PACKAGE_PIN BH14 [ get_ports {c1_ddr4_dq[59]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ59" - IO_L18N_T2U_N11_AD2N_68 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[59]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ59" - IO_L18N_T2U_N11_AD2N_68 +set_property PACKAGE_PIN BH15 [ get_ports {c1_ddr4_dq[58]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ58" - IO_L18P_T2U_N10_AD2P_68 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[58]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ58" - IO_L18P_T2U_N10_AD2P_68 +set_property PACKAGE_PIN BJ12 [ get_ports {c1_ddr4_dq[57]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ57" - IO_L17N_T2U_N9_AD10N_68 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[57]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ57" - IO_L17N_T2U_N9_AD10N_68 +set_property PACKAGE_PIN BJ13 [ get_ports {c1_ddr4_dq[56]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ56" - IO_L17P_T2U_N8_AD10P_68 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[56]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ56" - IO_L17P_T2U_N8_AD10P_68 +set_property PACKAGE_PIN BK13 [ get_ports {c1_ddr4_dqs_c[14]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQS_C7" - IO_L16N_T2U_N7_QBC_AD3N_68 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c1_ddr4_dqs_c[14]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQS_C7" - IO_L16N_T2U_N7_QBC_AD3N_68 +set_property PACKAGE_PIN BJ14 [ get_ports {c1_ddr4_dqs_t[14]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQS_T7" - IO_L16P_T2U_N6_QBC_AD3P_68 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c1_ddr4_dqs_t[14]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQS_T7" - IO_L16P_T2U_N6_QBC_AD3P_68 +set_property PACKAGE_PIN BK14 [ get_ports {c1_ddr4_dq[60]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ60" - IO_L15N_T2L_N5_AD11N_68 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[60]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ60" - IO_L15N_T2L_N5_AD11N_68 +set_property PACKAGE_PIN BK15 [ get_ports {c1_ddr4_dq[61]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ61" - IO_L15P_T2L_N4_AD11P_68 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[61]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ61" - IO_L15P_T2L_N4_AD11P_68 +set_property PACKAGE_PIN BL12 [ get_ports {c1_ddr4_dq[62]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ62" - IO_L14N_T2L_N3_GC_68 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[62]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ62" - IO_L14N_T2L_N3_GC_68 +set_property PACKAGE_PIN BL13 [ get_ports {c1_ddr4_dq[63]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ63" - IO_L14P_T2L_N2_GC_68 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[63]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ63" - IO_L14P_T2L_N2_GC_68 +set_property PACKAGE_PIN BK11 [ get_ports {c1_ddr4_dqs_c[15]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQS_C16" - IO_L13N_T2L_N1_GC_QBC_68 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c1_ddr4_dqs_c[15]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQS_C16" - IO_L13N_T2L_N1_GC_QBC_68 +set_property PACKAGE_PIN BJ11 [ get_ports {c1_ddr4_dqs_t[15]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQS_T16" - IO_L13P_T2L_N0_GC_QBC_68 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c1_ddr4_dqs_t[15]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQS_T16" - IO_L13P_T2L_N0_GC_QBC_68 +set_property PACKAGE_PIN BK9 [ get_ports {c1_ddr4_dq[41]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ41" - IO_L12N_T1U_N11_GC_68 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[41]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ41" - IO_L12N_T1U_N11_GC_68 +set_property PACKAGE_PIN BJ9 [ get_ports {c1_ddr4_dq[40]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ40" - IO_L12P_T1U_N10_GC_68 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[40]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ40" - IO_L12P_T1U_N10_GC_68 +set_property PACKAGE_PIN BL10 [ get_ports {c1_ddr4_dq[43]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ43" - IO_L11N_T1U_N9_GC_68 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[43]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ43" - IO_L11N_T1U_N9_GC_68 +set_property PACKAGE_PIN BK10 [ get_ports {c1_ddr4_dq[42]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ42" - IO_L11P_T1U_N8_GC_68 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[42]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ42" - IO_L11P_T1U_N8_GC_68 +set_property PACKAGE_PIN BM8 [ get_ports {c1_ddr4_dqs_c[10]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQS_C5" - IO_L10N_T1U_N7_QBC_AD4N_68 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c1_ddr4_dqs_c[10]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQS_C5" - IO_L10N_T1U_N7_QBC_AD4N_68 +set_property PACKAGE_PIN BL8 [ get_ports {c1_ddr4_dqs_t[10]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQS_T5" - IO_L10P_T1U_N6_QBC_AD4P_68 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c1_ddr4_dqs_t[10]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQS_T5" - IO_L10P_T1U_N6_QBC_AD4P_68 +set_property PACKAGE_PIN BN9 [ get_ports {c1_ddr4_dq[45]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ45" - IO_L9N_T1L_N5_AD12N_68 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[45]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ45" - IO_L9N_T1L_N5_AD12N_68 +set_property PACKAGE_PIN BM9 [ get_ports {c1_ddr4_dq[44]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ44" - IO_L9P_T1L_N4_AD12P_68 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[44]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ44" - IO_L9P_T1L_N4_AD12P_68 +set_property PACKAGE_PIN BN10 [ get_ports {c1_ddr4_dq[46]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ46" - IO_L8N_T1L_N3_AD5N_68 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[46]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ46" - IO_L8N_T1L_N3_AD5N_68 +set_property PACKAGE_PIN BM10 [ get_ports {c1_ddr4_dq[47]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ47" - IO_L8P_T1L_N2_AD5P_68 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[47]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ47" - IO_L8P_T1L_N2_AD5P_68 +set_property PACKAGE_PIN BP8 [ get_ports {c1_ddr4_dqs_c[11]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQS_C14" - IO_L7N_T1L_N1_QBC_AD13N_68 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c1_ddr4_dqs_c[11]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQS_C14" - IO_L7N_T1L_N1_QBC_AD13N_68 +set_property PACKAGE_PIN BP9 [ get_ports {c1_ddr4_dqs_t[11]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQS_T14" - IO_L7P_T1L_N0_QBC_AD13P_68 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c1_ddr4_dqs_t[11]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQS_T14" - IO_L7P_T1L_N0_QBC_AD13P_68 +#set_property PACKAGE_PIN BL11 #N/A ;# Bank 68 VCCO - VCC1V2 Net "Not Connected" - IO_T1U_N12_68 +#set_property IOSTANDARD LVCMOS18 #N/A ;# Bank 68 VCCO - VCC1V2 Net "Not Connected" - IO_T1U_N12_68 +set_property PACKAGE_PIN BM15 [ get_ports {c1_ddr4_dq[48]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ48" - IO_L6N_T0U_N11_AD6N_68 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[48]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ48" - IO_L6N_T0U_N11_AD6N_68 +set_property PACKAGE_PIN BL15 [ get_ports {c1_ddr4_dq[50]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ50" - IO_L6P_T0U_N10_AD6P_68 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[50]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ50" - IO_L6P_T0U_N10_AD6P_68 +set_property PACKAGE_PIN BM13 [ get_ports {c1_ddr4_dq[51]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ51" - IO_L5N_T0U_N9_AD14N_68 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[51]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ51" - IO_L5N_T0U_N9_AD14N_68 +set_property PACKAGE_PIN BM14 [ get_ports {c1_ddr4_dq[49]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ49" - IO_L5P_T0U_N8_AD14P_68 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[49]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ49" - IO_L5P_T0U_N8_AD14P_68 +set_property PACKAGE_PIN BN14 [ get_ports {c1_ddr4_dqs_c[12]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQS_C6" - IO_L4N_T0U_N7_DBC_AD7N_68 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c1_ddr4_dqs_c[12]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQS_C6" - IO_L4N_T0U_N7_DBC_AD7N_68 +set_property PACKAGE_PIN BN15 [ get_ports {c1_ddr4_dqs_t[12]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQS_T6" - IO_L4P_T0U_N6_DBC_AD7P_68 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c1_ddr4_dqs_t[12]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQS_T6" - IO_L4P_T0U_N6_DBC_AD7P_68 +set_property PACKAGE_PIN BN12 [ get_ports {c1_ddr4_dq[52]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ52" - IO_L3N_T0L_N5_AD15N_68 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[52]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ52" - IO_L3N_T0L_N5_AD15N_68 +set_property PACKAGE_PIN BM12 [ get_ports {c1_ddr4_dq[53]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ53" - IO_L3P_T0L_N4_AD15P_68 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[53]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ53" - IO_L3P_T0L_N4_AD15P_68 +set_property PACKAGE_PIN BP13 [ get_ports {c1_ddr4_dq[54]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ54" - IO_L2N_T0L_N3_68 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[54]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ54" - IO_L2N_T0L_N3_68 +set_property PACKAGE_PIN BP14 [ get_ports {c1_ddr4_dq[55]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ55" - IO_L2P_T0L_N2_68 +set_property IOSTANDARD POD12_DCI [ get_ports {c1_ddr4_dq[55]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQ55" - IO_L2P_T0L_N2_68 +set_property PACKAGE_PIN BP11 [ get_ports {c1_ddr4_dqs_c[13]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQS_C15" - IO_L1N_T0L_N1_DBC_68 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c1_ddr4_dqs_c[13]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQS_C15" - IO_L1N_T0L_N1_DBC_68 +set_property PACKAGE_PIN BP12 [ get_ports {c1_ddr4_dqs_t[13]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQS_T15" - IO_L1P_T0L_N0_DBC_68 +set_property IOSTANDARD DIFF_POD12_DCI [ get_ports {c1_ddr4_dqs_t[13]} ] ;# Bank 68 VCCO - VCC1V2 Net "DDR4_C1_DQS_T15" - IO_L1P_T0L_N0_DBC_68 \ No newline at end of file diff --git a/hw/constraints/u280/u280_net.xdc b/hw/constraints/u280/u280_net.xdc new file mode 100644 index 00000000..66df2b03 --- /dev/null +++ b/hw/constraints/u280/u280_net.xdc @@ -0,0 +1,59 @@ +### +### QSFP 0 +### + +# Clock (156.25 MHz) +set_property PACKAGE_PIN T43 [get_ports gt0_refclk_n ] ; +set_property PACKAGE_PIN T42 [get_ports gt0_refclk_p ] ; + +# Clock (161 MHz) +#set_property PACKAGE_PIN R41 [get_ports gt0_refclk_n ] ; +#set_property PACKAGE_PIN R40 [get_ports gt0_refclk_p ] ; + +# Transceiver +set_property PACKAGE_PIN L54 [get_ports {gt0_rxn_in[0]} ] ; +set_property PACKAGE_PIN K52 [get_ports {gt0_rxn_in[1]} ] ; +set_property PACKAGE_PIN J54 [get_ports {gt0_rxn_in[2]} ] ; +set_property PACKAGE_PIN H52 [get_ports {gt0_rxn_in[3]} ] ; +set_property PACKAGE_PIN L53 [get_ports {gt0_rxp_in[0]} ] ; +set_property PACKAGE_PIN K51 [get_ports {gt0_rxp_in[1]} ] ; +set_property PACKAGE_PIN J53 [get_ports {gt0_rxp_in[2]} ] ; +set_property PACKAGE_PIN H51 [get_ports {gt0_rxp_in[3]} ] ; +set_property PACKAGE_PIN L49 [get_ports {gt0_txn_in[0]} ] ; +set_property PACKAGE_PIN L45 [get_ports {gt0_txn_in[1]} ] ; +set_property PACKAGE_PIN K47 [get_ports {gt0_txn_in[2]} ] ; +set_property PACKAGE_PIN J49 [get_ports {gt0_txn_in[3]} ] ; +set_property PACKAGE_PIN L48 [get_ports {gt0_txp_in[0]} ] ; +set_property PACKAGE_PIN L44 [get_ports {gt0_txp_in[1]} ] ; +set_property PACKAGE_PIN K46 [get_ports {gt0_txp_in[2]} ] ; +set_property PACKAGE_PIN J48 [get_ports {gt0_txp_in[3]} ] ; + +### +### QSFP 1 +### + +# Clock (156 MHz) +set_property PACKAGE_PIN P43 [get_ports gt1_refclk_n ] ; +set_property PACKAGE_PIN P42 [get_ports gt1_refclk_p ] ; + +# Clock (161 MHz) +#set_property PACKAGE_PIN M43 [get_ports gt1_refclk_n ] ; +#set_property PACKAGE_PIN M42 [get_ports gt1_refclk_p ] ; + +# Transceiver +set_property PACKAGE_PIN G54 [get_ports {gt1_rxn_in[0]} ] ; +set_property PACKAGE_PIN F52 [get_ports {gt1_rxn_in[1]} ] ; +set_property PACKAGE_PIN E54 [get_ports {gt1_rxn_in[2]} ] ; +set_property PACKAGE_PIN D52 [get_ports {gt1_rxn_in[3]} ] ; +set_property PACKAGE_PIN G53 [get_ports {gt1_rxp_in[0]} ] ; +set_property PACKAGE_PIN F51 [get_ports {gt1_rxp_in[1]} ] ; +set_property PACKAGE_PIN E53 [get_ports {gt1_rxp_in[2]} ] ; +set_property PACKAGE_PIN D51 [get_ports {gt1_rxp_in[3]} ] ; +set_property PACKAGE_PIN G49 [get_ports {gt1_txn_in[0]} ] ; +set_property PACKAGE_PIN E49 [get_ports {gt1_txn_in[1]} ] ; +set_property PACKAGE_PIN C49 [get_ports {gt1_txn_in[2]} ] ; +set_property PACKAGE_PIN A50 [get_ports {gt1_txn_in[3]} ] ; +set_property PACKAGE_PIN G48 [get_ports {gt1_txp_in[0]} ] ; +set_property PACKAGE_PIN E48 [get_ports {gt1_txp_in[1]} ] ; +set_property PACKAGE_PIN C48 [get_ports {gt1_txp_in[2]} ] ; +set_property PACKAGE_PIN A49 [get_ports {gt1_txp_in[3]} ] ; \ No newline at end of file diff --git a/hw/constraints/u280/u280_pcie.xdc b/hw/constraints/u280/u280_pcie.xdc new file mode 100644 index 00000000..58203937 --- /dev/null +++ b/hw/constraints/u280/u280_pcie.xdc @@ -0,0 +1,82 @@ +#PCIe + +#Clock +set_property PACKAGE_PIN AR14 [get_ports {pcie_clk_clk_n}] ; +set_property PACKAGE_PIN AR15 [get_ports {pcie_clk_clk_p}] ; + +create_clock -period 10.000 -name pcie_ref_clk [get_ports pcie_clk_clk_p] + +set_property PACKAGE_PIN BH26 [get_ports perst_n_nb ] ; +set_property IOSTANDARD LVCMOS18 [get_ports perst_n_nb ] ; + +# Set false path +set_false_path -from [get_ports perst_n_nb] + +# Transceiver +set_property PACKAGE_PIN AN1 [get_ports {pcie_x16_rxn[3]}] ; +set_property PACKAGE_PIN AN5 [get_ports {pcie_x16_rxn[2]}] ; +set_property PACKAGE_PIN AM3 [get_ports {pcie_x16_rxn[1]}] ; +set_property PACKAGE_PIN AL1 [get_ports {pcie_x16_rxn[0]}] ; +set_property PACKAGE_PIN AN2 [get_ports {pcie_x16_rxp[3]}] ; +set_property PACKAGE_PIN AN6 [get_ports {pcie_x16_rxp[2]}] ; +set_property PACKAGE_PIN AM4 [get_ports {pcie_x16_rxp[1]}] ; +set_property PACKAGE_PIN AL2 [get_ports {pcie_x16_rxp[0]}] ; +set_property PACKAGE_PIN AP8 [get_ports {pcie_x16_txn[3]}] ; +set_property PACKAGE_PIN AN10 [get_ports {pcie_x16_txn[2]}] ; +set_property PACKAGE_PIN AM8 [get_ports {pcie_x16_txn[1]}] ; +set_property PACKAGE_PIN AL10 [get_ports {pcie_x16_txn[0]}] ; +set_property PACKAGE_PIN AP9 [get_ports {pcie_x16_txp[3]}] ; +set_property PACKAGE_PIN AN11 [get_ports {pcie_x16_txp[2]}] ; +set_property PACKAGE_PIN AM9 [get_ports {pcie_x16_txp[1]}] ; +set_property PACKAGE_PIN AL11 [get_ports {pcie_x16_txp[0]}] ; + +set_property PACKAGE_PIN AU1 [get_ports {pcie_x16_rxn[7]}] ; +set_property PACKAGE_PIN AT3 [get_ports {pcie_x16_rxn[6]}] ; +set_property PACKAGE_PIN AR1 [get_ports {pcie_x16_rxn[5]}] ; +set_property PACKAGE_PIN AP3 [get_ports {pcie_x16_rxn[4]}] ; +set_property PACKAGE_PIN AU2 [get_ports {pcie_x16_rxp[7]}] ; +set_property PACKAGE_PIN AT4 [get_ports {pcie_x16_rxp[6]}] ; +set_property PACKAGE_PIN AR2 [get_ports {pcie_x16_rxp[5]}] ; +set_property PACKAGE_PIN AP4 [get_ports {pcie_x16_rxp[4]}] ; +set_property PACKAGE_PIN AU10 [get_ports {pcie_x16_txn[7]}] ; +set_property PACKAGE_PIN AT8 [get_ports {pcie_x16_txn[6]}] ; +set_property PACKAGE_PIN AR6 [get_ports {pcie_x16_txn[5]}] ; +set_property PACKAGE_PIN AR10 [get_ports {pcie_x16_txn[4]}] ; +set_property PACKAGE_PIN AU11 [get_ports {pcie_x16_txp[7]}] ; +set_property PACKAGE_PIN AT9 [get_ports {pcie_x16_txp[6]}] ; +set_property PACKAGE_PIN AR7 [get_ports {pcie_x16_txp[5]}] ; +set_property PACKAGE_PIN AR11 [get_ports {pcie_x16_txp[4]}] ; + +set_property PACKAGE_PIN AY3 [get_ports {pcie_x16_rxn[11]} ] ; +set_property PACKAGE_PIN AW1 [get_ports {pcie_x16_rxn[10]} ] ; +set_property PACKAGE_PIN AW5 [get_ports {pcie_x16_rxn[9]} ] ; +set_property PACKAGE_PIN AV3 [get_ports {pcie_x16_rxn[8]} ] ; +set_property PACKAGE_PIN AY4 [get_ports {pcie_x16_rxp[11]} ] ; +set_property PACKAGE_PIN AW2 [get_ports {pcie_x16_rxp[10]} ] ; +set_property PACKAGE_PIN AW6 [get_ports {pcie_x16_rxp[9]} ] ; +set_property PACKAGE_PIN AV4 [get_ports {pcie_x16_rxp[8]} ] ; +set_property PACKAGE_PIN AY8 [get_ports {pcie_x16_txn[11]} ] ; +set_property PACKAGE_PIN AW10 [get_ports {pcie_x16_txn[10]} ] ; +set_property PACKAGE_PIN AV8 [get_ports {pcie_x16_txn[9]} ] ; +set_property PACKAGE_PIN AU6 [get_ports {pcie_x16_txn[8]} ] ; +set_property PACKAGE_PIN AY9 [get_ports {pcie_x16_txp[11]} ] ; +set_property PACKAGE_PIN AW11 [get_ports {pcie_x16_txp[10]} ] ; +set_property PACKAGE_PIN AV9 [get_ports {pcie_x16_txp[9]} ] ; +set_property PACKAGE_PIN AU7 [get_ports {pcie_x16_txp[8]} ] ; + +set_property PACKAGE_PIN BC1 [get_ports {pcie_x16_rxn[15]} ] ; +set_property PACKAGE_PIN BB3 [get_ports {pcie_x16_rxn[14]} ] ; +set_property PACKAGE_PIN BA1 [get_ports {pcie_x16_rxn[13]} ] ; +set_property PACKAGE_PIN BA5 [get_ports {pcie_x16_rxn[12]} ] ; +set_property PACKAGE_PIN BC2 [get_ports {pcie_x16_rxp[15]} ] ; +set_property PACKAGE_PIN BB4 [get_ports {pcie_x16_rxp[14]} ] ; +set_property PACKAGE_PIN BA2 [get_ports {pcie_x16_rxp[13]} ] ; +set_property PACKAGE_PIN BA6 [get_ports {pcie_x16_rxp[12]} ] ; +set_property PACKAGE_PIN BC6 [get_ports {pcie_x16_txn[15]} ] ; +set_property PACKAGE_PIN BC10 [get_ports {pcie_x16_txn[14]} ] ; +set_property PACKAGE_PIN BB8 [get_ports {pcie_x16_txn[13]} ] ; +set_property PACKAGE_PIN BA10 [get_ports {pcie_x16_txn[12]} ] ; +set_property PACKAGE_PIN BC7 [get_ports {pcie_x16_txp[15]} ] ; +set_property PACKAGE_PIN BC11 [get_ports {pcie_x16_txp[14]} ] ; +set_property PACKAGE_PIN BB9 [get_ports {pcie_x16_txp[13]} ] ; +set_property PACKAGE_PIN BA11 [get_ports {pcie_x16_txp[12]} ] ; \ No newline at end of file diff --git a/hw/constraints/vcu118/vcu_base.xdc b/hw/constraints/vcu118/vcu_base.xdc new file mode 100644 index 00000000..d756ad52 --- /dev/null +++ b/hw/constraints/vcu118/vcu_base.xdc @@ -0,0 +1,14 @@ +set_property BITSTREAM.GENERAL.COMPRESS TRUE [current_design] + +# Clock and reset + +# Use the 300MHz system clock. + +# Reset +set_property PACKAGE_PIN BB24 [get_ports reset_0_nb] +set_property IOSTANDARD LVCMOS18 [get_ports reset_0_nb] + +# Reset false path +set_false_path -from [get_ports reset_0_nb] +set_false_path -from [get_pins {design_static_i/proc_sys_reset_1/U0/ACTIVE_LOW_PR_OUT_DFF[0].FDRE_PER_N/C}] + diff --git a/hw/constraints/vcu118/vcu_ddr.xdc b/hw/constraints/vcu118/vcu_ddr.xdc new file mode 100644 index 00000000..41b9a80a --- /dev/null +++ b/hw/constraints/vcu118/vcu_ddr.xdc @@ -0,0 +1,619 @@ +#### DDR4 + +### +### DDR4 Channel 0 +### + +## 250 Mhz clock +set_property PACKAGE_PIN E12 [get_ports c0_sys_clk_p] +set_property PACKAGE_PIN D12 [get_ports c0_sys_clk_n] + +# ACT +set_property PACKAGE_PIN E13 [get_ports c0_ddr4_act_n] +#set_property IOSTANDARD SSTL12_DCI [get_ports c0_ddr4_act_n] + +# ADDR c0_ddr4_adr +set_property PACKAGE_PIN D14 [get_ports {c0_ddr4_adr[0]}] +set_property PACKAGE_PIN B15 [get_ports {c0_ddr4_adr[1]}] +set_property PACKAGE_PIN B16 [get_ports {c0_ddr4_adr[2]}] +set_property PACKAGE_PIN C14 [get_ports {c0_ddr4_adr[3]}] +set_property PACKAGE_PIN C15 [get_ports {c0_ddr4_adr[4]}] +set_property PACKAGE_PIN A13 [get_ports {c0_ddr4_adr[5]}] +set_property PACKAGE_PIN A14 [get_ports {c0_ddr4_adr[6]}] +set_property PACKAGE_PIN A15 [get_ports {c0_ddr4_adr[7]}] +set_property PACKAGE_PIN A16 [get_ports {c0_ddr4_adr[8]}] +set_property PACKAGE_PIN B12 [get_ports {c0_ddr4_adr[9]}] +set_property PACKAGE_PIN C12 [get_ports {c0_ddr4_adr[10]}] +set_property PACKAGE_PIN B13 [get_ports {c0_ddr4_adr[11]}] +set_property PACKAGE_PIN C13 [get_ports {c0_ddr4_adr[12]}] +set_property PACKAGE_PIN D15 [get_ports {c0_ddr4_adr[13]}] +set_property PACKAGE_PIN H14 [get_ports {c0_ddr4_adr[14]}] +set_property PACKAGE_PIN H15 [get_ports {c0_ddr4_adr[15]}] +set_property PACKAGE_PIN F15 [get_ports {c0_ddr4_adr[16]}] + +#set_property IOSTANDARD SSTL12_DCI [get_ports {c0_ddr4_adr[0]}] +#set_property IOSTANDARD SSTL12_DCI [get_ports {c0_ddr4_adr[1]}] +#set_property IOSTANDARD SSTL12_DCI [get_ports {c0_ddr4_adr[2]}] +#set_property IOSTANDARD SSTL12_DCI [get_ports {c0_ddr4_adr[3]}] +#set_property IOSTANDARD SSTL12_DCI [get_ports {c0_ddr4_adr[4]}] +#set_property IOSTANDARD SSTL12_DCI [get_ports {c0_ddr4_adr[5]}] +#set_property IOSTANDARD SSTL12_DCI [get_ports {c0_ddr4_adr[6]}] +#set_property IOSTANDARD SSTL12_DCI [get_ports {c0_ddr4_adr[7]}] +#set_property IOSTANDARD SSTL12_DCI [get_ports {c0_ddr4_adr[8]}] +#set_property IOSTANDARD SSTL12_DCI [get_ports {c0_ddr4_adr[9]}] +#set_property IOSTANDARD SSTL12_DCI [get_ports {c0_ddr4_adr[10]}] +#set_property IOSTANDARD SSTL12_DCI [get_ports {c0_ddr4_adr[11]}] +#set_property IOSTANDARD SSTL12_DCI [get_ports {c0_ddr4_adr[12]}] +#set_property IOSTANDARD SSTL12_DCI [get_ports {c0_ddr4_adr[13]}] +#set_property IOSTANDARD SSTL12_DCI [get_ports {c0_ddr4_adr[14]}] +#set_property IOSTANDARD SSTL12_DCI [get_ports {c0_ddr4_adr[15]}] +#set_property IOSTANDARD SSTL12_DCI [get_ports {c0_ddr4_adr[16]}] + +# BA +set_property PACKAGE_PIN G13 [get_ports {c0_ddr4_ba[1]}] +set_property PACKAGE_PIN G15 [get_ports {c0_ddr4_ba[0]}] + +#set_property IOSTANDARD SSTL12_DCI [get_ports {c0_ddr4_ba[0]}] +#set_property IOSTANDARD SSTL12_DCI [get_ports {c0_ddr4_ba[1]}] + +# BG +set_property PACKAGE_PIN H13 [get_ports {c0_ddr4_bg[0]}] +#set_property IOSTANDARD SSTL12_DCI [get_ports {c0_ddr4_bg[0]}] + +# CKE +set_property PACKAGE_PIN A10 [get_ports {c0_ddr4_cke[0]}] +#set_property IOSTANDARD SSTL12_DCI [get_ports {c0_ddr4_cke[0]}] + +# ODT +set_property PACKAGE_PIN C8 [get_ports {c0_ddr4_odt[0]}] +#set_property IOSTANDARD SSTL12_DCI [get_ports {c0_ddr4_odt[0]}] + +# CS_n +set_property PACKAGE_PIN F13 [get_ports {c0_ddr4_cs_n[0]}] +#set_property IOSTANDARD SSTL12_DCI [get_ports {c0_ddr4_cs_n[0]}] + +# CKC + +#set_property IOSTANDARD DIFF_SSTL12_DCI [get_ports {c0_ddr4_ck_t[0]}] +#set_property IOSTANDARD DIFF_SSTL12_DCI [get_ports {c0_ddr4_ck_c[0]}] + +# RESET +set_property PACKAGE_PIN N20 [get_ports c0_ddr4_reset_n] +#set_property IOSTANDARD LVCMOS12 [get_ports c0_ddr4_reset_n] +#set_property DRIVE 8 [get_ports c0_ddr4_reset_n] + +# DM +#set_property PACKAGE_PIN C9 [get_ports {c0_ddr4_dm_dbi_n[9]}] +set_property PACKAGE_PIN G22 [get_ports {c0_ddr4_dm_dbi_n[7]}] +set_property PACKAGE_PIN L23 [get_ports {c0_ddr4_dm_dbi_n[6]}] +set_property PACKAGE_PIN P20 [get_ports {c0_ddr4_dm_dbi_n[5]}] +set_property PACKAGE_PIN B18 [get_ports {c0_ddr4_dm_dbi_n[4]}] +set_property PACKAGE_PIN G18 [get_ports {c0_ddr4_dm_dbi_n[3]}] +set_property PACKAGE_PIN K17 [get_ports {c0_ddr4_dm_dbi_n[2]}] +set_property PACKAGE_PIN R18 [get_ports {c0_ddr4_dm_dbi_n[1]}] +set_property PACKAGE_PIN G11 [get_ports {c0_ddr4_dm_dbi_n[0]}] + +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dm_dbi_n[0]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dm_dbi_n[1]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dm_dbi_n[2]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dm_dbi_n[3]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dm_dbi_n[4]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dm_dbi_n[5]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dm_dbi_n[6]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dm_dbi_n[7]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dm_dbi_n[8]}] +##set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dm_dbi_n[9]}] + + +# DQ +#set_property PACKAGE_PIN A11 [get_ports {c0_ddr4_dq[79]}] +#set_property PACKAGE_PIN B11 [get_ports {c0_ddr4_dq[78]}] +#set_property PACKAGE_PIN B10 [get_ports {c0_ddr4_dq[77]}] +#set_property PACKAGE_PIN C10 [get_ports {c0_ddr4_dq[76]}] +#set_property PACKAGE_PIN B7 [get_ports {c0_ddr4_dq[75]}] +#set_property PACKAGE_PIN B8 [get_ports {c0_ddr4_dq[74]}] +#set_property PACKAGE_PIN C7 [get_ports {c0_ddr4_dq[73]}] +#set_property PACKAGE_PIN D7 [get_ports {c0_ddr4_dq[72]}] +set_property PACKAGE_PIN F23 [get_ports {c0_ddr4_dq[63]}] +set_property PACKAGE_PIN F24 [get_ports {c0_ddr4_dq[62]}] +set_property PACKAGE_PIN E21 [get_ports {c0_ddr4_dq[61]}] +set_property PACKAGE_PIN F21 [get_ports {c0_ddr4_dq[60]}] +set_property PACKAGE_PIN E22 [get_ports {c0_ddr4_dq[59]}] +set_property PACKAGE_PIN E23 [get_ports {c0_ddr4_dq[58]}] +set_property PACKAGE_PIN H22 [get_ports {c0_ddr4_dq[57]}] +set_property PACKAGE_PIN H23 [get_ports {c0_ddr4_dq[56]}] +set_property PACKAGE_PIN J22 [get_ports {c0_ddr4_dq[55]}] +set_property PACKAGE_PIN K22 [get_ports {c0_ddr4_dq[54]}] +set_property PACKAGE_PIN J21 [get_ports {c0_ddr4_dq[53]}] +set_property PACKAGE_PIN K21 [get_ports {c0_ddr4_dq[52]}] +set_property PACKAGE_PIN L21 [get_ports {c0_ddr4_dq[51]}] +set_property PACKAGE_PIN M21 [get_ports {c0_ddr4_dq[50]}] +set_property PACKAGE_PIN J24 [get_ports {c0_ddr4_dq[49]}] +set_property PACKAGE_PIN K24 [get_ports {c0_ddr4_dq[48]}] +set_property PACKAGE_PIN R23 [get_ports {c0_ddr4_dq[47]}] +set_property PACKAGE_PIN T23 [get_ports {c0_ddr4_dq[46]}] +set_property PACKAGE_PIN P22 [get_ports {c0_ddr4_dq[45]}] +set_property PACKAGE_PIN R22 [get_ports {c0_ddr4_dq[44]}] +set_property PACKAGE_PIN P21 [get_ports {c0_ddr4_dq[43]}] +set_property PACKAGE_PIN R21 [get_ports {c0_ddr4_dq[42]}] +set_property PACKAGE_PIN M23 [get_ports {c0_ddr4_dq[41]}] +set_property PACKAGE_PIN N23 [get_ports {c0_ddr4_dq[40]}] +set_property PACKAGE_PIN B20 [get_ports {c0_ddr4_dq[39]}] +set_property PACKAGE_PIN C20 [get_ports {c0_ddr4_dq[38]}] +set_property PACKAGE_PIN D19 [get_ports {c0_ddr4_dq[37]}] +set_property PACKAGE_PIN D20 [get_ports {c0_ddr4_dq[36]}] +set_property PACKAGE_PIN C18 [get_ports {c0_ddr4_dq[35]}] +set_property PACKAGE_PIN C19 [get_ports {c0_ddr4_dq[34]}] +set_property PACKAGE_PIN C17 [get_ports {c0_ddr4_dq[33]}] +set_property PACKAGE_PIN D17 [get_ports {c0_ddr4_dq[32]}] +set_property PACKAGE_PIN D16 [get_ports {c0_ddr4_dq[31]}] +set_property PACKAGE_PIN E17 [get_ports {c0_ddr4_dq[30]}] +set_property PACKAGE_PIN F20 [get_ports {c0_ddr4_dq[29]}] +set_property PACKAGE_PIN G20 [get_ports {c0_ddr4_dq[28]}] +set_property PACKAGE_PIN E18 [get_ports {c0_ddr4_dq[27]}] +set_property PACKAGE_PIN E19 [get_ports {c0_ddr4_dq[26]}] +set_property PACKAGE_PIN F18 [get_ports {c0_ddr4_dq[25]}] +set_property PACKAGE_PIN F19 [get_ports {c0_ddr4_dq[24]}] +set_property PACKAGE_PIN H18 [get_ports {c0_ddr4_dq[23]}] +set_property PACKAGE_PIN H19 [get_ports {c0_ddr4_dq[22]}] +set_property PACKAGE_PIN H17 [get_ports {c0_ddr4_dq[21]}] +set_property PACKAGE_PIN J17 [get_ports {c0_ddr4_dq[20]}] +set_property PACKAGE_PIN K18 [get_ports {c0_ddr4_dq[19]}] +set_property PACKAGE_PIN L18 [get_ports {c0_ddr4_dq[18]}] +set_property PACKAGE_PIN K16 [get_ports {c0_ddr4_dq[17]}] +set_property PACKAGE_PIN L16 [get_ports {c0_ddr4_dq[16]}] +set_property PACKAGE_PIN M16 [get_ports {c0_ddr4_dq[15]}] +set_property PACKAGE_PIN N17 [get_ports {c0_ddr4_dq[14]}] +set_property PACKAGE_PIN N18 [get_ports {c0_ddr4_dq[13]}] +set_property PACKAGE_PIN N19 [get_ports {c0_ddr4_dq[12]}] +set_property PACKAGE_PIN M17 [get_ports {c0_ddr4_dq[11]}] +set_property PACKAGE_PIN M18 [get_ports {c0_ddr4_dq[10]}] +set_property PACKAGE_PIN P19 [get_ports {c0_ddr4_dq[9]}] +set_property PACKAGE_PIN R19 [get_ports {c0_ddr4_dq[8]}] +set_property PACKAGE_PIN D9 [get_ports {c0_ddr4_dq[7]}] +set_property PACKAGE_PIN E9 [get_ports {c0_ddr4_dq[6]}] +set_property PACKAGE_PIN G12 [get_ports {c0_ddr4_dq[5]}] +set_property PACKAGE_PIN H12 [get_ports {c0_ddr4_dq[4]}] +set_property PACKAGE_PIN F9 [get_ports {c0_ddr4_dq[3]}] +set_property PACKAGE_PIN F10 [get_ports {c0_ddr4_dq[2]}] +set_property PACKAGE_PIN E11 [get_ports {c0_ddr4_dq[1]}] +set_property PACKAGE_PIN F11 [get_ports {c0_ddr4_dq[0]}] + +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[0]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[1]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[2]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[3]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[4]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[5]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[6]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[7]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[8]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[9]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[10]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[11]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[12]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[13]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[14]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[15]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[16]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[17]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[18]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[19]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[20]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[21]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[22]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[23]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[24]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[25]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[26]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[27]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[28]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[29]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[30]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[31]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[32]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[33]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[34]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[35]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[36]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[37]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[38]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[39]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[40]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[41]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[42]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[43]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[44]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[45]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[46]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[47]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[48]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[49]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[50]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[51]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[52]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[53]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[54]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[55]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[56]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[57]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[58]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[59]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[60]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[61]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[62]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[63]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[64]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[65]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[66]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[67]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[68]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[69]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[70]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[71]}] +##set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[72]}] +##set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[73]}] +##set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[74]}] +##set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[75]}] +##set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[76]}] +##set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[77]}] +##set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[78]}] +##set_property IOSTANDARD POD12_DCI [get_ports {c0_ddr4_dq[79]}] + +# DQS_C +#set_property PACKAGE_PIN A8 [get_ports {c0_ddr4_dqs_c[9]}] +#set_property PACKAGE_PIN A9 [get_ports {c0_ddr4_dqs_t[9]}] + +#set_property IOSTANDARD DIFF_POD12_DCI [get_ports {c0_ddr4_dqs_c[0]}] +#set_property IOSTANDARD DIFF_POD12_DCI [get_ports {c0_ddr4_dqs_t[0]}] +#set_property IOSTANDARD DIFF_POD12_DCI [get_ports {c0_ddr4_dqs_c[1]}] +#set_property IOSTANDARD DIFF_POD12_DCI [get_ports {c0_ddr4_dqs_t[1]}] +#set_property IOSTANDARD DIFF_POD12_DCI [get_ports {c0_ddr4_dqs_c[2]}] +#set_property IOSTANDARD DIFF_POD12_DCI [get_ports {c0_ddr4_dqs_t[2]}] +#set_property IOSTANDARD DIFF_POD12_DCI [get_ports {c0_ddr4_dqs_c[3]}] +#set_property IOSTANDARD DIFF_POD12_DCI [get_ports {c0_ddr4_dqs_t[3]}] +#set_property IOSTANDARD DIFF_POD12_DCI [get_ports {c0_ddr4_dqs_c[4]}] +#set_property IOSTANDARD DIFF_POD12_DCI [get_ports {c0_ddr4_dqs_t[4]}] +#set_property IOSTANDARD DIFF_POD12_DCI [get_ports {c0_ddr4_dqs_c[5]}] +#set_property IOSTANDARD DIFF_POD12_DCI [get_ports {c0_ddr4_dqs_t[5]}] +#set_property IOSTANDARD DIFF_POD12_DCI [get_ports {c0_ddr4_dqs_c[6]}] +#set_property IOSTANDARD DIFF_POD12_DCI [get_ports {c0_ddr4_dqs_t[6]}] +#set_property IOSTANDARD DIFF_POD12_DCI [get_ports {c0_ddr4_dqs_c[7]}] +#set_property IOSTANDARD DIFF_POD12_DCI [get_ports {c0_ddr4_dqs_t[7]}] +#set_property IOSTANDARD DIFF_POD12_DCI [get_ports {c0_ddr4_dqs_c[8]}] +#set_property IOSTANDARD DIFF_POD12_DCI [get_ports {c0_ddr4_dqs_t[8]}] +##set_property IOSTANDARD DIFF_POD12_DCI [get_ports {c0_ddr4_dqs_c[9]}] +##set_property IOSTANDARD DIFF_POD12_DCI [get_ports {c0_ddr4_dqs_t[9]}] + +### End of DDR4 channel 0 + +### +### DDR4 Channel 1 +### + +## 250 MHZ clk +set_property PACKAGE_PIN AW26 [get_ports c1_sys_clk_p] +set_property PACKAGE_PIN AW27 [get_ports c1_sys_clk_n] + +# ACT +set_property PACKAGE_PIN AN25 [get_ports c1_ddr4_act_n] +#set_property IOSTANDARD SSTL12_DCI [get_ports c1_ddr4_act_n] + +# ADDR c1_ddr4_adr +set_property PACKAGE_PIN AM27 [get_ports {c1_ddr4_adr[0]}] +set_property PACKAGE_PIN AL27 [get_ports {c1_ddr4_adr[1]}] +set_property PACKAGE_PIN AP26 [get_ports {c1_ddr4_adr[2]}] +set_property PACKAGE_PIN AP25 [get_ports {c1_ddr4_adr[3]}] +set_property PACKAGE_PIN AN28 [get_ports {c1_ddr4_adr[4]}] +set_property PACKAGE_PIN AM28 [get_ports {c1_ddr4_adr[5]}] +set_property PACKAGE_PIN AP28 [get_ports {c1_ddr4_adr[6]}] +set_property PACKAGE_PIN AP27 [get_ports {c1_ddr4_adr[7]}] +set_property PACKAGE_PIN AN26 [get_ports {c1_ddr4_adr[8]}] +set_property PACKAGE_PIN AM26 [get_ports {c1_ddr4_adr[9]}] +set_property PACKAGE_PIN AR28 [get_ports {c1_ddr4_adr[10]}] +set_property PACKAGE_PIN AR27 [get_ports {c1_ddr4_adr[11]}] +set_property PACKAGE_PIN AV25 [get_ports {c1_ddr4_adr[12]}] +set_property PACKAGE_PIN AT25 [get_ports {c1_ddr4_adr[13]}] +set_property PACKAGE_PIN AV28 [get_ports {c1_ddr4_adr[14]}] +set_property PACKAGE_PIN AU26 [get_ports {c1_ddr4_adr[15]}] +set_property PACKAGE_PIN AV26 [get_ports {c1_ddr4_adr[16]}] + +#set_property IOSTANDARD SSTL12_DCI [get_ports {c1_ddr4_adr[0]}] +#set_property IOSTANDARD SSTL12_DCI [get_ports {c1_ddr4_adr[1]}] +#set_property IOSTANDARD SSTL12_DCI [get_ports {c1_ddr4_adr[2]}] +#set_property IOSTANDARD SSTL12_DCI [get_ports {c1_ddr4_adr[3]}] +#set_property IOSTANDARD SSTL12_DCI [get_ports {c1_ddr4_adr[4]}] +#set_property IOSTANDARD SSTL12_DCI [get_ports {c1_ddr4_adr[5]}] +#set_property IOSTANDARD SSTL12_DCI [get_ports {c1_ddr4_adr[6]}] +#set_property IOSTANDARD SSTL12_DCI [get_ports {c1_ddr4_adr[7]}] +#set_property IOSTANDARD SSTL12_DCI [get_ports {c1_ddr4_adr[8]}] +#set_property IOSTANDARD SSTL12_DCI [get_ports {c1_ddr4_adr[9]}] +#set_property IOSTANDARD SSTL12_DCI [get_ports {c1_ddr4_adr[10]}] +#set_property IOSTANDARD SSTL12_DCI [get_ports {c1_ddr4_adr[11]}] +#set_property IOSTANDARD SSTL12_DCI [get_ports {c1_ddr4_adr[12]}] +#set_property IOSTANDARD SSTL12_DCI [get_ports {c1_ddr4_adr[13]}] +#set_property IOSTANDARD SSTL12_DCI [get_ports {c1_ddr4_adr[14]}] +#set_property IOSTANDARD SSTL12_DCI [get_ports {c1_ddr4_adr[15]}] +#set_property IOSTANDARD SSTL12_DCI [get_ports {c1_ddr4_adr[16]}] + +# BA +set_property PACKAGE_PIN AR25 [get_ports {c1_ddr4_ba[0]}] +set_property PACKAGE_PIN AU28 [get_ports {c1_ddr4_ba[1]}] + +#set_property IOSTANDARD SSTL12_DCI [get_ports {c1_ddr4_ba[0]}] +#set_property IOSTANDARD SSTL12_DCI [get_ports {c1_ddr4_ba[1]}] + +# BG +set_property PACKAGE_PIN AU27 [get_ports {c1_ddr4_bg[0]}] +#set_property IOSTANDARD SSTL12_DCI [get_ports {c1_ddr4_bg[0]}] + +# CKE +set_property PACKAGE_PIN AW28 [get_ports {c1_ddr4_cke[0]}] +#set_property IOSTANDARD SSTL12_DCI [get_ports {c1_ddr4_cke[0]}] + +# ODT +set_property PACKAGE_PIN BB29 [get_ports {c1_ddr4_odt[0]}] +#set_property IOSTANDARD SSTL12_DCI [get_ports {c1_ddr4_odt[0]}] + +# CS_n +set_property PACKAGE_PIN AY29 [get_ports {c1_ddr4_cs_n[0]}] +#set_property IOSTANDARD SSTL12_DCI [get_ports {c1_ddr4_cs_n[0]}] + +# CKC + +#set_property IOSTANDARD DIFF_SSTL12_DCI [get_ports {c1_ddr4_ck_t[0]}] +#set_property IOSTANDARD DIFF_SSTL12_DCI [get_ports {c1_ddr4_ck_c[0]}] + +# RESET +set_property PACKAGE_PIN BD35 [get_ports c1_ddr4_reset_n] +#set_property IOSTANDARD LVCMOS12 [get_ports c1_ddr4_reset_n] +#set_property DRIVE 8 [get_ports c1_ddr4_reset_n] + +# DM +set_property PACKAGE_PIN BE32 [get_ports {c1_ddr4_dm_dbi_n[0]}] +set_property PACKAGE_PIN BB31 [get_ports {c1_ddr4_dm_dbi_n[1]}] +set_property PACKAGE_PIN AV33 [get_ports {c1_ddr4_dm_dbi_n[2]}] +set_property PACKAGE_PIN AR32 [get_ports {c1_ddr4_dm_dbi_n[3]}] +set_property PACKAGE_PIN BC34 [get_ports {c1_ddr4_dm_dbi_n[4]}] +set_property PACKAGE_PIN BE40 [get_ports {c1_ddr4_dm_dbi_n[5]}] +set_property PACKAGE_PIN AY37 [get_ports {c1_ddr4_dm_dbi_n[6]}] +set_property PACKAGE_PIN AV35 [get_ports {c1_ddr4_dm_dbi_n[7]}] +#set_property PACKAGE_PIN BA29 [get_ports {c1_ddr4_dm_dbi__n[9]}] + +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dm_dbi_n[0]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dm_dbi_n[1]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dm_dbi_n[2]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dm_dbi_n[3]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dm_dbi_n[4]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dm_dbi_n[5]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dm_dbi_n[6]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dm_dbi_n[7]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dm_dbi_n[8]}] +##set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dm_dbi_n[9]}] + + +# DQ +set_property PACKAGE_PIN BD30 [get_ports {c1_ddr4_dq[0]}] +set_property PACKAGE_PIN BE30 [get_ports {c1_ddr4_dq[1]}] +set_property PACKAGE_PIN BD32 [get_ports {c1_ddr4_dq[2]}] +set_property PACKAGE_PIN BE33 [get_ports {c1_ddr4_dq[3]}] +set_property PACKAGE_PIN BC33 [get_ports {c1_ddr4_dq[4]}] +set_property PACKAGE_PIN BD33 [get_ports {c1_ddr4_dq[5]}] +set_property PACKAGE_PIN BC31 [get_ports {c1_ddr4_dq[6]}] +set_property PACKAGE_PIN BD31 [get_ports {c1_ddr4_dq[7]}] +set_property PACKAGE_PIN BA32 [get_ports {c1_ddr4_dq[8]}] +set_property PACKAGE_PIN BB33 [get_ports {c1_ddr4_dq[9]}] +set_property PACKAGE_PIN BA30 [get_ports {c1_ddr4_dq[10]}] +set_property PACKAGE_PIN BA31 [get_ports {c1_ddr4_dq[11]}] +set_property PACKAGE_PIN AW31 [get_ports {c1_ddr4_dq[12]}] +set_property PACKAGE_PIN AW32 [get_ports {c1_ddr4_dq[13]}] +set_property PACKAGE_PIN AY32 [get_ports {c1_ddr4_dq[14]}] +set_property PACKAGE_PIN AY33 [get_ports {c1_ddr4_dq[15]}] +set_property PACKAGE_PIN AV30 [get_ports {c1_ddr4_dq[16]}] +set_property PACKAGE_PIN AW30 [get_ports {c1_ddr4_dq[17]}] +set_property PACKAGE_PIN AU33 [get_ports {c1_ddr4_dq[18]}] +set_property PACKAGE_PIN AU34 [get_ports {c1_ddr4_dq[19]}] +set_property PACKAGE_PIN AT31 [get_ports {c1_ddr4_dq[20]}] +set_property PACKAGE_PIN AU32 [get_ports {c1_ddr4_dq[21]}] +set_property PACKAGE_PIN AU31 [get_ports {c1_ddr4_dq[22]}] +set_property PACKAGE_PIN AV31 [get_ports {c1_ddr4_dq[23]}] +set_property PACKAGE_PIN AR33 [get_ports {c1_ddr4_dq[24]}] +set_property PACKAGE_PIN AT34 [get_ports {c1_ddr4_dq[25]}] +set_property PACKAGE_PIN AT29 [get_ports {c1_ddr4_dq[26]}] +set_property PACKAGE_PIN AT30 [get_ports {c1_ddr4_dq[27]}] +set_property PACKAGE_PIN AP30 [get_ports {c1_ddr4_dq[28]}] +set_property PACKAGE_PIN AR30 [get_ports {c1_ddr4_dq[29]}] +set_property PACKAGE_PIN AN30 [get_ports {c1_ddr4_dq[30]}] +set_property PACKAGE_PIN AN31 [get_ports {c1_ddr4_dq[31]}] +set_property PACKAGE_PIN BE34 [get_ports {c1_ddr4_dq[32]}] +set_property PACKAGE_PIN BF34 [get_ports {c1_ddr4_dq[33]}] +set_property PACKAGE_PIN BC35 [get_ports {c1_ddr4_dq[34]}] +set_property PACKAGE_PIN BC36 [get_ports {c1_ddr4_dq[35]}] +set_property PACKAGE_PIN BD36 [get_ports {c1_ddr4_dq[36]}] +set_property PACKAGE_PIN BE37 [get_ports {c1_ddr4_dq[37]}] +set_property PACKAGE_PIN BF36 [get_ports {c1_ddr4_dq[38]}] +set_property PACKAGE_PIN BF37 [get_ports {c1_ddr4_dq[39]}] +set_property PACKAGE_PIN BD37 [get_ports {c1_ddr4_dq[40]}] +set_property PACKAGE_PIN BE38 [get_ports {c1_ddr4_dq[41]}] +set_property PACKAGE_PIN BC39 [get_ports {c1_ddr4_dq[42]}] +set_property PACKAGE_PIN BD40 [get_ports {c1_ddr4_dq[43]}] +set_property PACKAGE_PIN BB38 [get_ports {c1_ddr4_dq[44]}] +set_property PACKAGE_PIN BB39 [get_ports {c1_ddr4_dq[45]}] +set_property PACKAGE_PIN BC38 [get_ports {c1_ddr4_dq[46]}] +set_property PACKAGE_PIN BD38 [get_ports {c1_ddr4_dq[47]}] +set_property PACKAGE_PIN BB36 [get_ports {c1_ddr4_dq[48]}] +set_property PACKAGE_PIN BB37 [get_ports {c1_ddr4_dq[49]}] +set_property PACKAGE_PIN BA39 [get_ports {c1_ddr4_dq[50]}] +set_property PACKAGE_PIN BA40 [get_ports {c1_ddr4_dq[51]}] +set_property PACKAGE_PIN AW40 [get_ports {c1_ddr4_dq[52]}] +set_property PACKAGE_PIN AY40 [get_ports {c1_ddr4_dq[53]}] +set_property PACKAGE_PIN AY38 [get_ports {c1_ddr4_dq[54]}] +set_property PACKAGE_PIN AY39 [get_ports {c1_ddr4_dq[55]}] +set_property PACKAGE_PIN AW35 [get_ports {c1_ddr4_dq[56]}] +set_property PACKAGE_PIN AW36 [get_ports {c1_ddr4_dq[57]}] +set_property PACKAGE_PIN AU40 [get_ports {c1_ddr4_dq[58]}] +set_property PACKAGE_PIN AV40 [get_ports {c1_ddr4_dq[59]}] +set_property PACKAGE_PIN AU38 [get_ports {c1_ddr4_dq[60]}] +set_property PACKAGE_PIN AU39 [get_ports {c1_ddr4_dq[61]}] +set_property PACKAGE_PIN AV38 [get_ports {c1_ddr4_dq[62]}] +set_property PACKAGE_PIN AV39 [get_ports {c1_ddr4_dq[63]}] +#set_property PACKAGE_PIN BC25 [get_ports {c1_ddr4_dq[72]}] +#set_property PACKAGE_PIN BC26 [get_ports {c1_ddr4_dq[73]}] +#set_property PACKAGE_PIN BB28 [get_ports {c1_ddr4_dq[74]}] +#set_property PACKAGE_PIN BC28 [get_ports {c1_ddr4_dq[75]}] +#set_property PACKAGE_PIN AY27 [get_ports {c1_ddr4_dq[76]}] +#set_property PACKAGE_PIN AY28 [get_ports {c1_ddr4_dq[77]}] +#set_property PACKAGE_PIN BA27 [get_ports {c1_ddr4_dq[78]}] +#set_property PACKAGE_PIN BB27 [get_ports {c1_ddr4_dq[79]}] + +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[0]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[1]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[2]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[3]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[4]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[5]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[6]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[7]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[8]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[9]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[10]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[11]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[12]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[13]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[14]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[15]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[16]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[17]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[18]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[19]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[20]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[21]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[22]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[23]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[24]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[25]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[26]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[27]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[28]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[29]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[30]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[31]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[32]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[33]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[34]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[35]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[36]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[37]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[38]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[39]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[40]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[41]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[42]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[43]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[44]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[45]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[46]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[47]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[48]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[49]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[50]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[51]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[52]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[53]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[54]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[55]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[56]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[57]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[58]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[59]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[60]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[61]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[62]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[63]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[64]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[65]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[66]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[67]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[68]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[69]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[70]}] +#set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[71]}] +##set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[72]}] +##set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[73]}] +##set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[74]}] +##set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[75]}] +##set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[76]}] +##set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[77]}] +##set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[78]}] +##set_property IOSTANDARD POD12_DCI [get_ports {c1_ddr4_dq[79]}] + +# DQS_C +#set_property PACKAGE_PIN BA26 [get_ports {c1_ddr4_dqs_t[9]}] +#set_property PACKAGE_PIN BB26 [get_ports {c1_ddr4_dqs_c[9]}] + +#set_property IOSTANDARD DIFF_POD12_DCI [get_ports {c1_ddr4_dqs_c[0]}] +#set_property IOSTANDARD DIFF_POD12_DCI [get_ports {c1_ddr4_dqs_t[0]}] +#set_property IOSTANDARD DIFF_POD12_DCI [get_ports {c1_ddr4_dqs_c[1]}] +#set_property IOSTANDARD DIFF_POD12_DCI [get_ports {c1_ddr4_dqs_t[1]}] +#set_property IOSTANDARD DIFF_POD12_DCI [get_ports {c1_ddr4_dqs_c[2]}] +#set_property IOSTANDARD DIFF_POD12_DCI [get_ports {c1_ddr4_dqs_t[2]}] +#set_property IOSTANDARD DIFF_POD12_DCI [get_ports {c1_ddr4_dqs_c[3]}] +#set_property IOSTANDARD DIFF_POD12_DCI [get_ports {c1_ddr4_dqs_t[3]}] +#set_property IOSTANDARD DIFF_POD12_DCI [get_ports {c1_ddr4_dqs_c[4]}] +#set_property IOSTANDARD DIFF_POD12_DCI [get_ports {c1_ddr4_dqs_t[4]}] +#set_property IOSTANDARD DIFF_POD12_DCI [get_ports {c1_ddr4_dqs_c[5]}] +#set_property IOSTANDARD DIFF_POD12_DCI [get_ports {c1_ddr4_dqs_t[5]}] +#set_property IOSTANDARD DIFF_POD12_DCI [get_ports {c1_ddr4_dqs_c[6]}] +#set_property IOSTANDARD DIFF_POD12_DCI [get_ports {c1_ddr4_dqs_t[6]}] +#set_property IOSTANDARD DIFF_POD12_DCI [get_ports {c1_ddr4_dqs_c[7]}] +#set_property IOSTANDARD DIFF_POD12_DCI [get_ports {c1_ddr4_dqs_t[7]}] +#set_property IOSTANDARD DIFF_POD12_DCI [get_ports {c1_ddr4_dqs_c[8]}] +#set_property IOSTANDARD DIFF_POD12_DCI [get_ports {c1_ddr4_dqs_t[8]}] +##set_property IOSTANDARD DIFF_POD12_DCI [get_ports {c1_ddr4_dqs_c[9]}] +##set_property IOSTANDARD DIFF_POD12_DCI [get_ports {c1_ddr4_dqs_t[9]}] + +### End of DDR4 channel 1 + +set_property PACKAGE_PIN BE39 [get_ports {c1_ddr4_dqs_t[5]}] +set_property PACKAGE_PIN BF39 [get_ports {c1_ddr4_dqs_c[5]}] +set_property PACKAGE_PIN BE35 [get_ports {c1_ddr4_dqs_t[4]}] +set_property PACKAGE_PIN BF35 [get_ports {c1_ddr4_dqs_c[4]}] +set_property PACKAGE_PIN AT26 [get_ports {c1_ddr4_ck_t[0]}] +set_property PACKAGE_PIN AT27 [get_ports {c1_ddr4_ck_c[0]}] +set_property PACKAGE_PIN AY34 [get_ports {c1_ddr4_dqs_t[1]}] +set_property PACKAGE_PIN BA34 [get_ports {c1_ddr4_dqs_c[1]}] +set_property PACKAGE_PIN AP31 [get_ports {c1_ddr4_dqs_t[3]}] +set_property PACKAGE_PIN AP32 [get_ports {c1_ddr4_dqs_c[3]}] +set_property PACKAGE_PIN AU29 [get_ports {c1_ddr4_dqs_t[2]}] +set_property PACKAGE_PIN AV29 [get_ports {c1_ddr4_dqs_c[2]}] +set_property PACKAGE_PIN BF30 [get_ports {c1_ddr4_dqs_t[0]}] +set_property PACKAGE_PIN BF31 [get_ports {c1_ddr4_dqs_c[0]}] +set_property PACKAGE_PIN BA35 [get_ports {c1_ddr4_dqs_t[6]}] +set_property PACKAGE_PIN BA36 [get_ports {c1_ddr4_dqs_c[6]}] +set_property PACKAGE_PIN AW37 [get_ports {c1_ddr4_dqs_t[7]}] +set_property PACKAGE_PIN AW38 [get_ports {c1_ddr4_dqs_c[7]}] +set_property IOSTANDARD DIFF_SSTL12 [get_ports c1_sys_clk_p] +set_property IOSTANDARD DIFF_SSTL12 [get_ports c1_sys_clk_n] + +set_property PACKAGE_PIN N22 [get_ports {c0_ddr4_dqs_t[5]}] +set_property PACKAGE_PIN M22 [get_ports {c0_ddr4_dqs_c[5]}] +set_property PACKAGE_PIN A19 [get_ports {c0_ddr4_dqs_t[4]}] +set_property PACKAGE_PIN A18 [get_ports {c0_ddr4_dqs_c[4]}] +set_property PACKAGE_PIN F14 [get_ports {c0_ddr4_ck_t[0]}] +set_property PACKAGE_PIN E14 [get_ports {c0_ddr4_ck_c[0]}] +set_property PACKAGE_PIN P17 [get_ports {c0_ddr4_dqs_t[1]}] +set_property PACKAGE_PIN P16 [get_ports {c0_ddr4_dqs_c[1]}] +set_property PACKAGE_PIN F16 [get_ports {c0_ddr4_dqs_t[3]}] +set_property PACKAGE_PIN E16 [get_ports {c0_ddr4_dqs_c[3]}] +set_property PACKAGE_PIN K19 [get_ports {c0_ddr4_dqs_t[2]}] +set_property PACKAGE_PIN J19 [get_ports {c0_ddr4_dqs_c[2]}] +set_property PACKAGE_PIN D11 [get_ports {c0_ddr4_dqs_t[0]}] +set_property PACKAGE_PIN D10 [get_ports {c0_ddr4_dqs_c[0]}] +set_property PACKAGE_PIN M20 [get_ports {c0_ddr4_dqs_t[6]}] +set_property PACKAGE_PIN L20 [get_ports {c0_ddr4_dqs_c[6]}] +set_property PACKAGE_PIN H24 [get_ports {c0_ddr4_dqs_t[7]}] +set_property PACKAGE_PIN G23 [get_ports {c0_ddr4_dqs_c[7]}] +set_property IOSTANDARD DIFF_SSTL12 [get_ports c0_sys_clk_p] +set_property IOSTANDARD DIFF_SSTL12 [get_ports c0_sys_clk_n] + + diff --git a/hw/constraints/vcu118/vcu_net.xdc b/hw/constraints/vcu118/vcu_net.xdc new file mode 100644 index 00000000..88cea9bc --- /dev/null +++ b/hw/constraints/vcu118/vcu_net.xdc @@ -0,0 +1,72 @@ +# Network clock +set_property IOSTANDARD LVDS [get_ports dclk_p] +set_property IOSTANDARD LVDS [get_ports dclk_n] + +set_property PACKAGE_PIN AY24 [get_ports dclk_p] +set_property PACKAGE_PIN AY23 [get_ports dclk_n] + +create_clock -period 8.000 -name dclk_clk [get_pins dclk_BUFG_inst/O] + +### These are sample constraints, please use correct constraints for your device +### update the gt_refclk pin location accordingly and un-comment the below two lines +set_property PACKAGE_PIN W8 [get_ports gt_refclk_n] +set_property PACKAGE_PIN W9 [get_ports gt_refclk_p] + +#QSPF28 Connector1 +#set_property PACKAGE_PIN Y2 [get_ports {gt_rxp_in[0]}] +#set_property PACKAGE_PIN Y1 [get_ports {gt_rxn_in[0]}] +#set_property PACKAGE_PIN V7 [get_ports {gt_txp_out[0]}] +#set_property PACKAGE_PIN V6 [get_ports {gt_txn_out[0]}] + +#set_property PACKAGE_PIN W4 [get_ports {gt_rxp_in[1]}] +#set_property PACKAGE_PIN W3 [get_ports {gt_rxn_in[1]}] +#set_property PACKAGE_PIN T7 [get_ports {gt_txp_out[1]}] +#set_property PACKAGE_PIN T6 [get_ports {gt_txn_out[1]}] + +#set_property PACKAGE_PIN V2 [get_ports {gt_rxp_in[2]}] +#set_property PACKAGE_PIN V1 [get_ports {gt_rxn_in[2]}] +#set_property PACKAGE_PIN P7 [get_ports {gt_txp_out[2]}] +#set_property PACKAGE_PIN P6 [get_ports {gt_txn_out[2]}] + +#set_property PACKAGE_PIN U4 [get_ports {gt_rxp_in[3]}] +#set_property PACKAGE_PIN U3 [get_ports {gt_rxn_in[3]}] +#set_property PACKAGE_PIN M7 [get_ports {gt_txp_out[3]}] +#set_property PACKAGE_PIN M6 [get_ports {gt_txn_out[3]}] + +#QSPF28 Connector2 +set_property PACKAGE_PIN T2 [get_ports {gt_rxp_in[0]}] +set_property PACKAGE_PIN T1 [get_ports {gt_rxn_in[0]}] +set_property PACKAGE_PIN L5 [get_ports {gt_txp_out[0]}] +set_property PACKAGE_PIN L4 [get_ports {gt_txn_out[0]}] + +set_property PACKAGE_PIN R4 [get_ports {gt_rxp_in[1]}] +set_property PACKAGE_PIN R3 [get_ports {gt_rxn_in[1]}] +set_property PACKAGE_PIN K7 [get_ports {gt_txp_out[1]}] +set_property PACKAGE_PIN K6 [get_ports {gt_txn_out[1]}] + +set_property PACKAGE_PIN P2 [get_ports {gt_rxp_in[2]}] +set_property PACKAGE_PIN P1 [get_ports {gt_rxn_in[2]}] +set_property PACKAGE_PIN J5 [get_ports {gt_txp_out[2]}] +set_property PACKAGE_PIN J4 [get_ports {gt_txn_out[2]}] + +set_property PACKAGE_PIN M2 [get_ports {gt_rxp_in[3]}] +set_property PACKAGE_PIN M1 [get_ports {gt_rxn_in[3]}] +set_property PACKAGE_PIN H7 [get_ports {gt_txp_out[3]}] +set_property PACKAGE_PIN H6 [get_ports {gt_txn_out[3]}] + +#set_property IOSTANDARD LVDS [get_ports uclk_p] +#set_property IOSTANDARD LVDS [get_ports uclk_n] + +#set_property PACKAGE_PIN AW22 [get_ports uclk_n] +#set_property PACKAGE_PIN AW23 [get_ports uclk_p] + +#create_clock -period 6.400 -name uclk_clk [get_pins uclk_BUFG_inst/O] + +set_max_delay -datapath_only -from [get_clocks -of_objects [get_pins -hierarchical -filter {NAME =~ */channel_inst/*_CHANNEL_PRIM_INST/RXOUTCLK}]] -to [get_clocks -of_objects [get_pins -hierarchical -filter {NAME =~ */channel_inst/*_CHANNEL_PRIM_INST/TXOUTCLK}]] 6.400 +set_max_delay -datapath_only -from [get_clocks -of_objects [get_pins -hierarchical -filter {NAME =~ */channel_inst/*_CHANNEL_PRIM_INST/TXOUTCLK}]] -to [get_clocks -of_objects [get_pins -hierarchical -filter {NAME =~ */channel_inst/*_CHANNEL_PRIM_INST/RXOUTCLK}]] 6.400 + +set_max_delay -datapath_only -from [get_clocks dclk_clk] -to [get_clocks -of_objects [get_pins -hierarchical -filter {NAME =~ */channel_inst/*_CHANNEL_PRIM_INST/TXOUTCLK}]] 8.000 +set_max_delay -datapath_only -from [get_clocks dclk_clk] -to [get_clocks -of_objects [get_pins -hierarchical -filter {NAME =~ */channel_inst/*_CHANNEL_PRIM_INST/RXOUTCLK}]] 8.000 + +set_max_delay -datapath_only -from [get_clocks -of_objects [get_pins -hierarchical -filter {NAME =~ */channel_inst/*_CHANNEL_PRIM_INST/RXOUTCLK}]] -to [get_clocks dclk_clk] 6.400 +set_max_delay -datapath_only -from [get_clocks -of_objects [get_pins -hierarchical -filter {NAME =~ */channel_inst/*_CHANNEL_PRIM_INST/TXOUTCLK}]] -to [get_clocks dclk_clk] 6.400 \ No newline at end of file diff --git a/hw/constraints/vcu118/vcu_pcie.xdc b/hw/constraints/vcu118/vcu_pcie.xdc new file mode 100644 index 00000000..1d64afaf --- /dev/null +++ b/hw/constraints/vcu118/vcu_pcie.xdc @@ -0,0 +1,82 @@ +#### PCIe + +# Clock +set_property PACKAGE_PIN AL8 [get_ports {pcie_clk_clk_n[0]}] +set_property PACKAGE_PIN AL9 [get_ports {pcie_clk_clk_p[0]}] + +# Reset +set_property PACKAGE_PIN AM17 [get_ports perst_n_nb] +set_property IOSTANDARD LVCMOS18 [get_ports perst_n_nb] +set_property PULLUP true [get_ports perst_n_nb] + +# PCIe ext. +set_property PACKAGE_PIN AA4 [get_ports {pcie_x16_rxp[0]}] +set_property PACKAGE_PIN AA3 [get_ports {pcie_x16_rxn[0]}] +set_property PACKAGE_PIN Y7 [get_ports {pcie_x16_txp[0]}] +set_property PACKAGE_PIN Y6 [get_ports {pcie_x16_txn[0]}] +set_property PACKAGE_PIN AB2 [get_ports {pcie_x16_rxp[1]}] +set_property PACKAGE_PIN AB1 [get_ports {pcie_x16_rxn[1]}] +set_property PACKAGE_PIN AB7 [get_ports {pcie_x16_txp[1]}] +set_property PACKAGE_PIN AB6 [get_ports {pcie_x16_txn[1]}] +set_property PACKAGE_PIN AC4 [get_ports {pcie_x16_rxp[2]}] +set_property PACKAGE_PIN AC3 [get_ports {pcie_x16_rxn[2]}] +set_property PACKAGE_PIN AD7 [get_ports {pcie_x16_txp[2]}] +set_property PACKAGE_PIN AD6 [get_ports {pcie_x16_txn[2]}] +set_property PACKAGE_PIN AD2 [get_ports {pcie_x16_rxp[3]}] +set_property PACKAGE_PIN AD1 [get_ports {pcie_x16_rxn[3]}] +set_property PACKAGE_PIN AF7 [get_ports {pcie_x16_txp[3]}] +set_property PACKAGE_PIN AF6 [get_ports {pcie_x16_txn[3]}] +set_property PACKAGE_PIN AE4 [get_ports {pcie_x16_rxp[4]}] +set_property PACKAGE_PIN AE3 [get_ports {pcie_x16_rxn[4]}] +set_property PACKAGE_PIN AH7 [get_ports {pcie_x16_txp[4]}] +set_property PACKAGE_PIN AH6 [get_ports {pcie_x16_txn[4]}] +set_property PACKAGE_PIN AF2 [get_ports {pcie_x16_rxp[5]}] +set_property PACKAGE_PIN AF1 [get_ports {pcie_x16_rxn[5]}] +set_property PACKAGE_PIN AK7 [get_ports {pcie_x16_txp[5]}] +set_property PACKAGE_PIN AK6 [get_ports {pcie_x16_txn[5]}] +set_property PACKAGE_PIN AG4 [get_ports {pcie_x16_rxp[6]}] +set_property PACKAGE_PIN AG3 [get_ports {pcie_x16_rxn[6]}] +set_property PACKAGE_PIN AM7 [get_ports {pcie_x16_txp[6]}] +set_property PACKAGE_PIN AM6 [get_ports {pcie_x16_txn[6]}] +set_property PACKAGE_PIN AH2 [get_ports {pcie_x16_rxp[7]}] +set_property PACKAGE_PIN AH1 [get_ports {pcie_x16_rxn[7]}] +set_property PACKAGE_PIN AN5 [get_ports {pcie_x16_txp[7]}] +set_property PACKAGE_PIN AN4 [get_ports {pcie_x16_txn[7]}] + +#x16 +set_property PACKAGE_PIN AJ4 [get_ports {pcie_x16_rxp[8]}] +set_property PACKAGE_PIN AJ3 [get_ports {pcie_x16_rxn[8]}] +set_property PACKAGE_PIN AP7 [get_ports {pcie_x16_txp[8]}] +set_property PACKAGE_PIN AP6 [get_ports {pcie_x16_txn[8]}] +set_property PACKAGE_PIN AK2 [get_ports {pcie_x16_rxp[9]}] +set_property PACKAGE_PIN AK1 [get_ports {pcie_x16_rxn[9]}] +set_property PACKAGE_PIN AR5 [get_ports {pcie_x16_txp[9]}] +set_property PACKAGE_PIN AR4 [get_ports {pcie_x16_txn[9]}] +set_property PACKAGE_PIN AM2 [get_ports {pcie_x16_rxp[10]}] +set_property PACKAGE_PIN AM1 [get_ports {pcie_x16_rxn[10]}] +set_property PACKAGE_PIN AT7 [get_ports {pcie_x16_txp[10]}] +set_property PACKAGE_PIN AT6 [get_ports {pcie_x16_txn[10]}] +set_property PACKAGE_PIN AP2 [get_ports {pcie_x16_rxp[11]}] +set_property PACKAGE_PIN AP1 [get_ports {pcie_x16_rxn[11]}] +set_property PACKAGE_PIN AU5 [get_ports {pcie_x16_txp[11]}] +set_property PACKAGE_PIN AU4 [get_ports {pcie_x16_txn[11]}] +set_property PACKAGE_PIN AT2 [get_ports {pcie_x16_rxp[12]}] +set_property PACKAGE_PIN AT1 [get_ports {pcie_x16_rxn[12]}] +set_property PACKAGE_PIN AW5 [get_ports {pcie_x16_txp[12]}] +set_property PACKAGE_PIN AW4 [get_ports {pcie_x16_txn[12]}] +set_property PACKAGE_PIN AV2 [get_ports {pcie_x16_rxp[13]}] +set_property PACKAGE_PIN AV1 [get_ports {pcie_x16_rxn[13]}] +set_property PACKAGE_PIN BA5 [get_ports {pcie_x16_txp[13]}] +set_property PACKAGE_PIN BA4 [get_ports {pcie_x16_txn[13]}] +set_property PACKAGE_PIN AY2 [get_ports {pcie_x16_rxp[14]}] +set_property PACKAGE_PIN AY1 [get_ports {pcie_x16_rxn[14]}] +set_property PACKAGE_PIN BC5 [get_ports {pcie_x16_txp[14]}] +set_property PACKAGE_PIN BC4 [get_ports {pcie_x16_txn[14]}] +set_property PACKAGE_PIN BB2 [get_ports {pcie_x16_rxp[15]}] +set_property PACKAGE_PIN BB1 [get_ports {pcie_x16_rxn[15]}] +set_property PACKAGE_PIN BE5 [get_ports {pcie_x16_txp[15]}] +set_property PACKAGE_PIN BE4 [get_ports {pcie_x16_txn[15]}] + +# Set false path +set_false_path -from [get_ports perst_n_nb] + diff --git a/hw/ext/network b/hw/ext/network new file mode 160000 index 00000000..e5b38fe5 --- /dev/null +++ b/hw/ext/network @@ -0,0 +1 @@ +Subproject commit e5b38fe5c8328c714e77bfafb964157480929557 diff --git a/hw/hdl/cdma/axi_dma_rd.sv b/hw/hdl/cdma/axi_dma_rd.sv new file mode 100644 index 00000000..ca5c4c44 --- /dev/null +++ b/hw/hdl/cdma/axi_dma_rd.sv @@ -0,0 +1,269 @@ +// /******************************************************************************* +// Copyright (c) 2018, Xilinx, Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// +// 2. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// +// 3. Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// *******************************************************************************/ + +import lynxTypes::*; + +//`define DEBUG_CDMA_RD + +module axi_dma_rd ( + // Clock and reset + input wire aclk, + input wire aresetn, + + // Control and status + input wire ctrl_valid, + output wire stat_ready, + input wire [AXI_ADDR_BITS-1:0] ctrl_addr, + input wire [LEN_BITS-1:0] ctrl_len, + input wire ctrl_ctl, + output wire stat_done, + + // AXI4 master interface + output wire arvalid, + input wire arready, + output wire [AXI_ADDR_BITS-1:0] araddr, + output wire [0:0] arid, + output wire [7:0] arlen, + output wire [2:0] arsize, + output wire [1:0] arburst, + output wire [0:0] arlock, + output wire [3:0] arcache, + output wire [2:0] arprot, + input wire rvalid, + output wire rready, + input wire [AXI_DATA_BITS-1:0] rdata, + input wire rlast, + input wire [0:0] rid, + input wire [1:0] rresp, + + // AXI4-Stream master interface + output wire axis_out_tvalid, + input wire axis_out_tready, + output wire [AXI_DATA_BITS-1:0] axis_out_tdata, + output wire [AXI_DATA_BITS/8-1:0] axis_out_tkeep, + output wire axis_out_tlast +); + +/////////////////////////////////////////////////////////////////////////////// +// Local Parameters +/////////////////////////////////////////////////////////////////////////////// +localparam integer MAX_OUTSTANDING = 8; +localparam integer AXI_MAX_BURST_LEN = 64; +localparam integer AXI_DATA_BYTES = AXI_DATA_BITS / 8; +localparam integer LOG_DATA_LEN = $clog2(AXI_DATA_BYTES); +localparam integer LOG_BURST_LEN = $clog2(AXI_MAX_BURST_LEN); +localparam integer LP_MAX_OUTSTANDING_CNTR_WIDTH = $clog2(MAX_OUTSTANDING+1); +localparam integer LP_TRANSACTION_CNTR_WIDTH = LEN_BITS-LOG_BURST_LEN-LOG_DATA_LEN; + +logic [LP_TRANSACTION_CNTR_WIDTH-1:0] num_full_bursts; +logic num_partial_bursts; + +logic start; +logic [LP_TRANSACTION_CNTR_WIDTH-1:0] num_transactions; +logic has_partial_burst; +logic [LOG_BURST_LEN-1:0] final_burst_len; +logic single_transaction; + +// AR +logic arvalid_r; +logic [AXI_ADDR_BITS-1:0] addr_r; +logic ctl_r; +logic ar_done; +logic ar_idle; + +logic arxfer; +logic ar_final_transaction; +logic [LP_TRANSACTION_CNTR_WIDTH-1:0] ar_transactions_to_go; + +// R +logic rxfer; +logic r_final_transaction; + +logic burst_ready_snk; + +/////////////////////////////////////////////////////////////////////////////// +// Ctrl +/////////////////////////////////////////////////////////////////////////////// +assign stat_done = rxfer & rlast & r_final_transaction; +assign stat_ready = ar_idle; + +// Determine how many full burst to issue and if there are any partial bursts. +assign num_full_bursts = ctrl_len[LOG_DATA_LEN+LOG_BURST_LEN+:LEN_BITS-LOG_DATA_LEN-LOG_BURST_LEN]; +assign num_partial_bursts = ctrl_len[LOG_DATA_LEN+:LOG_BURST_LEN] ? 1'b1 : 1'b0; + +always_ff @(posedge aclk, negedge aresetn) begin + if(~aresetn) begin + start <= 0; + end + else begin + start <= ctrl_valid & stat_ready; + if(ctrl_valid & stat_ready) begin + num_transactions <= (num_partial_bursts == 1'b0) ? num_full_bursts - 1'b1 : num_full_bursts; + has_partial_burst <= num_partial_bursts; + final_burst_len <= ctrl_len[LOG_DATA_LEN+:LOG_BURST_LEN] - 1'b1; + end + end +end + +// Special case if there is only 1 AXI transaction. +assign single_transaction = (num_transactions == {LP_TRANSACTION_CNTR_WIDTH{1'b0}}) ? 1'b1 : 1'b0; + +/////////////////////////////////////////////////////////////////////////////// +// AXI Read Address Channel +/////////////////////////////////////////////////////////////////////////////// +assign arvalid = arvalid_r; +assign araddr = addr_r; +assign arlen = ar_final_transaction ? final_burst_len : AXI_MAX_BURST_LEN - 1; +assign arsize = LOG_DATA_LEN; +assign arid = 1'b0; + +assign arburst = 2'b01; +assign arlock = 1'b0; +assign arcache = 4'b0011; +assign arprot = 3'b010; + +assign arxfer = arvalid & arready; + +// Send ar_valid + always_ff @(posedge aclk, negedge aresetn) begin + if (~aresetn) begin + arvalid_r <= 1'b0; + end + else begin + arvalid_r <= ~ar_idle & ~arvalid_r & burst_ready_snk ? 1'b1 : + arready ? 1'b0 : arvalid_r; + end +end + +// When ar_idle, there are no transactions to issue. + always_ff @(posedge aclk, negedge aresetn) begin + if (~aresetn) begin + ar_idle <= 1'b1; + end + else begin + ar_idle <= (ctrl_valid & stat_ready) ? 1'b0 : + ar_done ? 1'b1 : ar_idle; + end +end + +// Increment to next address after each transaction is issued. Ctl latching. + always_ff @(posedge aclk, negedge aresetn) begin + if (~aresetn) begin + ctl_r <= 1'b0; + end + else begin + addr_r <= (ctrl_valid & stat_ready) ? ctrl_addr : + arxfer ? addr_r + AXI_MAX_BURST_LEN*AXI_DATA_BYTES : addr_r; + ctl_r <= (ctrl_valid & stat_ready) ? ctrl_ctl : ctl_r; + end +end + +// Counts down the number of transactions to send. +krnl_counter #( + .C_WIDTH ( LP_TRANSACTION_CNTR_WIDTH ) , + .C_INIT ( {LP_TRANSACTION_CNTR_WIDTH{1'b0}} ) +) +inst_ar_transaction_cntr ( + .aclk ( aclk ) , + .clken ( 1'b1 ) , + .aresetn ( aresetn ) , + .load ( start ) , + .incr ( 1'b0 ) , + .decr ( arxfer ) , + .load_value ( num_transactions ) , + .count ( ar_transactions_to_go ) , + .is_zero ( ar_final_transaction ) +); + +assign ar_done = ar_final_transaction && arxfer; + +/////////////////////////////////////////////////////////////////////////////// +// AXI Read Channel +/////////////////////////////////////////////////////////////////////////////// +assign axis_out_tvalid = rvalid; +assign axis_out_tdata = rdata; +assign axis_out_tkeep = ~0; +assign axis_out_tlast = rlast & r_final_transaction; +assign rready = axis_out_tready; + +assign rxfer = rready & rvalid; + +queue #( + .QTYPE(logic), + .QDEPTH(MAX_OUTSTANDING) +) burst_seq ( + .aclk(aclk), + .aresetn(aresetn), + .val_snk(arxfer), + .rdy_snk(burst_ready_snk), + .data_snk(ctl_r & ar_final_transaction), + .val_src(rlast & rxfer), + .rdy_src(), + .data_src(r_final_transaction) +); + +///////////////////////////////////////////////////////////////////////////// +// DEBUG +///////////////////////////////////////////////////////////////////////////// +`ifdef DEBUG_CDMA_RD + ila_dma_rd inst_ila_dma_rd ( + .clk(aclk), + .probe0(num_full_bursts), + .probe1(num_partial_bursts), + .probe2(start), + .probe3(num_transactions), + .probe4(has_partial_burst), + .probe5(final_burst_len), + .probe6(single_transaction), + .probe7(arvalid_r), + .probe8(addr_r), + .probe9(burst_ready_snk), + .probe10(ar_done), + .probe11(ar_idle), + .probe12(0), + .probe13(0), + .probe14(arxfer), + .probe15(ar_final_transaction), + .probe16(ar_transactions_to_go), + .probe17(0), + .probe18(rxfer), + .probe19(r_final_transaction), + .probe20(0), + .probe21(rvalid), + .probe22(rready), + .probe23(rlast) + ); +`endif + +endmodule \ No newline at end of file diff --git a/hw/hdl/cdma/axi_dma_wr.sv b/hw/hdl/cdma/axi_dma_wr.sv new file mode 100644 index 00000000..5ae9b0c4 --- /dev/null +++ b/hw/hdl/cdma/axi_dma_wr.sv @@ -0,0 +1,333 @@ +// /******************************************************************************* +// Copyright (c) 2018, Xilinx, Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// +// 2. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// +// 3. Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// *******************************************************************************/ + +import lynxTypes::*; + +//`define DEBUG_CDMA_WR + +module axi_dma_wr ( + // AXI Interface + input wire aclk, + input wire aresetn, + + // Control interface + input wire ctrl_valid, + output wire stat_ready, + input wire [AXI_ADDR_BITS-1:0] ctrl_addr, + input wire [LEN_BITS-1:0] ctrl_len, + input wire ctrl_ctl, + output wire stat_done, + + output wire awvalid, + input wire awready, + output wire [AXI_ADDR_BITS-1:0] awaddr, + output wire [0:0] awid, + output wire [7:0] awlen, + output wire [2:0] awsize, + output wire [1:0] awburst, + output wire [0:0] awlock, + output wire [3:0] awcache, + output wire [2:0] awprot, + output wire [AXI_DATA_BITS-1:0] wdata, + output wire [AXI_DATA_BITS/8-1:0] wstrb, + output wire wlast, + output wire wvalid, + input wire wready, + input wire bid, + input wire [1:0] bresp, + input wire bvalid, + output wire bready, + + // AXI4-Stream slave interface + input wire axis_in_tvalid, + output wire axis_in_tready, + input wire [AXI_DATA_BITS-1:0] axis_in_tdata, + input wire [AXI_DATA_BITS/8-1:0] axis_in_tkeep, + input wire axis_in_tlast +); + +/////////////////////////////////////////////////////////////////////////////// +// Local Parameters +/////////////////////////////////////////////////////////////////////////////// +localparam integer MAX_OUTSTANDING = 8; +localparam integer AXI_MAX_BURST_LEN = 64; +localparam integer AXI_DATA_BYTES = AXI_DATA_BITS / 8; +localparam integer LOG_DATA_LEN = $clog2(AXI_DATA_BYTES); +localparam integer LOG_BURST_LEN = $clog2(AXI_MAX_BURST_LEN); +localparam integer LP_MAX_OUTSTANDING_CNTR_WIDTH = $clog2(MAX_OUTSTANDING+1); +localparam integer LP_TRANSACTION_CNTR_WIDTH = LEN_BITS-LOG_BURST_LEN-LOG_DATA_LEN; + +logic [LP_TRANSACTION_CNTR_WIDTH-1:0] num_full_bursts; +logic num_partial_bursts; + +logic start; +logic [LP_TRANSACTION_CNTR_WIDTH-1:0] num_transactions; +logic has_partial_burst; +logic [LOG_BURST_LEN-1:0] final_burst_len; +logic single_transaction; + +// AW +logic awvalid_r; +logic [AXI_ADDR_BITS-1:0] addr_r; +logic ctl_r; +logic aw_done; +logic aw_idle; + +logic awxfer; +logic aw_final_transaction; +logic [LP_TRANSACTION_CNTR_WIDTH-1:0] aw_transactions_to_go; + +// W +logic wxfer; +logic [LOG_BURST_LEN-1:0] wxfers_to_go; + +logic burst_load; +logic burst_active; +logic burst_ready_snk; +logic burst_ready_src; +logic [LOG_BURST_LEN-1:0] burst_len; + +// B +logic bxfer; +logic b_final_transaction; + +logic b_ready_snk; + +///////////////////////////////////////////////////////////////////////////// +// Control logic +///////////////////////////////////////////////////////////////////////////// +assign stat_done = bxfer & b_final_transaction; +assign stat_ready = aw_idle; + +// Count the number of transfers and assert done when the last bvalid is received. +assign num_full_bursts = ctrl_len[LOG_DATA_LEN+LOG_BURST_LEN+:LEN_BITS-LOG_DATA_LEN-LOG_BURST_LEN]; +assign num_partial_bursts = ctrl_len[LOG_DATA_LEN+:LOG_BURST_LEN] ? 1'b1 : 1'b0; + +always_ff @(posedge aclk, negedge aresetn) begin + if(~aresetn) begin + start <= 0; + end + else begin + start <= ctrl_valid & stat_ready; + if(ctrl_valid & stat_ready) begin + num_transactions <= (num_partial_bursts == 1'b0) ? num_full_bursts - 1'b1 : num_full_bursts; + has_partial_burst <= num_partial_bursts; + final_burst_len <= ctrl_len[LOG_DATA_LEN+:LOG_BURST_LEN] - 1'b1; + end + end +end + +// Special case if there is only 1 AXI transaction. +assign single_transaction = (num_transactions == {LP_TRANSACTION_CNTR_WIDTH{1'b0}}) ? 1'b1 : 1'b0; + +/////////////////////////////////////////////////////////////////////////////// +// AXI Write Address Channel +/////////////////////////////////////////////////////////////////////////////// +assign awvalid = awvalid_r; +assign awaddr = addr_r; +assign awlen = aw_final_transaction ? final_burst_len : AXI_MAX_BURST_LEN - 1; +assign awsize = LOG_DATA_LEN; +assign awid = 1'b0; + +assign awburst = 2'b01; +assign awlock = 1'b0; +assign awcache = 4'b0011; +assign awprot = 3'b010; + +assign awxfer = awvalid & awready; + +// Send aw_valid +always_ff @(posedge aclk, negedge aresetn) begin + if (~aresetn) begin + awvalid_r <= 1'b0; + end + else begin + awvalid_r <= ~aw_idle & ~awvalid_r & b_ready_snk ? 1'b1 : + awready ? 1'b0 : awvalid_r; + end +end + +// When aw_idle, there are no transactions to issue. +always_ff @(posedge aclk, negedge aresetn) begin + if (~aresetn) begin + aw_idle <= 1'b1; + end + else begin + aw_idle <= (ctrl_valid & stat_ready) ? 1'b0 : + aw_done ? 1'b1 : aw_idle; + end +end + +// Increment to next address after each transaction is issued. Ctl latching. +always_ff @(posedge aclk, negedge aresetn) begin + if (~aresetn) begin + ctl_r <= 1'b0; + end + else begin + addr_r <= (ctrl_valid & stat_ready) ? ctrl_addr : + awxfer ? addr_r + AXI_MAX_BURST_LEN*AXI_DATA_BYTES : addr_r; + ctl_r <= (ctrl_valid & stat_ready) ? ctrl_ctl : ctl_r; + end +end + +// Counts down the number of transactions to send. +krnl_counter #( + .C_WIDTH ( LP_TRANSACTION_CNTR_WIDTH ) , + .C_INIT ( {LP_TRANSACTION_CNTR_WIDTH{1'b0}} ) +) +inst_aw_transaction_cntr ( + .aclk ( aclk ) , + .clken ( 1'b1 ) , + .aresetn ( aresetn ) , + .load ( start ) , + .incr ( 1'b0 ) , + .decr ( awxfer ) , + .load_value ( num_transactions ) , + .count ( aw_transactions_to_go ) , + .is_zero ( aw_final_transaction ) +); + +assign aw_done = aw_final_transaction && awxfer; + +///////////////////////////////////////////////////////////////////////////// +// AXI Write Data Channel +///////////////////////////////////////////////////////////////////////////// +assign wvalid = axis_in_tvalid & burst_active; +assign wdata = axis_in_tdata; +assign wstrb = axis_in_tkeep; +assign axis_in_tready = wready & burst_active; + +assign wxfer = wvalid & wready; + +assign burst_load = burst_ready_src && ((wlast & wxfer) || ~burst_active); + +always_ff @(posedge aclk, negedge aresetn) begin + if (~aresetn) begin + burst_active <= 1'b0; + end + else begin + burst_active <= burst_load ? 1'b1 : + (wlast & wxfer) ? 1'b0 : burst_active; + end +end + +krnl_counter #( + .C_WIDTH ( LOG_BURST_LEN ) , + .C_INIT ( {LOG_BURST_LEN{1'b1}} ) +) +inst_burst_cntr ( + .aclk ( aclk ) , + .clken ( 1'b1 ) , + .aresetn ( aresetn ) , + .load ( burst_load ) , + .incr ( 1'b0 ) , + .decr ( wxfer ) , + .load_value ( burst_len ) , + .count ( wxfers_to_go ) , + .is_zero ( wlast ) +); + +queue #( + .QTYPE(logic[LOG_BURST_LEN-1:0]), + .QDEPTH(MAX_OUTSTANDING) +) burst_seq ( + .aclk(aclk), + .aresetn(aresetn), + .val_snk(awxfer), + .rdy_snk(burst_ready_snk), + .data_snk(awlen[0+:LOG_BURST_LEN]), + .val_src(burst_load), + .rdy_src(burst_ready_src), + .data_src(burst_len) +); + +///////////////////////////////////////////////////////////////////////////// +// AXI Write Response Channel +///////////////////////////////////////////////////////////////////////////// +assign bready = 1'b1; +assign bxfer = bready & bvalid; + +queue #( + .QTYPE(logic), + .QDEPTH(MAX_OUTSTANDING) +) b_seq ( + .aclk(aclk), + .aresetn(aresetn), + .val_snk(awxfer), + .rdy_snk(b_ready_snk), + .data_snk(ctl_r & aw_final_transaction), + .val_src(bxfer), + .rdy_src(), + .data_src(b_final_transaction) +); + + +///////////////////////////////////////////////////////////////////////////// +// DEBUG +///////////////////////////////////////////////////////////////////////////// +`ifdef DEBUG_CDMA_WR + ila_dma_wr inst_ila_dma_wr ( + .clk(aclk), + .probe0(num_full_bursts), + .probe1(num_partial_bursts), + .probe2(start), + .probe3(num_transactions), + .probe4(has_partial_burst), + .probe5(final_burst_len), + .probe6(single_transaction), + .probe7(awvalid_r), + .probe8(addr_r), + .probe9(aw_done), + .probe10(aw_idle), + .probe11(awxfer), + .probe12(aw_final_transaction), + .probe13(aw_transactions_to_go), + .probe14(wxfer), + .probe15(wxfers_to_go), + .probe16(burst_load), + .probe17(burst_active), + .probe18(burst_ready_snk), + .probe19(burst_ready_src), + .probe20(burst_len), + .probe21(bxfer), + .probe22(b_final_transaction), + .probe23(b_ready_snk), + .probe24(wvalid), + .probe25(wready), + .probe26(wlast) + ); +`endif + +endmodule + diff --git a/hw/hdl/cdma/cdma.sv b/hw/hdl/cdma/cdma.sv new file mode 100644 index 00000000..8f2178ec --- /dev/null +++ b/hw/hdl/cdma/cdma.sv @@ -0,0 +1,148 @@ +/* + * CDMA + */ + +import lynxTypes::*; + +module cdma ( + input logic aclk, + input logic aresetn, + + dmaIntf.s rdCDMA, + dmaIntf.s wrCDMA, + + AXI4.m axi_ddr_in, + + AXI4S.s axis_ddr_in, + AXI4S.m axis_ddr_out +); + +// Decoupling +dmaIntf rdCDMA_int (); +dmaIntf wrCDMA_int (); + +// RD ------------------------------------------------------------------------------------------ +// CDMA completion +assign rdCDMA.done = rdCDMA_int.done; + +// Request queue +queue_stream #(.QTYPE(dma_req_t)) inst_rddma_out ( + .aclk(aclk), + .aresetn(aresetn), + .val_snk(rdCDMA.valid), + .rdy_snk(rdCDMA.ready), + .data_snk(rdCDMA.req), + .val_src(rdCDMA_int.valid), + .rdy_src(rdCDMA_int.ready), + .data_src(rdCDMA_int.req) +); + +// WR ------------------------------------------------------------------------------------------ +// CDMA completion +assign wrCDMA.done = wrCDMA_int.done; + +queue_stream #(.QTYPE(dma_req_t)) inst_wrdma_out ( + .aclk(aclk), + .aresetn(aresetn), + .val_snk(wrCDMA.valid), + .rdy_snk(wrCDMA.ready), + .data_snk(wrCDMA.req), + .val_src(wrCDMA_int.valid), + .rdy_src(wrCDMA_int.ready), + .data_src(wrCDMA_int.req) +); + +// +// CDMA +// + +// RD channel +axi_dma_rd axi_dma_rd_inst ( + .aclk(aclk), + .aresetn(aresetn), + + // CS + .ctrl_valid(rdCDMA_int.valid), + .stat_ready(rdCDMA_int.ready), + .ctrl_addr(rdCDMA_int.req.paddr), + .ctrl_len(rdCDMA_int.req.len), + .ctrl_ctl(rdCDMA_int.req.ctl), + .stat_done(rdCDMA_int.done), + + // AXI + .arvalid(axi_ddr_in.arvalid), + .arready(axi_ddr_in.arready), + .araddr(axi_ddr_in.araddr), + .arid(axi_ddr_in.arid), + .arlen(axi_ddr_in.arlen), + .arsize(axi_ddr_in.arsize), + .arburst(axi_ddr_in.arburst), + .arlock(axi_ddr_in.arlock), + .arcache(axi_ddr_in.arcache), + .arprot(axi_ddr_in.arprot), + .rvalid(axi_ddr_in.rvalid), + .rready(axi_ddr_in.rready), + .rdata(axi_ddr_in.rdata), + .rlast(axi_ddr_in.rlast), + .rid(axi_ddr_in.rid), + .rresp(axi_ddr_in.rresp), + + // AXIS + .axis_out_tdata(axis_ddr_out.tdata), + .axis_out_tkeep(axis_ddr_out.tkeep), + .axis_out_tvalid(axis_ddr_out.tvalid), + .axis_out_tready(axis_ddr_out.tready), + .axis_out_tlast(axis_ddr_out.tlast) +); + +// Tie-off RD +assign axi_ddr_in.arqos = 0; +assign axi_ddr_in.arregion = 0; + +// WR channel +axi_dma_wr axi_dma_wr_inst ( + .aclk(aclk), + .aresetn(aresetn), + + // CS + .ctrl_valid(wrCDMA_int.valid), + .stat_ready(wrCDMA_int.ready), + .ctrl_addr(wrCDMA_int.req.paddr), + .ctrl_len(wrCDMA_int.req.len), + .ctrl_ctl(wrCDMA_int.req.ctl), + .stat_done(wrCDMA_int.done), + + // AXI + .awvalid(axi_ddr_in.awvalid), + .awready(axi_ddr_in.awready), + .awaddr(axi_ddr_in.awaddr), + .awid(axi_ddr_in.awid), + .awlen(axi_ddr_in.awlen), + .awsize(axi_ddr_in.awsize), + .awburst(axi_ddr_in.awburst), + .awlock(axi_ddr_in.awlock), + .awcache(axi_ddr_in.awcache), + .awprot(axi_ddr_in.awprot), + .wdata(axi_ddr_in.wdata), + .wstrb(axi_ddr_in.wstrb), + .wlast(axi_ddr_in.wlast), + .wvalid(axi_ddr_in.wvalid), + .wready(axi_ddr_in.wready), + .bid(axi_ddr_in.bid), + .bresp(axi_ddr_in.bresp), + .bvalid(axi_ddr_in.bvalid), + .bready(axi_ddr_in.bready), + + // AXIS + .axis_in_tdata(axis_ddr_in.tdata), + .axis_in_tkeep(axis_ddr_in.tkeep), + .axis_in_tvalid(axis_ddr_in.tvalid), + .axis_in_tready(axis_ddr_in.tready), + .axis_in_tlast(axis_ddr_in.tlast) +); + +// Tie-off WR +assign axi_ddr_in.awqos = 0; +assign axi_ddr_in.awregion = 0; + +endmodule \ No newline at end of file diff --git a/hw/hdl/cdma/krnl_counter.sv b/hw/hdl/cdma/krnl_counter.sv new file mode 100644 index 00000000..624aaad1 --- /dev/null +++ b/hw/hdl/cdma/krnl_counter.sv @@ -0,0 +1,105 @@ +// /******************************************************************************* +// Copyright (c) 2018, Xilinx, Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// +// 2. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// +// 3. Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// *******************************************************************************/ + +//----------------------------------------------------------------------------- +// Simple up/down counter with reset. +//----------------------------------------------------------------------------- +`default_nettype none +`timescale 1ps/1ps +module krnl_counter #( + parameter integer C_WIDTH = 4, + parameter [C_WIDTH-1:0] C_INIT = {C_WIDTH{1'b0}} +) +( + input wire aclk, + input wire clken, + input wire aresetn, + input wire load, + input wire incr, + input wire decr, + input wire [C_WIDTH-1:0] load_value, + output wire [C_WIDTH-1:0] count, + output wire is_zero +); + + localparam [C_WIDTH-1:0] LP_ZERO = {C_WIDTH{1'b0}}; + localparam [C_WIDTH-1:0] LP_ONE = {{C_WIDTH-1{1'b0}},1'b1}; + localparam [C_WIDTH-1:0] LP_MAX = {C_WIDTH{1'b1}}; + + reg [C_WIDTH-1:0] count_r = C_INIT; + reg is_zero_r = (C_INIT == LP_ZERO); + + assign count = count_r; + + always_ff @(posedge aclk, negedge aresetn) begin + if (~aresetn) begin + count_r <= C_INIT; + end + else if (clken) begin + if (load) begin + count_r <= load_value; + end + else if (incr & ~decr) begin + count_r <= count_r + 1'b1; + end + else if (~incr & decr) begin + count_r <= count_r - 1'b1; + end + else + count_r <= count_r; + end + end + + assign is_zero = is_zero_r; + + always_ff @(posedge aclk, negedge aresetn) begin + if (~aresetn) begin + is_zero_r <= (C_INIT == LP_ZERO); + end + else if (clken) begin + if (load) begin + is_zero_r <= (load_value == LP_ZERO); + end + else begin + is_zero_r <= incr ^ decr ? (decr && (count_r == LP_ONE)) || (incr && (count_r == LP_MAX)) : is_zero_r; + end + end + else begin + is_zero_r <= is_zero_r; + end + end + + +endmodule : krnl_counter +`default_nettype wire diff --git a/hw/hdl/cdma_unaglined/axi_dma.sv b/hw/hdl/cdma_unaglined/axi_dma.sv new file mode 100644 index 00000000..cf9c8e1d --- /dev/null +++ b/hw/hdl/cdma_unaglined/axi_dma.sv @@ -0,0 +1,236 @@ +/* + * CDMA + */ + +import lynxTypes::*; + +`timescale 1ns / 1ps + +module cdma ( + input wire aclk, + input wire aresetn, + + dmaIntf.s rdCDMA, + dmaIntf.s wrCDMA, + + AXI4.m axi_ddr_in, + + AXI4S.s axis_ddr_in, + AXI4S.m axis_ddr_out +); + +// RD ------------------------------------------------------------------------------------------ +dmaIntf rdCDMA_que (); +dmaIntf rdCDMA_int (); + +logic rd_seq_snk_valid, rd_seq_snk_ready; +logic rd_seq_src_data; + +logic tmp_last_rd; + +// Request queue rd +queue_stream #(.QTYPE(dma_req_t)) inst_rddma_out ( + .aclk(aclk), + .aresetn(aresetn), + .val_snk(rdCDMA.valid), + .rdy_snk(rdCDMA.ready), + .data_snk(rdCDMA.req), + .val_src(rdCDMA_que.valid), + .rdy_src(rdCDMA_que.ready), + .data_src(rdCDMA_que.req) +); + +// CTL sequencing rd +queue_stream #(.QTYPE(logic)) inst_ctl_seq_rd ( + .aclk(aclk), + .aresetn(aresetn), + .val_snk(rd_seq_snk_valid), + .rdy_snk(rd_seq_snk_ready), + .data_snk(rdCDMA_que.req.ctl), + .val_src(), + .rdy_src(rdCDMA_int.done), + .data_src(rd_seq_src_data) +); + +always_comb begin + // => + rdCDMA_que.ready = rdCDMA_int.ready & rd_seq_snk_ready & rdCDMA_que.valid; + rdCDMA_int.valid = rdCDMA_que.ready; + rd_seq_snk_valid = rdCDMA_que.ready; + + rdCDMA_int.req = rdCDMA_que.req; + + // <= + rdCDMA_que.done = rdCDMA_int.done; // passthrough + rdCDMA.done = rdCDMA_que.done & rd_seq_src_data; + + axis_ddr_out.tlast = tmp_last_rd & rd_seq_src_data; +end + +// WR ------------------------------------------------------------------------------------------ +dmaIntf wrCDMA_que (); +dmaIntf wrCDMA_int (); + +logic wr_seq_snk_valid, wr_seq_snk_ready; +logic wr_seq_src_data; + +// Request queue wr +queue_stream #(.QTYPE(dma_req_t)) inst_wrdma_out ( + .aclk(aclk), + .aresetn(aresetn), + .val_snk(wrCDMA.valid), + .rdy_snk(wrCDMA.ready), + .data_snk(wrCDMA.req), + .val_src(wrCDMA_que.valid), + .rdy_src(wrCDMA_que.ready), + .data_src(wrCDMA_que.req) +); + +// CTL sequencing wr +queue_stream #(.QTYPE(logic)) inst_ctl_seq_wr ( + .aclk(aclk), + .aresetn(aresetn), + .val_snk(wr_seq_snk_valid), + .rdy_snk(wr_seq_snk_ready), + .data_snk(wrCDMA_que.req.ctl), + .val_src(), + .rdy_src(wrCDMA_int.done), + .data_src(wr_seq_src_data) +); + +always_comb begin + // => + wrCDMA_que.ready = wrCDMA_int.ready & wr_seq_snk_ready & wrCDMA_que.valid; + wrCDMA_int.valid = wrCDMA_que.ready; + wr_seq_snk_valid = wrCDMA_que.ready; + + wrCDMA_int.req = wrCDMA_que.req; + + // <= + wrCDMA_que.done = wrCDMA_int.done; // passthrough + wrCDMA.done = wrCDMA_que.done & wr_seq_src_data; +end + +// +// CDMA +// +axi_dma_rd #( + .AXI_DATA_WIDTH(AXI_DATA_BITS), + .AXI_ADDR_WIDTH(AXI_ADDR_BITS), + .AXI_STRB_WIDTH(AXI_DATA_BITS/8), + .AXI_MAX_BURST_LEN(64), + .AXIS_DATA_WIDTH(AXI_DATA_BITS), + .AXIS_KEEP_ENABLE(1), + .AXIS_KEEP_WIDTH(AXI_DATA_BITS/8), + .AXIS_LAST_ENABLE(1'b1), + .LEN_WIDTH(LEN_BITS) +) +axi_dma_rd_inst ( + .aclk(aclk), + .aresetn(aresetn), + + /* + * AXI read descriptor input + */ + .s_axis_read_desc_addr(rdCDMA_int.req.paddr), + .s_axis_read_desc_len(rdCDMA_int.req.len), + .s_axis_read_desc_valid(rdCDMA_int.valid), + .s_axis_read_desc_ready(rdCDMA_int.ready), + + /* + * AXI read descriptor status output + */ + .m_axis_read_desc_status_valid(rdCDMA_int.done), + + /* + * AXI stream read data output + */ + .m_axis_read_data_tdata(axis_ddr_out.tdata), + .m_axis_read_data_tkeep(axis_ddr_out.tkeep), + .m_axis_read_data_tvalid(axis_ddr_out.tvalid), + .m_axis_read_data_tready(axis_ddr_out.tready), + .m_axis_read_data_tlast(tmp_last_rd), + + /* + * AXI master interface + */ + .m_axi_arid(axi_ddr_in.arid), + .m_axi_araddr(axi_ddr_in.araddr), + .m_axi_arlen(axi_ddr_in.arlen), + .m_axi_arsize(axi_ddr_in.arsize), + .m_axi_arburst(axi_ddr_in.arburst), + .m_axi_arlock(axi_ddr_in.arlock), + .m_axi_arcache(axi_ddr_in.arcache), + .m_axi_arprot(axi_ddr_in.arprot), + .m_axi_arvalid(axi_ddr_in.arvalid), + .m_axi_arready(axi_ddr_in.arready), + .m_axi_rid(axi_ddr_in.rid), + .m_axi_rdata(axi_ddr_in.rdata), + .m_axi_rresp(axi_ddr_in.rresp), + .m_axi_rlast(axi_ddr_in.rlast), + .m_axi_rvalid(axi_ddr_in.rvalid), + .m_axi_rready(axi_ddr_in.rready) +); + +axi_dma_wr #( + .AXI_DATA_WIDTH(AXI_DATA_BITS), + .AXI_ADDR_WIDTH(AXI_ADDR_BITS), + .AXI_STRB_WIDTH(AXI_DATA_BITS/8), + .AXI_MAX_BURST_LEN(64), + .AXIS_DATA_WIDTH(AXI_DATA_BITS), + .AXIS_KEEP_ENABLE(1), + .AXIS_KEEP_WIDTH(AXI_DATA_BITS/8), + .AXIS_LAST_ENABLE(0), + .LEN_WIDTH(LEN_BITS) +) +axi_dma_wr_inst ( + .aclk(aclk), + .aresetn(aresetn), + + /* + * AXI write descriptor input + */ + .s_axis_write_desc_addr(wrCDMA_int.req.paddr), + .s_axis_write_desc_len(wrCDMA_int.req.len), + .s_axis_write_desc_valid(wrCDMA_int.valid), + .s_axis_write_desc_ready(wrCDMA_int.ready), + + /* + * AXI write descriptor status output + */ + .m_axis_write_desc_status_valid(wrCDMA_int.done), + + /* + * AXI stream write data input + */ + .s_axis_write_data_tdata(axis_ddr_in.tdata), + .s_axis_write_data_tkeep(axis_ddr_in.tkeep), + .s_axis_write_data_tvalid(axis_ddr_in.tvalid), + .s_axis_write_data_tready(axis_ddr_in.tready), + .s_axis_write_data_tlast(axis_ddr_in.tlast), + + /* + * AXI master interface + */ + .m_axi_awid(axi_ddr_in.awid), + .m_axi_awaddr(axi_ddr_in.awaddr), + .m_axi_awlen(axi_ddr_in.awlen), + .m_axi_awsize(axi_ddr_in.awsize), + .m_axi_awburst(axi_ddr_in.awburst), + .m_axi_awlock(axi_ddr_in.awlock), + .m_axi_awcache(axi_ddr_in.awcache), + .m_axi_awprot(axi_ddr_in.awprot), + .m_axi_awvalid(axi_ddr_in.awvalid), + .m_axi_awready(axi_ddr_in.awready), + .m_axi_wdata(axi_ddr_in.wdata), + .m_axi_wstrb(axi_ddr_in.wstrb), + .m_axi_wlast(axi_ddr_in.wlast), + .m_axi_wvalid(axi_ddr_in.wvalid), + .m_axi_wready(axi_ddr_in.wready), + .m_axi_bid(axi_ddr_in.bid), + .m_axi_bresp(axi_ddr_in.bresp), + .m_axi_bvalid(axi_ddr_in.bvalid), + .m_axi_bready(axi_ddr_in.bready) +); + +endmodule diff --git a/hw/hdl/cdma_unaglined/axi_dma_rd.sv b/hw/hdl/cdma_unaglined/axi_dma_rd.sv new file mode 100644 index 00000000..a3fe8423 --- /dev/null +++ b/hw/hdl/cdma_unaglined/axi_dma_rd.sv @@ -0,0 +1,559 @@ +/* +Copyright (c) 2018 Alex Forencich +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +// Language: Verilog 2001 + +`timescale 1ns / 1ps + +/* + * AXI4 DMA + */ +module axi_dma_rd # +( + // Width of AXI data bus in bits + parameter AXI_DATA_WIDTH = 32, + // Width of AXI address bus in bits + parameter AXI_ADDR_WIDTH = 16, + // Width of AXI wstrb (width of data bus in words) + parameter AXI_STRB_WIDTH = (AXI_DATA_WIDTH/8), + // Maximum AXI burst length to generate + parameter AXI_MAX_BURST_LEN = 16, + // Width of AXI stream interfaces in bits + parameter AXIS_DATA_WIDTH = AXI_DATA_WIDTH, + // Use AXI stream tkeep signal + parameter AXIS_KEEP_ENABLE = (AXIS_DATA_WIDTH>8), + // AXI stream tkeep signal width (words per cycle) + parameter AXIS_KEEP_WIDTH = (AXIS_DATA_WIDTH/8), + // Use AXI stream tlast signal + parameter AXIS_LAST_ENABLE = 1, + // Width of length field + parameter LEN_WIDTH = 20 +) +( + input logic aclk, + input logic aresetn, + + /* + * AXI read descriptor input + */ + input logic [AXI_ADDR_WIDTH-1:0] s_axis_read_desc_addr, + input logic [LEN_WIDTH-1:0] s_axis_read_desc_len, + input logic s_axis_read_desc_valid, + output logic s_axis_read_desc_ready, + + /* + * AXI read descriptor status output + */ + output logic m_axis_read_desc_status_valid, + + /* + * AXI stream read data output + */ + output logic [AXIS_DATA_WIDTH-1:0] m_axis_read_data_tdata, + output logic [AXIS_KEEP_WIDTH-1:0] m_axis_read_data_tkeep, + output logic m_axis_read_data_tvalid, + input logic m_axis_read_data_tready, + output logic m_axis_read_data_tlast, + + /* + * AXI master interface + */ + output logic [0:0] m_axi_arid, + output logic [AXI_ADDR_WIDTH-1:0] m_axi_araddr, + output logic [7:0] m_axi_arlen, + output logic [2:0] m_axi_arsize, + output logic [1:0] m_axi_arburst, + output logic m_axi_arlock, + output logic [3:0] m_axi_arcache, + output logic [2:0] m_axi_arprot, + output logic m_axi_arvalid, + input logic m_axi_arready, + input logic [0:0] m_axi_rid, + input logic [AXI_DATA_WIDTH-1:0] m_axi_rdata, + input logic [1:0] m_axi_rresp, + input logic m_axi_rlast, + input logic m_axi_rvalid, + output logic m_axi_rready +); + +localparam AXI_WORD_WIDTH = AXI_STRB_WIDTH; +localparam AXI_WORD_SIZE = AXI_DATA_WIDTH/AXI_WORD_WIDTH; +localparam AXI_BURST_SIZE = $clog2(AXI_STRB_WIDTH); +localparam AXI_MAX_BURST_SIZE = AXI_MAX_BURST_LEN << AXI_BURST_SIZE; + +localparam AXIS_KEEP_WIDTH_INT = AXIS_KEEP_ENABLE ? AXIS_KEEP_WIDTH : 1; +localparam AXIS_WORD_WIDTH = AXIS_KEEP_WIDTH_INT; +localparam AXIS_WORD_SIZE = AXIS_DATA_WIDTH/AXIS_WORD_WIDTH; + +localparam OFFSET_WIDTH = AXI_STRB_WIDTH > 1 ? $clog2(AXI_STRB_WIDTH) : 1; +localparam OFFSET_MASK = AXI_STRB_WIDTH > 1 ? {OFFSET_WIDTH{1'b1}} : 0; +localparam ADDR_MASK = {AXI_ADDR_WIDTH{1'b1}} << $clog2(AXI_STRB_WIDTH); +localparam CYCLE_COUNT_WIDTH = LEN_WIDTH - AXI_BURST_SIZE + 1; + +localparam [0:0] + AXI_STATE_IDLE = 1'd0, + AXI_STATE_START = 1'd1; + +logic [0:0] axi_state_reg = AXI_STATE_IDLE, axi_state_next; + +localparam [0:0] + AXIS_STATE_IDLE = 1'd0, + AXIS_STATE_READ = 1'd1; + +logic [0:0] axis_state_reg = AXIS_STATE_IDLE, axis_state_next; + +// datapath control signals +logic transfer_in_save; + +logic [AXI_ADDR_WIDTH-1:0] addr_reg = {AXI_ADDR_WIDTH{1'b0}}, addr_next; +logic [LEN_WIDTH-1:0] op_word_count_reg = {LEN_WIDTH{1'b0}}, op_word_count_next; +logic [LEN_WIDTH-1:0] tr_word_count_reg = {LEN_WIDTH{1'b0}}, tr_word_count_next; + +typedef struct packed { + logic [OFFSET_WIDTH-1:0] axis_cmd_offset; + logic [OFFSET_WIDTH-1:0] axis_cmd_last_cycle_offset; + logic axis_cmd_bubble_cycle; + logic [CYCLE_COUNT_WIDTH-1:0] axis_cmd_input_cycle_count; + logic [CYCLE_COUNT_WIDTH-1:0] axis_cmd_output_cycle_count; +} cdma_rd_cmd_t; + +logic ost_snk_valid, ost_snk_ready; +cdma_rd_cmd_t ost_snk_data; +logic ost_src_valid, ost_src_ready; +cdma_rd_cmd_t ost_src_data; + +logic [OFFSET_WIDTH-1:0] offset_reg = {OFFSET_WIDTH{1'b0}}, offset_next; +logic [OFFSET_WIDTH-1:0] last_cycle_offset_reg = {OFFSET_WIDTH{1'b0}}, last_cycle_offset_next; +logic [CYCLE_COUNT_WIDTH-1:0] input_cycle_count_reg = {CYCLE_COUNT_WIDTH{1'b0}}, input_cycle_count_next; +logic [CYCLE_COUNT_WIDTH-1:0] output_cycle_count_reg = {CYCLE_COUNT_WIDTH{1'b0}}, output_cycle_count_next; +logic input_active_reg = 1'b0, input_active_next; +logic output_active_reg = 1'b0, output_active_next; +logic bubble_cycle_reg = 1'b0, bubble_cycle_next; +logic first_cycle_reg = 1'b0, first_cycle_next; +logic output_last_cycle_reg = 1'b0, output_last_cycle_next; + +logic m_axis_read_desc_status_valid_reg = 1'b0, m_axis_read_desc_status_valid_next; + +logic [AXI_ADDR_WIDTH-1:0] m_axi_araddr_reg = {AXI_ADDR_WIDTH{1'b0}}, m_axi_araddr_next; +logic [7:0] m_axi_arlen_reg = 8'd0, m_axi_arlen_next; +logic m_axi_arvalid_reg = 1'b0, m_axi_arvalid_next; +logic m_axi_rready_reg = 1'b0, m_axi_rready_next; + +logic [AXI_DATA_WIDTH-1:0] save_axi_rdata_reg = {AXI_DATA_WIDTH{1'b0}}; + +logic [AXI_DATA_WIDTH-1:0] shift_axi_rdata = {m_axi_rdata, save_axi_rdata_reg} >> ((AXI_STRB_WIDTH-offset_reg)*AXI_WORD_SIZE); + +// internal datapath +logic [AXIS_DATA_WIDTH-1:0] m_axis_read_data_tdata_int; +logic [AXIS_KEEP_WIDTH-1:0] m_axis_read_data_tkeep_int; +logic m_axis_read_data_tvalid_int; +logic m_axis_read_data_tready_int_reg = 1'b0; +logic m_axis_read_data_tlast_int; +logic m_axis_read_data_tready_int_early; + +assign m_axis_read_desc_status_valid = m_axis_read_desc_status_valid_reg; + +assign m_axi_arid = 0; +assign m_axi_araddr = m_axi_araddr_reg; +assign m_axi_arlen = m_axi_arlen_reg; +assign m_axi_arsize = AXI_BURST_SIZE; +assign m_axi_arburst = 2'b01; +assign m_axi_arlock = 1'b0; +assign m_axi_arcache = 4'b0011; +assign m_axi_arprot = 3'b010; +assign m_axi_arvalid = m_axi_arvalid_reg; +assign m_axi_rready = m_axi_rready_reg; + +logic [AXI_ADDR_WIDTH-1:0] addr_plus_max_burst = addr_reg + AXI_MAX_BURST_SIZE; +logic [AXI_ADDR_WIDTH-1:0] addr_plus_count = addr_reg + op_word_count_reg; + +// Outstanding queue +queue_stream #(.QTYPE(cdma_rd_cmd_t)) inst_outstanding_que ( + .aclk(aclk), + .aresetn(aresetn), + .val_snk(ost_snk_valid), + .rdy_snk(ost_snk_ready), + .data_snk(ost_snk_data), + .val_src(ost_src_valid), + .rdy_src(ost_src_ready), + .data_src(ost_src_data) +); + +// NSL Requests +always_comb begin + axi_state_next = AXI_STATE_IDLE; + + m_axi_araddr_next = m_axi_araddr_reg; + m_axi_arlen_next = m_axi_arlen_reg; + m_axi_arvalid_next = m_axi_arvalid_reg && !m_axi_arready; + + addr_next = addr_reg; + op_word_count_next = op_word_count_reg; + tr_word_count_next = tr_word_count_reg; + + s_axis_read_desc_ready = 1'b0; + + ost_snk_valid = 1'b0; + ost_snk_data = 0; + + case (axi_state_reg) + AXI_STATE_IDLE: begin + // idle state - load new descriptor to start operation + s_axis_read_desc_ready = ost_snk_ready; + + if (s_axis_read_desc_valid & s_axis_read_desc_ready) begin + ost_snk_valid = 1'b1; + + addr_next = s_axis_read_desc_addr; + op_word_count_next = s_axis_read_desc_len; + + ost_snk_data.axis_cmd_offset = AXI_STRB_WIDTH > 1 ? AXI_STRB_WIDTH - (s_axis_read_desc_addr & OFFSET_MASK) : 0; + ost_snk_data.axis_cmd_last_cycle_offset = s_axis_read_desc_len & OFFSET_MASK; + ost_snk_data.axis_cmd_bubble_cycle = ost_snk_data.axis_cmd_offset > 0; + ost_snk_data.axis_cmd_input_cycle_count = (op_word_count_next + (s_axis_read_desc_addr & OFFSET_MASK) - 1) >> AXI_BURST_SIZE; + ost_snk_data.axis_cmd_output_cycle_count = (op_word_count_next - 1) >> AXI_BURST_SIZE; + + axi_state_next = AXI_STATE_START; + end else begin + axi_state_next = AXI_STATE_IDLE; + end + end + + AXI_STATE_START: begin + // start state - initiate new AXI transfer + if (!m_axi_arvalid) begin + if (op_word_count_reg <= AXI_MAX_BURST_SIZE - (addr_reg & OFFSET_MASK) || AXI_MAX_BURST_SIZE >= 4096) begin + // packet smaller than max burst size + if (addr_reg[12] != addr_plus_count[12]) begin + // crosses 4k boundary + tr_word_count_next = 13'h1000 - addr_reg[11:0]; + end else begin + // does not cross 4k boundary + tr_word_count_next = op_word_count_reg; + end + end else begin + // packet larger than max burst size + if (addr_reg[12] != addr_plus_max_burst[12]) begin + // crosses 4k boundary + tr_word_count_next = 13'h1000 - addr_reg[11:0]; + end else begin + // does not cross 4k boundary + tr_word_count_next = AXI_MAX_BURST_SIZE - (addr_reg & OFFSET_MASK); + end + end + + m_axi_araddr_next = addr_reg; + m_axi_arlen_next = (tr_word_count_next + (addr_reg & OFFSET_MASK) - 1) >> AXI_BURST_SIZE; + m_axi_arvalid_next = 1'b1; + + addr_next = addr_reg + tr_word_count_next; + op_word_count_next = op_word_count_reg - tr_word_count_next; + + if (op_word_count_next > 0) begin + axi_state_next = AXI_STATE_START; + end else begin + axi_state_next = AXI_STATE_IDLE; + end + end else begin + axi_state_next = AXI_STATE_START; + end + end + endcase +end + +// NSL Data +always_comb begin + axis_state_next = AXIS_STATE_IDLE; + + m_axis_read_desc_status_valid_next = 1'b0; + + m_axis_read_data_tdata_int = shift_axi_rdata; + m_axis_read_data_tkeep_int = {AXIS_KEEP_WIDTH{1'b1}}; + m_axis_read_data_tlast_int = 1'b0; + m_axis_read_data_tvalid_int = 1'b0; + + m_axi_rready_next = 1'b0; + + transfer_in_save = 1'b0; + + offset_next = offset_reg; + last_cycle_offset_next = last_cycle_offset_reg; + input_cycle_count_next = input_cycle_count_reg; + output_cycle_count_next = output_cycle_count_reg; + bubble_cycle_next = bubble_cycle_reg; + + output_last_cycle_next = output_last_cycle_reg; + input_active_next = input_active_reg; + output_active_next = output_active_reg; + first_cycle_next = first_cycle_reg; + + ost_src_ready = 1'b0; + + case (axis_state_reg) + AXIS_STATE_IDLE: begin + // idle state - load new descriptor to start operation + m_axi_rready_next = 1'b0; + + // store transfer parameters + offset_next = ost_src_data.axis_cmd_offset; + last_cycle_offset_next = ost_src_data.axis_cmd_last_cycle_offset; + input_cycle_count_next = ost_src_data.axis_cmd_input_cycle_count; + output_cycle_count_next = ost_src_data.axis_cmd_output_cycle_count; + bubble_cycle_next = ost_src_data.axis_cmd_bubble_cycle; + + output_last_cycle_next = output_cycle_count_next == 0; + input_active_next = 1'b1; + output_active_next = 1'b1; + first_cycle_next = 1'b1; + + if (ost_src_valid) begin + ost_src_ready = 1'b1; + + m_axi_rready_next = m_axis_read_data_tready_int_early; + axis_state_next = AXIS_STATE_READ; + end + end + AXIS_STATE_READ: begin + // handle AXI read data + m_axi_rready_next = m_axis_read_data_tready_int_early && input_active_reg; + + if (m_axis_read_data_tready_int_reg && ((m_axi_rready && m_axi_rvalid) || !input_active_reg)) begin + // transfer in AXI read data + transfer_in_save = m_axi_rready && m_axi_rvalid; + + if (first_cycle_reg && bubble_cycle_reg) begin + if (input_active_reg) begin + input_cycle_count_next = input_cycle_count_reg - 1; + input_active_next = input_cycle_count_reg > 0; + end + bubble_cycle_next = 1'b0; + first_cycle_next = 1'b0; + + m_axi_rready_next = m_axis_read_data_tready_int_early && input_active_next; + axis_state_next = AXIS_STATE_READ; + end else begin + // update counters + if (input_active_reg) begin + input_cycle_count_next = input_cycle_count_reg - 1; + input_active_next = input_cycle_count_reg > 0; + end + if (output_active_reg) begin + output_cycle_count_next = output_cycle_count_reg - 1; + output_active_next = output_cycle_count_reg > 0; + end + output_last_cycle_next = output_cycle_count_next == 0; + bubble_cycle_next = 1'b0; + first_cycle_next = 1'b0; + + // pass through read data + m_axis_read_data_tdata_int = shift_axi_rdata; + m_axis_read_data_tkeep_int = {AXIS_KEEP_WIDTH_INT{1'b1}}; + m_axis_read_data_tvalid_int = 1'b1; + + if (output_last_cycle_reg) begin + // no more data to transfer, finish operation + if (last_cycle_offset_reg > 0) begin + m_axis_read_data_tkeep_int = {AXIS_KEEP_WIDTH_INT{1'b1}} >> (AXIS_KEEP_WIDTH_INT - last_cycle_offset_reg); + end + m_axis_read_data_tlast_int = 1'b1; + + m_axis_read_desc_status_valid_next = 1'b1; + + m_axi_rready_next = 1'b0; + axis_state_next = AXIS_STATE_IDLE; + end else begin + // more cycles in AXI transfer + m_axi_rready_next = m_axis_read_data_tready_int_early && input_active_next; + axis_state_next = AXIS_STATE_READ; + end + end + end else begin + axis_state_next = AXIS_STATE_READ; + end + end + endcase +end + +always_ff @(posedge aclk) begin + if (~aresetn) begin + axi_state_reg <= AXI_STATE_IDLE; + axis_state_reg <= AXIS_STATE_IDLE; + m_axis_read_desc_status_valid_reg <= 1'b0; + m_axi_arvalid_reg <= 1'b0; + m_axi_rready_reg <= 1'b0; + end else begin + axi_state_reg <= axi_state_next; + axis_state_reg <= axis_state_next; + m_axis_read_desc_status_valid_reg <= m_axis_read_desc_status_valid_next; + m_axi_arvalid_reg <= m_axi_arvalid_next; + m_axi_rready_reg <= m_axi_rready_next; + end + + m_axi_araddr_reg <= m_axi_araddr_next; + m_axi_arlen_reg <= m_axi_arlen_next; + + addr_reg <= addr_next; + op_word_count_reg <= op_word_count_next; + tr_word_count_reg <= tr_word_count_next; + + offset_reg <= offset_next; + last_cycle_offset_reg <= last_cycle_offset_next; + input_cycle_count_reg <= input_cycle_count_next; + output_cycle_count_reg <= output_cycle_count_next; + input_active_reg <= input_active_next; + output_active_reg <= output_active_next; + bubble_cycle_reg <= bubble_cycle_next; + first_cycle_reg <= first_cycle_next; + output_last_cycle_reg <= output_last_cycle_next; + + if (transfer_in_save) begin + save_axi_rdata_reg <= m_axi_rdata; + end +end + +// output datapath logic +logic [AXIS_DATA_WIDTH-1:0] m_axis_read_data_tdata_reg = {AXIS_DATA_WIDTH{1'b0}}; +logic [AXIS_KEEP_WIDTH-1:0] m_axis_read_data_tkeep_reg = {AXIS_KEEP_WIDTH{1'b0}}; +logic m_axis_read_data_tvalid_reg = 1'b0, m_axis_read_data_tvalid_next; +logic m_axis_read_data_tlast_reg = 1'b0; + +logic [AXIS_DATA_WIDTH-1:0] temp_m_axis_read_data_tdata_reg = {AXIS_DATA_WIDTH{1'b0}}; +logic [AXIS_KEEP_WIDTH-1:0] temp_m_axis_read_data_tkeep_reg = {AXIS_KEEP_WIDTH{1'b0}}; +logic temp_m_axis_read_data_tvalid_reg = 1'b0, temp_m_axis_read_data_tvalid_next; +logic temp_m_axis_read_data_tlast_reg = 1'b0; + +// datapath control +logic store_axis_int_to_output; +logic store_axis_int_to_temp; +logic store_axis_temp_to_output; + +assign m_axis_read_data_tdata = m_axis_read_data_tdata_reg; +assign m_axis_read_data_tkeep = AXIS_KEEP_ENABLE ? m_axis_read_data_tkeep_reg : {AXIS_KEEP_WIDTH{1'b1}}; +assign m_axis_read_data_tvalid = m_axis_read_data_tvalid_reg; +assign m_axis_read_data_tlast = AXIS_LAST_ENABLE ? m_axis_read_data_tlast_reg : 1'b1; + +// enable ready input next cycle if output is ready or the temp reg will not be filled on the next cycle (output reg empty or no input) +assign m_axis_read_data_tready_int_early = m_axis_read_data_tready || (!temp_m_axis_read_data_tvalid_reg && (!m_axis_read_data_tvalid_reg || !m_axis_read_data_tvalid_int)); + +always_comb begin + // transfer sink ready state to source + m_axis_read_data_tvalid_next = m_axis_read_data_tvalid_reg; + temp_m_axis_read_data_tvalid_next = temp_m_axis_read_data_tvalid_reg; + + store_axis_int_to_output = 1'b0; + store_axis_int_to_temp = 1'b0; + store_axis_temp_to_output = 1'b0; + + if (m_axis_read_data_tready_int_reg) begin + // input is ready + if (m_axis_read_data_tready || !m_axis_read_data_tvalid_reg) begin + // output is ready or currently not valid, transfer data to output + m_axis_read_data_tvalid_next = m_axis_read_data_tvalid_int; + store_axis_int_to_output = 1'b1; + end else begin + // output is not ready, store input in temp + temp_m_axis_read_data_tvalid_next = m_axis_read_data_tvalid_int; + store_axis_int_to_temp = 1'b1; + end + end else if (m_axis_read_data_tready) begin + // input is not ready, but output is ready + m_axis_read_data_tvalid_next = temp_m_axis_read_data_tvalid_reg; + temp_m_axis_read_data_tvalid_next = 1'b0; + store_axis_temp_to_output = 1'b1; + end +end + +always_ff @(posedge aclk) begin + if (~aresetn) begin + m_axis_read_data_tvalid_reg <= 1'b0; + m_axis_read_data_tready_int_reg <= 1'b0; + temp_m_axis_read_data_tvalid_reg <= 1'b0; + end else begin + m_axis_read_data_tvalid_reg <= m_axis_read_data_tvalid_next; + m_axis_read_data_tready_int_reg <= m_axis_read_data_tready_int_early; + temp_m_axis_read_data_tvalid_reg <= temp_m_axis_read_data_tvalid_next; + end + + // datapath + if (store_axis_int_to_output) begin + m_axis_read_data_tdata_reg <= m_axis_read_data_tdata_int; + m_axis_read_data_tkeep_reg <= m_axis_read_data_tkeep_int; + m_axis_read_data_tlast_reg <= m_axis_read_data_tlast_int; + end else if (store_axis_temp_to_output) begin + m_axis_read_data_tdata_reg <= temp_m_axis_read_data_tdata_reg; + m_axis_read_data_tkeep_reg <= temp_m_axis_read_data_tkeep_reg; + m_axis_read_data_tlast_reg <= temp_m_axis_read_data_tlast_reg; + end + + if (store_axis_int_to_temp) begin + temp_m_axis_read_data_tdata_reg <= m_axis_read_data_tdata_int; + temp_m_axis_read_data_tkeep_reg <= m_axis_read_data_tkeep_int; + temp_m_axis_read_data_tlast_reg <= m_axis_read_data_tlast_int; + end +end + +/* +// DEBUG +ila_ddr_rd inst_ila_ddr_rd ( + .clk(aclk), + .probe0(axi_state_reg), // + .probe1(axis_state_reg), // + .probe2(input_cycle_count_reg), // 23 + .probe3(output_cycle_count_reg), // 23 + .probe4(op_word_count_reg), // 28 + .probe5(tr_word_count_reg), // 28 + .probe6(m_axis_read_data_tvalid), // + .probe7(m_axis_read_data_tready), // + .probe8(m_axis_read_data_tlast), // + .probe9(m_axis_read_data_tkeep), // 64 + .probe10(m_axis_read_data_tdata), // 512 + .probe11(m_axis_read_desc_status_valid), // + .probe12(transfer_in_save), // + .probe13(ost_snk_valid), // + .probe14(ost_snk_ready), // + .probe15(ost_src_valid), // + .probe16(ost_src_ready), // + .probe17(addr_reg), // 64 + .probe18(ost_snk_data.axis_cmd_offset), // 6 + .probe19(ost_snk_data.axis_cmd_last_cycle_offset), // 6 + .probe20(ost_snk_data.axis_cmd_bubble_cycle), // + .probe21(offset_reg), // 6 + .probe22(last_cycle_offset_reg), // 6 + .probe23(input_active_reg),// + .probe24(output_active_reg), // + .probe25(bubble_cycle_reg), // + .probe26(first_cycle_reg), // + .probe27(output_last_cycle_reg), // + .probe28(s_axis_read_desc_ready), // + .probe29(s_axis_read_desc_valid), // + .probe30(m_axis_read_desc_status_valid_reg), // + .probe31(m_axi_araddr_reg), // 64 + .probe32(m_axi_arlen_reg), // 8 + .probe33(m_axi_arvalid_reg), // + .probe34(m_axi_rready_reg), // + .probe35(m_axis_read_data_tvalid_int), // + .probe36(m_axis_read_data_tready_int_reg), // + .probe37(m_axis_read_data_tlast_int), // + .probe38(m_axis_read_data_tready_int_early), // + .probe39(save_axi_rdata_reg), // 512 + .probe40(shift_axi_rdata) // 512 +); +*/ + +endmodule \ No newline at end of file diff --git a/hw/hdl/cdma_unaglined/axi_dma_wr.sv b/hw/hdl/cdma_unaglined/axi_dma_wr.sv new file mode 100644 index 00000000..988fe17d --- /dev/null +++ b/hw/hdl/cdma_unaglined/axi_dma_wr.sv @@ -0,0 +1,666 @@ +/* +Copyright (c) 2018 Alex Forencich +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +// Language: Verilog 2001 + +`timescale 1ns / 1ps + +/* + * AXI4 DMA + */ +module axi_dma_wr # +( + // Width of AXI data bus in bits + parameter AXI_DATA_WIDTH = 32, + // Width of AXI address bus in bits + parameter AXI_ADDR_WIDTH = 16, + // Width of AXI wstrb (width of data bus in words) + parameter AXI_STRB_WIDTH = (AXI_DATA_WIDTH/8), + // Maximum AXI burst length to generate + parameter AXI_MAX_BURST_LEN = 16, + // Width of AXI stream interfaces in bits + parameter AXIS_DATA_WIDTH = AXI_DATA_WIDTH, + // Use AXI stream tkeep signal + parameter AXIS_KEEP_ENABLE = (AXIS_DATA_WIDTH>8), + // AXI stream tkeep signal width (words per cycle) + parameter AXIS_KEEP_WIDTH = (AXIS_DATA_WIDTH/8), + // Use AXI stream tlast signal + parameter AXIS_LAST_ENABLE = 0, + // Width of length field + parameter LEN_WIDTH = 20 +) +( + input logic aclk, + input logic aresetn, + + /* + * AXI write descriptor input + */ + input logic [AXI_ADDR_WIDTH-1:0] s_axis_write_desc_addr, + input logic [LEN_WIDTH-1:0] s_axis_write_desc_len, + input logic s_axis_write_desc_valid, + output logic s_axis_write_desc_ready, + + /* + * AXI write descriptor status output + */ + output logic m_axis_write_desc_status_valid, + + /* + * AXI stream write data input + */ + input logic [AXIS_DATA_WIDTH-1:0] s_axis_write_data_tdata, + input logic [AXIS_KEEP_WIDTH-1:0] s_axis_write_data_tkeep, + input logic s_axis_write_data_tvalid, + output logic s_axis_write_data_tready, + input logic s_axis_write_data_tlast, + + /* + * AXI master interface + */ + output logic [0:0] m_axi_awid, + output logic [AXI_ADDR_WIDTH-1:0] m_axi_awaddr, + output logic [7:0] m_axi_awlen, + output logic [2:0] m_axi_awsize, + output logic [1:0] m_axi_awburst, + output logic m_axi_awlock, + output logic [3:0] m_axi_awcache, + output logic [2:0] m_axi_awprot, + output logic m_axi_awvalid, + input logic m_axi_awready, + output logic [AXI_DATA_WIDTH-1:0] m_axi_wdata, + output logic [AXI_STRB_WIDTH-1:0] m_axi_wstrb, + output logic m_axi_wlast, + output logic m_axi_wvalid, + input logic m_axi_wready, + input logic [0:0] m_axi_bid, + input logic [1:0] m_axi_bresp, + input logic m_axi_bvalid, + output logic m_axi_bready +); + +localparam AXI_WORD_WIDTH = AXI_STRB_WIDTH; +localparam AXI_WORD_SIZE = AXI_DATA_WIDTH/AXI_WORD_WIDTH; +localparam AXI_BURST_SIZE = $clog2(AXI_STRB_WIDTH); +localparam AXI_MAX_BURST_SIZE = AXI_MAX_BURST_LEN << AXI_BURST_SIZE; + +localparam AXIS_KEEP_WIDTH_INT = AXIS_KEEP_ENABLE ? AXIS_KEEP_WIDTH : 1; +localparam AXIS_WORD_WIDTH = AXIS_KEEP_WIDTH_INT; +localparam AXIS_WORD_SIZE = AXIS_DATA_WIDTH/AXIS_WORD_WIDTH; + +localparam OFFSET_WIDTH = AXI_STRB_WIDTH > 1 ? $clog2(AXI_STRB_WIDTH) : 1; +localparam OFFSET_MASK = AXI_STRB_WIDTH > 1 ? {OFFSET_WIDTH{1'b1}} : 0; +localparam ADDR_MASK = {AXI_ADDR_WIDTH{1'b1}} << $clog2(AXI_STRB_WIDTH); +localparam CYCLE_COUNT_WIDTH = LEN_WIDTH - AXI_BURST_SIZE + 1; + +localparam STATUS_FIFO_ADDR_WIDTH = 5; + +localparam [1:0] + STATE_IDLE = 3'd0, + STATE_START = 3'd1, + STATE_WRITE = 3'd2; + +logic[1:0] state_reg = STATE_IDLE, state_next; + +// datapath control signals +logic transfer_in_save; +logic flush_save; +logic status_fifo_we; + +integer i; +logic [OFFSET_WIDTH:0] cycle_size; + +logic [AXI_ADDR_WIDTH-1:0] addr_reg = {AXI_ADDR_WIDTH{1'b0}}, addr_next; +logic [LEN_WIDTH-1:0] op_word_count_reg = {LEN_WIDTH{1'b0}}, op_word_count_next; +logic [LEN_WIDTH-1:0] tr_word_count_reg = {LEN_WIDTH{1'b0}}, tr_word_count_next; + +logic [OFFSET_WIDTH-1:0] offset_reg = {OFFSET_WIDTH{1'b0}}, offset_next; +logic [AXI_STRB_WIDTH-1:0] strb_offset_mask_reg = {AXI_STRB_WIDTH{1'b1}}, strb_offset_mask_next; +logic zero_offset_reg = 1'b1, zero_offset_next; +logic [OFFSET_WIDTH-1:0] last_cycle_offset_reg = {OFFSET_WIDTH{1'b0}}, last_cycle_offset_next; +logic [LEN_WIDTH-1:0] length_reg = {LEN_WIDTH{1'b0}}, length_next; +logic [CYCLE_COUNT_WIDTH-1:0] input_cycle_count_reg = {CYCLE_COUNT_WIDTH{1'b0}}, input_cycle_count_next; +logic [CYCLE_COUNT_WIDTH-1:0] output_cycle_count_reg = {CYCLE_COUNT_WIDTH{1'b0}}, output_cycle_count_next; +logic input_active_reg = 1'b0, input_active_next; +logic first_cycle_reg = 1'b0, first_cycle_next; +logic input_last_cycle_reg = 1'b0, input_last_cycle_next; +logic output_last_cycle_reg = 1'b0, output_last_cycle_next; +logic last_transfer_reg = 1'b0, last_transfer_next; + +logic [STATUS_FIFO_ADDR_WIDTH+1-1:0] status_fifo_wr_ptr_reg = 0, status_fifo_wr_ptr_next; +logic [STATUS_FIFO_ADDR_WIDTH+1-1:0] status_fifo_rd_ptr_reg = 0, status_fifo_rd_ptr_next; +logic status_fifo_last[(2**STATUS_FIFO_ADDR_WIDTH)-1:0]; +logic status_fifo_wr_last; + +logic s_axis_write_desc_ready_reg = 1'b0, s_axis_write_desc_ready_next; + +logic m_axis_write_desc_status_valid_reg = 1'b0, m_axis_write_desc_status_valid_next; + +logic [AXI_ADDR_WIDTH-1:0] m_axi_awaddr_reg = {AXI_ADDR_WIDTH{1'b0}}, m_axi_awaddr_next; +logic [7:0] m_axi_awlen_reg = 8'd0, m_axi_awlen_next; +logic m_axi_awvalid_reg = 1'b0, m_axi_awvalid_next; +logic m_axi_bready_reg = 1'b0, m_axi_bready_next; + +logic s_axis_write_data_tready_reg = 1'b0, s_axis_write_data_tready_next; + +logic [AXIS_DATA_WIDTH-1:0] save_axis_tdata_reg = {AXIS_DATA_WIDTH{1'b0}}; +logic [AXIS_KEEP_WIDTH_INT-1:0] save_axis_tkeep_reg = {AXIS_KEEP_WIDTH_INT{1'b0}}; +logic save_axis_tlast_reg = 1'b0; + +logic [AXIS_DATA_WIDTH-1:0] shift_axis_tdata; +logic [AXIS_KEEP_WIDTH_INT-1:0] shift_axis_tkeep; +logic shift_axis_tvalid; +logic shift_axis_tlast; +logic shift_axis_input_tready; +logic shift_axis_extra_cycle_reg = 1'b0; + +// internal datapath +logic [AXI_DATA_WIDTH-1:0] m_axi_wdata_int; +logic [AXI_STRB_WIDTH-1:0] m_axi_wstrb_int; +logic m_axi_wlast_int; +logic m_axi_wvalid_int; +logic m_axi_wready_int_reg = 1'b0; +logic m_axi_wready_int_early; + +logic [14:0] tmp_loc_reg, tmp_loc_next; + +assign s_axis_write_desc_ready = s_axis_write_desc_ready_reg; + +assign m_axis_write_desc_status_valid = m_axis_write_desc_status_valid_reg; + +assign s_axis_write_data_tready = s_axis_write_data_tready_reg; + +assign m_axi_awid = 0; +assign m_axi_awaddr = m_axi_awaddr_reg; +assign m_axi_awlen = m_axi_awlen_reg; +assign m_axi_awsize = AXI_BURST_SIZE; +assign m_axi_awburst = 2'b01; +assign m_axi_awlock = 1'b0; +assign m_axi_awcache = 4'b0011; +assign m_axi_awprot = 3'b010; +assign m_axi_awvalid = m_axi_awvalid_reg; +assign m_axi_bready = m_axi_bready_reg; + +logic [AXI_ADDR_WIDTH-1:0] addr_plus_max_burst = addr_reg + AXI_MAX_BURST_SIZE; +logic [AXI_ADDR_WIDTH-1:0] addr_plus_count = addr_reg + op_word_count_reg; + +always_comb begin + shift_axis_tdata = {s_axis_write_data_tdata, save_axis_tdata_reg} >> ((AXIS_KEEP_WIDTH_INT-offset_reg)*AXIS_WORD_SIZE); + shift_axis_tkeep = {s_axis_write_data_tkeep, save_axis_tkeep_reg} >> (AXIS_KEEP_WIDTH_INT-offset_reg); + shift_axis_tvalid = s_axis_write_data_tvalid; + shift_axis_tlast = 1'b0; + shift_axis_input_tready = 1'b1; +end + +always_comb begin + state_next = STATE_IDLE; + + s_axis_write_desc_ready_next = 1'b0; + + m_axis_write_desc_status_valid_next = 1'b0; + + s_axis_write_data_tready_next = 1'b0; + + m_axi_awaddr_next = m_axi_awaddr_reg; + m_axi_awlen_next = m_axi_awlen_reg; + m_axi_awvalid_next = m_axi_awvalid_reg && !m_axi_awready; + m_axi_wdata_int = shift_axis_tdata; + m_axi_wstrb_int = shift_axis_tkeep; + m_axi_wlast_int = 1'b0; + m_axi_wvalid_int = 1'b0; + m_axi_bready_next = 1'b0; + + transfer_in_save = 1'b0; + flush_save = 1'b0; + status_fifo_we = 1'b0; + + cycle_size = AXIS_KEEP_WIDTH_INT; + + addr_next = addr_reg; + offset_next = offset_reg; + strb_offset_mask_next = strb_offset_mask_reg; + zero_offset_next = zero_offset_reg; + last_cycle_offset_next = last_cycle_offset_reg; + length_next = length_reg; + op_word_count_next = op_word_count_reg; + tr_word_count_next = tr_word_count_reg; + input_cycle_count_next = input_cycle_count_reg; + output_cycle_count_next = output_cycle_count_reg; + input_active_next = input_active_reg; + first_cycle_next = first_cycle_reg; + input_last_cycle_next = input_last_cycle_reg; + output_last_cycle_next = output_last_cycle_reg; + last_transfer_next = last_transfer_reg; + + status_fifo_rd_ptr_next = status_fifo_rd_ptr_reg; + + status_fifo_wr_last = 1'b0; + + tmp_loc_next = 0; + + case (state_reg) + STATE_IDLE: begin + // idle state - load new descriptor to start operation + flush_save = 1'b1; + s_axis_write_desc_ready_next = 1'b1; + + addr_next = s_axis_write_desc_addr; + offset_next = s_axis_write_desc_addr & OFFSET_MASK; + strb_offset_mask_next = {AXI_STRB_WIDTH{1'b1}} << (s_axis_write_desc_addr & OFFSET_MASK); + zero_offset_next = (s_axis_write_desc_addr & OFFSET_MASK) == 0; + last_cycle_offset_next = offset_next + (s_axis_write_desc_len & OFFSET_MASK); + + op_word_count_next = s_axis_write_desc_len; + first_cycle_next = 1'b1; + length_next = 0; + + if (s_axis_write_desc_ready && s_axis_write_desc_valid) begin + s_axis_write_desc_ready_next = 1'b0; + state_next = STATE_START; + end else begin + state_next = STATE_IDLE; + end + end + STATE_START: begin + // start state - initiate new AXI transfer + if (op_word_count_reg <= AXI_MAX_BURST_SIZE - (addr_reg & OFFSET_MASK) || AXI_MAX_BURST_SIZE >= 4096) begin + // packet smaller than max burst size + if (addr_reg[12] != addr_plus_count[12]) begin + // crosses 4k boundary + tr_word_count_next = 13'h1000 - addr_reg[11:0]; + end else begin + // does not cross 4k boundary + tr_word_count_next = op_word_count_reg; + end + end else begin + // packet larger than max burst size + if (addr_reg[12] != addr_plus_max_burst[12]) begin + // crosses 4k boundary + tr_word_count_next = 13'h1000 - addr_reg[11:0]; + end else begin + // does not cross 4k boundary + tr_word_count_next = AXI_MAX_BURST_SIZE - (addr_reg & OFFSET_MASK); + end + end + + input_cycle_count_next = (tr_word_count_next - 1) >> $clog2(AXIS_KEEP_WIDTH_INT); + input_last_cycle_next = input_cycle_count_next == 0; + + output_cycle_count_next = (tr_word_count_next + (addr_reg & OFFSET_MASK) - 1) >> AXI_BURST_SIZE; + output_last_cycle_next = output_cycle_count_next == 0; + + last_transfer_next = tr_word_count_next == op_word_count_reg; + input_active_next = 1'b1; + + if (!first_cycle_reg && last_transfer_next) begin + if (offset_reg >= last_cycle_offset_reg && last_cycle_offset_reg > 0) begin + // last cycle will be served by stored partial cycle + input_active_next = input_cycle_count_next > 0; + input_cycle_count_next = input_cycle_count_next - 1; + end + end + + if (!m_axi_awvalid_reg) begin + m_axi_awaddr_next = addr_reg; + m_axi_awlen_next = output_cycle_count_next; + m_axi_awvalid_next = s_axis_write_data_tvalid || !first_cycle_reg; + + if (m_axi_awvalid_next) begin + addr_next = addr_reg + tr_word_count_next; + op_word_count_next = op_word_count_reg - tr_word_count_next; + + s_axis_write_data_tready_next = m_axi_wready_int_early && input_active_next; + state_next = STATE_WRITE; + end else begin + state_next = STATE_START; + end + end else begin + state_next = STATE_START; + end + end + + STATE_WRITE: begin + //s_axis_write_data_tready_next = m_axi_wready_int_early && (last_transfer_reg || input_active_reg) && shift_axis_input_tready; + s_axis_write_data_tready_next = m_axi_wready_int_early && (input_active_reg) && shift_axis_input_tready; + + tmp_loc_next[0] = 1'b1; + + //if (m_axi_wready_int_reg && ((s_axis_write_data_tready && shift_axis_tvalid) || (!input_active_reg && !last_transfer_reg) || !shift_axis_input_tready)) begin + if (m_axi_wready_int_reg && ((s_axis_write_data_tready && shift_axis_tvalid) || (!input_active_reg) || !shift_axis_input_tready)) begin + tmp_loc_next[1] = 1'b1; + + if (s_axis_write_data_tready && s_axis_write_data_tvalid) begin + transfer_in_save = 1'b1; + + tmp_loc_next[2] = 1'b1; + end + + // update counters + if (first_cycle_reg) begin + length_next = length_reg + (AXIS_KEEP_WIDTH_INT - offset_reg); + + tmp_loc_next[3] = 1'b1; + end else begin + length_next = length_reg + AXIS_KEEP_WIDTH_INT; + + tmp_loc_next[4] = 1'b1; + end + if (input_active_reg) begin + input_cycle_count_next = input_cycle_count_reg - 1; + input_active_next = input_cycle_count_reg > 0; + + tmp_loc_next[5] = 1'b1; + end + input_last_cycle_next = input_cycle_count_next == 0; + output_cycle_count_next = output_cycle_count_reg - 1; + output_last_cycle_next = output_cycle_count_next == 0; + first_cycle_next = 1'b0; + strb_offset_mask_next = {AXI_STRB_WIDTH{1'b1}}; + + m_axi_wdata_int = shift_axis_tdata; + m_axi_wstrb_int = strb_offset_mask_reg; + m_axi_wvalid_int = 1'b1; + + if (output_last_cycle_reg) begin + m_axi_wlast_int = 1'b1; + + tmp_loc_next[6] = 1'b1; + + if (op_word_count_reg > 0) begin + // current AXI transfer complete, but there is more data to transfer + // enqueue status FIFO entry for write completion + status_fifo_we = 1'b1; + status_fifo_wr_last = 1'b0; + + tmp_loc_next[7] = 1'b1; + + s_axis_write_data_tready_next = 1'b0; + state_next = STATE_START; + end else begin + // no more data to transfer, finish operation + if (last_cycle_offset_reg > 0) begin + m_axi_wstrb_int = strb_offset_mask_reg & {AXI_STRB_WIDTH{1'b1}} >> (AXI_STRB_WIDTH - last_cycle_offset_reg); + + tmp_loc_next[8] = 1'b1; + + if (first_cycle_reg) begin + length_next = length_reg + (last_cycle_offset_reg - offset_reg); + + tmp_loc_next[9] = 1'b1; + end else begin + length_next = length_reg + last_cycle_offset_reg; + + tmp_loc_next[10] = 1'b1; + end + end + + tmp_loc_next[11] = 1'b1; + + // enqueue status FIFO entry for write completion + status_fifo_we = 1'b1; + status_fifo_wr_last = 1'b1; + + tmp_loc_next[12] = 1'b1; + + // no framing; return to idle + s_axis_write_data_tready_next = 1'b0; + s_axis_write_desc_ready_next = 1'b1; + state_next = STATE_IDLE; + end + end else begin + tmp_loc_next[13] = 1'b1; + + s_axis_write_data_tready_next = m_axi_wready_int_early && input_active_next && shift_axis_input_tready; + state_next = STATE_WRITE; + end + end else begin + tmp_loc_next[14] = 1'b1; + + state_next = STATE_WRITE; + end + end + endcase + + if (status_fifo_rd_ptr_reg != status_fifo_wr_ptr_reg) begin + // status FIFO not empty + if (m_axi_bready && m_axi_bvalid) begin + // got write completion, pop and return status + m_axis_write_desc_status_valid_next = status_fifo_last[status_fifo_rd_ptr_reg[STATUS_FIFO_ADDR_WIDTH-1:0]]; + status_fifo_rd_ptr_next = status_fifo_rd_ptr_reg + 1; + m_axi_bready_next = 1'b0; + end else begin + // wait for write completion + m_axi_bready_next = 1'b1; + end + end +end + +always_ff @(posedge aclk) begin + if (~aresetn) begin + state_reg <= STATE_IDLE; + s_axis_write_desc_ready_reg <= 1'b0; + m_axis_write_desc_status_valid_reg <= 1'b0; + s_axis_write_data_tready_reg <= 1'b0; + m_axi_awvalid_reg <= 1'b0; + m_axi_bready_reg <= 1'b0; + save_axis_tlast_reg <= 1'b0; + shift_axis_extra_cycle_reg <= 1'b0; + + status_fifo_wr_ptr_reg <= 0; + status_fifo_rd_ptr_reg <= 0; + + tmp_loc_reg <= 0; + end else begin + state_reg <= state_next; + s_axis_write_desc_ready_reg <= s_axis_write_desc_ready_next; + m_axis_write_desc_status_valid_reg <= m_axis_write_desc_status_valid_next; + s_axis_write_data_tready_reg <= s_axis_write_data_tready_next; + m_axi_awvalid_reg <= m_axi_awvalid_next; + m_axi_bready_reg <= m_axi_bready_next; + + tmp_loc_reg <= tmp_loc_next; + + // datapath + if (flush_save) begin + save_axis_tlast_reg <= 1'b0; + shift_axis_extra_cycle_reg <= 1'b0; + end else if (transfer_in_save) begin + save_axis_tlast_reg <= s_axis_write_data_tlast; + shift_axis_extra_cycle_reg <= s_axis_write_data_tlast & ((s_axis_write_data_tkeep >> (AXIS_KEEP_WIDTH_INT-offset_reg)) != 0); + end + + if (status_fifo_we) begin + status_fifo_wr_ptr_reg <= status_fifo_wr_ptr_reg + 1; + end + status_fifo_rd_ptr_reg <= status_fifo_rd_ptr_next; + end + + m_axi_awaddr_reg <= m_axi_awaddr_next; + m_axi_awlen_reg <= m_axi_awlen_next; + + addr_reg <= addr_next; + offset_reg <= offset_next; + strb_offset_mask_reg <= strb_offset_mask_next; + zero_offset_reg <= zero_offset_next; + last_cycle_offset_reg <= last_cycle_offset_next; + length_reg <= length_next; + op_word_count_reg <= op_word_count_next; + tr_word_count_reg <= tr_word_count_next; + input_cycle_count_reg <= input_cycle_count_next; + output_cycle_count_reg <= output_cycle_count_next; + input_active_reg <= input_active_next; + first_cycle_reg <= first_cycle_next; + input_last_cycle_reg <= input_last_cycle_next; + output_last_cycle_reg <= output_last_cycle_next; + last_transfer_reg <= last_transfer_next; + + if (flush_save) begin + save_axis_tkeep_reg <= {AXIS_KEEP_WIDTH_INT{1'b0}}; + end else if (transfer_in_save) begin + save_axis_tdata_reg <= s_axis_write_data_tdata; + save_axis_tkeep_reg <= AXIS_KEEP_ENABLE ? s_axis_write_data_tkeep : {AXIS_KEEP_WIDTH_INT{1'b1}}; + end + + if (status_fifo_we) begin + status_fifo_last[status_fifo_wr_ptr_reg[STATUS_FIFO_ADDR_WIDTH-1:0]] <= status_fifo_wr_last; + status_fifo_wr_ptr_reg <= status_fifo_wr_ptr_reg + 1; + end +end + +// output datapath logic +logic [AXI_DATA_WIDTH-1:0] m_axi_wdata_reg = {AXI_DATA_WIDTH{1'b0}}; +logic [AXI_STRB_WIDTH-1:0] m_axi_wstrb_reg = {AXI_STRB_WIDTH{1'b0}}; +logic m_axi_wlast_reg = 1'b0; +logic m_axi_wvalid_reg = 1'b0, m_axi_wvalid_next; + +logic [AXI_DATA_WIDTH-1:0] temp_m_axi_wdata_reg = {AXI_DATA_WIDTH{1'b0}}; +logic [AXI_STRB_WIDTH-1:0] temp_m_axi_wstrb_reg = {AXI_STRB_WIDTH{1'b0}}; +logic temp_m_axi_wlast_reg = 1'b0; +logic temp_m_axi_wvalid_reg = 1'b0, temp_m_axi_wvalid_next; + +// datapath control +logic store_axi_w_int_to_output; +logic store_axi_w_int_to_temp; +logic store_axi_w_temp_to_output; + +assign m_axi_wdata = m_axi_wdata_reg; +assign m_axi_wstrb = m_axi_wstrb_reg; +assign m_axi_wvalid = m_axi_wvalid_reg; +assign m_axi_wlast = m_axi_wlast_reg; + +// enable ready input next cycle if output is ready or the temp reg will not be filled on the next cycle (output reg empty or no input) +assign m_axi_wready_int_early = m_axi_wready || (!temp_m_axi_wvalid_reg && (!m_axi_wvalid_reg || !m_axi_wvalid_int)); + +always_comb begin + // transfer sink ready state to source + m_axi_wvalid_next = m_axi_wvalid_reg; + temp_m_axi_wvalid_next = temp_m_axi_wvalid_reg; + + store_axi_w_int_to_output = 1'b0; + store_axi_w_int_to_temp = 1'b0; + store_axi_w_temp_to_output = 1'b0; + + if (m_axi_wready_int_reg) begin + // input is ready + if (m_axi_wready || !m_axi_wvalid_reg) begin + // output is ready or currently not valid, transfer data to output + m_axi_wvalid_next = m_axi_wvalid_int; + store_axi_w_int_to_output = 1'b1; + end else begin + // output is not ready, store input in temp + temp_m_axi_wvalid_next = m_axi_wvalid_int; + store_axi_w_int_to_temp = 1'b1; + end + end else if (m_axi_wready) begin + // input is not ready, but output is ready + m_axi_wvalid_next = temp_m_axi_wvalid_reg; + temp_m_axi_wvalid_next = 1'b0; + store_axi_w_temp_to_output = 1'b1; + end +end + +always_ff @(posedge aclk) begin + if (~aresetn) begin + m_axi_wvalid_reg <= 1'b0; + m_axi_wready_int_reg <= 1'b0; + temp_m_axi_wvalid_reg <= 1'b0; + end else begin + m_axi_wvalid_reg <= m_axi_wvalid_next; + m_axi_wready_int_reg <= m_axi_wready_int_early; + temp_m_axi_wvalid_reg <= temp_m_axi_wvalid_next; + end + + // datapath + if (store_axi_w_int_to_output) begin + m_axi_wdata_reg <= m_axi_wdata_int; + m_axi_wstrb_reg <= m_axi_wstrb_int; + m_axi_wlast_reg <= m_axi_wlast_int; + end else if (store_axi_w_temp_to_output) begin + m_axi_wdata_reg <= temp_m_axi_wdata_reg; + m_axi_wstrb_reg <= temp_m_axi_wstrb_reg; + m_axi_wlast_reg <= temp_m_axi_wlast_reg; + end + + if (store_axi_w_int_to_temp) begin + temp_m_axi_wdata_reg <= m_axi_wdata_int; + temp_m_axi_wstrb_reg <= m_axi_wstrb_int; + temp_m_axi_wlast_reg <= m_axi_wlast_int; + end +end + +/* +// DEBUG +ila_ddr_wr inst_ila_ddr_wr ( + .clk(aclk), + .probe0(state_reg), + .probe1(input_cycle_count_reg), + .probe2(output_cycle_count_reg), + .probe3(op_word_count_reg), + .probe4(tr_word_count_reg), + .probe5(s_axis_write_data_tvalid), + .probe6(s_axis_write_data_tready), + .probe7(s_axis_write_data_tlast), + .probe8(s_axis_write_data_tkeep), + .probe9(s_axis_write_data_tdata), + .probe10(m_axis_write_desc_status_valid), + .probe11(transfer_in_save), + .probe12(flush_save), + .probe13(status_fifo_we), + .probe14(cycle_size), // 7 + .probe15(addr_reg), // 64 + .probe16(offset_reg), // 6 + .probe17(strb_offset_mask_reg), // 64 + .probe18(zero_offset_reg), + .probe19(last_cycle_offset_reg), // 6 + .probe20(length_reg), // 28 + .probe21(input_active_reg), + .probe22(first_cycle_reg), + .probe23(input_last_cycle_reg), + .probe24(output_last_cycle_reg), + .probe25(last_transfer_reg), + .probe26(status_fifo_wr_ptr_reg), // 6 + .probe27(status_fifo_rd_ptr_reg), // 6 + .probe28(status_fifo_wr_last), + .probe29(s_axis_write_desc_ready_reg), + .probe30(m_axis_write_desc_status_valid_reg), + .probe31(m_axi_awaddr_reg), // 64 + .probe32(m_axi_awlen_reg), // 8 + .probe33(m_axi_awvalid_reg), + .probe34(m_axi_bready_reg), + .probe35(s_axis_write_data_tready_reg), + .probe36(save_axis_tlast_reg), + .probe37(shift_axis_tvalid), + .probe38(shift_axis_tlast), + .probe39(shift_axis_input_tready), + .probe40(shift_axis_extra_cycle_reg), + .probe41(m_axi_wlast_int), + .probe42(m_axi_wvalid_int), + .probe43(m_axi_wready_int_reg), + .probe44(m_axi_wready_int_early), + .probe45(tmp_loc_reg), // 15 + .probe46(save_axis_tdata_reg), // 512 + .probe47(save_axis_tkeep_reg), // 64 + .probe48(shift_axis_tdata), // 512 + .probe49(shift_axis_tkeep), // 64 + .probe50(m_axi_wdata_int), // 512 + .probe51(m_axi_wstrb_int) // 64 +); +*/ + +endmodule \ No newline at end of file diff --git a/hw/hdl/mmu/tlb_arbiter.sv b/hw/hdl/mmu/tlb_arbiter.sv new file mode 100644 index 00000000..ef991430 --- /dev/null +++ b/hw/hdl/mmu/tlb_arbiter.sv @@ -0,0 +1,173 @@ +import lynxTypes::*; + +/** + * TLB request arbiter - Round Robin + */ +module tlb_arbiter #( + parameter integer ARB_DATA_BITS = AXI_DATA_BITS +) ( + input logic aclk, + input logic aresetn, + + // User logic + dmaIntf.s req_snk [N_REGIONS], + dmaIntf.m req_src, + + // Multiplexing + muxUserIntf.s mux_user +); + +localparam integer BEAT_LOG_BITS = $clog2(ARB_DATA_BITS/8); + +logic [N_REGIONS-1:0] ready_snk; +logic [N_REGIONS-1:0] valid_snk; +dma_req_t [N_REGIONS-1:0] request_snk; +logic [N_REGIONS-1:0] done_snk; +logic [N_REGIONS-1:0] done_snk_r; + +logic ready_src; +logic valid_src; +dma_req_t request_src; +logic done_src; + +logic [N_REGIONS_BITS-1:0] rr_reg; +logic [N_REGIONS_BITS-1:0] id; + +metaIntf #(.DATA_BITS(N_REGIONS_BITS+LEN_BITS-BEAT_LOG_BITS)) user_seq_in (); +metaIntf #(.DATA_BITS(N_REGIONS_BITS)) done_seq_in (); + +logic [N_REGIONS_BITS-1:0] done_seq_out_data; + +logic [LEN_BITS-BEAT_LOG_BITS-1:0] n_tr; + +// -------------------------------------------------------------------------------- +// IO +// -------------------------------------------------------------------------------- +for(genvar i = 0; i < N_REGIONS; i++) begin + assign valid_snk[i] = req_snk[i].valid; + assign req_snk[i].ready = ready_snk[i]; + assign request_snk[i] = req_snk[i].req; + assign req_snk[i].done = done_snk_r[i]; +end + +assign req_src.valid = valid_src; +assign ready_src = req_src.ready; +assign req_src.req = request_src; +assign done_src = req_src.done; + +// -------------------------------------------------------------------------------- +// RR +// -------------------------------------------------------------------------------- +always_ff @(posedge aclk or negedge aresetn) begin + if(aresetn == 1'b0) begin + rr_reg <= 0; + done_snk_r <= 0; + end else begin + if(valid_src & ready_src) begin + rr_reg <= rr_reg + 1; + if(rr_reg >= N_REGIONS-1) + rr_reg <= 0; + end + + done_snk_r <= done_snk; + end +end + +// DP +always_comb begin + ready_snk = 0; + valid_src = 1'b0; + id = 0; + + done_snk = 0; + + for(int i = 0; i < N_REGIONS; i++) begin + if(i+rr_reg >= N_REGIONS) begin + if(valid_snk[i+rr_reg-N_REGIONS]) begin + valid_src = valid_snk[i+rr_reg-N_REGIONS] && user_seq_in.ready && done_seq_in.ready; + id = i+rr_reg-N_REGIONS; + break; + end + end + else begin + if(valid_snk[i+rr_reg]) begin + valid_src = valid_snk[i+rr_reg] && user_seq_in.ready && done_seq_in.ready; + id = i+rr_reg; + break; + end + end + end + + ready_snk[id] = ready_src && user_seq_in.ready && done_seq_in.ready; + request_src = request_snk[id]; + + done_snk[done_seq_out_data] = done_src; +end + +assign n_tr = (request_snk[id].len - 1) >> BEAT_LOG_BITS; +assign user_seq_in.valid = valid_src & ready_src; +assign user_seq_in.data = {id, n_tr}; + +assign done_seq_in.valid = valid_src & ready_src & request_src.ctl; +assign done_seq_in.data = id; + +// Multiplexer sequence +queue #( + .QTYPE(logic [N_REGIONS_BITS+LEN_BITS-BEAT_LOG_BITS-1:0]) +) inst_seq_que_user ( + .aclk(aclk), + .aresetn(aresetn), + .val_snk(user_seq_in.valid), + .rdy_snk(user_seq_in.ready), + .data_snk(user_seq_in.data), + .val_src(mux_user.valid), + .rdy_src(mux_user.ready), + .data_src({mux_user.id, mux_user.len}) +); + +// Completion sequence +queue #( + .QTYPE(logic [N_REGIONS_BITS-1:0]) +) inst_seq_que_done ( + .aclk(aclk), + .aresetn(aresetn), + .val_snk(done_seq_in.valid), + .rdy_snk(done_seq_in.ready), + .data_snk(done_seq_in.data), + .val_src(done_src), + .rdy_src(), + .data_src(done_seq_out_data) +); + +/* +ila_arbiter inst_ila_arbiter ( + .clk(aclk), + .probe0(ready_snk[0]), + .probe1(ready_snk[1]), + .probe2(ready_snk[2]), + .probe3(valid_snk[0]), + .probe4(valid_snk[1]), + .probe5(valid_snk[2]), + .probe6(done_snk[0]), + .probe7(done_snk[1]), + .probe8(done_snk[2]), + .probe9(ready_src), + .probe10(valid_src), + .probe11(done_src), + .probe12(rr_reg), + .probe13(id), + .probe14(user_seq_in.valid), + .probe15(user_seq_in.ready), + .probe16(user_seq_in.data), + .probe17(done_seq_in.ready), + .probe18(done_seq_in.valid), + .probe19(done_seq_in.data), + .probe20(mux_user.ready), + .probe21(mux_user.valid), + .probe22(mux_user.id), + .probe23(mux_user.len), + .probe24(done_seq_out_data) +); +*/ + +endmodule \ No newline at end of file diff --git a/hw/hdl/mmu/tlb_arbiter_isr.sv b/hw/hdl/mmu/tlb_arbiter_isr.sv new file mode 100644 index 00000000..899d4275 --- /dev/null +++ b/hw/hdl/mmu/tlb_arbiter_isr.sv @@ -0,0 +1,131 @@ +import lynxTypes::*; + +/** + * TLB request arbiter - Round Robin + */ +module tlb_arbiter_isr #( + parameter integer RDWR = 0 +) ( + input logic aclk, + input logic aresetn, + + // User logic + dmaIsrIntf.s req_snk [N_REGIONS], + dmaIntf.m req_src_host, + dmaIntf.m req_src_card +); + +logic [N_REGIONS-1:0] ready_snk; +logic [N_REGIONS-1:0] valid_snk; +dma_isr_req_t [N_REGIONS-1:0] request_snk; +logic [N_REGIONS-1:0] done_snk; +logic [N_REGIONS-1:0] done_snk_r; +logic [N_REGIONS-1:0] isr_return_snk; + +logic ready_src; +logic valid_src; +dma_isr_req_t request_src; +logic done_src; + +logic [N_REGIONS_BITS-1:0] rr_reg; +logic [N_REGIONS_BITS-1:0] id; + +metaIntf #(.DATA_BITS(N_REGIONS_BITS)) done_seq_in (); + +logic [N_REGIONS_BITS-1:0] done_seq_out_data; + +// -------------------------------------------------------------------------------- +// IO +// -------------------------------------------------------------------------------- +for(genvar i = 0; i < N_REGIONS; i++) begin + assign valid_snk[i] = req_snk[i].valid; + assign req_snk[i].ready = ready_snk[i]; + assign request_snk[i] = req_snk[i].req; + assign req_snk[i].done = done_snk_r[i]; + assign req_snk[i].isr_return = 1'b0; +end + +assign req_src_host.valid = ready_src & valid_src; +assign req_src_card.valid = ready_src & valid_src; +assign req_src_host.req.paddr = request_src.paddr_host; +assign req_src_card.req.paddr = request_src.paddr_card; +assign req_src_host.req.len = request_src.len; +assign req_src_card.req.len = request_src.len; +assign req_src_host.req.ctl = request_src.ctl; +assign req_src_card.req.ctl = request_src.ctl; +assign req_src_host.req.rsrvd = 0; +assign req_src_card.req.rsrvd = 0; + +assign ready_src = req_src_host.ready & req_src_card.ready; +if(RDWR == 0) + assign done_src = req_src_card.done; +else + assign done_src = req_src_host.done; + +// -------------------------------------------------------------------------------- +// RR +// -------------------------------------------------------------------------------- +always_ff @(posedge aclk or negedge aresetn) begin + if(aresetn == 1'b0) begin + rr_reg <= 0; + done_snk_r <= 0; + end else begin + if(valid_src & ready_src) begin + rr_reg <= rr_reg + 1; + if(rr_reg >= N_REGIONS-1) + rr_reg <= 0; + end + + done_snk_r <= done_snk; + end +end + +// DP +always_comb begin + ready_snk = 0; + valid_src = 1'b0; + id = 0; + + done_snk = 0; + + for(int i = 0; i < N_REGIONS; i++) begin + if(i+rr_reg >= N_REGIONS) begin + if(valid_snk[i+rr_reg-N_REGIONS]) begin + valid_src = valid_snk[i+rr_reg-N_REGIONS] && done_seq_in.ready; + id = i+rr_reg-N_REGIONS; + break; + end + end + else begin + if(valid_snk[i+rr_reg]) begin + valid_src = valid_snk[i+rr_reg] && done_seq_in.ready; + id = i+rr_reg; + break; + end + end + end + + ready_snk[id] = ready_src && done_seq_in.ready; + request_src = request_snk[id]; + + done_snk[done_seq_out_data] = done_src; +end + +assign done_seq_in.valid = valid_src & ready_src & request_src.ctl; +assign done_seq_in.data = id; + +// Completion sequence +queue #( + .QTYPE(logic [N_REGIONS_BITS-1:0]) +) inst_seq_que_done ( + .aclk(aclk), + .aresetn(aresetn), + .val_snk(done_seq_in.valid), + .rdy_snk(done_seq_in.ready), + .data_snk(done_seq_in.data), + .val_src(done_src), + .rdy_src(), + .data_src(done_seq_out_data) +); + +endmodule \ No newline at end of file diff --git a/hw/hdl/mmu/tlb_assign_isr.sv b/hw/hdl/mmu/tlb_assign_isr.sv new file mode 100644 index 00000000..54297a02 --- /dev/null +++ b/hw/hdl/mmu/tlb_assign_isr.sv @@ -0,0 +1,40 @@ +import lynxTypes::*; + +/** + * TLB assign when single region present + */ +module tlb_assign_isr #( + parameter integer RDWR = 0 +) ( + input logic aclk, + input logic aresetn, + + // User logic + dmaIsrIntf.s req_snk, + dmaIntf.m req_src_host, + dmaIntf.m req_src_card +); + +// Assign +always_comb begin + + req_snk.ready = req_src_host.ready & req_src_card.ready; + if(RDWR == 0) + req_snk.done = req_src_card.done; + else + req_snk.done = req_src_host.done; + req_snk.isr_return = 1'b0; + + req_src_host.valid = req_snk.valid & req_snk.ready; + req_src_card.valid = req_snk.valid & req_snk.ready; + req_src_host.req.paddr = req_snk.req.paddr_host; + req_src_card.req.paddr = req_snk.req.paddr_card; + req_src_host.req.len = req_snk.req.len; + req_src_card.req.len = req_snk.req.len; + req_src_host.req.ctl = req_snk.req.ctl; + req_src_card.req.ctl = req_snk.req.ctl; + req_src_host.req.dest = req_snk.req.dest; + req_src_card.req.dest = req_snk.req.dest; +end + +endmodule \ No newline at end of file diff --git a/hw/hdl/mmu/tlb_credits_rd.sv b/hw/hdl/mmu/tlb_credits_rd.sv new file mode 100644 index 00000000..58419911 --- /dev/null +++ b/hw/hdl/mmu/tlb_credits_rd.sv @@ -0,0 +1,186 @@ +import lynxTypes::*; + +/** + * Single region requests credits + */ +module tlb_credits_rd #( + parameter integer ID_REG = 0, + parameter integer CRED_DATA_BITS = AXI_DATA_BITS +) ( + input logic aclk, + input logic aresetn, + + // Requests + dmaIntf.s req_in, + dmaIntf.m req_out, + + // Data read + input logic rxfer, + output logic [3:0] rd_dest +); + +// -- Constants +localparam integer BEAT_LOG_BITS = $clog2(CRED_DATA_BITS/8); + +// -- FSM +typedef enum logic[0:0] {ST_IDLE, ST_READ} state_t; +logic [0:0] state_C, state_N; + +logic [7:0] cred_reg_C, cred_reg_N; +logic [LEN_BITS-BEAT_LOG_BITS-1:0] cnt_C, cnt_N; +logic [LEN_BITS-BEAT_LOG_BITS-1:0] n_beats_C, n_beats_N; +logic [3:0] dest_C, dest_N; + +logic req_sent; +logic req_done; + +logic [LEN_BITS-BEAT_LOG_BITS-1:0] rd_len; + +metaIntf #(.DATA_BITS(4+LEN_BITS-BEAT_LOG_BITS)) req_que_in (); +metaIntf #(.DATA_BITS(4+LEN_BITS-BEAT_LOG_BITS)) req_que_out (); + +// -- REG +always_ff @(posedge aclk, negedge aresetn) begin: PROC_REG +if (aresetn == 1'b0) begin + cred_reg_C <= N_OUTSTANDING; + state_C <= ST_IDLE; +end +else + cred_reg_C <= cred_reg_N; + state_C <= state_N; + cnt_C <= cnt_N; + n_beats_C <= n_beats_N; + dest_C <= dest_N; +end + +// -- NSL +always_comb begin: NSL + state_N = state_C; + + case(state_C) + ST_IDLE: + state_N = req_que_out.valid ? ST_READ : ST_IDLE; + + ST_READ: + state_N = req_done ? (req_que_out.valid ? ST_READ : ST_IDLE) : ST_READ; + + endcase // state_C +end + +// -- DP +always_comb begin + cred_reg_N = cred_reg_C; + cnt_N = cnt_C; + n_beats_N = n_beats_C; + dest_N = dest_C; + + // IO + req_in.ready = 1'b0; + req_in.done = req_out.done; + + req_out.valid = 1'b0; + req_out.req.paddr = req_in.req.paddr; + req_out.req.len = req_in.req.len; + req_out.req.ctl = req_in.req.ctl; + req_out.req.rsrvd = 0; + + // Status + req_sent = req_in.valid && req_out.ready && req_que_in.ready && ((cred_reg_C > 0) || req_done); + req_done = (cnt_C == n_beats_C) && rxfer; + + // Outstanding queue + req_que_in.valid = 1'b0; + rd_len = (req_in.req.len - 1) >> BEAT_LOG_BITS; + req_que_in.data = {req_in.req.dest, rd_len}; + req_que_out.ready = 1'b0; + + if(req_sent && !req_done) + cred_reg_N = cred_reg_C - 1; + else if(req_done && !req_sent) + cred_reg_N = cred_reg_C + 1; + + if(req_in.valid && req_out.ready && req_que_in.ready && ((cred_reg_C > 0) || req_done)) begin + req_in.ready = 1'b1; + req_out.valid = 1'b1; + req_que_in.valid = 1'b1; + end + + case(state_C) + ST_IDLE: begin + cnt_N = 0; + if(req_que_out.valid) begin + req_que_out.ready = 1'b1; + n_beats_N = req_que_out.data[LEN_BITS-BEAT_LOG_BITS-1:0]; + dest_N = req_que_out.data[LEN_BITS-BEAT_LOG_BITS+:4]; + end + end + + ST_READ: begin + if(req_done) begin + cnt_N = 0; + if(req_que_out.valid) begin + req_que_out.ready = 1'b1; + n_beats_N = req_que_out.data; + dest_N = req_que_out.data[LEN_BITS-BEAT_LOG_BITS+:4]; + end + end + else begin + cnt_N = rxfer ? cnt_C + 1 : cnt_C; + end + end + + endcase +end + +// Output dest +assign rd_dest = dest_C; + +// Outstanding +queue_stream #(.QTYPE(logic [4+LEN_BITS-BEAT_LOG_BITS-1:0])) inst_dque ( + .aclk(aclk), + .aresetn(aresetn), + .val_snk(req_que_in.valid), + .rdy_snk(req_que_in.ready), + .data_snk(req_que_in.data), + .val_src(req_que_out.valid), + .rdy_src(req_que_out.ready), + .data_src(req_que_out.data) +); + +/* +// DEBUG +if(ID_REG == 0) begin +logic [15:0] cnt_req_in; +logic [15:0] cnt_req_out; + +ila_rd_cred inst_ila_rd_cred ( + .clk(aclk), + .probe0(state_C), + .probe1(req_in.valid), + .probe2(req_in.ready), + .probe3(req_in.req.len), + .probe4(cred_reg_C), + .probe5(cnt_C), + .probe6(n_beats_C), + .probe7(req_sent), + .probe8(rxfer), + .probe9(req_sent), + .probe10(req_done), + .probe11(cnt_req_in), + .probe12(cnt_req_out) +); + +always_ff @(posedge aclk or negedge aresetn) begin + if(aresetn == 1'b0) begin + cnt_req_in <= 0; + cnt_req_out <= 0; + end + else begin + cnt_req_in <= (req_in.valid & req_in.ready) ? cnt_req_in + 1 : cnt_req_in; + cnt_req_out <= (req_out.valid & req_out.ready) ? cnt_req_out + 1 : cnt_req_out; + end +end +end +*/ + +endmodule \ No newline at end of file diff --git a/hw/hdl/mmu/tlb_credits_wr.sv b/hw/hdl/mmu/tlb_credits_wr.sv new file mode 100644 index 00000000..664e9b6f --- /dev/null +++ b/hw/hdl/mmu/tlb_credits_wr.sv @@ -0,0 +1,99 @@ +import lynxTypes::*; + +/** + * Single region requests credits + */ +module tlb_credits_wr #( + parameter integer ID_REG = 0, + parameter integer CRED_DATA_BITS = AXI_DATA_BITS +) ( + input logic aclk, + input logic aresetn, + + // Requests + dmaIntf.s req_in, + dmaIntf.m req_out, + + // Data write + input logic wxfer +); + +// -- Constants +localparam integer BEAT_LOG_BITS = $clog2(CRED_DATA_BITS/8); + +logic [LEN_BITS-BEAT_LOG_BITS:0] cnt_C, cnt_N; + +logic [LEN_BITS-BEAT_LOG_BITS:0] n_beats; + +// -- REG +always_ff @(posedge aclk, negedge aresetn) begin: PROC_REG +if (aresetn == 1'b0) begin + cnt_C <= 0; +end +else + cnt_C <= cnt_N; +end + +// -- DP +always_comb begin + cnt_N = cnt_C; + + // IO + req_in.ready = 1'b0; + req_in.done = req_out.done; + + req_out.valid = 1'b0; + req_out.req.paddr = req_in.req.paddr; + req_out.req.len = req_in.req.len; + req_out.req.ctl = req_in.req.ctl; + req_out.req.rsrvd = 0; + + n_beats = (req_in.req.len - 1) >> BEAT_LOG_BITS; + + if(req_in.valid && req_out.ready && (cnt_C >= n_beats)) begin + req_in.ready = 1'b1; + req_out.valid = 1'b1; + + cnt_N = wxfer ? cnt_C - (n_beats - 1) : cnt_C - n_beats; + end + else begin + cnt_N = wxfer ? cnt_C + 1 : cnt_C; + end + +end + +/* +// DEBUG +if(ID_REG == 0) begin +logic [15:0] cnt_req_in; +logic [15:0] cnt_req_out; + +ila_wr_cred inst_ila_wr_cred ( + .clk(aclk), + .probe0(req_in.valid), + .probe1(req_in.ready), + .probe2(req_in.req.len), + .probe3(req_out.valid), + .probe4(req_out.ready), + .probe5(n_beats), + .probe6(cnt_C), + .probe7(wxfer), + .probe8(cnt_req_in), + .probe9(cnt_req_out) +); + +always_ff @(posedge aclk or negedge aresetn) begin + if(aresetn == 1'b0) begin + cnt_req_in <= 0; + cnt_req_out <= 0; + end + else begin + cnt_req_in <= (req_in.valid & req_in.ready) ? cnt_req_in + 1 : cnt_req_in; + cnt_req_out <= (req_out.valid & req_out.ready) ? cnt_req_out + 1 : cnt_req_out; + end +end +end +*/ + + +endmodule \ No newline at end of file diff --git a/hw/hdl/mmu/tlb_fsm_rd.sv b/hw/hdl/mmu/tlb_fsm_rd.sv new file mode 100644 index 00000000..e4e359b5 --- /dev/null +++ b/hw/hdl/mmu/tlb_fsm_rd.sv @@ -0,0 +1,583 @@ +/** + * TLB FSM read + * + * Request channels + * @param: + * - RDWR : Read(0) or write(1) request channel + */ + +import lynxTypes::*; + +//`define DEBUG_TLB_FSM_RD + +module tlb_fsm_rd #( + parameter integer ID_REG = 0, + parameter integer RDWR = 0 +) ( + input logic aclk, + input logic aresetn, + + // TLBs + tlbIntf.m lTlb, + tlbIntf.m sTlb, + + // User logic + cnfgIntf.s cnfg, + + // Requests + reqIntf.s req_in, + + // DMA - host +`ifdef EN_STRM + dmaIntf.m HDMA, // Host +`endif + + // DMA - card +`ifdef EN_DDR + dmaIntf.m DDMA, // Card + dmaIsrIntf.m IDMA, // Page fault, sync +`endif + + // Mutex + output logic lock, + output logic unlock, + input logic [1:0] mutex +); + +// ---------------------------------------------------------------------------------------------------------- +// -- Decl +// ---------------------------------------------------------------------------------------------------------- + +// -- Constants +localparam integer PG_L_SIZE = 1 << PG_L_BITS; +localparam integer PG_S_SIZE = 1 << PG_S_BITS; +localparam integer HASH_L_BITS = TLB_L_ORDER; +localparam integer HASH_S_BITS = TLB_S_ORDER; +localparam integer TAG_L_BITS = VADDR_BITS - HASH_L_BITS - PG_L_BITS; +localparam integer TAG_S_BITS = VADDR_BITS - HASH_S_BITS - PG_S_BITS; +localparam integer PHY_L_BITS = PADDR_BITS - PG_L_BITS; +localparam integer PHY_S_BITS = PADDR_BITS - PG_S_BITS; +localparam integer HIT_L_IDX_BITS = $clog2(N_L_ASSOC); +localparam integer HIT_S_IDX_BITS = $clog2(N_S_ASSOC); + +// -- FSM --------------------------------------------------------------------------------------------------- +typedef enum logic[3:0] {ST_IDLE, ST_MUTEX, ST_CHECK, + ST_HIT_LARGE, ST_HIT_SMALL, ST_CALC_LARGE, ST_CALC_SMALL, // timing extra states +`ifdef EN_STRM + ST_HOST_SEND, +`endif +`ifdef EN_DDR + ST_ISR_WAIT, + ST_CARD_SEND, ST_SYNC_SEND, ST_ISR_SEND, +`endif + ST_MISS} state_t; +logic [3:0] state_C, state_N; + +// -- Internal registers ------------------------------------------------------------------------------------ +// Request +logic [LEN_BITS-1:0] len_C, len_N; +logic [VADDR_BITS-1:0] vaddr_C, vaddr_N; +logic sync_C, sync_N; +logic ctl_C, ctl_N; +logic strm_C, strm_N; +logic [3:0] dest_C, dest_N; + +// TLB data +logic [TLB_DATA_BITS-1:0] data_host_C, data_host_N; +logic [TLB_DATA_BITS-1:0] data_card_C, data_card_N; + +// Page fault +logic unlock_C, unlock_N; +logic miss_C, miss_N; +logic [LEN_BITS-1:0] len_miss_C, len_miss_N; +logic [VADDR_BITS-1:0] vaddr_miss_C, vaddr_miss_N; +logic isr_C, isr_N; + +// -- Out +logic [LEN_BITS-1:0] plen_C, plen_N; +logic [PADDR_BITS-1:0] paddr_host_C, paddr_host_N; +logic [PADDR_BITS-1:0] paddr_card_C, paddr_card_N; + +// -- Internal signals -------------------------------------------------------------------------------------- +logic [N_L_ASSOC-1:0] tag_cmp_card_l; +logic [N_S_ASSOC-1:0] tag_cmp_card_s; + +logic [N_L_ASSOC-1:0] tag_cmp_host_l; +logic [N_S_ASSOC-1:0] tag_cmp_host_s; + +logic hitL; +logic hitS; + +logic [HIT_L_IDX_BITS-1:0] hitL_card_idx; +logic [HIT_S_IDX_BITS-1:0] hitS_card_idx; + +logic [HIT_L_IDX_BITS-1:0] hitL_host_idx; +logic [HIT_S_IDX_BITS-1:0] hitS_host_idx; + +// ---------------------------------------------------------------------------------------------------------- +// -- Def +// ---------------------------------------------------------------------------------------------------------- + +// REG +always_ff @(posedge aclk, negedge aresetn) begin: PROC_REG +if (aresetn == 1'b0) begin + state_C <= ST_IDLE; + + // ISR + miss_C <= 0; + unlock_C <= 0; + isr_C <= 0; +end +else + state_C <= state_N; + + // Requests + len_C <= len_N; + vaddr_C <= vaddr_N; + sync_C <= sync_N; + ctl_C <= ctl_N; + strm_C <= strm_N; + dest_C <= dest_N; + // TLB + plen_C <= plen_N; + paddr_host_C <= paddr_host_N; + data_host_C <= data_host_N; +`ifdef EN_DDR + paddr_card_C <= paddr_card_N; + data_card_C <= data_card_N; +`endif + // ISR + miss_C <= miss_N; + unlock_C <= unlock_N; + isr_C <= isr_N; + len_miss_C <= len_miss_N; + vaddr_miss_C <= vaddr_miss_N; +end + +// NSL +always_comb begin: NSL + state_N = state_C; + + case(state_C) + // Wait until request queue is not empty + ST_IDLE: + state_N = (req_in.valid) ? ST_MUTEX : ST_IDLE; + + // Obtain mutex + ST_MUTEX: + state_N = ((mutex[1] == RDWR) && (mutex[0] == 1'b0)) ? ST_CHECK : ST_MUTEX; + + // Check hits + ST_CHECK: + state_N = hitL ? ST_HIT_LARGE : hitS ? ST_HIT_SMALL : ST_MISS; + + // Page parsing + ST_HIT_LARGE: + state_N = ST_CALC_LARGE; + ST_HIT_SMALL: + state_N = ST_CALC_SMALL; + + // Calc. + ST_CALC_LARGE: +`ifdef EN_STRM + `ifdef EN_DDR + if(strm_C) + state_N = ST_HOST_SEND; + else + state_N = isr_C ? ST_ISR_SEND : sync_C ? ST_SYNC_SEND : ST_CARD_SEND; + `else + state_N = ST_HOST_SEND; + `endif +`else + state_N = isr_C ? ST_ISR_SEND : sync_C ? ST_SYNC_SEND : ST_CARD_SEND; +`endif + ST_CALC_SMALL: +`ifdef EN_STRM + `ifdef EN_DDR + if(strm_C) + state_N = ST_HOST_SEND; + else + state_N = isr_C ? ST_ISR_SEND : sync_C ? ST_SYNC_SEND : ST_CARD_SEND; + `else + state_N = ST_HOST_SEND; + `endif +`else + state_N = isr_C ? ST_ISR_SEND : sync_C ? ST_SYNC_SEND : ST_CARD_SEND; +`endif + + // Send DMA requests +`ifdef EN_STRM + ST_HOST_SEND: + if(HDMA.ready) + state_N = len_C ? ST_MUTEX : ST_IDLE; +`endif + +`ifdef EN_DDR + ST_CARD_SEND: + if(DDMA.ready) + state_N = len_C ? ST_MUTEX : ST_IDLE; + ST_SYNC_SEND: + if(IDMA.ready) + state_N = len_C ? ST_MUTEX : ST_IDLE; + ST_ISR_SEND: + if(IDMA.ready) + state_N = len_C ? ST_MUTEX : ST_ISR_WAIT; + + // Wait until data is fetched + ST_ISR_WAIT: + state_N = IDMA.done && IDMA.isr_return ? ST_MUTEX : ST_ISR_WAIT; +`endif + + // Page fault + ST_MISS: + state_N = cnfg.restart ? ST_CHECK : ST_MISS; + + endcase // state_C +end + +// DP +always_comb begin: DP + // Requests + len_N = len_C; + vaddr_N = vaddr_C; + sync_N = sync_C; + ctl_N = ctl_C; + strm_N = strm_C; + dest_N = dest_C; + + // TLB + data_host_N = data_host_C; +`ifdef EN_DDR + data_card_N = data_card_C; +`endif + + // Out + plen_N = plen_C; + paddr_host_N = paddr_host_C; +`ifdef EN_DDR + paddr_card_N = paddr_card_C; +`endif + + // ISR + unlock_N = 1'b0; + miss_N = 1'b0; + vaddr_miss_N = vaddr_miss_C; + len_miss_N = len_miss_C; + isr_N = isr_C; + + // mutex + lock = 1'b0; + unlock = unlock_C; + + // Requests + req_in.ready = 1'b0; + + // Config +`ifdef EN_STRM + cnfg.done_host = HDMA.done; +`else + cnfg.done_host = 1'b0; +`endif + +`ifdef EN_DDR + cnfg.done_card = DDMA.done; + cnfg.done_sync = IDMA.done & ~IDMA.isr_return; +`else + cnfg.done_card = 1'b0; + cnfg.done_sync = 1'b0; +`endif + + cnfg.pf.miss = miss_C; + cnfg.pf.vaddr = vaddr_miss_C; + cnfg.pf.len = len_miss_C; + + // TLB + lTlb.addr = vaddr_C; + sTlb.addr = vaddr_C; + +`ifdef EN_STRM + // HDMA + HDMA.req.paddr = paddr_host_C; + HDMA.req.len = plen_C; + HDMA.req.ctl = 1'b0; + HDMA.req.dest = dest_C; + HDMA.req.rsrvd = 0; + HDMA.valid = 1'b0; +`endif + +`ifdef EN_DDR + // DDMA + DDMA.req.paddr = paddr_card_C; + DDMA.req.len = plen_C; + DDMA.req.ctl = 1'b0; + DDMA.req.dest = dest_C; + DDMA.req.rsrvd = 0; + DDMA.valid = 1'b0; + + // IDMA + IDMA.req.paddr_card = paddr_card_C; + IDMA.req.paddr_host = paddr_host_C; + IDMA.req.len = plen_C; + IDMA.req.ctl = 1'b0; + IDMA.req.dest = dest_C; + IDMA.req.isr = 1'b0; + IDMA.req.rsrvd = 0; + IDMA.valid = 1'b0; +`endif + + case(state_C) + ST_IDLE: begin + isr_N = 1'b0; + req_in.ready = 1'b1; + if(req_in.valid) begin // RR + // Lock the mutex + lock = 1'b1; + + // Request + len_N = req_in.req.len; + vaddr_N = req_in.req.vaddr; + sync_N = req_in.req.sync; + ctl_N = req_in.req.ctl; + strm_N = req_in.req.stream; + dest_N = req_in.req.dest; + end + end + + ST_MUTEX: + lock = 1'b1; + + ST_CHECK: +`ifdef EN_STRM + `ifdef EN_DDR + if(hitS || hitL) begin + if(strm_C) + unlock_N = 1'b1; + else + unlock_N = (isr_C || sync_C) ? 1'b0 : 1'b1; + end + `else + if(hitS || hitL) begin + unlock_N = 1'b1; + end + `endif +`else + if(hitS || hitL) begin + unlock_N = (isr_C || sync_C) ? 1'b0 : 1'b1; + end +`endif + else begin + miss_N = 1'b1; + vaddr_miss_N = vaddr_C; + len_miss_N = len_C; + isr_N = 1'b1; + end + + ST_HIT_LARGE: begin + data_host_N = lTlb.data[hitL_host_idx]; +`ifdef EN_DDR + data_card_N = lTlb.data[hitL_card_idx]; +`endif + end + + ST_HIT_SMALL: begin + data_host_N = sTlb.data[hitS_host_idx]; +`ifdef EN_DDR + data_card_N = sTlb.data[hitS_card_idx]; +`endif + end + + ST_CALC_LARGE: begin + paddr_host_N = {data_host_C[PHY_L_BITS-1:0], vaddr_C[PG_L_BITS-1:0]}; +`ifdef EN_DDR + paddr_card_N = {data_card_C[PHY_L_BITS-1:0], vaddr_C[PG_L_BITS-1:0]}; +`endif + if(len_C + vaddr_C[PG_L_BITS-1:0] > PG_L_SIZE) begin + plen_N = PG_L_SIZE - vaddr_C[PG_L_BITS-1:0]; + len_N = len_C - (PG_L_SIZE - vaddr_C[PG_L_BITS-1:0]); + vaddr_N += PG_L_SIZE - vaddr_C[PG_L_BITS-1:0]; + end + else begin + plen_N = len_C; + len_N = 0; + end + end + + ST_CALC_SMALL: begin + paddr_host_N = {data_host_C[PHY_S_BITS-1:0], vaddr_C[PG_S_BITS-1:0]}; +`ifdef EN_DDR + paddr_card_N = {data_card_C[PHY_S_BITS-1:0], vaddr_C[PG_S_BITS-1:0]}; +`endif + if(len_C + vaddr_C[PG_S_BITS-1:0] > PG_S_SIZE) begin + plen_N = PG_S_SIZE - vaddr_C[PG_S_BITS-1:0]; + len_N = len_C - (PG_S_SIZE - vaddr_C[PG_S_BITS-1:0]); + vaddr_N += PG_S_SIZE - vaddr_C[PG_S_BITS-1:0]; + end + else begin + plen_N = len_C; + len_N = 0; + end + end + +`ifdef EN_STRM + ST_HOST_SEND: begin + HDMA.valid = HDMA.ready; + HDMA.req.ctl = HDMA.valid && !len_C && ctl_C; + end +`endif + +`ifdef EN_DDR + ST_CARD_SEND: begin + DDMA.valid = DDMA.ready; + DDMA.req.ctl = DDMA.valid && !len_C && ctl_C; + end + + ST_SYNC_SEND: begin + IDMA.valid = IDMA.ready; + IDMA.req.ctl = IDMA.valid && !len_C && ctl_C; + IDMA.req.isr = 1'b0; + unlock_N = IDMA.valid && !len_C; + end + + ST_ISR_SEND: begin + IDMA.valid = IDMA.ready; + IDMA.req.ctl = IDMA.valid && !len_C; + IDMA.req.isr = 1'b1; + unlock_N = IDMA.valid && !len_C; + end + + ST_ISR_WAIT: begin + vaddr_N = vaddr_miss_C; + len_N = len_miss_C; + isr_N = 1'b0; + lock = IDMA.done && IDMA.isr_return; + end +`endif + + default: ; + + endcase // state_C +end + +// Hit/Miss combinational logic +always_comb begin + + hitL = 1'b0; + hitS = 1'b0; + + hitL_host_idx = 0; + hitS_host_idx = 0; + + tag_cmp_host_s = 0; + tag_cmp_host_l = 0; + +`ifdef EN_DDR + hitL_card_idx = 0; + hitS_card_idx = 0; + + tag_cmp_card_s = 0; + tag_cmp_card_l = 0; +`endif + + // Small pages + for (int i = 0; i < N_S_ASSOC; i++) begin + // tag cmp host + tag_cmp_host_s[i] = + (sTlb.data[i][TAG_S_BITS+PHY_S_BITS-1:PHY_S_BITS] == vaddr_C[VADDR_BITS-1:HASH_S_BITS+PG_S_BITS]) && // tag hit + sTlb.data[i][TLB_DATA_BITS-1] && // valid + ~sTlb.data[i][TLB_DATA_BITS-2]; // host hit + + if(tag_cmp_host_s[i]) begin + hitS = 1'b1; + hitS_host_idx = i; + end + +`ifdef EN_DDR + // tag cmp card + tag_cmp_card_s[i] = + (sTlb.data[i][TAG_S_BITS+PHY_S_BITS-1:PHY_S_BITS] == vaddr_C[VADDR_BITS-1:HASH_S_BITS+PG_S_BITS]) && // tag hit + sTlb.data[i][TLB_DATA_BITS-1] && // valid + sTlb.data[i][TLB_DATA_BITS-2]; // card hit + + if(tag_cmp_card_s[i]) begin + hitS = 1'b1; + hitS_card_idx = i; + end +`endif + + end + // Large pages + for (int i = 0; i < N_L_ASSOC; i++) begin + // tag cmp host + tag_cmp_host_l[i] = + (lTlb.data[i][TAG_L_BITS+PHY_L_BITS-1:PHY_L_BITS] == vaddr_C[VADDR_BITS-1:HASH_L_BITS+PG_L_BITS]) && // tag hit + lTlb.data[i][TLB_DATA_BITS-1] && // valid + ~lTlb.data[i][TLB_DATA_BITS-2]; // host hit + + if(tag_cmp_host_l[i]) begin + hitL = 1'b1; + hitL_host_idx = i; + end + +`ifdef EN_DDR + // tag cmp card + tag_cmp_card_l[i] = + (lTlb.data[i][TAG_L_BITS+PHY_L_BITS-1:PHY_L_BITS] == vaddr_C[VADDR_BITS-1:HASH_L_BITS+PG_L_BITS]) && // tag hit + lTlb.data[i][TLB_DATA_BITS-1] && // valid + lTlb.data[i][TLB_DATA_BITS-2]; // card hit + + if(tag_cmp_card_l[i]) begin + hitL = 1'b1; + hitL_card_idx = i; + end +`endif + + end +end + +// ILA ****************************************************************** +`ifdef DEBUG_TLB_FSM_RD +if(ID_REG == 0) begin + logic [15:0] cnt_req_in; + + always @( posedge aclk ) begin + if ( aresetn == 1'b0 ) begin + cnt_req_in <= 0; + end + else begin + cnt_req_in <= (req_in.valid & req_in.ready) ? cnt_req_in + 1 : cnt_req_in; + end + end + + ila_fsm_rd inst_ila_rd ( + .clk(aclk), + .probe0(state_C), + .probe1(len_C), + .probe2(vaddr_C), + .probe3(sync_C), + .probe4(0), + .probe5(data_host_C), + .probe6(data_card_C), + .probe7(vaddr_miss_C), + .probe8(len_miss_C), + .probe9(isr_C), + .probe10(unlock_C), + .probe11(miss_C), + .probe12(plen_C), + .probe13(paddr_host_C), + .probe14(paddr_card_C), + .probe15(DDMA.valid), + .probe16(DDMA.ready), + .probe17(DDMA.req.ctl), + .probe18(IDMA.valid), + .probe19(IDMA.ready), + .probe20(IDMA.req.ctl), + .probe21(cnt_req_in), + .probe22(DDMA.done), + .probe23(IDMA.done), + .probe24(IDMA.isr_return) + ); + end +`endif +// ********************************************************************** + +endmodule \ No newline at end of file diff --git a/hw/hdl/mmu/tlb_fsm_wr.sv b/hw/hdl/mmu/tlb_fsm_wr.sv new file mode 100644 index 00000000..ce08e88d --- /dev/null +++ b/hw/hdl/mmu/tlb_fsm_wr.sv @@ -0,0 +1,599 @@ +/** + * TLB FSM write + * + * Request channels + * @param: + * - RDWR : Read(0) or write(1) request channel + */ + +import lynxTypes::*; + +//`define DEBUG_TLB_FSM_WR + +module tlb_fsm_wr #( + parameter integer ID_REG = 0, + parameter integer RDWR = 1 +) ( + input logic aclk, + input logic aresetn, + + // TLBs + tlbIntf.m lTlb, + tlbIntf.m sTlb, + + // User logic + cnfgIntf.s cnfg, + + // Requests + reqIntf.s req_in, + + // DMA - host +`ifdef EN_STRM + dmaIntf.m HDMA, // Host +`endif + + // DMA - card +`ifdef EN_DDR + dmaIntf.m DDMA, // Card + dmaIsrIntf.m IDMA, // Page fault + dmaIsrIntf.m SDMA, // Sync +`endif + + // Mutex + output logic lock, + output logic unlock, + input logic [1:0] mutex +); + +// ---------------------------------------------------------------------------------------------------------- +// -- Decl +// ---------------------------------------------------------------------------------------------------------- + +// -- Constants +localparam integer PG_L_SIZE = 1 << PG_L_BITS; +localparam integer PG_S_SIZE = 1 << PG_S_BITS; +localparam integer HASH_L_BITS = TLB_L_ORDER; +localparam integer HASH_S_BITS = TLB_S_ORDER; +localparam integer TAG_L_BITS = VADDR_BITS - HASH_L_BITS - PG_L_BITS; +localparam integer TAG_S_BITS = VADDR_BITS - HASH_S_BITS - PG_S_BITS; +localparam integer PHY_L_BITS = PADDR_BITS - PG_L_BITS; +localparam integer PHY_S_BITS = PADDR_BITS - PG_S_BITS; +localparam integer HIT_L_IDX_BITS = $clog2(N_L_ASSOC); +localparam integer HIT_S_IDX_BITS = $clog2(N_S_ASSOC); + +// -- FSM --------------------------------------------------------------------------------------------------- +typedef enum logic[3:0] {ST_IDLE, ST_MUTEX, ST_CHECK, + ST_HIT_LARGE, ST_HIT_SMALL, ST_CALC_LARGE, ST_CALC_SMALL, // timing extra states +`ifdef EN_STRM + ST_HOST_SEND, +`endif +`ifdef EN_DDR + ST_ISR_WAIT, + ST_CARD_SEND, ST_SYNC_SEND, ST_ISR_SEND, +`endif + ST_MISS} state_t; +logic [3:0] state_C, state_N; + +// -- Internal registers ------------------------------------------------------------------------------------ +// Request +logic [LEN_BITS-1:0] len_C, len_N; +logic [VADDR_BITS-1:0] vaddr_C, vaddr_N; +logic sync_C, sync_N; +logic ctl_C, ctl_N; +logic strm_C, strm_N; +logic [3:0] dest_C, dest_N; + +// TLB data +logic [TLB_DATA_BITS-1:0] data_host_C, data_host_N; +logic [TLB_DATA_BITS-1:0] data_card_C, data_card_N; + +// Page fault +logic unlock_C, unlock_N; +logic miss_C, miss_N; +logic [LEN_BITS-1:0] len_miss_C, len_miss_N; +logic [VADDR_BITS-1:0] vaddr_miss_C, vaddr_miss_N; +logic isr_C, isr_N; + +// -- Out +logic [LEN_BITS-1:0] plen_C, plen_N; +logic [PADDR_BITS-1:0] paddr_host_C, paddr_host_N; +logic [PADDR_BITS-1:0] paddr_card_C, paddr_card_N; + +// -- Internal signals -------------------------------------------------------------------------------------- +logic [N_L_ASSOC-1:0] tag_cmp_card_l; +logic [N_S_ASSOC-1:0] tag_cmp_card_s; + +logic [N_L_ASSOC-1:0] tag_cmp_host_l; +logic [N_S_ASSOC-1:0] tag_cmp_host_s; + +logic hitL; +logic hitS; + +logic [HIT_L_IDX_BITS-1:0] hitL_card_idx; +logic [HIT_S_IDX_BITS-1:0] hitS_card_idx; + +logic [HIT_L_IDX_BITS-1:0] hitL_host_idx; +logic [HIT_S_IDX_BITS-1:0] hitS_host_idx; + +// ---------------------------------------------------------------------------------------------------------- +// -- Def +// ---------------------------------------------------------------------------------------------------------- + +// REG +always_ff @(posedge aclk, negedge aresetn) begin: PROC_REG +if (aresetn == 1'b0) begin + state_C <= ST_IDLE; + + // ISR + miss_C <= 0; + unlock_C <= 0; + isr_C <= 0; +end +else + state_C <= state_N; + + // Requests + len_C <= len_N; + vaddr_C <= vaddr_N; + sync_C <= sync_N; + ctl_C <= ctl_N; + strm_C <= strm_N; + dest_C <= dest_N; + // TLB + plen_C <= plen_N; + paddr_host_C <= paddr_host_N; + data_host_C <= data_host_N; +`ifdef EN_DDR + paddr_card_C <= paddr_card_N; + data_card_C <= data_card_N; +`endif + // ISR + miss_C <= miss_N; + unlock_C <= unlock_N; + isr_C <= isr_N; + len_miss_C <= len_miss_N; + vaddr_miss_C <= vaddr_miss_N; +end + +// NSL +always_comb begin: NSL + state_N = state_C; + + case(state_C) + // Wait until request queue is not empty + ST_IDLE: + state_N = (req_in.valid) ? ST_MUTEX : ST_IDLE; + + // Obtain mutex + ST_MUTEX: + state_N = ((mutex[1] == RDWR) && (mutex[0] == 1'b0)) ? ST_CHECK : ST_MUTEX; + + // Check hits + ST_CHECK: + state_N = hitL ? ST_HIT_LARGE : hitS ? ST_HIT_SMALL : ST_MISS; + + // Page parsing + ST_HIT_LARGE: + state_N = ST_CALC_LARGE; + ST_HIT_SMALL: + state_N = ST_CALC_SMALL; + + // Calc. + ST_CALC_LARGE: +`ifdef EN_STRM + `ifdef EN_DDR + if(strm_C) + state_N = ST_HOST_SEND; + else + state_N = isr_C ? ST_ISR_SEND : sync_C ? ST_SYNC_SEND : ST_CARD_SEND; + `else + state_N = ST_HOST_SEND; + `endif +`else + state_N = isr_C ? ST_ISR_SEND : sync_C ? ST_SYNC_SEND : ST_CARD_SEND; +`endif + ST_CALC_SMALL: +`ifdef EN_STRM + `ifdef EN_DDR + if(strm_C) + state_N = ST_HOST_SEND; + else + state_N = isr_C ? ST_ISR_SEND : sync_C ? ST_SYNC_SEND : ST_CARD_SEND; + `else + state_N = ST_HOST_SEND; + `endif +`else + state_N = isr_C ? ST_ISR_SEND : sync_C ? ST_SYNC_SEND : ST_CARD_SEND; +`endif + + // Send DMA requests +`ifdef EN_STRM + ST_HOST_SEND: + if(HDMA.ready) + state_N = len_C ? ST_MUTEX : ST_IDLE; +`endif + +`ifdef EN_DDR + ST_CARD_SEND: + if(DDMA.ready) + state_N = len_C ? ST_MUTEX : ST_IDLE; + ST_SYNC_SEND: + if(SDMA.ready) + state_N = len_C ? ST_MUTEX : ST_IDLE; + ST_ISR_SEND: + if(IDMA.ready) + state_N = len_C ? ST_MUTEX : ST_ISR_WAIT; + + // Wait until data is fetched + ST_ISR_WAIT: + state_N = IDMA.done && IDMA.isr_return ? ST_MUTEX : ST_ISR_WAIT; +`endif + + + + // Page fault + ST_MISS: + state_N = cnfg.restart ? ST_CHECK : ST_MISS; + + endcase // state_C +end + +// DP +always_comb begin: DP + // Requests + len_N = len_C; + vaddr_N = vaddr_C; + sync_N = sync_C; + ctl_N = ctl_C; + strm_N = strm_C; + dest_N = dest_C; + + // TLB + data_host_N = data_host_C; +`ifdef EN_DDR + data_card_N = data_card_C; +`endif + + // Out + plen_N = plen_C; + paddr_host_N = paddr_host_C; +`ifdef EN_DDR + paddr_card_N = paddr_card_C; +`endif + + // ISR + unlock_N = 1'b0; + miss_N = 1'b0; + vaddr_miss_N = vaddr_miss_C; + len_miss_N = len_miss_C; + isr_N = isr_C; + + // mutex + lock = 1'b0; + unlock = unlock_C; + + // Requests + req_in.ready = 1'b0; + + // Config +`ifdef EN_STRM + cnfg.done_host = HDMA.done; +`else + cnfg.done_host = 1'b0; +`endif + +`ifdef EN_DDR + cnfg.done_card = DDMA.done; + cnfg.done_sync = SDMA.done; +`else + cnfg.done_card = 1'b0; + cnfg.done_sync = 1'b0; +`endif + + cnfg.pf.miss = miss_C; + cnfg.pf.vaddr = vaddr_miss_C; + cnfg.pf.len = len_miss_C; + + // TLB + lTlb.addr = vaddr_C; + sTlb.addr = vaddr_C; + +`ifdef EN_STRM + // HDMA + HDMA.req.paddr = paddr_host_C; + HDMA.req.len = plen_C; + HDMA.req.ctl = 1'b0; + HDMA.req.dest = dest_C; + HDMA.req.rsrvd = 0; + HDMA.valid = 1'b0; +`endif + +`ifdef EN_DDR + // DDMA + DDMA.req.paddr = paddr_card_C; + DDMA.req.len = plen_C; + DDMA.req.ctl = 1'b0; + DDMA.req.dest = dest_C; + DDMA.req.rsrvd = 0; + DDMA.valid = 1'b0; + + // IDMA + IDMA.req.paddr_card = paddr_card_C; + IDMA.req.paddr_host = paddr_host_C; + IDMA.req.len = plen_C; + IDMA.req.ctl = 1'b0; + IDMA.req.dest = dest_C; + IDMA.req.isr = 1'b0; + IDMA.req.rsrvd = 0; + IDMA.valid = 1'b0; + + // SDMA + SDMA.req.paddr_card = paddr_card_C; + SDMA.req.paddr_host = paddr_host_C; + SDMA.req.len = plen_C; + SDMA.req.ctl = 1'b0; + SDMA.req.dest = dest_C; + SDMA.req.isr = 1'b0; + SDMA.req.rsrvd = 0; + SDMA.valid = 1'b0; +`endif + + case(state_C) + ST_IDLE: begin + isr_N = 1'b0; + req_in.ready = 1'b1; + if(req_in.valid) begin // RR + // Lock the mutex + lock = 1'b1; + + // Request + len_N = req_in.req.len; + vaddr_N = req_in.req.vaddr; + sync_N = req_in.req.sync; + ctl_N = req_in.req.ctl; + strm_N = req_in.req.stream; + dest_N = req_in.req.dest; + end + end + + ST_MUTEX: + lock = 1'b1; + + ST_CHECK: +`ifdef EN_STRM + `ifdef EN_DDR + if(hitS || hitL) begin + if(strm_C) + unlock_N = 1'b1; + else + unlock_N = (isr_C || sync_C) ? 1'b0 : 1'b1; + end + `else + if(hitS || hitL) begin + unlock_N = 1'b1; + end + `endif +`else + if(hitS || hitL) begin + unlock_N = (isr_C || sync_C) ? 1'b0 : 1'b1; + end +`endif + else begin + miss_N = 1'b1; + vaddr_miss_N = vaddr_C; + len_miss_N = len_C; + isr_N = 1'b1; + end + + ST_HIT_LARGE: begin + data_host_N = lTlb.data[hitL_host_idx]; +`ifdef EN_DDR + data_card_N = lTlb.data[hitL_card_idx]; +`endif + end + + ST_HIT_SMALL: begin + data_host_N = sTlb.data[hitS_host_idx]; +`ifdef EN_DDR + data_card_N = sTlb.data[hitS_card_idx]; +`endif + end + + ST_CALC_LARGE: begin + paddr_host_N = {data_host_C[PHY_L_BITS-1:0], vaddr_C[PG_L_BITS-1:0]}; +`ifdef EN_DDR + paddr_card_N = {data_card_C[PHY_L_BITS-1:0], vaddr_C[PG_L_BITS-1:0]}; +`endif + if(len_C + vaddr_C[PG_L_BITS-1:0] > PG_L_SIZE) begin + plen_N = PG_L_SIZE - vaddr_C[PG_L_BITS-1:0]; + len_N = len_C - (PG_L_SIZE - vaddr_C[PG_L_BITS-1:0]); + vaddr_N += PG_L_SIZE - vaddr_C[PG_L_BITS-1:0]; + end + else begin + plen_N = len_C; + len_N = 0; + end + end + + ST_CALC_SMALL: begin + paddr_host_N = {data_host_C[PHY_S_BITS-1:0], vaddr_C[PG_S_BITS-1:0]}; +`ifdef EN_DDR + paddr_card_N = {data_card_C[PHY_S_BITS-1:0], vaddr_C[PG_S_BITS-1:0]}; +`endif + if(len_C + vaddr_C[PG_S_BITS-1:0] > PG_S_SIZE) begin + plen_N = PG_S_SIZE - vaddr_C[PG_S_BITS-1:0]; + len_N = len_C - (PG_S_SIZE - vaddr_C[PG_S_BITS-1:0]); + vaddr_N += PG_S_SIZE - vaddr_C[PG_S_BITS-1:0]; + end + else begin + plen_N = len_C; + len_N = 0; + end + end + +`ifdef EN_STRM + ST_HOST_SEND: begin + HDMA.valid = HDMA.ready; + HDMA.req.ctl = HDMA.valid && !len_C && ctl_C; + end +`endif + +`ifdef EN_DDR + ST_CARD_SEND: begin + DDMA.valid = DDMA.ready; + DDMA.req.ctl = DDMA.valid && !len_C && ctl_C; + end + + ST_SYNC_SEND: begin + SDMA.valid = SDMA.ready; + SDMA.req.ctl = SDMA.valid && !len_C && ctl_C; + unlock_N = SDMA.valid && !len_C; + end + + ST_ISR_SEND: begin + IDMA.valid = IDMA.ready; + IDMA.req.ctl = IDMA.valid && !len_C; + IDMA.req.isr = 1'b1; + unlock_N = IDMA.valid && !len_C; + end + + ST_ISR_WAIT: begin + vaddr_N = vaddr_miss_C; + len_N = len_miss_C; + isr_N = 1'b0; + lock = IDMA.done && IDMA.isr_return; + end +`endif + + default: ; + + endcase // state_C +end + +// Hit/Miss combinational logic +always_comb begin + + hitL = 1'b0; + hitS = 1'b0; + + hitL_host_idx = 0; + hitS_host_idx = 0; + + tag_cmp_host_s = 0; + tag_cmp_host_l = 0; + +`ifdef EN_DDR + hitL_card_idx = 0; + hitS_card_idx = 0; + + tag_cmp_card_s = 0; + tag_cmp_card_l = 0; +`endif + + // Small pages + for (int i = 0; i < N_S_ASSOC; i++) begin + // tag cmp host + tag_cmp_host_s[i] = + (sTlb.data[i][TAG_S_BITS+PHY_S_BITS-1:PHY_S_BITS] == vaddr_C[VADDR_BITS-1:HASH_S_BITS+PG_S_BITS]) && // tag hit + sTlb.data[i][TLB_DATA_BITS-1] && // valid + ~sTlb.data[i][TLB_DATA_BITS-2]; // host hit + + if(tag_cmp_host_s[i]) begin + hitS = 1'b1; + hitS_host_idx = i; + end + +`ifdef EN_DDR + // tag cmp card + tag_cmp_card_s[i] = + (sTlb.data[i][TAG_S_BITS+PHY_S_BITS-1:PHY_S_BITS] == vaddr_C[VADDR_BITS-1:HASH_S_BITS+PG_S_BITS]) && // tag hit + sTlb.data[i][TLB_DATA_BITS-1] && // valid + sTlb.data[i][TLB_DATA_BITS-2]; // card hit + + if(tag_cmp_card_s[i]) begin + hitS = 1'b1; + hitS_card_idx = i; + end +`endif + + end + // Large pages + for (int i = 0; i < N_L_ASSOC; i++) begin + // tag cmp host + tag_cmp_host_l[i] = + (lTlb.data[i][TAG_L_BITS+PHY_L_BITS-1:PHY_L_BITS] == vaddr_C[VADDR_BITS-1:HASH_L_BITS+PG_L_BITS]) && // tag hit + lTlb.data[i][TLB_DATA_BITS-1] && // valid + ~lTlb.data[i][TLB_DATA_BITS-2]; // host hit + + if(tag_cmp_host_l[i]) begin + hitL = 1'b1; + hitL_host_idx = i; + end + +`ifdef EN_DDR + // tag cmp card + tag_cmp_card_l[i] = + (lTlb.data[i][TAG_L_BITS+PHY_L_BITS-1:PHY_L_BITS] == vaddr_C[VADDR_BITS-1:HASH_L_BITS+PG_L_BITS]) && // tag hit + lTlb.data[i][TLB_DATA_BITS-1] && // valid + lTlb.data[i][TLB_DATA_BITS-2]; // card hit + + if(tag_cmp_card_l[i]) begin + hitL = 1'b1; + hitL_card_idx = i; + end +`endif + + end +end + +// ILA ****************************************************************** +`ifdef DEBUG_TLB_FSM_WR +if(ID_REG == 0) begin + logic [15:0] cnt_req_in; + + always @( posedge aclk ) begin + if ( aresetn == 1'b0 ) begin + cnt_req_in <= 0; + end + else begin + cnt_req_in <= (req_in.valid & req_in.ready) ? cnt_req_in + 1 : cnt_req_in; + end + end + + ila_fsm_wr inst_ila_wr ( + .clk(aclk), + .probe0(state_C), + .probe1(len_C), + .probe2(vaddr_C), + .probe3(sync_C), + .probe4(0), + .probe5(data_host_C), + .probe6(data_card_C), + .probe7(vaddr_miss_C), + .probe8(len_miss_C), + .probe9(isr_C), + .probe10(unlock_C), + .probe11(miss_C), + .probe12(plen_C), + .probe13(paddr_host_C), + .probe14(paddr_card_C), + .probe15(DDMA.valid), + .probe16(DDMA.ready), + .probe17(DDMA.req.ctl), + .probe18(IDMA.valid), + .probe19(IDMA.ready), + .probe20(IDMA.req.ctl), + .probe21(SDMA.valid), + .probe22(SDMA.ready), + .probe23(SDMA.req.ctl), + .probe24(cnt_req_in), + .probe25(DDMA.done), + .probe26(IDMA.done), + .probe27(IDMA.isr_return), + .probe28(SDMA.done) + ); +end +`endif +// ********************************************************************** + +endmodule \ No newline at end of file diff --git a/hw/hdl/mmu/tlb_idma_arb.sv b/hw/hdl/mmu/tlb_idma_arb.sv new file mode 100644 index 00000000..30401f37 --- /dev/null +++ b/hw/hdl/mmu/tlb_idma_arb.sv @@ -0,0 +1,76 @@ +import lynxTypes::*; + +/** + * TLB idma request arbitration between read and write channels + */ +module tlb_idma_arb #( + parameter integer RDWR = 0 +) ( + input logic aclk, + input logic aresetn, + + input logic mutex, + + dmaIsrIntf.s rd_idma, + dmaIsrIntf.s wr_idma, + dmaIsrIntf.m idma +); + +// IDMA +logic sync_seq_snk_ready; +logic sync_seq_snk_valid; +logic [1:0] sync_seq_snk_data; // 1: ISR return, 0: rd/wr +logic [1:0] sync_seq_src_data; + +// Sequence queue IDMA +queue #( + .QTYPE(logic [1:0]) +) inst_seq_que_idma ( + .aclk(aclk), + .aresetn(aresetn), + .val_snk(sync_seq_snk_valid), + .rdy_snk(sync_seq_snk_ready), + .data_snk(sync_seq_snk_data), + .val_src(idma.done), + .rdy_src(), + .data_src(sync_seq_src_data) +); + +always_comb begin + rd_idma.done = idma.done && ~sync_seq_src_data[0]; + wr_idma.done = idma.done && sync_seq_src_data[0]; + + rd_idma.isr_return = sync_seq_src_data[1]; + wr_idma.isr_return = sync_seq_src_data[1]; + + if(mutex) begin // mutex[1] + wr_idma.ready = idma.ready && sync_seq_snk_ready; + rd_idma.ready = 1'b0; + + sync_seq_snk_valid = wr_idma.valid && wr_idma.ready && wr_idma.req.ctl; + sync_seq_snk_data = {wr_idma.req.isr, 1'b1}; + + idma.valid = wr_idma.valid && wr_idma.ready; + idma.req.paddr_host = wr_idma.req.paddr_host; + idma.req.paddr_card = wr_idma.req.paddr_card; + idma.req.len = wr_idma.req.len; + idma.req.ctl = wr_idma.req.ctl; + idma.req.isr = 1'b0; + end + else begin + rd_idma.ready = idma.ready && sync_seq_snk_ready; + wr_idma.ready = 1'b0; + + sync_seq_snk_valid = rd_idma.valid && rd_idma.ready && rd_idma.req.ctl; + sync_seq_snk_data = {rd_idma.req.isr, 1'b0}; + + idma.valid = rd_idma.valid && rd_idma.ready; + idma.req.paddr_host = rd_idma.req.paddr_host; + idma.req.paddr_card = rd_idma.req.paddr_card; + idma.req.len = rd_idma.req.len; + idma.req.ctl = rd_idma.req.ctl; + idma.req.isr = 1'b0; + end +end + +endmodule \ No newline at end of file diff --git a/hw/hdl/mmu/tlb_parser.sv b/hw/hdl/mmu/tlb_parser.sv new file mode 100644 index 00000000..d455f442 --- /dev/null +++ b/hw/hdl/mmu/tlb_parser.sv @@ -0,0 +1,136 @@ +import lynxTypes::*; + +/** + * Request parser + */ +module tlb_parser ( + input logic aclk, + input logic aresetn, + + reqIntf.s req_in, + reqIntf.m req_out +); + +localparam integer PARSE_SIZE = PMTU_BITS; // probably best to keep at PMTU size + +// -- FSM +typedef enum logic[1:0] {ST_IDLE, ST_PARSE, ST_SEND} state_t; +logic [1:0] state_C, state_N; + +logic [LEN_BITS-1:0] len_C, len_N; +logic [VADDR_BITS-1:0] vaddr_C, vaddr_N; +logic ctl_C, ctl_N; +logic sync_C, sync_N; +logic stream_C, stream_N; +logic [3:0] dest_C, dest_N; + +logic [LEN_BITS-1:0] plen_C, plen_N; +logic [VADDR_BITS-1:0] pvaddr_C, pvaddr_N; +logic pctl_C, pctl_N; + +// REG +always_ff @(posedge aclk, negedge aresetn) begin: PROC_REG +if (aresetn == 1'b0) begin + state_C <= ST_IDLE; +end +else + state_C <= state_N; + + len_C <= len_N; + vaddr_C <= vaddr_N; + ctl_C <= ctl_N; + sync_C <= sync_N; + stream_C <= stream_N; + dest_C <= dest_N; + + plen_C <= plen_N; + pvaddr_C <= pvaddr_N; + pctl_C <= pctl_N; +end + +// NSL +always_comb begin: NSL + state_N = state_C; + + case(state_C) + ST_IDLE: + if(req_in.valid) begin + state_N = ST_PARSE; + end + + ST_PARSE: + state_N = ST_SEND; + + ST_SEND: + if(req_out.ready) + state_N = len_C ? ST_PARSE : ST_IDLE; + + endcase // state_C +end + +// DP +always_comb begin: DP + len_N = len_C; + vaddr_N = vaddr_C; + ctl_N = ctl_C; + sync_N = sync_C; + stream_N = stream_C; + dest_N = dest_C; + + plen_N = plen_C; + pvaddr_N = pvaddr_C; + pctl_N = pctl_C; + + // Flow + req_in.ready = 1'b0; + req_out.valid = 1'b0; + + // Data + req_out.req.len = plen_C; + req_out.req.vaddr = pvaddr_C; + req_out.req.ctl = pctl_C; + req_out.req.sync = sync_C; + req_out.req.stream = stream_C; + req_out.req.dest = dest_C; + req_out.req.rsrvd = 0; + + case(state_C) + ST_IDLE: begin + req_in.ready = 1'b1; + if(req_in.valid) begin + len_N = req_in.req.len; + vaddr_N = req_in.req.vaddr; + ctl_N = req_in.req.ctl; + sync_N = req_in.req.sync; + stream_N = req_in.req.stream; + dest_N = req_in.req.dest; + end + end + + ST_PARSE: begin + pvaddr_N = vaddr_N; + + if(len_C > PARSE_SIZE) begin + vaddr_N = vaddr_C + PARSE_SIZE; + len_N = len_C - PARSE_SIZE; + + plen_N = PARSE_SIZE; + pctl_N = 1'b0; + end + else begin + len_N = 0; + + plen_N = len_C; + pctl_N = ctl_C; + end + end + + ST_SEND: + if(req_out.ready) begin + req_out.valid = 1'b1; + end + + endcase +end + +endmodule \ No newline at end of file diff --git a/hw/hdl/mmu/tlb_region_top.sv b/hw/hdl/mmu/tlb_region_top.sv new file mode 100644 index 00000000..9ef3fc4a --- /dev/null +++ b/hw/hdl/mmu/tlb_region_top.sv @@ -0,0 +1,315 @@ +/** + * TLB top + * + * Top level TLB for sub-regions + */ + +import lynxTypes::*; + +module tlb_region_top #( + parameter integer ID_REG = 0 +) ( + input logic aclk, + input logic aresetn, + + // AXI tlb control + AXI4L.s axi_ctrl_lTlb, + AXI4L.s axi_ctrl_sTlb, + +`ifdef EN_AVX + // AXI config + AXI4.s axim_ctrl_cnfg, +`else + // AXIL Config + AXI4L.s axi_ctrl_cnfg, +`endif + +`ifdef EN_BPSS + // Requests user + reqIntf.s rd_req_user, + reqIntf.s wr_req_user, +`endif + +`ifdef EN_FV + // FV request + metaIntf.m rdma_req, +`endif + +`ifdef EN_STRM + // Stream DMAs + dmaIntf.m rdHDMA, + dmaIntf.m wrHDMA, + + // Credits + input logic rxfer_host, + input logic wxfer_host, + output logic [3:0] rd_dest_host, +`endif + +`ifdef EN_DDR + // Card DMAs + dmaIntf.m rdDDMA, + dmaIntf.m wrDDMA, + dmaIsrIntf.m IDMA, + dmaIsrIntf.m SDMA, + + // Credits + input logic rxfer_card, + input logic wxfer_card, + output logic [3:0] rd_dest_card, +`endif + + // Decoupling + output logic decouple, + + // Page fault IRQ + output logic pf_irq +); + +// -- Decl ----------------------------------------------------------------------------------- +// ------------------------------------------------------------------------------------------- +// Tlb interfaces +tlbIntf #(.N_ASSOC(N_L_ASSOC)) rd_lTlb (); +tlbIntf #(.N_ASSOC(N_S_ASSOC)) rd_sTlb (); +tlbIntf #(.N_ASSOC(N_L_ASSOC)) wr_lTlb (); +tlbIntf #(.N_ASSOC(N_S_ASSOC)) wr_sTlb (); +tlbIntf #(.N_ASSOC(N_L_ASSOC)) lTlb (); +tlbIntf #(.N_ASSOC(N_S_ASSOC)) sTlb (); + +// Config interfaces +cnfgIntf rd_cnfg (); +cnfgIntf wr_cnfg (); + +// Request interfaces +reqIntf rd_req (); +reqIntf wr_req (); + +// Mutex +logic [1:0] mutex; +logic rd_lock, wr_lock; +logic rd_unlock, wr_unlock; + +// ---------------------------------------------------------------------------------------- +// Mutex +// ---------------------------------------------------------------------------------------- +always_ff @(posedge aclk or negedge aresetn) begin + if(aresetn == 1'b0) begin + mutex <= 2'b01; + end else begin + if(mutex[0] == 1'b1) begin // free + if(rd_lock) + mutex <= 2'b00; + else if(wr_lock) + mutex <= 2'b10; + end + else begin // locked + if((mutex[1] == 1'b0) && rd_unlock) + mutex <= 2'b01; + else if (wr_unlock) + mutex <= 2'b01; + end + end +end + +// ---------------------------------------------------------------------------------------- +// TLB +// ---------------------------------------------------------------------------------------- +assign rd_lTlb.data = lTlb.data; +assign wr_lTlb.data = lTlb.data; +assign rd_sTlb.data = sTlb.data; +assign wr_sTlb.data = sTlb.data; +assign lTlb.addr = mutex[1] ? wr_lTlb.addr : rd_lTlb.addr; +assign sTlb.addr = mutex[1] ? wr_sTlb.addr : rd_sTlb.addr; + +// TLB 2M +tlb_slave #( + .TLB_ORDER(TLB_L_ORDER), + .PG_BITS(PG_L_BITS), + .N_ASSOC(N_L_ASSOC) +) inst_lTlb ( + .aclk(aclk), + .aresetn(aresetn), + .axi_ctrl(axi_ctrl_lTlb), + .TLB(lTlb) +); + +// TLB 4K +tlb_slave #( + .TLB_ORDER(TLB_S_ORDER), + .PG_BITS(PG_S_BITS), + .N_ASSOC(N_S_ASSOC) +) inst_sTlb ( + .aclk(aclk), + .aresetn(aresetn), + .axi_ctrl(axi_ctrl_sTlb), + .TLB(sTlb) +); + +// ---------------------------------------------------------------------------------------- +// Config slave +// ---------------------------------------------------------------------------------------- +`ifdef EN_AVX + cnfg_slave_avx #(.ID_REG(ID_REG)) inst_cnfg_slave ( +`else + cnfg_slave #(.ID_REG(ID_REG)) inst_cnfg_slave ( +`endif + .aclk(aclk), + .aresetn(aresetn), +`ifdef EN_AVX + .axim_ctrl(axim_ctrl_cnfg), +`else + .axi_ctrl(axi_ctrl_cnfg), +`endif +`ifdef EN_BPSS + .rd_req_user(rd_req_user), + .wr_req_user(wr_req_user), +`endif +`ifdef EN_FV + .rdma_req(rdma_req), +`endif + .rd_cnfg(rd_cnfg), + .wr_cnfg(wr_cnfg), + .rd_req(rd_req), + .wr_req(wr_req), + .decouple(decouple), + .pf_irq(pf_irq) + ); + +// ---------------------------------------------------------------------------------------- +// Parsing +// ---------------------------------------------------------------------------------------- +reqIntf rd_req_parsed (); +reqIntf wr_req_parsed (); +reqIntf rd_req_parsed_q (); +reqIntf wr_req_parsed_q (); + +tlb_parser inst_rd_parser (.aclk(aclk), .aresetn(aresetn), .req_in(rd_req), .req_out(rd_req_parsed)); +tlb_parser inst_wr_parser (.aclk(aclk), .aresetn(aresetn), .req_in(wr_req), .req_out(wr_req_parsed)); + +// Queueing +req_queue inst_rd_q_parser (.aclk(aclk), .aresetn(aresetn), .req_in(rd_req_parsed), .req_out(rd_req_parsed_q)); +req_queue inst_wr_q_parser (.aclk(aclk), .aresetn(aresetn), .req_in(wr_req_parsed), .req_out(wr_req_parsed_q)); + +// ---------------------------------------------------------------------------------------- +// FSM +// ---------------------------------------------------------------------------------------- +`ifdef EN_STRM + // FSM + dmaIntf rdHDMA_fsm (); + dmaIntf wrHDMA_fsm (); + + dmaIntf rdHDMA_fsm_q (); + dmaIntf wrHDMA_fsm_q (); + + // Credits + dmaIntf rdHDMA_cred (); + dmaIntf wrHDMA_cred (); +`endif + +`ifdef EN_DDR + dmaIntf rdDDMA_fsm (); + dmaIntf wrDDMA_fsm (); + dmaIsrIntf rdIDMA_fsm (); + dmaIsrIntf wrIDMA_fsm (); + dmaIsrIntf IDMA_fsm (); + dmaIsrIntf SDMA_fsm (); + + dmaIntf rdDDMA_fsm_q (); + dmaIntf wrDDMA_fsm_q (); + + // Credits + dmaIntf rdDDMA_cred (); + dmaIntf wrDDMA_cred (); +`endif + +// TLB rd FSM +tlb_fsm_rd #( + .ID_REG(ID_REG) +) inst_fsm_rd ( + .aclk(aclk), + .aresetn(aresetn), + .lTlb(rd_lTlb), + .sTlb(rd_sTlb), + .cnfg(rd_cnfg), + .req_in(rd_req_parsed_q), +`ifdef EN_STRM + .HDMA(rdHDMA_fsm), +`endif +`ifdef EN_DDR + .DDMA(rdDDMA_fsm), + .IDMA(rdIDMA_fsm), +`endif + .lock(rd_lock), + .unlock(rd_unlock), + .mutex(mutex) +); + +// TLB wr FSM +tlb_fsm_wr #( + .ID_REG(ID_REG) +) inst_fsm_wr ( + .aclk(aclk), + .aresetn(aresetn), + .lTlb(wr_lTlb), + .sTlb(wr_sTlb), + .cnfg(wr_cnfg), + .req_in(wr_req_parsed_q), +`ifdef EN_STRM + .HDMA(wrHDMA_fsm), +`endif +`ifdef EN_DDR + .DDMA(wrDDMA_fsm), + .IDMA(wrIDMA_fsm), + .SDMA(SDMA_fsm), +`endif + .lock(wr_lock), + .unlock(wr_unlock), + .mutex(mutex) +); + +// Queueing +`ifdef EN_STRM + // HDMA + dma_req_queue inst_rd_q_fsm_hdma (.aclk(aclk), .aresetn(aresetn), .req_in(rdHDMA_fsm), .req_out(rdHDMA_fsm_q)); + dma_req_queue inst_wr_q_fsm_hdma (.aclk(aclk), .aresetn(aresetn), .req_in(wrHDMA_fsm), .req_out(wrHDMA_fsm_q)); +`endif + +`ifdef EN_DDR + // IDMA arbitration + tlb_idma_arb inst_idma_arb (.aclk(aclk), .aresetn(aresetn), .mutex(mutex[1]), .rd_idma(rdIDMA_fsm), .wr_idma(wrIDMA_fsm), .idma(IDMA_fsm)); + + // DDMA + dma_req_queue inst_rd_q_fsm_ddma (.aclk(aclk), .aresetn(aresetn), .req_in(rdDDMA_fsm), .req_out(rdDDMA_fsm_q)); + dma_req_queue inst_wr_q_fsm_ddma (.aclk(aclk), .aresetn(aresetn), .req_in(wrDDMA_fsm), .req_out(wrDDMA_fsm_q)); + + // IDMA + dma_isr_req_queue inst_q_fsm_idma (.aclk(aclk), .aresetn(aresetn), .req_in(IDMA_fsm), .req_out(IDMA)); + + // SDMA + dma_isr_req_queue inst_q_fsm_sdma (.aclk(aclk), .aresetn(aresetn), .req_in(SDMA_fsm), .req_out(SDMA)); +`endif + +// ---------------------------------------------------------------------------------------- +// Credits and output +// ---------------------------------------------------------------------------------------- +`ifdef EN_STRM + // HDMA + tlb_credits_rd #(.ID_REG(ID_REG)) inst_rd_cred_hdma (.aclk(aclk), .aresetn(aresetn), .req_in(rdHDMA_fsm_q), .req_out(rdHDMA_cred), .rxfer(rxfer_host), .rd_dest(rd_dest_host)); + tlb_credits_wr #(.ID_REG(ID_REG)) inst_wr_cred_hdma (.aclk(aclk), .aresetn(aresetn), .req_in(wrHDMA_fsm_q), .req_out(wrHDMA_cred), .wxfer(wxfer_host)); + + // Queueing + dma_req_queue inst_rd_q_cred_hdma (.aclk(aclk), .aresetn(aresetn), .req_in(rdHDMA_cred), .req_out(rdHDMA)); + dma_req_queue inst_wr_q_cred_hdma (.aclk(aclk), .aresetn(aresetn), .req_in(wrHDMA_cred), .req_out(wrHDMA)); +`endif + +`ifdef EN_DDR + // DDMA + tlb_credits_rd #(.ID_REG(ID_REG), .CRED_DATA_BITS(N_DDR_CHAN*AXI_DATA_BITS)) inst_rd_cred_ddma (.aclk(aclk), .aresetn(aresetn), .req_in(rdDDMA_fsm_q), .req_out(rdDDMA_cred), .rxfer(rxfer_card), .rd_dest(rd_dest_card)); + tlb_credits_wr #(.ID_REG(ID_REG), .CRED_DATA_BITS(N_DDR_CHAN*AXI_DATA_BITS)) inst_wr_cred_ddma (.aclk(aclk), .aresetn(aresetn), .req_in(wrDDMA_fsm_q), .req_out(wrDDMA_cred), .wxfer(wxfer_card)); + + // Queueing + dma_req_queue inst_rd_q_cred_ddma (.aclk(aclk), .aresetn(aresetn), .req_in(rdDDMA_cred), .req_out(rdDDMA)); + dma_req_queue inst_wr_q_cred_ddma (.aclk(aclk), .aresetn(aresetn), .req_in(wrDDMA_cred), .req_out(wrDDMA)); +`endif + +endmodule // tlb_top \ No newline at end of file diff --git a/hw/hdl/mmu/tlb_top.sv b/hw/hdl/mmu/tlb_top.sv new file mode 100644 index 00000000..2cd57a60 --- /dev/null +++ b/hw/hdl/mmu/tlb_top.sv @@ -0,0 +1,174 @@ +/** + * TLB top + * + * Top level TLB for sub-regions + */ + +import lynxTypes::*; + +`include "lynx_macros.svh" + +module tlb_top #( + parameter integer ID_DYN = 0 +) ( + input logic aclk, + input logic aresetn, + + // AXI tlb control + AXI4L.s axi_ctrl_lTlb [N_REGIONS], + AXI4L.s axi_ctrl_sTlb [N_REGIONS], + +`ifdef EN_AVX + // AXI config + AXI4.s axim_ctrl_cnfg [N_REGIONS], +`else + // AXIL Config + AXI4L.s axi_ctrl_cnfg [N_REGIONS], +`endif + +`ifdef EN_BPSS + // Requests user + reqIntf.s rd_req_user [N_REGIONS], + reqIntf.s wr_req_user [N_REGIONS], +`endif + +`ifdef EN_FV + // FV request + metaIntf.m rdma_req [N_REGIONS], +`endif + +`ifdef EN_STRM + // Stream DMAs + dmaIntf.m rdXDMA_host, + dmaIntf.m wrXDMA_host, + + input logic [N_REGIONS-1:0] rxfer_host, + input logic [N_REGIONS-1:0] wxfer_host, + output logic [N_REGIONS-1:0][3:0] rd_dest_host, +`endif + +`ifdef EN_DDR + // Card DMAs + dmaIntf.m rdXDMA_sync, + dmaIntf.m wrXDMA_sync, + dmaIntf.m rdCDMA_sync, + dmaIntf.m wrCDMA_sync, + dmaIntf.m rdCDMA_card, + dmaIntf.m wrCDMA_card, + + input logic [N_REGIONS-1:0] rxfer_card, + input logic [N_REGIONS-1:0] wxfer_card, + output logic [N_REGIONS-1:0][3:0] rd_dest_card, +`endif + +`ifdef MULT_REGIONS + `ifdef EN_STRM + // Mux user host + muxUserIntf.s mux_host_rd_user, + muxUserIntf.s mux_host_wr_user, + `endif + `ifdef EN_DDR + // Mux user host + muxUserIntf.s mux_card_rd_user, + muxUserIntf.s mux_card_wr_user, + `endif +`endif + + // Decoupling + output logic [N_REGIONS-1:0] decouple, + + // Page fault IRQ + output logic [N_REGIONS-1:0] pf_irq +); + +// +`ifdef EN_STRM + dmaIntf rdHDMA_arb [N_REGIONS] (); + dmaIntf wrHDMA_arb [N_REGIONS] (); +`endif + +`ifdef EN_DDR + dmaIntf rdDDMA_arb [N_REGIONS] (); + dmaIntf wrDDMA_arb [N_REGIONS] (); + + dmaIsrIntf IDMA_arb [N_REGIONS] (); + dmaIsrIntf SDMA_arb [N_REGIONS] (); +`endif + +// Instantiate region TLBs +for(genvar i = 0; i < N_REGIONS; i++) begin + + tlb_region_top #(.ID_REG(ID_DYN*N_REGIONS+i)) inst_reg_top ( + .aclk(aclk), + .aresetn(aresetn), + .axi_ctrl_sTlb(axi_ctrl_sTlb[i]), + .axi_ctrl_lTlb(axi_ctrl_lTlb[i]), + `ifdef EN_AVX + .axim_ctrl_cnfg(axim_ctrl_cnfg[i]), + `else + .axi_ctrl_cnfg(axi_ctrl_cnfg[i]), + `endif + `ifdef EN_BPSS + .rd_req_user(rd_req_user[i]), + .wr_req_user(wr_req_user[i]), + `endif + `ifdef EN_FV + .rdma_req(rdma_req[i]), + `endif + `ifdef EN_STRM + .rdHDMA(rdHDMA_arb[i]), + .wrHDMA(wrHDMA_arb[i]), + .rxfer_host(rxfer_host[i]), + .wxfer_host(wxfer_host[i]), + .rd_dest_host(rd_dest_host[i]), + `endif + `ifdef EN_DDR + .rdDDMA(rdDDMA_arb[i]), + .wrDDMA(wrDDMA_arb[i]), + .IDMA(IDMA_arb[i]), + .SDMA(SDMA_arb[i]), + .rxfer_card(rxfer_card[i]), + .wxfer_card(wxfer_card[i]), + .rd_dest_card(rd_dest_card[i]), + `endif + .decouple(decouple[i]), + .pf_irq(pf_irq[i]) + ); + +end + +// Instantiate arbitration +`ifdef MULT_REGIONS + + // Arbiters + `ifdef EN_STRM + tlb_arbiter inst_hdma_arb_rd (.aclk(aclk), .aresetn(aresetn), .req_snk(rdHDMA_arb), .req_src(rdXDMA_host), .mux_user(mux_host_rd_user)); + tlb_arbiter inst_hdma_arb_wr (.aclk(aclk), .aresetn(aresetn), .req_snk(wrHDMA_arb), .req_src(wrXDMA_host), .mux_user(mux_host_wr_user)); + `endif + + `ifdef EN_DDR + tlb_arbiter inst_ddma_arb_rd (.aclk(aclk), .aresetn(aresetn), .req_snk(rdDDMA_arb), .req_src(rdCDMA_card), .mux_user(mux_card_rd_user)); + tlb_arbiter inst_ddma_arb_wr (.aclk(aclk), .aresetn(aresetn), .req_snk(wrDDMA_arb), .req_src(wrCDMA_card), .mux_user(mux_card_wr_user)); + + tlb_arbiter_isr #(.RDWR(0)) inst_idma_arb (.aclk(aclk), .aresetn(aresetn), .req_snk(IDMA_arb), .req_src_host(rdXDMA_sync), .req_src_card(wrCDMA_sync)); + tlb_arbiter_isr #(.RDWR(1)) inst_sdma_arb (.aclk(aclk), .aresetn(aresetn), .req_snk(SDMA_arb), .req_src_host(wrXDMA_sync), .req_src_card(rdCDMA_sync)); + `endif + +`else + + `ifdef EN_STRM + `DMA_REQ_ASSIGN(rdHDMA_arb[0], rdXDMA_host) + `DMA_REQ_ASSIGN(wrHDMA_arb[0], wrXDMA_host) + `endif + + `ifdef EN_DDR + `DMA_REQ_ASSIGN(rdDDMA_arb[0], rdCDMA_card) + `DMA_REQ_ASSIGN(wrDDMA_arb[0], wrCDMA_card) + + tlb_assign_isr #(.RDWR(0)) inst_idma_arb (.aclk(aclk), .aresetn(aresetn), .req_snk(IDMA_arb[0]), .req_src_host(rdXDMA_sync), .req_src_card(wrCDMA_sync)); + tlb_assign_isr #(.RDWR(1)) inst_sdma_arb (.aclk(aclk), .aresetn(aresetn), .req_snk(SDMA_arb[0]), .req_src_host(wrXDMA_sync), .req_src_card(rdCDMA_sync)); + `endif + +`endif + +endmodule // tlb_top \ No newline at end of file diff --git a/hw/hdl/mux/axis_mux_cdma.sv b/hw/hdl/mux/axis_mux_cdma.sv new file mode 100644 index 00000000..0e9c6b03 --- /dev/null +++ b/hw/hdl/mux/axis_mux_cdma.sv @@ -0,0 +1,94 @@ +import lynxTypes::*; + +/** + * CDMA multiplexer + * + * DMA request adjustments when multiple channels are enabled. + */ +module axis_mux_cdma ( + input logic aclk, + input logic aresetn, + + dmaIntf.s CDMA, // Regular + dmaIntf.m CDMA_adj [N_DDR_CHAN] // Adjusted +); + +localparam integer N_DDR_CHAN_BITS = $clog2(N_DDR_CHAN); + +logic [N_DDR_CHAN-1:0] cdma_ready_adj; +logic [N_DDR_CHAN-1:0] cdma_done_adj; + +logic [N_DDR_CHAN-1:0][15:0] cdma_done_cnt; +logic cdma_done; + +for(genvar i = 0; i < N_DDR_CHAN; i++) begin + if(N_DDR_CHAN > 1) begin + assign CDMA_adj[i].req.paddr = {{N_DDR_CHAN_BITS{1'b0}}, CDMA.req.paddr[N_DDR_CHAN_BITS+:PADDR_BITS-N_DDR_CHAN_BITS]}; + assign CDMA_adj[i].req.len = {{N_DDR_CHAN_BITS{1'b0}}, CDMA.req.len[N_DDR_CHAN_BITS+:LEN_BITS-N_DDR_CHAN_BITS]}; + end + else begin + assign CDMA_adj[i].req.paddr = CDMA.req.paddr; + assign CDMA_adj[i].req.len = CDMA.req.len; + end + + assign CDMA_adj[i].req.ctl = CDMA.req.ctl; + assign CDMA_adj[i].req.rsrvd = 0; + assign CDMA_adj[i].valid = CDMA.valid & CDMA.ready; + + assign cdma_ready_adj[i] = CDMA_adj[i].ready; + assign cdma_done_adj[i] = CDMA_adj[i].done; +end + +// Ready +assign CDMA.ready = &cdma_ready_adj; + +// Done signal +always_comb begin + cdma_done = 1'b1; + + for(int i = 0; i < N_DDR_CHAN; i++) begin + if(cdma_done_cnt[i] == 0) cdma_done = 1'b0; + end +end + +// Done counters +always_ff @(posedge aclk, negedge aresetn) begin + if(~aresetn) begin + cdma_done_cnt <= 0; + end + else begin + for(int i = 0; i < N_DDR_CHAN; i++) begin + // Counter + if(cdma_done) begin + cdma_done_cnt[i] <= cdma_done_adj[i] ? cdma_done_cnt[i] : cdma_done_cnt[i] - 1; + end + else begin + cdma_done_cnt[i] <= cdma_done_adj[i] ? cdma_done_cnt[i] + 1 : cdma_done_cnt[i]; + end + end + end +end + +assign CDMA.done = cdma_done; +/* +ila_mux_cdma inst_ila_cdma ( + .clk(aclk), + .probe0(CDMA.valid), + .probe1(CDMA.ready), + .probe2(CDMA.req.paddr), + .probe3(CDMA.req.len), + .probe4(CDMA.req.ctl), + .probe5(CDMA_adj[0].valid), + .probe6(CDMA_adj[0].ready), + .probe7(CDMA_adj[1].valid), + .probe8(CDMA_adj[1].ready), + .probe9(CDMA_adj[0].req.paddr), + .probe10(CDMA_adj[0].req.len), + .probe11(CDMA_adj[1].req.paddr), + .probe12(CDMA_adj[1].req.len), + .probe13(CDMA.done), + .probe14(CDMA_adj[0].done), + .probe15(CDMA_adj[1].done) +); +*/ +endmodule \ No newline at end of file diff --git a/hw/hdl/mux/axis_mux_ddr_host.sv b/hw/hdl/mux/axis_mux_ddr_host.sv new file mode 100644 index 00000000..f1f72011 --- /dev/null +++ b/hw/hdl/mux/axis_mux_ddr_host.sv @@ -0,0 +1,128 @@ +import lynxTypes::*; + +/** + * Multi channel data multiplexer - host signals + */ +module axis_mux_ddr_host ( + input logic aclk, + input logic aresetn, + + AXI4S.s axis_in_host, + AXI4S.m axis_out_host, + + AXI4S.m axis_out_card [N_DDR_CHAN], + AXI4S.s axis_in_card [N_DDR_CHAN] +); + +// Params +localparam integer N_DDR_CHAN_BITS = $clog2(N_DDR_CHAN); + +// Internal regs +logic [N_DDR_CHAN_BITS-1:0] sel_sink_r; +logic [N_DDR_CHAN_BITS-1:0] sel_src_r; + +// ----------------------------------------------------------------------------------------------------------------------- +// interface loop issues => temp signals +// ----------------------------------------------------------------------------------------------------------------------- +logic axis_in_host_tvalid; +logic axis_in_host_tready; +logic [AXI_DATA_BITS-1:0] axis_in_host_tdata; +logic [AXI_DATA_BITS/8-1:0] axis_in_host_tkeep; +logic axis_in_host_tlast; + +logic axis_out_host_tvalid; +logic axis_out_host_tready; +logic [AXI_DATA_BITS-1:0] axis_out_host_tdata; +logic [AXI_DATA_BITS/8-1:0] axis_out_host_tkeep; +logic axis_out_host_tlast; + +logic [N_DDR_CHAN-1:0] axis_fifo_sink_tvalid; +logic [N_DDR_CHAN-1:0] axis_fifo_sink_tready; +logic [N_DDR_CHAN-1:0][AXI_DATA_BITS-1:0] axis_fifo_sink_tdata; +logic [N_DDR_CHAN-1:0][AXI_DATA_BITS/8-1:0] axis_fifo_sink_tkeep; +logic [N_DDR_CHAN-1:0] axis_fifo_sink_tlast; + +logic [N_DDR_CHAN-1:0] axis_fifo_src_tvalid; +logic [N_DDR_CHAN-1:0] axis_fifo_src_tready; +logic [N_DDR_CHAN-1:0][AXI_DATA_BITS-1:0] axis_fifo_src_tdata; +logic [N_DDR_CHAN-1:0][AXI_DATA_BITS/8-1:0] axis_fifo_src_tkeep; +logic [N_DDR_CHAN-1:0] axis_fifo_src_tlast; + +// Assign +assign axis_in_host_tvalid = axis_in_host.tvalid; +assign axis_in_host_tdata = axis_in_host.tdata; +assign axis_in_host_tkeep = axis_in_host.tkeep; +assign axis_in_host_tlast = axis_in_host.tlast; +assign axis_in_host.tready = axis_in_host_tready; + +assign axis_out_host.tvalid = axis_out_host_tvalid; +assign axis_out_host.tdata = axis_out_host_tdata; +assign axis_out_host.tkeep = axis_out_host_tkeep; +assign axis_out_host.tlast = axis_out_host_tlast; +assign axis_out_host_tready = axis_out_host.tready; + +for(genvar i = 0; i < N_DDR_CHAN; i++) begin + axis_data_fifo_512 inst_fifo_ddr_sink_mux ( + .s_axis_aresetn(aresetn), + .s_axis_aclk(aclk), + .s_axis_tvalid(axis_fifo_sink_tvalid[i]), + .s_axis_tready(axis_fifo_sink_tready[i]), + .s_axis_tdata(axis_fifo_sink_tdata[i]), + .s_axis_tkeep(axis_fifo_sink_tkeep[i]), + .s_axis_tlast(axis_fifo_sink_tlast[i]), + .m_axis_tvalid(axis_out_card[i].tvalid), + .m_axis_tready(axis_out_card[i].tready), + .m_axis_tdata(axis_out_card[i].tdata), + .m_axis_tkeep(axis_out_card[i].tkeep), + .m_axis_tlast(axis_out_card[i].tlast) + ); + + axis_data_fifo_512 inst_fifo_ddr_src_mux ( + .s_axis_aresetn(aresetn), + .s_axis_aclk(aclk), + .s_axis_tvalid(axis_in_card[i].tvalid), + .s_axis_tready(axis_in_card[i].tready), + .s_axis_tdata(axis_in_card[i].tdata), + .s_axis_tkeep(axis_in_card[i].tkeep), + .s_axis_tlast(axis_in_card[i].tlast), + .m_axis_tvalid(axis_fifo_src_tvalid[i]), + .m_axis_tready(axis_fifo_src_tready[i]), + .m_axis_tdata(axis_fifo_src_tdata[i]), + .m_axis_tkeep(axis_fifo_src_tkeep[i]), + .m_axis_tlast(axis_fifo_src_tlast[i]) + ); +end + +// Mux +always_comb begin + // Sink + for(int i = 0; i < N_DDR_CHAN; i++) begin + axis_fifo_sink_tdata[i] = axis_in_host_tdata; + axis_fifo_sink_tkeep[i] = axis_in_host_tkeep; + axis_fifo_sink_tlast[i] = 1'b0; + axis_fifo_sink_tvalid[i] = (sel_sink_r == i) ? axis_in_host_tvalid : 1'b0; + end + axis_in_host_tready = axis_fifo_sink_tready[sel_sink_r]; + + // Src + for(int i = 0; i < N_DDR_CHAN; i++) begin + axis_fifo_src_tready[i] = (sel_src_r == i) ? axis_out_host_tready : 1'b0; + end + axis_out_host_tdata = axis_fifo_src_tdata[sel_src_r]; + axis_out_host_tkeep = axis_fifo_src_tkeep[sel_src_r]; + axis_out_host_tlast = 1'b0; + axis_out_host_tvalid = axis_fifo_src_tvalid[sel_src_r]; +end + +always_ff @(posedge aclk, negedge aresetn) begin + if (~aresetn) begin + sel_sink_r <= 0; + sel_src_r <= 0; + end + else begin + sel_sink_r <= (axis_in_host_tvalid & axis_in_host_tready) ? sel_sink_r + 1 : sel_sink_r; + sel_src_r <= (axis_out_host_tvalid & axis_out_host_tready) ? sel_src_r + 1 : sel_src_r; + end +end + +endmodule \ No newline at end of file diff --git a/hw/hdl/mux/axis_mux_ddr_user.sv b/hw/hdl/mux/axis_mux_ddr_user.sv new file mode 100644 index 00000000..e454b68b --- /dev/null +++ b/hw/hdl/mux/axis_mux_ddr_user.sv @@ -0,0 +1,112 @@ +import lynxTypes::*; + +/** + * Multi channel data multiplexer - user signals + */ +module axis_mux_ddr_user ( + input logic aclk, + input logic aresetn, + + AXI4S.s axis_in_user, + AXI4S.m axis_out_user, + + AXI4S.m axis_out_card [N_DDR_CHAN], + AXI4S.s axis_in_card [N_DDR_CHAN] +); + +// ----------------------------------------------------------------------------------------------------------------------- +// interface loop issues => temp signals +// ----------------------------------------------------------------------------------------------------------------------- +logic axis_in_user_tvalid; +logic axis_in_user_tready; +logic [N_DDR_CHAN*AXI_DATA_BITS-1:0] axis_in_user_tdata; +logic [N_DDR_CHAN*AXI_DATA_BITS/8-1:0] axis_in_user_tkeep; +logic axis_in_user_tlast; + +logic axis_out_user_tvalid; +logic axis_out_user_tready; +logic [N_DDR_CHAN*AXI_DATA_BITS-1:0] axis_out_user_tdata; +logic [N_DDR_CHAN*AXI_DATA_BITS/8-1:0] axis_out_user_tkeep; +logic axis_out_user_tlast; + +logic [N_DDR_CHAN-1:0] axis_fifo_sink_tvalid; +logic [N_DDR_CHAN-1:0] axis_fifo_sink_tready; +logic [N_DDR_CHAN-1:0][AXI_DATA_BITS-1:0] axis_fifo_sink_tdata; +logic [N_DDR_CHAN-1:0][AXI_DATA_BITS/8-1:0] axis_fifo_sink_tkeep; +logic [N_DDR_CHAN-1:0] axis_fifo_sink_tlast; + +logic [N_DDR_CHAN-1:0] axis_fifo_src_tvalid; +logic [N_DDR_CHAN-1:0] axis_fifo_src_tready; +logic [N_DDR_CHAN-1:0][AXI_DATA_BITS-1:0] axis_fifo_src_tdata; +logic [N_DDR_CHAN-1:0][AXI_DATA_BITS/8-1:0] axis_fifo_src_tkeep; +logic [N_DDR_CHAN-1:0] axis_fifo_src_tlast; + +assign axis_in_user_tvalid = axis_in_user.tvalid; +assign axis_in_user_tkeep = axis_in_user.tkeep; +assign axis_in_user_tdata = axis_in_user.tdata; +assign axis_in_user_tlast = axis_in_user.tlast; +assign axis_in_user.tready = axis_in_user_tready; + +assign axis_out_user.tvalid = axis_out_user_tvalid; +assign axis_out_user.tdata = axis_out_user_tdata; +assign axis_out_user.tkeep = axis_out_user_tkeep; +assign axis_out_user.tlast = axis_out_user_tlast; +assign axis_out_user_tready = axis_out_user.tready; + +for(genvar i = 0; i < N_DDR_CHAN; i++) begin + axis_data_fifo_512 inst_fifo_ddr_sink_mux ( + .s_axis_aresetn(aresetn), + .s_axis_aclk(aclk), + .s_axis_tvalid(axis_fifo_sink_tvalid[i]), + .s_axis_tready(axis_fifo_sink_tready[i]), + .s_axis_tdata(axis_fifo_sink_tdata[i]), + .s_axis_tkeep(axis_fifo_sink_tkeep[i]), + .s_axis_tlast(axis_fifo_sink_tlast[i]), + .m_axis_tvalid(axis_out_card[i].tvalid), + .m_axis_tready(axis_out_card[i].tready), + .m_axis_tdata(axis_out_card[i].tdata), + .m_axis_tkeep(axis_out_card[i].tkeep), + .m_axis_tlast(axis_out_card[i].tlast) + ); + + axis_data_fifo_512 inst_fifo_ddr_src_mux ( + .s_axis_aresetn(aresetn), + .s_axis_aclk(aclk), + .s_axis_tvalid(axis_in_card[i].tvalid), + .s_axis_tready(axis_in_card[i].tready), + .s_axis_tdata(axis_in_card[i].tdata), + .s_axis_tkeep(axis_in_card[i].tkeep), + .s_axis_tlast(axis_in_card[i].tlast), + .m_axis_tvalid(axis_fifo_src_tvalid[i]), + .m_axis_tready(axis_fifo_src_tready[i]), + .m_axis_tdata(axis_fifo_src_tdata[i]), + .m_axis_tkeep(axis_fifo_src_tkeep[i]), + .m_axis_tlast(axis_fifo_src_tlast[i]) + ); +end + +// Mux +always_comb begin + // Sink + axis_in_user_tready = &axis_fifo_sink_tready; + + for(int i = 0; i < N_DDR_CHAN; i++) begin + axis_fifo_sink_tdata[i] = axis_in_user_tdata[i*AXI_DATA_BITS+:AXI_DATA_BITS]; + axis_fifo_sink_tkeep[i] = axis_in_user_tkeep[i*AXI_DATA_BITS/8+:AXI_DATA_BITS/8]; + axis_fifo_sink_tlast[i] = 1'b0; + axis_fifo_sink_tvalid[i] = axis_in_user_tready & axis_in_user_tvalid; + end + + // Src + axis_out_user_tlast = axis_fifo_src_tlast[N_DDR_CHAN-1]; + axis_out_user_tvalid = &axis_fifo_src_tvalid; + + for(int i = 0; i < N_DDR_CHAN; i++) begin + axis_out_user_tdata[i*AXI_DATA_BITS+:AXI_DATA_BITS] = axis_fifo_src_tdata[i]; + axis_out_user_tkeep[i*AXI_DATA_BITS/8+:AXI_DATA_BITS/8] = axis_fifo_src_tkeep[i]; + + axis_fifo_src_tready[i] = axis_out_user_tvalid & axis_out_user_tready; + end +end + +endmodule diff --git a/hw/hdl/mux/axis_mux_user_sink.sv b/hw/hdl/mux/axis_mux_user_sink.sv new file mode 100644 index 00000000..3e7dd87a --- /dev/null +++ b/hw/hdl/mux/axis_mux_user_sink.sv @@ -0,0 +1,173 @@ +import lynxTypes::*; + +/** + * User multiplexer + */ +module axis_mux_user_sink #( + parameter integer MUX_DATA_BITS = AXI_DATA_BITS +) ( + input logic aclk, + input logic aresetn, + + muxUserIntf.m mux, + + AXI4S.s axis_in [N_REGIONS], + AXI4S.m axis_out +); + +// -- Constants +localparam integer BEAT_LOG_BITS = $clog2(MUX_DATA_BITS/8); +localparam integer N_ID = N_REGIONS; +localparam integer N_ID_BITS = N_REGIONS_BITS; + +// -- FSM +typedef enum logic[0:0] {ST_IDLE, ST_MUX} state_t; +logic [0:0] state_C, state_N; + +// -- Internal regs +logic [N_ID_BITS-1:0] id_C, id_N; +logic [LEN_BITS-BEAT_LOG_BITS:0] cnt_C, cnt_N; +logic [LEN_BITS-BEAT_LOG_BITS:0] n_beats_C, n_beats_N; + +// -- Internal signals +logic tr_done; + +// ----------------------------------------------------------------------------------------------------------------------- +// Mux +// ----------------------------------------------------------------------------------------------------------------------- +// -- interface loop issues => temp signals +logic [N_ID-1:0] axis_in_tvalid; +logic [N_ID-1:0] axis_in_tready; +logic [N_ID-1:0][MUX_DATA_BITS-1:0] axis_in_tdata; +logic [N_ID-1:0][MUX_DATA_BITS/8-1:0] axis_in_tkeep; +logic [N_ID-1:0] axis_in_tlast; + +logic axis_out_tvalid; +logic axis_out_tready; +logic [MUX_DATA_BITS-1:0] axis_out_tdata; +logic [MUX_DATA_BITS/8-1:0] axis_out_tkeep; +logic axis_out_tlast; + +for(genvar i = 0; i < N_ID; i++) begin + assign axis_in_tvalid[i] = axis_in[i].tvalid; + assign axis_in_tdata[i] = axis_in[i].tdata; + assign axis_in_tkeep[i] = axis_in[i].tkeep; + assign axis_in_tlast[i] = axis_in[i].tlast; + assign axis_in[i].tready = axis_in_tready[i]; +end + +assign axis_out.tvalid = axis_out_tvalid; +assign axis_out.tdata = axis_out_tdata; +assign axis_out.tkeep = axis_out_tkeep; +assign axis_out.tlast = axis_out_tlast; +assign axis_out_tready = axis_out.tready; + +// -- Mux +always_comb begin + for(int i = 0; i < N_ID; i++) begin + if(state_C == ST_MUX) + axis_in_tready[i] = (id_C == i) ? axis_out_tready : 1'b0; + else + axis_in_tready[i] = 1'b0; + end + + if(id_C < N_ID && state_C == ST_MUX) begin + axis_out_tdata = axis_in_tdata[id_C]; + axis_out_tkeep = axis_in_tkeep[id_C]; + axis_out_tlast = axis_in_tlast[id_C]; + axis_out_tvalid = axis_in_tvalid[id_C]; + end + else begin + axis_out_tdata = 0; + axis_out_tkeep = 0; + axis_out_tlast = 1'b0; + axis_out_tvalid = 1'b0; + end +end + +// ----------------------------------------------------------------------------------------------------------------------- +// -- Memory subsystem +// ----------------------------------------------------------------------------------------------------------------------- +// -- REG +always_ff @(posedge aclk, negedge aresetn) begin: PROC_REG +if (aresetn == 1'b0) begin + state_C <= ST_IDLE; +end +else + state_C <= state_N; + + cnt_C <= cnt_N; + id_C <= id_N; + n_beats_C <= n_beats_N; +end + +// -- NSL +always_comb begin: NSL + state_N = state_C; + + case(state_C) + ST_IDLE: + state_N = mux.ready ? ST_MUX : ST_IDLE; + + ST_MUX: + state_N = tr_done ? (mux.ready ? ST_MUX : ST_IDLE) : ST_MUX; + + endcase // state_C +end + +// -- DP +always_comb begin : DP + cnt_N = cnt_C; + id_N = id_C; + n_beats_N = n_beats_C; + + // Transfer done + tr_done = (cnt_C == n_beats_C) && (axis_out_tvalid & axis_out_tready); + + // Memory subsystem + mux.valid = 1'b0; + + case(state_C) + ST_IDLE: begin + cnt_N = 0; + if(mux.ready) begin + mux.valid = 1'b1; + id_N = mux.id; + n_beats_N = mux.len; + end + end + + ST_MUX: begin + if(tr_done) begin + cnt_N = 0; + if(mux.ready) begin + mux.valid = 1'b1; + id_N = mux.id; + n_beats_N = mux.len; + end + end + else begin + cnt_N = (axis_out_tvalid & axis_out_tready) ? cnt_C + 1 : cnt_C; + end + end + + endcase +end +/* +ila_2 inst_ila_2 ( + .clk(aclk), + .probe0(mux.ready), + .probe1(mux.valid), + .probe2(mux.len), + .probe3(mux.id), + .probe4(cnt_C), + .probe5(state_C), + .probe6(n_beats_C), + .probe7(id_C), + .probe8(tr_done), + .probe9(axis_out_tvalid), + .probe10(axis_out_tready) +); +*/ + +endmodule \ No newline at end of file diff --git a/hw/hdl/mux/axis_mux_user_src.sv b/hw/hdl/mux/axis_mux_user_src.sv new file mode 100644 index 00000000..cc7d4486 --- /dev/null +++ b/hw/hdl/mux/axis_mux_user_src.sv @@ -0,0 +1,151 @@ +import lynxTypes::*; + +module axis_mux_user_src #( + parameter integer MUX_DATA_BITS = AXI_DATA_BITS +) ( + input logic aclk, + input logic aresetn, + + muxUserIntf.m mux, + + AXI4S.s axis_in, + AXI4S.m axis_out [N_REGIONS] +); + +// -- Constants +localparam integer BEAT_LOG_BITS = $clog2(MUX_DATA_BITS/8); +localparam integer N_ID = N_REGIONS; +localparam integer N_ID_BITS = N_REGIONS_BITS; + +// -- FSM +typedef enum logic[0:0] {ST_IDLE, ST_MUX} state_t; +logic [0:0] state_C, state_N; + +// -- Internal regs +logic [N_ID_BITS-1:0] id_C, id_N; +logic [LEN_BITS-BEAT_LOG_BITS:0] cnt_C, cnt_N; +logic [LEN_BITS-BEAT_LOG_BITS:0] n_beats_C, n_beats_N; + +// -- Internal signals +logic tr_done; + +// ----------------------------------------------------------------------------------------------------------------------- +// -- Mux +// ----------------------------------------------------------------------------------------------------------------------- +// -- interface loop issues => temp signals +logic axis_in_tvalid; +logic axis_in_tready; +logic [MUX_DATA_BITS-1:0] axis_in_tdata; +logic [MUX_DATA_BITS/8-1:0] axis_in_tkeep; +logic axis_in_tlast; + +logic [N_ID-1:0] axis_out_tvalid; +logic [N_ID-1:0] axis_out_tready; +logic [N_ID-1:0][MUX_DATA_BITS-1:0] axis_out_tdata; +logic [N_ID-1:0][MUX_DATA_BITS/8-1:0] axis_out_tkeep; +logic [N_ID-1:0] axis_out_tlast; + +assign axis_in_tvalid = axis_in.tvalid; +assign axis_in_tdata = axis_in.tdata; +assign axis_in_tkeep = axis_in.tkeep; +assign axis_in_tlast = axis_in.tlast; +assign axis_in.tready = axis_in_tready; + +for(genvar i = 0; i < N_ID; i++) begin + assign axis_out[i].tvalid = axis_out_tvalid[i]; + assign axis_out[i].tdata = axis_out_tdata[i]; + assign axis_out[i].tkeep = axis_out_tkeep[i]; + assign axis_out[i].tlast = axis_out_tlast[i]; + assign axis_out_tready[i] = axis_out[i].tready; +end + +// -- Mux +always_comb begin + for(int i = 0; i < N_ID; i++) begin + axis_out_tdata[i] = axis_in_tdata; + axis_out_tkeep[i] = axis_in_tkeep; + axis_out_tlast[i] = axis_in_tlast; + if(state_C == ST_MUX) begin + axis_out_tvalid[i] = (id_C == i) ? axis_in_tvalid : 1'b0; + end + else begin + axis_out_tvalid[i] = 1'b0; + end + end + + if(id_C < N_ID && state_C == ST_MUX) + axis_in_tready = axis_out_tready[id_C]; + else + axis_in_tready = 1'b0; +end + +// ----------------------------------------------------------------------------------------------------------------------- +// -- Memory subsystem +// ----------------------------------------------------------------------------------------------------------------------- +// -- REG +always_ff @(posedge aclk, negedge aresetn) begin: PROC_REG +if (aresetn == 1'b0) begin + state_C <= ST_IDLE; +end +else + state_C <= state_N; + + cnt_C <= cnt_N; + id_C <= id_N; + n_beats_C <= n_beats_N; +end + +// -- NSL +always_comb begin: NSL + state_N = state_C; + + case(state_C) + ST_IDLE: + state_N = mux.ready ? ST_MUX : ST_IDLE; + + ST_MUX: + state_N = tr_done ? (mux.ready ? ST_MUX : ST_IDLE) : ST_MUX; + + endcase // state_C +end + +// -- DP +always_comb begin : DP + cnt_N = cnt_C; + id_N = id_C; + n_beats_N = n_beats_C; + + // Transfer done + tr_done = (cnt_C == n_beats_C) && (axis_in_tvalid & axis_in_tready); + + // Memory subsystem + mux.valid = 1'b0; + + case(state_C) + ST_IDLE: begin + cnt_N = 0; + if(mux.ready) begin + mux.valid = 1'b1; + id_N = mux.id; + n_beats_N = mux.len; + end + end + + ST_MUX: begin + if(tr_done) begin + cnt_N = 0; + if(mux.ready) begin + mux.valid = 1'b1; + id_N = mux.id; + n_beats_N = mux.len; + end + end + else begin + cnt_N = (axis_in_tvalid & axis_in_tready) ? cnt_C + 1 : cnt_C; + end + end + + endcase +end + +endmodule \ No newline at end of file diff --git a/hw/hdl/network/cmac_uplus_axis_wrapper.sv b/hw/hdl/network/cmac_uplus_axis_wrapper.sv new file mode 100644 index 00000000..716a9c4a --- /dev/null +++ b/hw/hdl/network/cmac_uplus_axis_wrapper.sv @@ -0,0 +1,658 @@ +/* + * Copyright (c) 2019, Systems Group, ETH Zurich + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, + * are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +`timescale 1ns/1ps + +//`define DEBUG + +import lynxTypes::*; + +module cmac_axis_wrapper +( + input wire init_clk, + input wire gt_ref_clk_p, + input wire gt_ref_clk_n, + input wire [3:0] gt_rxp_in, + input wire [3:0] gt_rxn_in, + output wire [3:0] gt_txn_out, + output wire [3:0] gt_txp_out, + input wire sys_reset, + + AXI4S.m m_rx_axis, + AXI4S.s s_tx_axis, + + output logic rx_aligned, + output logic usr_tx_clk, + output logic tx_rst, + output logic rx_rst, + output logic [3:0] gt_rxrecclkout +); + +logic gt_txusrclk2; +logic gt_rxusrclk2; + +logic[11 :0] gt_loopback_in; + +// Resets +reg usr_rx_reset_r; +reg core_tx_reset_r; + +wire usr_rx_reset_w; +wire core_tx_reset_w; + +always @( posedge gt_rxusrclk2 ) begin //TODO check if this is correct + usr_rx_reset_r <= usr_rx_reset_w; +end + +always @( posedge gt_txusrclk2 ) begin + core_tx_reset_r <= core_tx_reset_w; +end + +assign rx_rst = usr_rx_reset_r; +assign tx_rst = core_tx_reset_r; + +// Clock out +assign usr_tx_clk = gt_txusrclk2; + +// Aligned status +wire stat_rx_aligned; +assign rx_aligned = stat_rx_aligned; + +//RX FSM states ---------------------------------------------------------------------- +localparam STATE_RX_IDLE = 0; +localparam STATE_GT_LOCKED = 1; +localparam STATE_WAIT_RX_ALIGNED = 2; +localparam STATE_PKT_TRANSFER_INIT = 3; +localparam STATE_WAIT_FOR_RESTART = 6; + +reg ctl_rx_enable_r, ctl_rx_force_resync_r; + +////State Registers for RX +reg [3:0] rx_prestate; +/* +ila_link inst_ila_link ( + .clk(gt_txusrclk2), + .probe0(stat_rx_aligned), + .probe1(rx_prestate) +); +*/ + +//rx reset handling +reg rx_reset_done; +reg stat_rx_aligned_1d; + +always @(posedge gt_txusrclk2) begin + if (usr_rx_reset_w) begin + rx_prestate <= STATE_RX_IDLE; + ctl_rx_enable_r <= 1'b0; + ctl_rx_force_resync_r <= 1'b0; + stat_rx_aligned_1d <= 1'b0; + rx_reset_done <= 1'b0; + end + else begin + rx_reset_done <= 1'b1; + stat_rx_aligned_1d <= stat_rx_aligned; + case (rx_prestate) + STATE_RX_IDLE: begin + ctl_rx_enable_r <= 1'b0; + ctl_rx_force_resync_r <= 1'b0; + if (rx_reset_done == 1'b1) begin + rx_prestate <= STATE_GT_LOCKED; + end + end + STATE_GT_LOCKED: begin + ctl_rx_enable_r <= 1'b1; + ctl_rx_force_resync_r <= 1'b0; + rx_prestate <= STATE_WAIT_RX_ALIGNED; + end + STATE_WAIT_RX_ALIGNED: begin + if (stat_rx_aligned_1d == 1'b1) begin + rx_prestate <= STATE_PKT_TRANSFER_INIT; + end + end + STATE_PKT_TRANSFER_INIT: begin + if (stat_rx_aligned_1d == 1'b0) begin + rx_prestate <= STATE_RX_IDLE; + end + end + endcase + end +end +wire ctl_rx_enable; +wire ctl_rx_force_resync; +assign ctl_rx_enable = ctl_rx_enable_r; +assign ctl_rx_force_resync = ctl_rx_force_resync_r; + +// TX FSM States +localparam STATE_TX_IDLE = 0; +//localparam STATE_GT_LOCKED = 1; +//localparam STATE_WAIT_RX_ALIGNED = 2; +//localparam STATE_PKT_TRANSFER_INIT = 3; +//localparam STATE_LBUS_TX_ENABLE = 4; +//localparam STATE_LBUS_TX_HALT = 5; +//localparam STATE_LBUS_TX_DONE = 6; +//localparam STATE_WAIT_FOR_RESTART = 7; +reg [3:0] tx_prestate; +reg tx_reset_done; +reg ctl_tx_enable_r, ctl_tx_send_idle_r, ctl_tx_send_rfi_r, ctl_tx_test_pattern_r; +reg ctl_tx_send_lfi_r; +always @(posedge gt_txusrclk2) begin + if (core_tx_reset_w) begin + tx_prestate <= STATE_TX_IDLE; + ctl_tx_enable_r <= 1'b0; + ctl_tx_send_idle_r <= 1'b0; + ctl_tx_send_lfi_r <= 1'b0; + ctl_tx_send_rfi_r <= 1'b0; + ctl_tx_test_pattern_r <= 1'b0; + tx_reset_done <= 1'b0; + end + else begin + tx_reset_done <= 1'b1; + //stat_rx_aligned_1d <= cmac_stat.stat_rx_aligned; + case (tx_prestate) + STATE_TX_IDLE: begin + ctl_tx_enable_r <= 1'b0; + ctl_tx_send_idle_r <= 1'b0; + ctl_tx_send_lfi_r <= 1'b0; + ctl_tx_send_rfi_r <= 1'b0; + ctl_tx_test_pattern_r <= 1'b0; + if (tx_reset_done == 1'b1) begin + tx_prestate <= STATE_GT_LOCKED; + end + /*else begin + rx_prestate <= STATE_RX_IDLE; + end*/ + end + STATE_GT_LOCKED: begin + ctl_tx_enable_r <= 1'b0; + ctl_tx_send_idle_r <= 1'b0; + ctl_tx_send_lfi_r <= 1'b1; + ctl_tx_send_rfi_r <= 1'b1; + tx_prestate <= STATE_WAIT_RX_ALIGNED; + end + STATE_WAIT_RX_ALIGNED: begin //TODO rename? + if (stat_rx_aligned_1d == 1'b1) begin + tx_prestate <= STATE_PKT_TRANSFER_INIT; + end + end + STATE_PKT_TRANSFER_INIT: begin + ctl_tx_send_idle_r <= 1'b0; + ctl_tx_send_lfi_r <= 1'b0; + ctl_tx_send_rfi_r <= 1'b0; + ctl_tx_enable_r <= 1'b1; + if (stat_rx_aligned_1d == 1'b0) begin + tx_prestate <= STATE_TX_IDLE; + end + end + endcase + end +end +wire ctl_tx_enable; +wire ctl_tx_send_idle; +wire ctl_tx_send_lfi; +wire ctl_tx_send_rfi; +wire ctl_tx_test_pattern; +assign ctl_tx_enable = ctl_tx_enable_r; +assign ctl_tx_send_idle = ctl_tx_send_idle_r; +assign ctl_tx_send_lfi = ctl_tx_send_lfi_r; +assign ctl_tx_send_rfi = ctl_tx_send_rfi_r; +assign ctl_tx_test_pattern = ctl_tx_test_pattern_r; + +wire stat_rx_aligned_err; +wire [2:0] stat_rx_bad_code; +wire [2:0] stat_rx_bad_fcs; +wire stat_rx_bad_preamble; +wire stat_rx_bad_sfd; + +wire stat_rx_got_signal_os; +wire stat_rx_hi_ber; +wire stat_rx_inrangeerr; +wire stat_rx_internal_local_fault; +wire stat_rx_jabber; +wire stat_rx_local_fault; +wire [19:0] stat_rx_mf_err; +wire [19:0] stat_rx_mf_len_err; +wire [19:0] stat_rx_mf_repeat_err; +wire stat_rx_misaligned; + +wire stat_rx_received_local_fault; +wire stat_rx_remote_fault; +wire stat_rx_status; +wire [2:0] stat_rx_stomped_fcs; +wire [19:0] stat_rx_synced; +wire [19:0] stat_rx_synced_err; + +//For debug +logic[6:0] stat_rx_total_bytes; +logic[13:0] stat_rx_good_bytes; +logic stat_rx_good_packets; +logic[2:0] stat_rx_total_packets; + +logic[5:0] stat_tx_total_bytes; +logic[13:0] stat_tx_good_bytes; +logic stat_tx_good_packets; +logic stat_tx_total_packets; + +logic tx_ovf;//TODO use for debug +logic tx_unf;//TODO use for debug + +wire tx_user_rst_i; +assign tx_user_rst_i = sys_reset; //TODO why not 1'b0?? + +cmac_usplus_axis cmac_axis_inst ( + .gt0_rxp_in (gt_rxp_in[0]), + .gt1_rxp_in (gt_rxp_in[1]), + .gt2_rxp_in (gt_rxp_in[2]), + .gt3_rxp_in (gt_rxp_in[3]), + + .gt0_rxn_in (gt_rxn_in[0]), + .gt1_rxn_in (gt_rxn_in[1]), + .gt2_rxn_in (gt_rxn_in[2]), + .gt3_rxn_in (gt_rxn_in[3]), + + .gt0_txp_out (gt_txp_out[0]), + .gt1_txp_out (gt_txp_out[1]), + .gt2_txp_out (gt_txp_out[2]), + .gt3_txp_out (gt_txp_out[3]), + + .gt0_txn_out (gt_txn_out[0]), + .gt1_txn_out (gt_txn_out[1]), + .gt2_txn_out (gt_txn_out[2]), + .gt3_txn_out (gt_txn_out[3]), + + /*.gt_rxp_in(gt_rxp_in), + .gt_rxn_in(gt_rxn_in), + .gt_txp_out(gt_txp_out), + .gt_txn_out(gt_txn_out),*/ + + + .gt_txusrclk2 (gt_txusrclk2), + .gt_loopback_in (gt_loopback_in), + .gt_rxrecclkout (gt_rxrecclkout), + .gt_powergoodout (), + + .sys_reset (sys_reset), + .gtwiz_reset_tx_datapath (1'b0), + .gtwiz_reset_rx_datapath (1'b0), + + .gt_ref_clk_p (gt_ref_clk_p), + .gt_ref_clk_n (gt_ref_clk_n), + .init_clk (init_clk), + .gt_ref_clk_out (), + + + .rx_axis_tvalid (m_rx_axis.tvalid), + .rx_axis_tdata (m_rx_axis.tdata), + .rx_axis_tkeep (m_rx_axis.tkeep), + .rx_axis_tlast (m_rx_axis.tlast), + .rx_axis_tuser (), + .rx_otn_bip8_0 (), + .rx_otn_bip8_1 (), + .rx_otn_bip8_2 (), + .rx_otn_bip8_3 (), + .rx_otn_bip8_4 (), + .rx_otn_data_0 (), + .rx_otn_data_1 (), + .rx_otn_data_2 (), + .rx_otn_data_3 (), + .rx_otn_data_4 (), + .rx_otn_ena (), // + .rx_otn_lane0 (), // + .rx_otn_vlmarker (), // + + .rx_preambleout (), + .usr_rx_reset (usr_rx_reset_w), + + .gt_rxusrclk2 (gt_rxusrclk2), + + + .stat_rx_aligned (stat_rx_aligned), // + .stat_rx_aligned_err (stat_rx_aligned_err), // + .stat_rx_bad_code (stat_rx_bad_code), + .stat_rx_bad_fcs (stat_rx_bad_fcs), + .stat_rx_bad_preamble (stat_rx_bad_preamble), // + .stat_rx_bad_sfd (stat_rx_bad_sfd), // + .stat_rx_bip_err_0 (), // + .stat_rx_bip_err_1 (), // + .stat_rx_bip_err_10 (), // + .stat_rx_bip_err_11 (), // + .stat_rx_bip_err_12 (), // + .stat_rx_bip_err_13 (), // + .stat_rx_bip_err_14 (), // + .stat_rx_bip_err_15 (), // + .stat_rx_bip_err_16 (), // + .stat_rx_bip_err_17 (), // + .stat_rx_bip_err_18 (), // + .stat_rx_bip_err_19 (), // + .stat_rx_bip_err_2 (), // + .stat_rx_bip_err_3 (), // + .stat_rx_bip_err_4 (), // + .stat_rx_bip_err_5 (), // + .stat_rx_bip_err_6 (), // + .stat_rx_bip_err_7 (), // + .stat_rx_bip_err_8 (), // + .stat_rx_bip_err_9 (), // + .stat_rx_block_lock (), + .stat_rx_broadcast (), // + .stat_rx_fragment (), + .stat_rx_framing_err_0 (), + .stat_rx_framing_err_1 (), + .stat_rx_framing_err_10 (), + .stat_rx_framing_err_11 (), + .stat_rx_framing_err_12 (), + .stat_rx_framing_err_13 (), + .stat_rx_framing_err_14 (), + .stat_rx_framing_err_15 (), + .stat_rx_framing_err_16 (), + .stat_rx_framing_err_17 (), + .stat_rx_framing_err_18 (), + .stat_rx_framing_err_19 (), + .stat_rx_framing_err_2 (), + .stat_rx_framing_err_3 (), + .stat_rx_framing_err_4 (), + .stat_rx_framing_err_5 (), + .stat_rx_framing_err_6 (), + .stat_rx_framing_err_7 (), + .stat_rx_framing_err_8 (), + .stat_rx_framing_err_9 (), + .stat_rx_framing_err_valid_0 (), // + .stat_rx_framing_err_valid_1 (), // + .stat_rx_framing_err_valid_10 (), // + .stat_rx_framing_err_valid_11 (), // + .stat_rx_framing_err_valid_12 (), // + .stat_rx_framing_err_valid_13 (), // + .stat_rx_framing_err_valid_14 (), // + .stat_rx_framing_err_valid_15 (), // + .stat_rx_framing_err_valid_16 (), // + .stat_rx_framing_err_valid_17 (), // + .stat_rx_framing_err_valid_18 (), // + .stat_rx_framing_err_valid_19 (), // + .stat_rx_framing_err_valid_2 (), // + .stat_rx_framing_err_valid_3 (), // + .stat_rx_framing_err_valid_4 (), // + .stat_rx_framing_err_valid_5 (), // + .stat_rx_framing_err_valid_6 (), // + .stat_rx_framing_err_valid_7 (), // + .stat_rx_framing_err_valid_8 (), // + .stat_rx_framing_err_valid_9 (), // + .stat_rx_got_signal_os (stat_rx_got_signal_os), // + .stat_rx_hi_ber (stat_rx_hi_ber), // + .stat_rx_inrangeerr (stat_rx_inrangeerr), // + .stat_rx_internal_local_fault (stat_rx_internal_local_fault), // + .stat_rx_jabber (stat_rx_jabber), // + .stat_rx_local_fault (stat_rx_local_fault), // + .stat_rx_mf_err (stat_rx_mf_err), + .stat_rx_mf_len_err (stat_rx_mf_len_err), + .stat_rx_mf_repeat_err (stat_rx_mf_repeat_err), + .stat_rx_misaligned (stat_rx_misaligned), // + .stat_rx_multicast (), // + .stat_rx_oversize (), // + .stat_rx_packet_1024_1518_bytes(), // + .stat_rx_packet_128_255_bytes (), // + .stat_rx_packet_1519_1522_bytes(), // + .stat_rx_packet_1523_1548_bytes(), // + .stat_rx_packet_1549_2047_bytes(), // + .stat_rx_packet_2048_4095_bytes(), // + .stat_rx_packet_256_511_bytes (), // + .stat_rx_packet_4096_8191_bytes(), // + .stat_rx_packet_512_1023_bytes (), // + .stat_rx_packet_64_bytes (), // + .stat_rx_packet_65_127_bytes (), // + .stat_rx_packet_8192_9215_bytes(), // + .stat_rx_packet_bad_fcs (), // + .stat_rx_packet_large (), // + .stat_rx_packet_small (), + + .ctl_rx_enable (ctl_rx_enable), + .ctl_rx_force_resync (ctl_rx_force_resync), + .ctl_rx_test_pattern (1'b0), + .core_rx_reset (1'b0), //TODO 1'b0 in example design + .rx_clk (gt_txusrclk2), + + .stat_rx_received_local_fault (stat_rx_received_local_fault), // + .stat_rx_remote_fault (stat_rx_remote_fault), // + .stat_rx_status (stat_rx_status), // + .stat_rx_stomped_fcs (stat_rx_stomped_fcs), + .stat_rx_synced (stat_rx_synced), + .stat_rx_synced_err (stat_rx_synced_err), + .stat_rx_test_pattern_mismatch (), + .stat_rx_toolong (), // + .stat_rx_total_bytes (stat_rx_total_bytes), + .stat_rx_total_good_bytes (stat_rx_good_bytes), + .stat_rx_total_good_packets (stat_rx_good_packets), // + .stat_rx_total_packets (stat_rx_total_packets), + .stat_rx_truncated (), // + .stat_rx_undersize (), + .stat_rx_unicast (), // + .stat_rx_vlan (), // + .stat_rx_pcsl_demuxed (), + .stat_rx_pcsl_number_0 (), + .stat_rx_pcsl_number_1 (), + .stat_rx_pcsl_number_10 (), + .stat_rx_pcsl_number_11 (), + .stat_rx_pcsl_number_12 (), + .stat_rx_pcsl_number_13 (), + .stat_rx_pcsl_number_14 (), + .stat_rx_pcsl_number_15 (), + .stat_rx_pcsl_number_16 (), + .stat_rx_pcsl_number_17 (), + .stat_rx_pcsl_number_18 (), + .stat_rx_pcsl_number_19 (), + .stat_rx_pcsl_number_2 (), + .stat_rx_pcsl_number_3 (), + .stat_rx_pcsl_number_4 (), + .stat_rx_pcsl_number_5 (), + .stat_rx_pcsl_number_6 (), + .stat_rx_pcsl_number_7 (), + .stat_rx_pcsl_number_8 (), + .stat_rx_pcsl_number_9 (), + + + .stat_tx_bad_fcs (), + .stat_tx_broadcast (), + .stat_tx_frame_error (), + .stat_tx_local_fault (), + .stat_tx_multicast (), + .stat_tx_packet_1024_1518_bytes(), + .stat_tx_packet_128_255_bytes (), + .stat_tx_packet_1519_1522_bytes(), + .stat_tx_packet_1523_1548_bytes(), + .stat_tx_packet_1549_2047_bytes(), + .stat_tx_packet_2048_4095_bytes(), + .stat_tx_packet_256_511_bytes (), + .stat_tx_packet_4096_8191_bytes(), + .stat_tx_packet_512_1023_bytes (), + .stat_tx_packet_64_bytes (), + .stat_tx_packet_65_127_bytes (), + .stat_tx_packet_8192_9215_bytes(), + .stat_tx_packet_large (), + .stat_tx_packet_small (), + .stat_tx_total_bytes (stat_tx_total_bytes), + .stat_tx_total_good_bytes (stat_tx_good_bytes), + .stat_tx_total_good_packets (stat_tx_good_packets), + .stat_tx_total_packets (stat_tx_total_packets), + .stat_tx_unicast (), + .stat_tx_vlan (), + + .ctl_tx_enable (ctl_tx_enable), + .ctl_tx_send_idle (ctl_tx_send_idle), + .ctl_tx_send_rfi (ctl_tx_send_rfi), + .ctl_tx_send_lfi (ctl_tx_send_lfi), + .ctl_tx_test_pattern (ctl_tx_test_pattern), + .core_tx_reset (tx_user_rst_i), + + .tx_axis_tready (s_tx_axis.tready), + .tx_axis_tvalid (s_tx_axis.tvalid), + .tx_axis_tdata (s_tx_axis.tdata), + .tx_axis_tkeep (s_tx_axis.tkeep), + .tx_axis_tlast (s_tx_axis.tlast), + .tx_axis_tuser (0), + + .tx_ovfout (tx_ovf), + .tx_unfout (tx_unf), + .tx_preamblein ({55{1'b0}}), + + .usr_tx_reset (core_tx_reset_w), + + .core_drp_reset (1'b0), + .drp_clk (1'b0), + .drp_addr (10'b0), + .drp_di (16'b0), + .drp_en (1'b0), + .drp_do (), + .drp_rdy (), + .drp_we (1'b0) +); + + + +`ifdef DEBUG + +// Counting packets +logic[31:0] rx_good_packets_count; +logic[31:0] rx_total_packets_count; +logic[31:0] rx_good_bytes_count; +logic[31:0] rx_total_bytes_count; + +// Counting errors +logic[15:0] align_errors; +logic[15:0] code_errors; +logic[15:0] fcs_errors; +logic[15:0] preamble_errors; +logic[15:0] sfd_errors; + +ila_cmac_rx ila_rx ( + .clk(gt_txusrclk2), // input wire clk + + .probe0(ctl_rx_enable), // 1 + .probe1(ctl_rx_force_resync), // 1 + .probe2(stat_rx_aligned_1d), // 1 + .probe3(stat_rx_aligned), // 1 + .probe4(rx_reset_done), // 1 + .probe5(rx_prestate), // 4 + .probe6(m_rx_axis.tvalid), // 1 + .probe7(m_rx_axis.tready), // 1 + .probe8(stat_rx_aligned_err), // 1 + .probe9(stat_rx_bad_code), // 3 + .probe10({sfd_errors, preamble_errors, fcs_errors, code_errors, align_errors}), // 80 + .probe11(rx_good_packets_count), // 32 + .probe12(rx_total_packets_count), // 32 + .probe13(rx_good_bytes_count), // 32 + .probe14(rx_total_bytes_count), // 32 + .probe15(stat_rx_synced_err), // 20 + .probe16({stat_rx_got_signal_os, stat_rx_hi_ber, stat_rx_inrangeerr, stat_rx_internal_local_fault, stat_rx_jabber, stat_rx_local_fault, stat_rx_misaligned}), // 7 + .probe17({stat_rx_received_local_fault, stat_rx_remote_fault, stat_rx_status, stat_rx_stomped_fcs}), // 6 + .probe18(usr_rx_reset_w) // 1 +); + +always @(posedge gt_txusrclk2) begin + if (usr_rx_reset_w) begin + rx_good_packets_count <= '0; + rx_total_packets_count <= '0; + rx_good_bytes_count <= '0; + rx_total_bytes_count <= '0; + end + else begin + rx_good_packets_count <= rx_good_packets_count + stat_rx_good_packets; + rx_total_packets_count <= rx_total_packets_count + stat_rx_total_packets; + rx_good_bytes_count <= rx_good_bytes_count + stat_rx_good_bytes; + rx_total_bytes_count <= rx_total_bytes_count + stat_rx_total_bytes; + end +end + +always @(posedge gt_txusrclk2) begin + if (usr_rx_reset_w) begin + align_errors <= '0; + code_errors <= '0; + fcs_errors <= '0; + preamble_errors <= '0; + sfd_errors <= '0; + end + else begin + if (stat_rx_aligned_err != 0) begin + align_errors <= align_errors + 1; + end + if (stat_rx_bad_code != 0) begin + code_errors <= code_errors + 1; + end + if (stat_rx_bad_fcs != 0) begin + fcs_errors <= fcs_errors + 1; + end + if (stat_rx_bad_preamble != 0) begin + preamble_errors <= preamble_errors + 1; + end + if (stat_rx_bad_sfd != 0) begin + sfd_errors <= sfd_errors + 1; + end + end +end + + +logic[31:0] tx_good_packets_count; +logic[31:0] tx_total_packets_count; +logic[31:0] tx_good_bytes_count; +logic[31:0] tx_total_bytes_count; + +always @(posedge gt_txusrclk2) begin + if (core_tx_reset_w) begin + tx_good_packets_count <= '0; + tx_total_packets_count <= '0; + tx_good_bytes_count <= '0; + tx_total_bytes_count <= '0; + end + else begin + tx_good_packets_count <= tx_good_packets_count + stat_tx_good_packets; + tx_total_packets_count <= tx_total_packets_count + stat_tx_total_packets; + tx_good_bytes_count <= tx_good_bytes_count + stat_tx_good_bytes; + tx_total_bytes_count <= tx_total_bytes_count + stat_tx_total_bytes; + end +end + +ila_cmac_tx ila_tx ( + .clk(gt_txusrclk2), // input wire clk + + .probe0(ctl_tx_enable), // 1 + .probe1(ctl_tx_send_idle), // 1 + .probe2(ctl_tx_send_lfi), // 1 + .probe3(ctl_tx_send_rfi), // 1 + .probe4(ctl_tx_test_pattern), // 1 + .probe5(tx_prestate), // 4 + .probe6(tx_reset_done), // 1 + .probe7(tx_good_packets_count), // 32 + .probe8(tx_total_packets_count), // 32 + .probe9(tx_good_bytes_count), // 32 + .probe10(tx_total_bytes_count), // 32 + .probe11(core_tx_reset_w), // 1 + .probe12(s_tx_axis.tlast), // 1 + .probe13(s_tx_axis.tready), // 1 + .probe14(s_tx_axis.tvalid) // 1 +); + +`endif + +endmodule + +`default_nettype wire diff --git a/hw/hdl/network/network_clk_cross.sv b/hw/hdl/network/network_clk_cross.sv new file mode 100644 index 00000000..67796f01 --- /dev/null +++ b/hw/hdl/network/network_clk_cross.sv @@ -0,0 +1,382 @@ +import lynxTypes::*; + +`include "axi_macros.svh" +`include "lynx_macros.svh" + +module network_clk_cross ( + input wire aclk, + input wire aresetn, + input wire net_clk, + input wire net_aresetn, + + // ACLK + metaIntf.s arp_lookup_request_aclk, + metaIntf.m arp_lookup_reply_aclk, + metaIntf.s set_ip_addr_aclk, + metaIntf.s set_board_number_aclk, + metaIntf.s qp_interface_aclk, + metaIntf.s conn_interface_aclk, + + metaIntf.s rdma_req_host_aclk [N_REGIONS], +`ifdef EN_FVV + metaIntf.s rdma_req_card_aclk [N_REGIONS], + metaIntf.m rdma_req_fv_aclk [N_REGIONS], +`endif + + reqIntf.m rdma_rd_cmd_aclk [N_REGIONS], + reqIntf.m rdma_wr_cmd_aclk [N_REGIONS], + AXI4S.s axis_rdma_rd_data_aclk [N_REGIONS], + AXI4S.m axis_rdma_wr_data_aclk [N_REGIONS], + + // NCLK + metaIntf.m arp_lookup_request_nclk, + metaIntf.s arp_lookup_reply_nclk, + metaIntf.m set_ip_addr_nclk, + metaIntf.m set_board_number_nclk, + metaIntf.m qp_interface_nclk, + metaIntf.m conn_interface_nclk, + + metaIntf.m rdma_req_host_nclk, +`ifdef EN_FVV + metaIntf.m rdma_req_card_nclk, + metaIntf.s rdma_req_fv_nclk, +`endif + + rdmaIntf.s rdma_rd_cmd_nclk, + rdmaIntf.s rdma_wr_cmd_nclk, + AXI4S.m axis_rdma_rd_data_nclk, + AXI4S.s axis_rdma_wr_data_nclk +); + +// +// Crossings init +// + +// ARP request +axis_clock_converter_32_0 inst_clk_cnvrt_arp_request ( + .s_axis_aresetn(aresetn), + .m_axis_aresetn(net_aresetn), + .s_axis_aclk(aclk), + .m_axis_aclk(net_clk), + .s_axis_tvalid(arp_lookup_request_aclk.valid), + .s_axis_tready(arp_lookup_request_aclk.ready), + .s_axis_tdata(arp_lookup_request_aclk.data), + .m_axis_tvalid(arp_lookup_request_nclk.valid), + .m_axis_tready(arp_lookup_request_nclk.ready), + .m_axis_tdata(arp_lookup_request_nclk.data) +); + +// ARP reply +axis_clock_converter_56_0 inst_clk_cnvrt_arp_reply ( + .s_axis_aresetn(net_aresetn), + .m_axis_aresetn(aresetn), + .s_axis_aclk(net_clk), + .m_axis_aclk(aclk), + .s_axis_tvalid(arp_lookup_reply_nclk.valid), + .s_axis_tready(arp_lookup_reply_nclk.ready), + .s_axis_tdata(arp_lookup_reply_nclk.data), + .m_axis_tvalid(arp_lookup_reply_aclk.valid), + .m_axis_tready(arp_lookup_reply_aclk.ready), + .m_axis_tdata(arp_lookup_reply_aclk.data) +); + +// Set IP address +axis_clock_converter_32_0 inst_clk_cnvrt_set_ip_addr ( + .s_axis_aresetn(aresetn), + .m_axis_aresetn(net_aresetn), + .s_axis_aclk(aclk), + .m_axis_aclk(net_clk), + .s_axis_tvalid(set_ip_addr_aclk.valid), + .s_axis_tready(set_ip_addr_aclk.ready), + .s_axis_tdata(set_ip_addr_aclk.data), + .m_axis_tvalid(set_ip_addr_nclk.valid), + .m_axis_tready(set_ip_addr_nclk.ready), + .m_axis_tdata(set_ip_addr_nclk.data) +); + +// Set board number +axis_clock_converter_8_0 inst_clk_cnvrt_set_board_number ( + .s_axis_aresetn(aresetn), + .m_axis_aresetn(net_aresetn), + .s_axis_aclk(aclk), + .m_axis_aclk(net_clk), + .s_axis_tvalid(set_board_number_aclk.valid), + .s_axis_tready(set_board_number_aclk.ready), + .s_axis_tdata(set_board_number_aclk.data), + .m_axis_tvalid(set_board_number_nclk.valid), + .m_axis_tready(set_board_number_nclk.ready), + .m_axis_tdata(set_board_number_nclk.data) +); + +// Qp interface clock crossing +axis_clock_converter_144_0 inst_clk_cnvrt_qp_interface ( + .s_axis_aresetn(aresetn), + .m_axis_aresetn(net_aresetn), + .s_axis_aclk(aclk), + .m_axis_aclk(net_clk), + .s_axis_tvalid(qp_interface_aclk.valid), + .s_axis_tready(qp_interface_aclk.ready), + .s_axis_tdata(qp_interface_aclk.data), + .m_axis_tvalid(qp_interface_nclk.valid), + .m_axis_tready(qp_interface_nclk.ready), + .m_axis_tdata(qp_interface_nclk.data) +); + +// Connection interface clock crossing +axis_clock_converter_184_0 inst_clk_cnvrt_conn_interface ( + .s_axis_aresetn(aresetn), + .m_axis_aresetn(net_aresetn), + .s_axis_aclk(aclk), + .m_axis_aclk(net_clk), + .s_axis_tvalid(conn_interface_aclk.valid), + .s_axis_tready(conn_interface_aclk.ready), + .s_axis_tdata(conn_interface_aclk.data), + .m_axis_tvalid(conn_interface_nclk.valid), + .m_axis_tready(conn_interface_nclk.ready), + .m_axis_tdata(conn_interface_nclk.data) +); + +// +// Crossings commands +// + +// Arbitration RDMA requests host +metaIntf #(.DATA_BITS(FV_REQ_BITS)) rdma_req_host_arb (); + +`ifdef MULT_REGIONS + network_meta_tx_arbiter #( + .DATA_BITS(FV_REQ_BITS) + ) inst_rdma_req_host_arbiter ( + .aclk(aclk), + .aresetn(aresetn), + .meta_snk(rdma_req_host_aclk), + .meta_src(rdma_req_host_arb), + .id() + ); +`else + `META_ASSIGN(rdma_req_host_aclk[0], rdma_req_host_arb) +`endif + +axis_data_fifo_req_rdma_256 inst_rdma_req_host_cross ( + .m_axis_aclk(net_clk), + .s_axis_aclk(aclk), + .s_axis_aresetn(aresetn), + .s_axis_tvalid(rdma_req_host_arb.valid), + .s_axis_tready(rdma_req_host_arb.ready), + .s_axis_tdata(rdma_req_host_arb.data), + .m_axis_tvalid(rdma_req_host_nclk.valid), + .m_axis_tready(rdma_req_host_nclk.ready), + .m_axis_tdata(rdma_req_host_nclk.data) +); + +`ifdef EN_FVV + +// Arbitration RDMA requests card +metaIntf #(.DATA_BITS(FV_REQ_BITS)) rdma_req_card_arb (); + +`ifdef MULT_REGIONS + network_meta_tx_arbiter #( + .DATA_BITS(FV_REQ_BITS) + ) inst_rdma_req_card_arbiter ( + .aclk(aclk), + .aresetn(aresetn), + .meta_snk(rdma_req_card_aclk), + .meta_src(rdma_req_card_arb), + .id() + ); +`else + `META_ASSIGN(rdma_req_card_aclk[0], rdma_req_card_arb) +`endif + +axis_data_fifo_req_rdma_256 inst_rdma_req_card_cross ( + .m_axis_aclk(net_clk), + .s_axis_aclk(aclk), + .s_axis_aresetn(aresetn), + .s_axis_tvalid(rdma_req_card_arb.valid), + .s_axis_tready(rdma_req_card_arb.ready), + .s_axis_tdata(rdma_req_card_arb.data), + .m_axis_tvalid(rdma_req_card_nclk.valid), + .m_axis_tready(rdma_req_card_nclk.ready), + .m_axis_tdata(rdma_req_card_nclk.data) +); + +// Arbitration Farview RDMA requests +metaIntf #(.DATA_BITS(FV_REQ_BITS)) rdma_req_fv_arb (); + +axis_data_fifo_req_rdma_256 inst_rdma_req_fv_cross ( + .m_axis_aclk(aclk), + .s_axis_aclk(net_clk), + .s_axis_aresetn(net_aresetn), + .s_axis_tvalid(rdma_req_fv_nclk.valid), + .s_axis_tready(rdma_req_fv_nclk.ready), + .s_axis_tdata(rdma_req_fv_nclk.data), + .m_axis_tvalid(rdma_req_fv_arb.valid), + .m_axis_tready(rdma_req_fv_arb.ready), + .m_axis_tdata(rdma_req_fv_arb.data) +); + +`ifdef MULT_REGIONS + network_meta_fv_arbiter #( + .DATA_BITS(FV_REQ_BITS) + ) inst_rdma_req_fv_arbiter ( + .aclk(aclk), + .aresetn(aresetn), + .meta_snk(rdma_req_fv_arb), + .meta_src(rdma_req_fv_aclk) + ); +`else + `META_ASSIGN(rdma_req_fv_arb, rdma_req_fv_aclk[0]) +`endif + +`endif + +// +// Memory +// + +// Read command and data crossing +// +rdmaIntf rdma_rd_cmd_arb(); +AXI4S axis_rdma_rd_data_arb(); + +axis_data_fifo_cmd_rdma_96 inst_rdma_cmd_rd ( + .m_axis_aclk(aclk), + .s_axis_aclk(net_clk), + .s_axis_aresetn(net_aresetn), + .s_axis_tvalid(rdma_rd_cmd_nclk.valid), + .s_axis_tready(rdma_rd_cmd_nclk.ready), + .s_axis_tdata(rdma_rd_cmd_nclk.req), + .m_axis_tvalid(rdma_rd_cmd_arb.valid), + .m_axis_tready(rdma_rd_cmd_arb.ready), + .m_axis_tdata(rdma_rd_cmd_arb.req) +); + +// Read data crossing +axis_data_fifo_rdma_512 inst_rdma_data_rd ( + .m_axis_aclk(net_clk), + .s_axis_aclk(aclk), + .s_axis_aresetn(aresetn), + .s_axis_tvalid(axis_rdma_rd_data_arb.tvalid), + .s_axis_tready(axis_rdma_rd_data_arb.tready), + .s_axis_tdata(axis_rdma_rd_data_arb.tdata), + .s_axis_tkeep(axis_rdma_rd_data_arb.tkeep), + .s_axis_tlast(axis_rdma_rd_data_arb.tlast), + .m_axis_tvalid(axis_rdma_rd_data_nclk.tvalid), + .m_axis_tready(axis_rdma_rd_data_nclk.tready), + .m_axis_tdata(axis_rdma_rd_data_nclk.tdata), + .m_axis_tkeep(axis_rdma_rd_data_nclk.tkeep), + .m_axis_tlast(axis_rdma_rd_data_nclk.tlast) +); + +// Read command mux +`ifdef MULT_REGIONS + network_mux_cmd_rd inst_mux_cmd_rd ( + .aclk(aclk), + .aresetn(aresetn), + .req_snk(rdma_rd_cmd_arb), + .req_src(rdma_rd_cmd_aclk), + .axis_rd_data_snk(axis_rdma_rd_data_aclk), + .axis_rd_data_src(axis_rdma_rd_data_arb) + ); +`else + assign rdma_rd_cmd_aclk[0].valid = rdma_rd_cmd_arb.valid; + assign rdma_rd_cmd_arb.ready = rdma_rd_cmd_aclk[0].ready; + assign rdma_rd_cmd_aclk[0].req = rdma_rd_cmd_arb.req; + + `AXIS_ASSIGN(axis_rdma_rd_data_aclk[0], axis_rdma_rd_data_arb) +`endif + +// Write command crossing +// +rdmaIntf rdma_wr_cmd_arb(); +AXI4S axis_rdma_wr_data_arb(); + +axis_data_fifo_cmd_rdma_96 inst_rdma_cmd_wr ( + .m_axis_aclk(aclk), + .s_axis_aclk(net_clk), + .s_axis_aresetn(net_aresetn), + .s_axis_tvalid(rdma_wr_cmd_nclk.valid), + .s_axis_tready(rdma_wr_cmd_nclk.ready), + .s_axis_tdata(rdma_wr_cmd_nclk.req), + .m_axis_tvalid(rdma_wr_cmd_arb.valid), + .m_axis_tready(rdma_wr_cmd_arb.ready), + .m_axis_tdata(rdma_wr_cmd_arb.req) +); + +// Write data crossing +axis_data_fifo_rdma_512 inst_rdma_data_wr ( + .m_axis_aclk(aclk), + .s_axis_aclk(net_clk), + .s_axis_aresetn(net_aresetn), + .s_axis_tvalid(axis_rdma_wr_data_nclk.tvalid), + .s_axis_tready(axis_rdma_wr_data_nclk.tready), + .s_axis_tdata(axis_rdma_wr_data_nclk.tdata), + .s_axis_tkeep(axis_rdma_wr_data_nclk.tkeep), + .s_axis_tlast(axis_rdma_wr_data_nclk.tlast), + .m_axis_tvalid(axis_rdma_wr_data_arb.tvalid), + .m_axis_tready(axis_rdma_wr_data_arb.tready), + .m_axis_tdata(axis_rdma_wr_data_arb.tdata), + .m_axis_tkeep(axis_rdma_wr_data_arb.tkeep), + .m_axis_tlast(axis_rdma_wr_data_arb.tlast) +); + +// Write command mux +`ifdef MULT_REGIONS + network_mux_cmd_wr inst_mux_cmd_wr ( + .aclk(aclk), + .aresetn(aresetn), + .req_snk(rdma_wr_cmd_arb), + .req_src(rdma_wr_cmd_aclk), + .axis_wr_data_snk(axis_rdma_wr_data_arb), + .axis_wr_data_src(axis_rdma_wr_data_aclk) + ); +`else + assign rdma_wr_cmd_aclk[0].valid = rdma_wr_cmd_arb.valid; + assign rdma_wr_cmd_arb.ready = rdma_wr_cmd_aclk[0].ready; + assign rdma_wr_cmd_aclk[0].req = rdma_wr_cmd_arb.req; + + `AXIS_ASSIGN(axis_rdma_wr_data_arb, axis_rdma_wr_data_aclk[0]) +`endif + +/* +logic [31:0] cnt_arb_req_out; + +always_ff @(posedge aclk, negedge aresetn) begin +if (aresetn == 1'b0) begin + cnt_arb_req_out <= 0; +end +else + cnt_arb_req_out <= (rdma_req_arb.valid & rdma_req_arb.ready) ? cnt_arb_req_out + 1 : cnt_arb_req_out; +end + +ila_5 inst_ila_55 ( + .clk(aclk), + .probe0(cnt_arb_req_out), + .probe1(cnt_arb_data) +); +*/ + +/* +ila_0 inst_ila_0 ( + .clk(aclk), + .probe0(rdma_rd_cmd_arb.valid), + .probe1(rdma_rd_cmd_arb.ready), + .probe2(rdma_rd_cmd_arb.req.vaddr), + .probe3(rdma_rd_cmd_arb.req.len), + .probe4(rdma_rd_cmd_arb.req.ctl), + .probe5(rdma_rd_cmd_arb.id), + .probe6(rdma_wr_cmd_arb.valid), + .probe7(rdma_wr_cmd_arb.ready), + .probe8(rdma_wr_cmd_arb.req.vaddr), + .probe9(rdma_wr_cmd_arb.req.len), + .probe10(rdma_wr_cmd_arb.req.ctl), + .probe11(rdma_wr_cmd_arb.id), + .probe12(axis_rdma_rd_data_arb.tvalid), + .probe13(axis_rdma_rd_data_arb.tready), + .probe14(axis_rdma_rd_data_arb.tlast), + .probe15(axis_rdma_wr_data_arb.tvalid), + .probe16(axis_rdma_wr_data_arb.tready), + .probe17(axis_rdma_wr_data_arb.tlast) +); +*/ +endmodule \ No newline at end of file diff --git a/hw/hdl/network/network_meta_fv_arbiter.sv b/hw/hdl/network/network_meta_fv_arbiter.sv new file mode 100644 index 00000000..6e5a949d --- /dev/null +++ b/hw/hdl/network/network_meta_fv_arbiter.sv @@ -0,0 +1,68 @@ +import lynxTypes::*; + +/** + * Network meta RPC arbiter + */ +module network_meta_fv_arbiter #( + parameter integer DATA_BITS = 32 +) ( + input logic aclk, + input logic aresetn, + + // User logic + metaIntf.s meta_snk, + metaIntf.m meta_src [N_REGIONS] +); + +logic ready_snk; +logic valid_snk; +logic [DATA_BITS-1:0] data_snk; +logic [N_REGIONS-1:0] ready_src; +logic [N_REGIONS-1:0] valid_src; +logic [N_REGIONS-1:0][DATA_BITS-1:0] data_src; + +logic [N_REQUEST_BITS-1:0] id; + +metaIntf #(.DATA_BITS(FV_REQ_BITS)) meta_que [N_REGIONS] (); + +// -------------------------------------------------------------------------------- +// -- I/O !!! interface +// -------------------------------------------------------------------------------- +for(genvar i = 0; i < N_REGIONS; i++) begin + assign meta_que[i].valid = valid_src[i]; + assign ready_src[i] = meta_que[i].ready; + assign meta_que[i].data = data_src[i]; +end + +assign valid_snk = meta_snk.valid; +assign meta_snk.ready = ready_snk; +assign data_snk = meta_snk.data; + +// -------------------------------------------------------------------------------- +// -- Mux +// -------------------------------------------------------------------------------- +always_comb begin + id = data_snk[29+:4]; // Switch to interface, this is messy + + for(int i = 0; i < N_REGIONS; i++) begin + valid_src[i] = (id == i) ? valid_snk : 1'b0; + data_src[i] = data_snk; + end + ready_snk = ready_src[id]; +end + +for(genvar i = 0; i < N_REGIONS; i++) begin + axis_data_fifo_cnfg_rdma_256 inst_fv_queue_in ( + .s_axis_aresetn(aresetn), + .s_axis_aclk(aclk), + .s_axis_tvalid(meta_que[i].valid), + .s_axis_tready(meta_que[i].ready), + .s_axis_tdata(meta_que[i].data), + .m_axis_tvalid(meta_src[i].valid), + .m_axis_tready(meta_src[i].ready), + .m_axis_tdata(meta_src[i].data), + .axis_wr_data_count() + ); +end + +endmodule \ No newline at end of file diff --git a/hw/hdl/network/network_meta_tx_arbiter.sv b/hw/hdl/network/network_meta_tx_arbiter.sv new file mode 100644 index 00000000..aff39ddb --- /dev/null +++ b/hw/hdl/network/network_meta_tx_arbiter.sv @@ -0,0 +1,85 @@ +/** + * Network meta TX arbiter - Round Robin + */ + +import lynxTypes::*; + +module network_meta_tx_arbiter #( + parameter integer DATA_BITS = 32 +) ( + input logic aclk, + input logic aresetn, + + // User logic + metaIntf.s meta_snk [N_REGIONS], + metaIntf.m meta_src, + + // ID + output logic [N_REQUEST_BITS-1:0] id +); + +logic [N_REGIONS-1:0] ready_snk; +logic [N_REGIONS-1:0] valid_snk; +logic [N_REGIONS-1:0][DATA_BITS-1:0] data_snk; +logic ready_src; +logic valid_src; +logic [DATA_BITS-1:0] data_src; + +logic [N_REQUEST_BITS-1:0] rr_reg; + +// -------------------------------------------------------------------------------- +// I/O !!! interface +// -------------------------------------------------------------------------------- +for(genvar i = 0; i < N_REGIONS; i++) begin + assign valid_snk[i] = meta_snk[i].valid; + assign meta_snk[i].ready = ready_snk[i]; + assign data_snk[i] = meta_snk[i].data; +end + +assign meta_src.valid = valid_src; +assign ready_src = meta_src.ready; +assign meta_src.data = data_src; + +// -------------------------------------------------------------------------------- +// RR +// -------------------------------------------------------------------------------- +always_ff @(posedge aclk or negedge aresetn) begin + if(aresetn == 1'b0) begin + rr_reg <= 0; + end else begin + if(valid_src & ready_src) begin + rr_reg <= rr_reg + 1; + if(rr_reg >= N_REGIONS-1) + rr_reg <= 0; + end + end +end + +// DP +always_comb begin + ready_snk = 0; + valid_src = 1'b0; + id = 0; + + for(int i = 0; i < N_REGIONS; i++) begin + if(i+rr_reg >= N_REGIONS) begin + if(valid_snk[i+rr_reg-N_REGIONS]) begin + valid_src = valid_snk[i+rr_reg-N_REGIONS]; + id = i+rr_reg-N_REGIONS; + break; + end + end + else begin + if(valid_snk[i+rr_reg]) begin + valid_src = valid_snk[i+rr_reg]; + id = i+rr_reg; + break; + end + end + end + + ready_snk[id] = ready_src; + data_src = data_snk[id]; +end + +endmodule \ No newline at end of file diff --git a/hw/hdl/network/network_module.sv b/hw/hdl/network/network_module.sv new file mode 100644 index 00000000..d53d6682 --- /dev/null +++ b/hw/hdl/network/network_module.sv @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2019, Systems Group, ETH Zurich + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, + * are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +`timescale 1ns / 1ps + + +module network_module +( + input wire dclk, + output wire net_clk, + input wire sys_reset, + input wire aresetn, + output wire network_init_done, + + input wire gt_refclk_p, + input wire gt_refclk_n, + + input wire [3:0] gt_rxp_in, + input wire [3:0] gt_rxn_in, + output wire [3:0] gt_txp_out, + output wire [3:0] gt_txn_out, + + output wire user_rx_reset, + output wire user_tx_reset, + output wire gtpowergood_out, + + //Axi Stream Interface + AXI4S.m m_axis_net_rx, + AXI4S.s s_axis_net_tx +); + +reg core_reset_tmp = 1'b0; +reg core_reset = 1'b0; + +always @(posedge sys_reset or posedge net_clk) begin + if (sys_reset) begin + core_reset_tmp <= 1'b0; + core_reset <= 1'b0; + end + else begin + //Hold core in reset until everything is ready + //core_reset_tmp <= !(sys_reset | user_tx_reset | user_rx_reset); + core_reset_tmp <= !(sys_reset | user_tx_reset); + core_reset <= core_reset_tmp; + end +end +assign network_init_done = core_reset; + +/* + * RX + */ +AXI4S rx_axis(); + +/* + * TX + */ +AXI4S tx_axis(); +AXI4S axis_tx_pkg_to_fifo(); +AXI4S axis_tx_padding_to_fifo(); + +cmac_axis_wrapper cmac_wrapper_inst +( + .gt_rxp_in(gt_rxp_in), + .gt_rxn_in(gt_rxn_in), + .gt_txp_out(gt_txp_out), + .gt_txn_out(gt_txn_out), + .gt_ref_clk_p(gt_refclk_p), + .gt_ref_clk_n(gt_refclk_n), + .init_clk(dclk), + .sys_reset(sys_reset), + + .m_rx_axis(rx_axis), + .s_tx_axis(tx_axis), + + .rx_aligned(gtpowergood_out), //Todo REmove/rename + .usr_tx_clk(net_clk), + .tx_rst(user_tx_reset), + .rx_rst(user_rx_reset), + .gt_rxrecclkout() //not used +); + + +//RX Clock crossing (same clock) +axis_data_fifo_512_cc rx_crossing ( + //.s_axis_aresetn(~(sys_reset | user_rx_reset)), + .s_axis_aresetn(aresetn), + .s_axis_aclk(net_clk), + .s_axis_tvalid(rx_axis.tvalid), + .s_axis_tready(rx_axis.tready), + .s_axis_tdata(rx_axis.tdata), + .s_axis_tkeep(rx_axis.tkeep), + .s_axis_tlast(rx_axis.tlast), + .m_axis_aclk(net_clk), + .m_axis_tvalid(m_axis_net_rx.tvalid), + .m_axis_tready(m_axis_net_rx.tready), + .m_axis_tdata(m_axis_net_rx.tdata), + .m_axis_tkeep(m_axis_net_rx.tkeep), + .m_axis_tlast(m_axis_net_rx.tlast) +); + +// TX +// Pad Ethernet frames to at least 64B +// Packet FIFO, makes sure that whole packet is passed in a single burst to the CMAC +axis_data_fifo_512_cc tx_crossing ( + .s_axis_aresetn(aresetn), + .s_axis_aclk(net_clk), + .s_axis_tvalid(axis_tx_pkg_to_fifo.tvalid), + .s_axis_tready(axis_tx_pkg_to_fifo.tready), + .s_axis_tdata(axis_tx_pkg_to_fifo.tdata), + .s_axis_tkeep(axis_tx_pkg_to_fifo.tkeep), + .s_axis_tlast(axis_tx_pkg_to_fifo.tlast), + .m_axis_aclk(net_clk), + .m_axis_tvalid(tx_axis.tvalid), + .m_axis_tready(tx_axis.tready), + .m_axis_tdata(tx_axis.tdata), + .m_axis_tkeep(tx_axis.tkeep), + .m_axis_tlast(tx_axis.tlast) +); + +axis_pkg_fifo_512 axis_pkg_fifo_512 ( + .s_axis_aresetn(aresetn), + .s_axis_aclk(net_clk), + .s_axis_tvalid(axis_tx_padding_to_fifo.tvalid), + .s_axis_tready(axis_tx_padding_to_fifo.tready), + .s_axis_tdata(axis_tx_padding_to_fifo.tdata), + .s_axis_tkeep(axis_tx_padding_to_fifo.tkeep), + .s_axis_tlast(axis_tx_padding_to_fifo.tlast), + .m_axis_tvalid(axis_tx_pkg_to_fifo.tvalid), + .m_axis_tready(axis_tx_pkg_to_fifo.tready), + .m_axis_tdata(axis_tx_pkg_to_fifo.tdata), + .m_axis_tkeep(axis_tx_pkg_to_fifo.tkeep), + .m_axis_tlast(axis_tx_pkg_to_fifo.tlast) +); + +ethernet_frame_padding_512_ip ethernet_frame_padding_inst ( + .m_axis_TVALID(axis_tx_padding_to_fifo.tvalid), + .m_axis_TREADY(axis_tx_padding_to_fifo.tready), + .m_axis_TDATA(axis_tx_padding_to_fifo.tdata), + .m_axis_TKEEP(axis_tx_padding_to_fifo.tkeep), + .m_axis_TLAST(axis_tx_padding_to_fifo.tlast), + .s_axis_TVALID(s_axis_net_tx.tvalid), + .s_axis_TREADY(s_axis_net_tx.tready), + .s_axis_TDATA(s_axis_net_tx.tdata), + .s_axis_TKEEP(s_axis_net_tx.tkeep), + .s_axis_TLAST(s_axis_net_tx.tlast), + .ap_clk(net_clk), + .ap_rst_n(aresetn) +); + +endmodule + +`default_nettype wire diff --git a/hw/hdl/network/network_mux_cmd_rd.sv b/hw/hdl/network/network_mux_cmd_rd.sv new file mode 100644 index 00000000..935d1189 --- /dev/null +++ b/hw/hdl/network/network_mux_cmd_rd.sv @@ -0,0 +1,266 @@ +import lynxTypes::*; + +module network_mux_cmd_rd ( + input logic aclk, + input logic aresetn, + + rdmaIntf.s req_snk, + reqIntf.m req_src [N_REGIONS], + AXI4S.s axis_rd_data_snk [N_REGIONS], + AXI4S.m axis_rd_data_src +); + +logic [N_REGIONS-1:0] ready_src; +logic [N_REGIONS-1:0] valid_src; +logic ready_snk; +logic valid_snk; +req_t [N_REGIONS-1:0] request_src; +rdma_req_t request_snk; + +logic seq_snk_valid; +logic seq_snk_ready; +logic seq_src_valid; +logic seq_src_ready; + +logic [N_REQUEST_BITS-1:0] id_snk; +logic [N_REQUEST_BITS-1:0] id_next; +logic [LEN_BITS-1:0] len_snk; +logic [LEN_BITS-1:0] len_next; +logic host_snk; + +reqIntf req_que [N_REGIONS] (); + +// -------------------------------------------------------------------------------- +// -- I/O !!! interface +// -------------------------------------------------------------------------------- +for(genvar i = 0; i < N_REGIONS; i++) begin + assign req_que[i].valid = valid_src[i]; + assign ready_src[i] = req_que[i].ready; + assign req_que[i].req = request_src[i]; + + req_queue inst_req_que (.aclk(aclk), .aresetn(aresetn), .req_in(req_que[i]), .req_out(req_src[i])); +end + +assign valid_snk = req_snk.valid; +assign req_snk.ready = ready_snk; +assign request_snk = req_snk.req; +assign id_snk = req_snk.req.id; +assign len_snk = req_snk.req.len[LEN_BITS-1:0]; +assign host_snk = req_snk.req.host; + +// -------------------------------------------------------------------------------- +// -- Mux command +// -------------------------------------------------------------------------------- +always_comb begin + if(host_snk) begin + seq_snk_valid = seq_snk_ready & ready_src[id_snk] & valid_snk; + ready_snk = seq_snk_ready & ready_src[id_snk]; + end + else begin + seq_snk_valid = seq_snk_ready & valid_snk; + ready_snk = seq_snk_ready; + end +end + +for(genvar i = 0; i < N_REGIONS; i++) begin + assign valid_src[i] = ((id_snk == i) && host_snk) ? seq_snk_valid : 1'b0; + + assign request_src[i].vaddr = request_snk.vaddr; + assign request_src[i].len = request_snk.len; + assign request_src[i].sync = request_snk.sync; + assign request_src[i].ctl = request_snk.ctl; +end + +queue #( + .QTYPE(logic [N_REQUEST_BITS+LEN_BITS-1:0]) +) inst_seq_que_snk ( + .aclk(aclk), + .aresetn(aresetn), + .val_snk(seq_snk_valid), + .rdy_snk(seq_snk_ready), + .data_snk({id_snk, len_snk}), + .val_src(seq_src_valid), + .rdy_src(seq_src_ready), + .data_src({id_next, len_next}) +); + +// -------------------------------------------------------------------------------- +// -- Mux data +// -------------------------------------------------------------------------------- +localparam integer BEAT_LOG_BITS = $clog2(AXI_DATA_BITS/8); + +// -- FSM +typedef enum logic[0:0] {ST_IDLE, ST_MUX} state_t; +logic [0:0] state_C, state_N; + +logic [N_REQUEST_BITS-1:0] id_C, id_N; +logic [LEN_BITS-BEAT_LOG_BITS:0] cnt_C, cnt_N; +logic [LEN_BITS-BEAT_LOG_BITS:0] n_beats_C, n_beats_N; + +logic tr_done; + +logic [LEN_BITS-BEAT_LOG_BITS:0] cnt_req; + +logic [AXI_DATA_BITS-1:0] axis_rd_data_src_tdata; +logic [AXI_DATA_BITS/8-1:0] axis_rd_data_src_tkeep; +logic axis_rd_data_src_tlast; +logic axis_rd_data_src_tvalid; +logic axis_rd_data_src_tready; + +logic [N_REGIONS-1:0][AXI_DATA_BITS-1:0] axis_rd_data_snk_tdata; +logic [N_REGIONS-1:0][AXI_DATA_BITS/8-1:0] axis_rd_data_snk_tkeep; +logic [N_REGIONS-1:0] axis_rd_data_snk_tlast; +logic [N_REGIONS-1:0] axis_rd_data_snk_tvalid; +logic [N_REGIONS-1:0] axis_rd_data_snk_tready; + +// -------------------------------------------------------------------------------- +// -- I/O !!! interface +// -------------------------------------------------------------------------------- +for(genvar i = 0; i < N_REGIONS; i++) begin + axis_data_fifo_512 inst_data_que ( + .s_axis_aresetn(aresetn), + .s_axis_aclk(aclk), + .s_axis_tvalid(axis_rd_data_snk[i].tvalid), + .s_axis_tready(axis_rd_data_snk[i].tready), + .s_axis_tdata(axis_rd_data_snk[i].tdata), + .s_axis_tkeep(axis_rd_data_snk[i].tkeep), + .s_axis_tlast(axis_rd_data_snk[i].tlast), + .m_axis_tvalid(axis_rd_data_snk_tvalid[i]), + .m_axis_tready(axis_rd_data_snk_tready[i]), + .m_axis_tdata(axis_rd_data_snk_tdata[i]), + .m_axis_tkeep(axis_rd_data_snk_tkeep[i]), + .m_axis_tlast(axis_rd_data_snk_tlast[i]) + ); +end + +assign axis_rd_data_src.tvalid = axis_rd_data_src_tvalid; +assign axis_rd_data_src.tdata = axis_rd_data_src_tdata; +assign axis_rd_data_src.tkeep = axis_rd_data_src_tkeep; +assign axis_rd_data_src.tlast = axis_rd_data_src_tlast; +assign axis_rd_data_src_tready = axis_rd_data_src.tready; + +// REG +always_ff @(posedge aclk, negedge aresetn) begin: PROC_REG +if (aresetn == 1'b0) begin + state_C <= ST_IDLE; +end +else + state_C <= state_N; + cnt_C <= cnt_N; + id_C <= id_N; + n_beats_C <= n_beats_N; +end + +// NSL +always_comb begin: NSL + state_N = state_C; + + case(state_C) + ST_IDLE: + state_N = (seq_src_ready) ? ST_MUX : ST_IDLE; + + ST_MUX: + state_N = tr_done ? (seq_src_ready ? ST_MUX : ST_IDLE) : ST_MUX; + + endcase // state_C +end + +// DP +always_comb begin: DP + cnt_N = cnt_C; + id_N = id_C; + n_beats_N = n_beats_C; + + // Transfer done + tr_done = (cnt_C == n_beats_C) && (axis_rd_data_src_tvalid & axis_rd_data_src_tready); + + seq_src_valid = 1'b0; + + // Last gen (not needed) + //axis_rd_data_src_tlast = 1'b0; + + case(state_C) + ST_IDLE: begin + cnt_N = 0; + if(seq_src_ready) begin + seq_src_valid = 1'b1; + id_N = id_next; + n_beats_N = (len_next[BEAT_LOG_BITS-1:0] != 0) ? len_next[LEN_BITS-1:BEAT_LOG_BITS] : len_next[LEN_BITS-1:BEAT_LOG_BITS] - 1; + end + end + + ST_MUX: begin + if(tr_done) begin + cnt_N = 0; + if(seq_src_ready) begin + seq_src_valid = 1'b1; + id_N = id_next; + n_beats_N = (len_next[BEAT_LOG_BITS-1:0] != 0) ? len_next[LEN_BITS-1:BEAT_LOG_BITS] : len_next[LEN_BITS-1:BEAT_LOG_BITS] - 1; + end + end + else begin + cnt_N = (axis_rd_data_src_tvalid & axis_rd_data_src_tready) ? cnt_C + 1 : cnt_C; + end + + //axis_rd_data_src_tlast = (cnt_C == n_beats_C) ? 1'b1 : 1'b0; + end + + endcase +end + +// Mux +for(genvar i = 0; i < N_REGIONS; i++) begin + assign axis_rd_data_snk_tready[i] = (state_C == ST_MUX) ? ((i == id_C) ? axis_rd_data_src_tready : 1'b0) : 1'b0; +end + +assign axis_rd_data_src_tvalid = (state_C == ST_MUX) ? axis_rd_data_snk_tvalid[id_C] : 1'b0; +assign axis_rd_data_src_tdata = axis_rd_data_snk_tdata[id_C]; +assign axis_rd_data_src_tkeep = axis_rd_data_snk_tkeep[id_C]; +assign axis_rd_data_src_tlast = axis_rd_data_snk_tlast[id_C]; + +/* +logic [31:0] cnt_data, cnt_last, cnt_req; + +always_ff @(posedge aclk, negedge aresetn) begin +if (aresetn == 1'b0) begin + cnt_data <= 0; + cnt_last <= 0; + cnt_req <= 0; +end +else + cnt_data <= (axis_rd_data_src.tvalid & axis_rd_data_src.tready) ? cnt_data + 1 : cnt_data; + cnt_last <= (axis_rd_data_src.tvalid & axis_rd_data_src.tready & axis_rd_data_src.tlast) ? cnt_last + 1 : cnt_last; + cnt_req <= (req_snk.valid & req_snk.ready) ? cnt_req + 1 : cnt_req; +end + + +ila_cmd_rd inst_ila_rd ( + .clk(aclk), + .probe0(state_C), + .probe1(id_C), + .probe2(cnt_C), + .probe3(n_beats_C), + .probe4(cnt_data), + .probe5(cnt_last), + .probe6(cnt_req), + .probe7(tr_done), + .probe8(req_snk.valid), + .probe9(req_snk.ready), + .probe10(id_snk), + .probe11(host_snk), + .probe12(axis_rd_data_snk_tvalid[0]), + .probe13(axis_rd_data_snk_tready[0]), + .probe14(axis_rd_data_snk_tlast[0]), + .probe15(axis_rd_data_snk_tvalid[1]), + .probe16(axis_rd_data_snk_tready[1]), + .probe17(axis_rd_data_snk_tlast[1]), + .probe18(axis_rd_data_snk_tvalid[2]), + .probe19(axis_rd_data_snk_tready[2]), + .probe20(axis_rd_data_snk_tlast[2]), + .probe21(axis_rd_data_src_tvalid), + .probe22(axis_rd_data_src_tready), + .probe23(axis_rd_data_src_tlast) +); +*/ + +endmodule \ No newline at end of file diff --git a/hw/hdl/network/network_mux_cmd_wr.sv b/hw/hdl/network/network_mux_cmd_wr.sv new file mode 100644 index 00000000..25c27ba1 --- /dev/null +++ b/hw/hdl/network/network_mux_cmd_wr.sv @@ -0,0 +1,232 @@ +import lynxTypes::*; + +module network_mux_cmd_wr ( + input logic aclk, + input logic aresetn, + + rdmaIntf.s req_snk, + reqIntf.m req_src [N_REGIONS], + AXI4S.s axis_wr_data_snk, + AXI4S.m axis_wr_data_src [N_REGIONS] +); + +logic [N_REGIONS-1:0] ready_src; +logic [N_REGIONS-1:0] valid_src; +logic ready_snk; +logic valid_snk; +req_t [N_REGIONS-1:0] request_src; +rdma_req_t request_snk; + +logic seq_snk_valid; +logic seq_snk_ready; +logic seq_src_valid; +logic seq_src_ready; + +logic [N_REQUEST_BITS-1:0] id_snk; +logic [N_REQUEST_BITS-1:0] id_next; +logic [LEN_BITS-1:0] len_snk; +logic [LEN_BITS-1:0] len_next; +logic host_snk; +logic ctl_snk; +logic ctl_next; + +reqIntf req_que [N_REGIONS] (); + +// -------------------------------------------------------------------------------- +// -- I/O !!! interface +// -------------------------------------------------------------------------------- +for(genvar i = 0; i < N_REGIONS; i++) begin + assign req_que[i].valid = valid_src[i]; + assign ready_src[i] = req_que[i].ready; + assign req_que[i].req = request_src[i]; + + req_queue inst_req_que (.aclk(aclk), .aresetn(aresetn), .req_in(req_que[i]), .req_out(req_src[i])); +end + +assign valid_snk = req_snk.valid; +assign req_snk.ready = ready_snk; +assign request_snk = req_snk.req; +assign id_snk = req_snk.req.id; +assign len_snk = req_snk.req.len[LEN_BITS-1:0]; +assign host_snk = req_snk.req.host; +assign ctl_snk = req_snk.req.ctl; + +// -------------------------------------------------------------------------------- +// -- Mux command +// -------------------------------------------------------------------------------- +always_comb begin + if(host_snk) begin + seq_snk_valid = seq_snk_ready & ready_src[id_snk] & valid_snk; + ready_snk = seq_snk_ready & ready_src[id_snk]; + end + else begin + seq_snk_valid = seq_snk_ready & valid_snk; + ready_snk = seq_snk_ready; + end +end + +for(genvar i = 0; i < N_REGIONS; i++) begin + assign valid_src[i] = ((id_snk == i) && host_snk) ? seq_snk_valid : 1'b0; + + assign request_src[i].vaddr = request_snk.vaddr; + assign request_src[i].len = request_snk.len; + assign request_src[i].sync = request_snk.sync; + assign request_src[i].ctl = request_snk.ctl; +end + +queue #( + .QTYPE(logic [1+N_REQUEST_BITS+LEN_BITS-1:0]) +) inst_seq_que_snk ( + .aclk(aclk), + .aresetn(aresetn), + .val_snk(seq_snk_valid), + .rdy_snk(seq_snk_ready), + .data_snk({ctl_snk, id_snk, len_snk}), + .val_src(seq_src_valid), + .rdy_src(seq_src_ready), + .data_src({ctl_next, id_next, len_next}) +); + +// -------------------------------------------------------------------------------- +// -- Mux data +// -------------------------------------------------------------------------------- +localparam integer BEAT_LOG_BITS = $clog2(AXI_DATA_BITS/8); + +// -- FSM +typedef enum logic[0:0] {ST_IDLE, ST_MUX} state_t; +logic [0:0] state_C, state_N; + +logic [N_REQUEST_BITS-1:0] id_C, id_N; +logic [LEN_BITS-BEAT_LOG_BITS:0] cnt_C, cnt_N; +logic [LEN_BITS-BEAT_LOG_BITS:0] n_beats_C, n_beats_N; +logic ctl_C, ctl_N; + +logic tr_done; +logic tmp_tlast; + +logic [AXI_DATA_BITS-1:0] axis_wr_data_snk_tdata; +logic [AXI_DATA_BITS/8-1:0] axis_wr_data_snk_tkeep; +logic axis_wr_data_snk_tlast; +logic axis_wr_data_snk_tvalid; +logic axis_wr_data_snk_tready; + +logic [N_REGIONS-1:0][AXI_DATA_BITS-1:0] axis_wr_data_src_tdata; +logic [N_REGIONS-1:0][AXI_DATA_BITS/8-1:0] axis_wr_data_src_tkeep; +logic [N_REGIONS-1:0] axis_wr_data_src_tlast; +logic [N_REGIONS-1:0] axis_wr_data_src_tvalid; +logic [N_REGIONS-1:0] axis_wr_data_src_tready; + +// -------------------------------------------------------------------------------- +// -- I/O !!! interface +// -------------------------------------------------------------------------------- + +for(genvar i = 0; i < N_REGIONS; i++) begin + axis_data_fifo_512 inst_data_que ( + .s_axis_aresetn(aresetn), + .s_axis_aclk(aclk), + .s_axis_tvalid(axis_wr_data_src_tvalid[i]), + .s_axis_tready(axis_wr_data_src_tready[i]), + .s_axis_tdata(axis_wr_data_src_tdata[i]), + .s_axis_tkeep(axis_wr_data_src_tkeep[i]), + .s_axis_tlast(axis_wr_data_src_tlast[i]), + .m_axis_tvalid(axis_wr_data_src[i].tvalid), + .m_axis_tready(axis_wr_data_src[i].tready), + .m_axis_tdata(axis_wr_data_src[i].tdata), + .m_axis_tkeep(axis_wr_data_src[i].tkeep), + .m_axis_tlast(axis_wr_data_src[i].tlast) + ); +end + +assign axis_wr_data_snk_tvalid = axis_wr_data_snk.tvalid; +assign axis_wr_data_snk_tdata = axis_wr_data_snk.tdata; +assign axis_wr_data_snk_tkeep = axis_wr_data_snk.tkeep; +assign axis_wr_data_snk_tlast = axis_wr_data_snk.tlast; +assign axis_wr_data_snk.tready = axis_wr_data_snk_tready; + +// REG +always_ff @(posedge aclk, negedge aresetn) begin: PROC_REG +if (aresetn == 1'b0) begin + state_C <= ST_IDLE; +end +else + state_C <= state_N; + cnt_C <= cnt_N; + id_C <= id_N; + n_beats_C <= n_beats_N; + ctl_C <= ctl_N; +end + +// NSL +always_comb begin: NSL + state_N = state_C; + + case(state_C) + ST_IDLE: + state_N = (seq_src_ready) ? ST_MUX : ST_IDLE; + + ST_MUX: + state_N = tr_done ? (seq_src_ready ? ST_MUX : ST_IDLE) : ST_MUX; + + endcase // state_C +end + +// DP +always_comb begin: DP + cnt_N = cnt_C; + id_N = id_C; + n_beats_N = n_beats_C; + ctl_N = ctl_C; + + // Transfer done + tr_done = (cnt_C == n_beats_C) && (axis_wr_data_snk_tvalid & axis_wr_data_snk_tready); + + seq_src_valid = 1'b0; + + // Last gen + tmp_tlast = 1'b0; + + case(state_C) + ST_IDLE: begin + cnt_N = 0; + if(seq_src_ready) begin + seq_src_valid = 1'b1; + id_N = id_next; + n_beats_N = (len_next[BEAT_LOG_BITS-1:0] != 0) ? len_next[LEN_BITS-1:BEAT_LOG_BITS] : len_next[LEN_BITS-1:BEAT_LOG_BITS] - 1; + ctl_N = ctl_next; + end + end + + ST_MUX: begin + if(tr_done) begin + cnt_N = 0; + if(seq_src_ready) begin + seq_src_valid = 1'b1; + id_N = id_next; + n_beats_N = (len_next[BEAT_LOG_BITS-1:0] != 0) ? len_next[LEN_BITS-1:BEAT_LOG_BITS] : len_next[LEN_BITS-1:BEAT_LOG_BITS] - 1; + ctl_N = ctl_next; + end + end + else begin + cnt_N = (axis_wr_data_snk_tvalid & axis_wr_data_snk_tready) ? cnt_C + 1 : cnt_C; + end + + if(ctl_C) begin + tmp_tlast = (cnt_C == n_beats_C) ? 1'b1 : 1'b0; + end + end + + endcase +end + +// Mux +for(genvar i = 0; i < N_REGIONS; i++) begin + assign axis_wr_data_src_tvalid[i] = (state_C == ST_MUX) ? ((i == id_C) ? axis_wr_data_snk_tvalid : 1'b0) : 1'b0; + assign axis_wr_data_src_tdata[i] = axis_wr_data_snk_tdata; + assign axis_wr_data_src_tkeep[i] = axis_wr_data_snk_tkeep; + assign axis_wr_data_src_tlast[i] = tmp_tlast; +end + +assign axis_wr_data_snk_tready = (state_C == ST_MUX) ? axis_wr_data_src_tready[id_C] : 1'b0; + + +endmodule \ No newline at end of file diff --git a/hw/hdl/network/network_req_parser.sv b/hw/hdl/network/network_req_parser.sv new file mode 100644 index 00000000..60aa2d7b --- /dev/null +++ b/hw/hdl/network/network_req_parser.sv @@ -0,0 +1,348 @@ +import lynxTypes::*; + +/** + * Network request parser + * + * Parses the incoming RDMA requests. + * Requests: + * - Op code [4:0] - Based on the mode it carries one of the op codes + * - QP number [28:5] - Local QP number + * - Region ID [32:29] - Region ID, hardcoded + * - Host access [33] - Access is forwarded to the host or to user logic, hardcoded + * - Mode [34] - Parse the requests, or use raw opcodes. Raw codes are used for special operations, e.g. when final data length is unknown + * - Vaddr loc. [111:64] - Local buffer virtual address + * - Vaddr rem. [159:112] - Remote vuffer virtual address + * - Size [191:160] - Size of the transfer + * - Parameters [255:192] - Optional Farview parameters + * + */ +module network_req_parser #( + parameter integer ID_REG = 0, + parameter integer HOST = 0 +) ( + input logic aclk, + input logic aresetn, + + metaIntf.s req_in, + metaIntf.m req_out, + + output logic [31:0] used +); + +// Opcodes +localparam integer APP_READ = 0; +localparam integer APP_WRITE = 1; +localparam integer APP_RPC = 2; + +localparam integer RC_RDMA_WRITE_FIRST = 5'h6; +localparam integer RC_RDMA_WRITE_MIDDLE = 5'h7; +localparam integer RC_RDMA_WRITE_LAST = 5'h8; +localparam integer RC_RDMA_WRITE_LAST_WITH_IMD = 5'h9; +localparam integer RC_RDMA_WRITE_ONLY = 5'hA; +localparam integer RC_RDMA_WRITE_ONLY_WIT_IMD = 5'hB; +localparam integer RC_RDMA_READ_REQUEST = 5'hC; +localparam integer RC_RDMA_READ_RESP_FIRST = 5'hD; +localparam integer RC_RDMA_READ_RESP_MIDDLE = 5'hE; +localparam integer RC_RDMA_READ_RESP_LAST = 5'hF; +localparam integer RC_RDMA_READ_RESP_ONLY = 5'h10; +localparam integer RC_ACK = 5'h11; +localparam integer RC_RDMA_RPC_REQUEST = 5'h18; + +// -- FSM +typedef enum logic[2:0] {ST_IDLE, ST_PARSE_READ, ST_PARSE_WRITE_INIT, ST_PARSE_WRITE, ST_PARSE_RPC, ST_SEND_READ, ST_SEND_WRITE, ST_SEND_BASE} state_t; +logic [2:0] state_C, state_N; + +// TODO: Needs interfaces, cleaning necessary + +// Cmd 64 +logic [4:0] op_C, op_N; +logic [23:0] qp_C, qp_N; +logic [3:0] lreg_C, lreg_N; +logic [0:0] host_C, host_N; +logic [29:0] rsrvd_C, rsrvd_N; +// Params 192 +logic [47:0] lvaddr_C, lvaddr_N; +logic [47:0] rvaddr_C, rvaddr_N; +logic [31:0] len_C, len_N; +logic [63:0] params_C, params_N; + +// Send +logic [4:0] pop_C, pop_N; +logic [31:0] plen_C, plen_N; +logic [47:0] plvaddr_C, plvaddr_N; +logic [47:0] prvaddr_C, prvaddr_N; + +// Requests internal +metaIntf #(.DATA_BITS(FV_REQ_BITS)) req_pre_parsed (); +metaIntf #(.DATA_BITS(FV_REQ_BITS)) req_parsed (); + +// Decoupling +axis_data_fifo_cnfg_rdma_256 inst_cmd_queue_in ( + .s_axis_aresetn(aresetn), + .s_axis_aclk(aclk), + .s_axis_tvalid(req_in.valid), + .s_axis_tready(req_in.ready), + .s_axis_tdata(req_in.data), + .m_axis_tvalid(req_pre_parsed.valid), + .m_axis_tready(req_pre_parsed.ready), + .m_axis_tdata(req_pre_parsed.data), + .axis_wr_data_count(used) +); + +logic [31:0] queue_used_out; + +axis_data_fifo_cnfg_rdma_256 inst_cmd_queue_out ( + .s_axis_aresetn(aresetn), + .s_axis_aclk(aclk), + .s_axis_tvalid(req_parsed.valid), + .s_axis_tready(req_parsed.ready), + .s_axis_tdata(req_parsed.data), + .m_axis_tvalid(req_out.valid), + .m_axis_tready(req_out.ready), + .m_axis_tdata(req_out.data), + .axis_wr_data_count(queue_used_out) +); + +// REG +always_ff @(posedge aclk, negedge aresetn) begin: PROC_REG +if (aresetn == 1'b0) begin + state_C <= ST_IDLE; +end +else + state_C <= state_N; + + op_C <= op_N; + qp_C <= qp_N; + lreg_C <= lreg_N; + host_C <= host_N; + rsrvd_C <= rsrvd_N; + + lvaddr_C <= lvaddr_N; + rvaddr_C <= rvaddr_N; + len_C <= len_N; + params_C <= params_N; + + pop_C <= pop_N; + plen_C <= plen_N; + plvaddr_C <= plvaddr_N; + prvaddr_C <= prvaddr_N; +end + +// NSL +always_comb begin: NSL + state_N = state_C; + + case(state_C) + ST_IDLE: + if(req_pre_parsed.valid) begin + if(req_pre_parsed.data[34]) begin + state_N = ST_SEND_BASE; + end + else begin + case(req_pre_parsed.data[4:0]) + APP_READ: + state_N = ST_PARSE_READ; + APP_WRITE: + state_N = ST_PARSE_WRITE_INIT; + APP_RPC: + state_N = ST_PARSE_RPC; + + default: state_N = ST_IDLE; + endcase + end + end + + ST_PARSE_READ: + state_N = ST_SEND_READ; + + ST_PARSE_WRITE_INIT: + state_N = ST_SEND_WRITE; + + ST_PARSE_WRITE: + state_N = ST_SEND_WRITE; + + ST_PARSE_RPC: + state_N = ST_SEND_READ; + + ST_SEND_READ: + if(req_parsed.ready) begin + state_N = ST_IDLE; + end + + ST_SEND_WRITE: + if(req_parsed.ready) begin + state_N = len_C ? ST_PARSE_WRITE : ST_IDLE; + end + + ST_SEND_BASE: + if(req_parsed.ready) begin + state_N = ST_IDLE; + end + + endcase // state_C +end + +// DP +always_comb begin: DP + op_N = op_C; + qp_N = qp_C; + lreg_N = lreg_C; + host_N = host_C; + rsrvd_N = rsrvd_C; + + len_N = len_C; + lvaddr_N = lvaddr_C; + rvaddr_N = rvaddr_C; + params_N = params_C; + + pop_N = pop_C; + plen_N = plen_C; + plvaddr_N = plvaddr_C; + prvaddr_N = prvaddr_C; + + // Flow + req_pre_parsed.ready = 1'b0; + req_parsed.valid = 1'b0; + + // Data + req_parsed.data[255:0] = {params_C, plen_C, prvaddr_C, plvaddr_C, rsrvd_C, host_C, lreg_C, qp_C, pop_C}; + + case(state_C) + ST_IDLE: begin + req_pre_parsed.ready = 1'b1; + + qp_N = req_pre_parsed.data[28:5]; // qp number + lreg_N = ID_REG;//req_pre_parsed.data[32:29]; // local region + host_N = HOST;//req_pre_parsed.data[33:33]; // host + rsrvd_N = 0;//req_pre_parsed.data[63:34]; // reserved + params_N = req_pre_parsed.data[255:192]; // params + + if(req_pre_parsed.valid) begin + if(req_pre_parsed.data[34]) begin + pop_N = req_pre_parsed.data[4:0]; // op code + plvaddr_N = req_pre_parsed.data[111:64]; // local vaddr + prvaddr_N = req_pre_parsed.data[159:112]; // remote vaddr + plen_N = req_pre_parsed.data[191:160]; // length + + end + else begin + op_N = req_pre_parsed.data[4:0]; // op code + lvaddr_N = req_pre_parsed.data[111:64]; // local vaddr + rvaddr_N = req_pre_parsed.data[159:112]; // remote vaddr + len_N = req_pre_parsed.data[191:160]; // length + end + end + end + + ST_PARSE_READ: begin + pop_N = RC_RDMA_READ_REQUEST; + plen_N = len_C; + plvaddr_N = lvaddr_C; + prvaddr_N = rvaddr_C; + end + + ST_PARSE_WRITE_INIT: begin + plvaddr_N = lvaddr_C; + prvaddr_N = rvaddr_C; + + if(len_C > PMTU_BITS) begin + lvaddr_N = lvaddr_C + PMTU_BITS; + rvaddr_N = rvaddr_C + PMTU_BITS; + len_N = len_C - PMTU_BITS; + + pop_N = RC_RDMA_WRITE_FIRST; + plen_N = PMTU_BITS; + end + else begin + len_N = 0; + + pop_N = RC_RDMA_WRITE_ONLY; + plen_N = len_C; + end + end + + ST_PARSE_WRITE: begin + plvaddr_N = lvaddr_C; + prvaddr_N = rvaddr_C; + + if(len_C > PMTU_BITS) begin + lvaddr_N = lvaddr_C + PMTU_BITS; + rvaddr_N = rvaddr_C + PMTU_BITS; + len_N = len_C - PMTU_BITS; + + pop_N = RC_RDMA_WRITE_MIDDLE; + plen_N = PMTU_BITS; + end + else begin + len_N = 0; + + pop_N = RC_RDMA_WRITE_LAST; + plen_N = len_C; + end + end + + ST_PARSE_RPC: begin + pop_N = RC_RDMA_RPC_REQUEST; + plen_N = len_C; + plvaddr_N = lvaddr_C; + prvaddr_N = rvaddr_C; + end + + ST_SEND_READ: + if(req_parsed.ready) begin + req_parsed.valid = 1'b1; + end + + ST_SEND_WRITE: + if(req_parsed.ready) begin + req_parsed.valid = 1'b1; + end + + ST_SEND_BASE: + if(req_parsed.ready) begin + req_parsed.valid = 1'b1; + end + + endcase +end +/* +// DEBUG ila -------------------------------------------------------------------------------- + +logic [31:0] cnt_in, cnt_out; + +always_ff @(posedge aclk, negedge aresetn) begin +if (aresetn == 1'b0) begin + cnt_in <= 0; + cnt_out <= 0; +end +else + cnt_in <= (req_pre_parsed.valid & req_pre_parsed.ready) ? cnt_in + 1 : cnt_in; + cnt_out <= (req_parsed.valid & req_parsed.ready) ? cnt_out + 1 : cnt_out; +end +*/ + +/* +ila_parser inst_ila_parser ( + .clk(aclk), + .probe0(state_C), + .probe1(op_C), + .probe2(qp_C), + .probe3(lreg_C), + .probe4(host_C), + .probe5(lvaddr_C), + .probe6(rvaddr_C), + .probe7(len_C), + .probe8(pop_C), + .probe9(plen_C), + .probe10(plvaddr_C), + .probe11(prvaddr_C), + .probe12(req_parsed.valid), + .probe13(req_parsed.ready), + .probe14(req_pre_parsed.valid), + .probe15(req_pre_parsed.ready), + .probe16(cnt_in), + .probe17(cnt_out) +); +*/ + +endmodule \ No newline at end of file diff --git a/hw/hdl/network/network_stack.sv b/hw/hdl/network/network_stack.sv new file mode 100644 index 00000000..c88138f4 --- /dev/null +++ b/hw/hdl/network/network_stack.sv @@ -0,0 +1,836 @@ +`timescale 1ns / 1ps + +import lynxTypes::*; + +`define IP_VERSION4 + +module network_stack #( + parameter MAC_ADDRESS = 48'hE59D02350A00, // LSB first, 00:0A:35:02:9D:E5 + parameter IPV6_ADDRESS= 128'hE59D_02FF_FF35_0A02_0000_0000_0000_80FE, //LSB first: FE80_0000_0000_0000_020A_35FF_FF02_9DE5, + parameter IP_SUBNET_MASK = 32'h00FFFFFF, + parameter IP_DEFAULT_GATEWAY = 32'h00000000, + parameter DHCP_EN = 0 +)( + input wire net_clk, + input wire net_aresetn, + + /* Network streams */ + AXI4S.s s_axis_net, + AXI4S.m m_axis_net, + + /* Init */ + metaIntf.s arp_lookup_request, + metaIntf.m arp_lookup_reply, + metaIntf.s set_ip_addr, + metaIntf.s set_board_number, + metaIntf.s qp_interface, + metaIntf.s conn_interface, + + /* Commands */ + metaIntf.s s_axis_host_meta, + metaIntf.s s_axis_card_meta, + metaIntf.m m_axis_rpc_meta, + + /* Roce */ + rdmaIntf.m m_axis_roce_read_cmd, + rdmaIntf.m m_axis_roce_write_cmd, + AXI4S.s s_axis_roce_read_data, + AXI4S.m m_axis_roce_write_data +); + +// Sync the reset (timing) +(* DONT_TOUCH = "yes" *) +logic net_aresetn_r = 1'b1; + +always_ff @(posedge net_clk) begin + net_aresetn_r <= net_aresetn; +end + + +// Ip handler +// --------------------------------------------------------------------------------------------- +AXI4S axis_slice_to_ibh(); + +AXI4S axis_iph_to_arp_slice(); +AXI4S axis_iph_to_icmp_slice(); +AXI4S axis_iph_to_icmpv6_slice(); +AXI4S axis_iph_to_rocev6_slice(); +AXI4S axis_iph_to_toe_slice(); +AXI4S axis_iph_to_udp_slice(); +AXI4S axis_iph_to_roce_slice(); + +//Slice connections +AXI4S axis_arp_slice_to_arp(); +AXI4S axis_arp_to_arp_slice(); + +AXI4S #(.AXI4S_DATA_BITS(64)) axis_icmp_slice_to_icmp(); +AXI4S #(.AXI4S_DATA_BITS(64)) axis_icmp_to_icmp_slice(); +AXI4S axis_icmp_slice_to_merge(); + +AXI4S axis_udp_to_udp_slice(); +AXI4S axis_udp_slice_to_udp(); +AXI4S axis_udp_slice_to_merge(); + +AXI4S axis_toe_slice_to_toe(); +AXI4S axis_toe_to_toe_slice(); +AXI4S axis_toe_slice_to_merge(); + +AXI4S axis_roce_to_roce_slice(); +AXI4S axis_roce_slice_to_roce(); +AXI4S axis_roce_slice_to_merge(); + +// ARP lookup +// --------------------------------------------------------------------------------------------- +metaIntf #(.DATA_BITS(56)) axis_arp_lookup_reply (); +metaIntf #(.DATA_BITS(32)) axis_arp_lookup_request (); + +metaIntf #(.DATA_BITS(56)) axis_arp_lookup_reply_r (); +metaIntf #(.DATA_BITS(32)) axis_arp_lookup_request_r (); + + +// IP and MAC +// --------------------------------------------------------------------------------------------- +AXI4S axis_intercon_to_mie(); +AXI4S axis_mie_to_intercon(); + +// Register and distribute ip address +wire[31:0] dhcp_ip_address; +wire dhcp_ip_address_en; +reg[47:0] mie_mac_address; +reg[47:0] arp_mac_address; +reg[47:0] ipv6_mac_address; +reg[31:0] iph_ip_address; +reg[31:0] arp_ip_address; +reg[31:0] toe_ip_address; +reg[31:0] ip_subnet_mask; +reg[31:0] ip_default_gateway; +reg[127:0] link_local_ipv6_address; + +// Network controller +// --------------------------------------------------------------------------------------------- +// TX meta +metaIntf #(.DATA_BITS(FV_REQ_BITS)) axis_tx_metadata(); + +reg [31:0] local_ip_address; +reg[3:0] board_number; + +// Statistics +// --------------------------------------------------------------------------------------------- +logic[15:0] arp_request_pkg_counter; +logic[15:0] arp_reply_pkg_counter; + +logic[31:0] regCrcDropPkgCount; +logic regCrcDropPkgCount_valid; +logic[31:0] regInvalidPsnDropCount; +logic regInvalidPsnDropCount_valid; + +logic[31:0] rx_word_counter; +logic[31:0] rx_pkg_counter; +logic[31:0] tx_word_counter; +logic[31:0] tx_pkg_counter; + +logic[31:0] tcp_rx_pkg_counter; +logic[31:0] tcp_tx_pkg_counter; +logic[31:0] udp_rx_pkg_counter; +logic[31:0] udp_tx_pkg_counter; +logic[31:0] roce_rx_pkg_counter; +logic[31:0] roce_tx_pkg_counter; + +logic[31:0] roce_data_rx_word_counter; +logic[31:0] roce_data_rx_pkg_counter; +logic[31:0] roce_data_tx_role_word_counter; +logic[31:0] roce_data_tx_role_pkg_counter; +logic[31:0] roce_data_tx_host_word_counter; +logic[31:0] roce_data_tx_host_pkg_counter; + +logic[31:0] arp_rx_pkg_counter; +logic[31:0] arp_tx_pkg_counter; +logic[31:0] icmp_rx_pkg_counter; +logic[31:0] icmp_tx_pkg_counter; + +reg[7:0] axis_stream_down_counter; +reg axis_stream_down; +reg[7:0] output_stream_down_counter; +reg output_stream_down; + + +// --------------------------------------------------------------------------------------------- +// --------------------------------------------------------------------------------------------- + +/** + * Addresses + */ + +//assign dhcp_ip_address_en = 1'b1; +//assign dhcp_ip_address = 32'hD1D4010A; + +always @(posedge net_clk) +begin + if (net_aresetn_r == 0) begin + mie_mac_address <= 48'h000000000000; + arp_mac_address <= 48'h000000000000; + ipv6_mac_address <= 48'h000000000000; + iph_ip_address <= 32'h00000000; + arp_ip_address <= 32'h00000000; + toe_ip_address <= 32'h00000000; + ip_subnet_mask <= 32'h00000000; + ip_default_gateway <= 32'h00000000; + link_local_ipv6_address <= 0; + end + else begin + mie_mac_address <= {MAC_ADDRESS[47:44], (MAC_ADDRESS[43:40]+board_number), MAC_ADDRESS[39:0]}; + arp_mac_address <= {MAC_ADDRESS[47:44], (MAC_ADDRESS[43:40]+board_number), MAC_ADDRESS[39:0]}; + ipv6_mac_address <= {MAC_ADDRESS[47:44], (MAC_ADDRESS[43:40]+board_number), MAC_ADDRESS[39:0]}; + //link_local_ipv6_address[127:80] <= ipv6_mac_address; + //link_local_ipv6_address[15:0] <= 16'h80fe; // fe80 + //link_local_ipv6_address[79:16] <= 64'h0000_0000_0000_0000; + link_local_ipv6_address <= {IPV6_ADDRESS[127:120]+board_number, IPV6_ADDRESS[119:0]}; + if (DHCP_EN == 1) begin + if (dhcp_ip_address_en == 1'b1) begin + iph_ip_address <= dhcp_ip_address; + arp_ip_address <= dhcp_ip_address; + toe_ip_address <= dhcp_ip_address; + end + end + else begin + iph_ip_address <= local_ip_address; + arp_ip_address <= local_ip_address; + toe_ip_address <= local_ip_address; + ip_subnet_mask <= IP_SUBNET_MASK; + ip_default_gateway <= {local_ip_address[31:28], 8'h01, local_ip_address[23:0]}; + end + end +end + +/** + * IP handler + */ + +// In slice +axis_reg inst_slice_in (.aclk(net_clk), .aresetn(net_aresetn_r), .axis_in(s_axis_net), .axis_out(axis_slice_to_ibh)); + +// IP handler +ip_handler_ip ip_handler_inst ( + .m_axis_arp_TVALID(axis_iph_to_arp_slice.tvalid), // output AXI4Stream_M_TVALID + .m_axis_arp_TREADY(axis_iph_to_arp_slice.tready), // input AXI4Stream_M_TREADY + .m_axis_arp_TDATA(axis_iph_to_arp_slice.tdata), // output [63 : 0] AXI4Stream_M_TDATA + .m_axis_arp_TKEEP(axis_iph_to_arp_slice.tkeep), // output [7 : 0] AXI4Stream_M_TSTRB + .m_axis_arp_TLAST(axis_iph_to_arp_slice.tlast), // output [0 : 0] AXI4Stream_M_TLAST + + .m_axis_icmp_TVALID(axis_iph_to_icmp_slice.tvalid), // output AXI4Stream_M_TVALID + .m_axis_icmp_TREADY(axis_iph_to_icmp_slice.tready), // input AXI4Stream_M_TREADY + .m_axis_icmp_TDATA(axis_iph_to_icmp_slice.tdata), // output [63 : 0] AXI4Stream_M_TDATA + .m_axis_icmp_TKEEP(axis_iph_to_icmp_slice.tkeep), // output [7 : 0] AXI4Stream_M_TSTRB + .m_axis_icmp_TLAST(axis_iph_to_icmp_slice.tlast), // output [0 : 0] AXI4Stream_M_TLAST + + .m_axis_icmpv6_TVALID(axis_iph_to_icmpv6_slice.tvalid), + .m_axis_icmpv6_TREADY(axis_iph_to_icmpv6_slice.tready), + .m_axis_icmpv6_TDATA(axis_iph_to_icmpv6_slice.tdata), + .m_axis_icmpv6_TKEEP(axis_iph_to_icmpv6_slice.tkeep), + .m_axis_icmpv6_TLAST(axis_iph_to_icmpv6_slice.tlast), + + .m_axis_ipv6udp_TVALID(axis_iph_to_rocev6_slice.tvalid), + .m_axis_ipv6udp_TREADY(axis_iph_to_rocev6_slice.tready), + .m_axis_ipv6udp_TDATA(axis_iph_to_rocev6_slice.tdata), + .m_axis_ipv6udp_TKEEP(axis_iph_to_rocev6_slice.tkeep), + .m_axis_ipv6udp_TLAST(axis_iph_to_rocev6_slice.tlast), + + .m_axis_udp_TVALID(axis_iph_to_udp_slice.tvalid), + .m_axis_udp_TREADY(axis_iph_to_udp_slice.tready), + .m_axis_udp_TDATA(axis_iph_to_udp_slice.tdata), + .m_axis_udp_TKEEP(axis_iph_to_udp_slice.tkeep), + .m_axis_udp_TLAST(axis_iph_to_udp_slice.tlast), + + .m_axis_tcp_TVALID(axis_iph_to_toe_slice.tvalid), + .m_axis_tcp_TREADY(axis_iph_to_toe_slice.tready), + .m_axis_tcp_TDATA(axis_iph_to_toe_slice.tdata), + .m_axis_tcp_TKEEP(axis_iph_to_toe_slice.tkeep), + .m_axis_tcp_TLAST(axis_iph_to_toe_slice.tlast), + + .m_axis_roce_TVALID(axis_iph_to_roce_slice.tvalid), + .m_axis_roce_TREADY(axis_iph_to_roce_slice.tready), + .m_axis_roce_TDATA(axis_iph_to_roce_slice.tdata), + .m_axis_roce_TKEEP(axis_iph_to_roce_slice.tkeep), + .m_axis_roce_TLAST(axis_iph_to_roce_slice.tlast), + + .s_axis_raw_TVALID(axis_slice_to_ibh.tvalid), + .s_axis_raw_TREADY(axis_slice_to_ibh.tready), + .s_axis_raw_TDATA(axis_slice_to_ibh.tdata), + .s_axis_raw_TKEEP(axis_slice_to_ibh.tkeep), + .s_axis_raw_TLAST(axis_slice_to_ibh.tlast), + + .myIpAddress_V(iph_ip_address), + + .ap_clk(net_clk), // input aclk + .ap_rst_n(net_aresetn_r) // input aresetn +); + +// Tie-off +assign axis_iph_to_icmpv6_slice.tready = 1'b1; +assign axis_iph_to_rocev6_slice.tready = 1'b1; + +// IP handler -> out slices +// ARP +axis_reg inst_slice_out_0 (.aclk(net_clk), .aresetn(net_aresetn_r), .axis_in(axis_iph_to_arp_slice), .axis_out(axis_arp_slice_to_arp)); + +// ICMP +axis_512_to_64_converter icmp_in_data_converter ( + .aclk(net_clk), + .aresetn(net_aresetn_r), + .s_axis_tvalid(axis_iph_to_icmp_slice.tvalid), + .s_axis_tready(axis_iph_to_icmp_slice.tready), + .s_axis_tdata(axis_iph_to_icmp_slice.tdata), + .s_axis_tkeep(axis_iph_to_icmp_slice.tkeep), + .s_axis_tlast(axis_iph_to_icmp_slice.tlast), + .m_axis_tvalid(axis_icmp_slice_to_icmp.tvalid), + .m_axis_tready(axis_icmp_slice_to_icmp.tready), + .m_axis_tdata(axis_icmp_slice_to_icmp.tdata), + .m_axis_tkeep(axis_icmp_slice_to_icmp.tkeep), + .m_axis_tlast(axis_icmp_slice_to_icmp.tlast) +); + +icmp_server_ip icmp_server_inst ( + .s_axis_TVALID(axis_icmp_slice_to_icmp.tvalid), // input wire dataIn_TVALID + .s_axis_TREADY(axis_icmp_slice_to_icmp.tready), // output wire dataIn_TREADY + .s_axis_TDATA(axis_icmp_slice_to_icmp.tdata), // input wire [63 : 0] dataIn_TDATA + .s_axis_TKEEP(axis_icmp_slice_to_icmp.tkeep), // input wire [7 : 0] dataIn_TKEEP + .s_axis_TLAST(axis_icmp_slice_to_icmp.tlast), // input wire [0 : 0] dataIn_TLAST + .udpIn_TVALID(1'b0),//(axis_udp_to_icmp_tvalid), // input wire udpIn_TVALID + .udpIn_TREADY(), // output wire udpIn_TREADY + .udpIn_TDATA(0),//(axis_udp_to_icmp_tdata), // input wire [63 : 0] udpIn_TDATA + .udpIn_TKEEP(0),//(axis_udp_to_icmp_tkeep), // input wire [7 : 0] udpIn_TKEEP + .udpIn_TLAST(0),//(axis_udp_to_icmp_tlast), // input wire [0 : 0] udpIn_TLAST + .ttlIn_TVALID(1'b0),//(axis_ttl_to_icmp_tvalid), // input wire ttlIn_TVALID + .ttlIn_TREADY(), // output wire ttlIn_TREADY + .ttlIn_TDATA(0),//(axis_ttl_to_icmp_tdata), // input wire [63 : 0] ttlIn_TDATA + .ttlIn_TKEEP(0),//(axis_ttl_to_icmp_tkeep), // input wire [7 : 0] ttlIn_TKEEP + .ttlIn_TLAST(0),//(axis_ttl_to_icmp_tlast), // input wire [0 : 0] ttlIn_TLAST + .m_axis_TVALID(axis_icmp_to_icmp_slice.tvalid), // output wire dataOut_TVALID + .m_axis_TREADY(axis_icmp_to_icmp_slice.tready), // input wire dataOut_TREADY + .m_axis_TDATA(axis_icmp_to_icmp_slice.tdata), // output wire [63 : 0] dataOut_TDATA + .m_axis_TKEEP(axis_icmp_to_icmp_slice.tkeep), // output wire [7 : 0] dataOut_TKEEP + .m_axis_TLAST(axis_icmp_to_icmp_slice.tlast), // output wire [0 : 0] dataOut_TLAST + .ap_clk(net_clk), // input wire ap_clk + .ap_rst_n(net_aresetn_r) // input wire ap_rst_n +); + +axis_64_to_512_converter icmp_out_data_converter ( + .aclk(net_clk), + .aresetn(net_aresetn_r), + .s_axis_tvalid(axis_icmp_to_icmp_slice.tvalid), + .s_axis_tready(axis_icmp_to_icmp_slice.tready), + .s_axis_tdata(axis_icmp_to_icmp_slice.tdata), + .s_axis_tkeep(axis_icmp_to_icmp_slice.tkeep), + .s_axis_tlast(axis_icmp_to_icmp_slice.tlast), + .s_axis_tdest(0), + .m_axis_tvalid(axis_icmp_slice_to_merge.tvalid), + .m_axis_tready(axis_icmp_slice_to_merge.tready), + .m_axis_tdata(axis_icmp_slice_to_merge.tdata), + .m_axis_tkeep(axis_icmp_slice_to_merge.tkeep), + .m_axis_tlast(axis_icmp_slice_to_merge.tlast), + .m_axis_tdest() +); + +// UDP +axis_reg inst_slice_out_1 (.aclk(net_clk), .aresetn(net_aresetn_r), .axis_in(axis_iph_to_udp_slice), .axis_out(axis_udp_slice_to_udp)); +assign axis_udp_slice_to_udp.tready = 1'b1; + +// TCP +axis_reg inst_slice_out_2 (.aclk(net_clk), .aresetn(net_aresetn_r), .axis_in(axis_iph_to_toe_slice), .axis_out(axis_toe_slice_to_toe)); +assign axis_toe_slice_to_toe.tready = 1'b1; + +// Roce +axis_reg inst_slice_out_3 (.aclk(net_clk), .aresetn(net_aresetn_r), .axis_in(axis_iph_to_roce_slice), .axis_out(axis_roce_slice_to_roce)); + +/** + * Merge TX + */ + +// UDP +axis_reg inst_slice_out_4 (.aclk(net_clk), .aresetn(net_aresetn_r), .axis_in(axis_udp_to_udp_slice), .axis_out(axis_udp_slice_to_merge)); +assign axis_udp_to_udp_slice.tvalid = 1'b0; + +// TCP +axis_reg inst_slice_out_5 (.aclk(net_clk), .aresetn(net_aresetn_r), .axis_in(axis_toe_to_toe_slice), .axis_out(axis_toe_slice_to_merge)); +assign axis_toe_to_toe_slice.tvalid = 1'b0; + +// Roce +axis_reg inst_slice_out_6 (.aclk(net_clk), .aresetn(net_aresetn_r), .axis_in(axis_roce_to_roce_slice), .axis_out(axis_roce_slice_to_merge)); + +axis_interconnect_512_4to1 ip_merger ( + .ACLK(net_clk), // input wire ACLK + .ARESETN(net_aresetn_r), // input wire ARESETN + .S00_AXIS_ACLK(net_clk), // input wire S00_AXIS_ACLK + .S01_AXIS_ACLK(net_clk), // input wire S01_AXIS_ACLK + .S02_AXIS_ACLK(net_clk), // input wire S02_AXIS_ACLK + .S03_AXIS_ACLK(net_clk), // input wire S03_AXIS_ACLK + .S00_AXIS_ARESETN(net_aresetn_r), // input wire S00_AXIS_ARESETN + .S01_AXIS_ARESETN(net_aresetn_r), // input wire S01_AXIS_ARESETN + .S02_AXIS_ARESETN(net_aresetn_r), // input wire S02_AXIS_ARESETN + .S03_AXIS_ARESETN(net_aresetn_r), // input wire S03_AXIS_ARESETN + + .S00_AXIS_TVALID(axis_icmp_slice_to_merge.tvalid), // input wire S00_AXIS_TVALID + .S00_AXIS_TREADY(axis_icmp_slice_to_merge.tready), // output wire S00_AXIS_TREADY + .S00_AXIS_TDATA(axis_icmp_slice_to_merge.tdata), // input wire [63 : 0] S00_AXIS_TDATA + .S00_AXIS_TKEEP(axis_icmp_slice_to_merge.tkeep), // input wire [7 : 0] S00_AXIS_TKEEP + .S00_AXIS_TLAST(axis_icmp_slice_to_merge.tlast), // input wire S00_AXIS_TLAST + + .S01_AXIS_TVALID(axis_udp_slice_to_merge.tvalid), // input wire S01_AXIS_TVALID + .S01_AXIS_TREADY(axis_udp_slice_to_merge.tready), // output wire S01_AXIS_TREADY + .S01_AXIS_TDATA(axis_udp_slice_to_merge.tdata), // input wire [63 : 0] S01_AXIS_TDATA + .S01_AXIS_TKEEP(axis_udp_slice_to_merge.tkeep), // input wire [7 : 0] S01_AXIS_TKEEP + .S01_AXIS_TLAST(axis_udp_slice_to_merge.tlast), // input wire S01_AXIS_TLAST + + .S02_AXIS_TVALID(axis_toe_slice_to_merge.tvalid), // input wire S02_AXIS_TVALID + .S02_AXIS_TREADY(axis_toe_slice_to_merge.tready), // output wire S02_AXIS_TREADY + .S02_AXIS_TDATA(axis_toe_slice_to_merge.tdata), // input wire [63 : 0] S02_AXIS_TDATA + .S02_AXIS_TKEEP(axis_toe_slice_to_merge.tkeep), // input wire [7 : 0] S02_AXIS_TKEEP + .S02_AXIS_TLAST(axis_toe_slice_to_merge.tlast), // input wire S02_AXIS_TLAST + + .S03_AXIS_TVALID(axis_roce_slice_to_merge.tvalid), // input wire S01_AXIS_TVALID + .S03_AXIS_TREADY(axis_roce_slice_to_merge.tready), // output wire S01_AXIS_TREADY + .S03_AXIS_TDATA(axis_roce_slice_to_merge.tdata), // input wire [63 : 0] S01_AXIS_TDATA + .S03_AXIS_TKEEP(axis_roce_slice_to_merge.tkeep), // input wire [7 : 0] S01_AXIS_TKEEP + .S03_AXIS_TLAST(axis_roce_slice_to_merge.tlast), // input wire S01_AXIS_TLAST + + .M00_AXIS_ACLK(net_clk), // input wire M00_AXIS_ACLK + .M00_AXIS_ARESETN(net_aresetn_r), // input wire M00_AXIS_ARESETN + .M00_AXIS_TVALID(axis_intercon_to_mie.tvalid), // output wire M00_AXIS_TVALID + .M00_AXIS_TREADY(axis_intercon_to_mie.tready), // input wire M00_AXIS_TREADY + .M00_AXIS_TDATA(axis_intercon_to_mie.tdata), // output wire [63 : 0] M00_AXIS_TDATA + .M00_AXIS_TKEEP(axis_intercon_to_mie.tkeep), // output wire [7 : 0] M00_AXIS_TKEEP + .M00_AXIS_TLAST(axis_intercon_to_mie.tlast), // output wire M00_AXIS_TLAST + .S00_ARB_REQ_SUPPRESS(1'b0), // input wire S00_ARB_REQ_SUPPRESS + .S01_ARB_REQ_SUPPRESS(1'b0), // input wire S01_ARB_REQ_SUPPRESS + .S02_ARB_REQ_SUPPRESS(1'b0), // input wire S02_ARB_REQ_SUPPRESS + .S03_ARB_REQ_SUPPRESS(1'b0) // input wire S02_ARB_REQ_SUPPRESS +); + +/** + * ARP lookup + */ + +meta_reg #(.DATA_BITS(32)) inst_meta_slice_0 (.aclk(net_clk), .aresetn(net_aresetn_r), .meta_in(axis_arp_lookup_request), .meta_out(axis_arp_lookup_request_r)); +meta_reg #(.DATA_BITS(56)) inst_meta_slice_1 (.aclk(net_clk), .aresetn(net_aresetn_r), .meta_in(axis_arp_lookup_reply), .meta_out(axis_arp_lookup_reply_r)); + +mac_ip_encode_ip mac_ip_encode_inst ( + .m_axis_ip_TVALID(axis_mie_to_intercon.tvalid), + .m_axis_ip_TREADY(axis_mie_to_intercon.tready), + .m_axis_ip_TDATA(axis_mie_to_intercon.tdata), + .m_axis_ip_TKEEP(axis_mie_to_intercon.tkeep), + .m_axis_ip_TLAST(axis_mie_to_intercon.tlast), + .m_axis_arp_lookup_request_V_V_TVALID(axis_arp_lookup_request.valid), + .m_axis_arp_lookup_request_V_V_TREADY(axis_arp_lookup_request.ready), + .m_axis_arp_lookup_request_V_V_TDATA(axis_arp_lookup_request.data), + .s_axis_ip_TVALID(axis_intercon_to_mie.tvalid), + .s_axis_ip_TREADY(axis_intercon_to_mie.tready), + .s_axis_ip_TDATA(axis_intercon_to_mie.tdata), + .s_axis_ip_TKEEP(axis_intercon_to_mie.tkeep), + .s_axis_ip_TLAST(axis_intercon_to_mie.tlast), + .s_axis_arp_lookup_reply_V_TVALID(axis_arp_lookup_reply_r.valid), + .s_axis_arp_lookup_reply_V_TREADY(axis_arp_lookup_reply_r.ready), + .s_axis_arp_lookup_reply_V_TDATA(axis_arp_lookup_reply_r.data), + + .myMacAddress_V(mie_mac_address), // input wire [47 : 0] regMacAddress_V + .regSubNetMask_V(ip_subnet_mask), // input wire [31 : 0] regSubNetMask_V + .regDefaultGateway_V(ip_default_gateway), // input wire [31 : 0] regDefaultGateway_V + + .ap_clk(net_clk), // input aclk + .ap_rst_n(net_aresetn_r) // input aresetn +); + +/** + * Merges IP and ARP + */ + +axis_interconnect_512_2to1 mac_merger ( + .ACLK(net_clk), // input ACLK + .ARESETN(net_aresetn_r), // input ARESETN + .S00_AXIS_ACLK(net_clk), // input S00_AXIS_ACLK + .S01_AXIS_ACLK(net_clk), // input S01_AXIS_ACLK + //.S02_AXIS_ACLK(net_clk), // input S01_AXIS_ACLK + .S00_AXIS_ARESETN(net_aresetn_r), // input S00_AXIS_ARESETN + .S01_AXIS_ARESETN(net_aresetn_r), // input S01_AXIS_ARESETN + //.S02_AXIS_ARESETN(net_aresetn_r), // input S01_AXIS_ARESETN + .S00_AXIS_TVALID(axis_arp_to_arp_slice.tvalid), // input S00_AXIS_TVALID + .S00_AXIS_TREADY(axis_arp_to_arp_slice.tready), // output S00_AXIS_TREADY + .S00_AXIS_TDATA(axis_arp_to_arp_slice.tdata), // input [63 : 0] S00_AXIS_TDATA + .S00_AXIS_TKEEP(axis_arp_to_arp_slice.tkeep), // input [7 : 0] S00_AXIS_TKEEP + .S00_AXIS_TLAST(axis_arp_to_arp_slice.tlast), // input S00_AXIS_TLAST + + .S01_AXIS_TVALID(axis_mie_to_intercon.tvalid), // input S01_AXIS_TVALID + .S01_AXIS_TREADY(axis_mie_to_intercon.tready), // output S01_AXIS_TREADY + .S01_AXIS_TDATA(axis_mie_to_intercon.tdata), // input [63 : 0] S01_AXIS_TDATA + .S01_AXIS_TKEEP(axis_mie_to_intercon.tkeep), // input [7 : 0] S01_AXIS_TKEEP + .S01_AXIS_TLAST(axis_mie_to_intercon.tlast), // input S01_AXIS_TLAST + + /*.S02_AXIS_TVALID(axis_ethencode_to_intercon.valid), // input S01_AXIS_TVALID + .S02_AXIS_TREADY(axis_ethencode_to_intercon.ready), // output S01_AXIS_TREADY + .S02_AXIS_TDATA(axis_ethencode_to_intercon.data), // input [63 : 0] S01_AXIS_TDATA + .S02_AXIS_TKEEP(axis_ethencode_to_intercon.keep), // input [7 : 0] S01_AXIS_TKEEP + .S02_AXIS_TLAST(axis_ethencode_to_intercon.last), // input S01_AXIS_TLAST*/ + + .M00_AXIS_ACLK(net_clk), // input M00_AXIS_ACLK + .M00_AXIS_ARESETN(net_aresetn_r), // input M00_AXIS_ARESETN + .M00_AXIS_TVALID(m_axis_net.tvalid), // output M00_AXIS_TVALID + .M00_AXIS_TREADY(m_axis_net.tready), // input M00_AXIS_TREADY + .M00_AXIS_TDATA(m_axis_net.tdata), // output [63 : 0] M00_AXIS_TDATA + .M00_AXIS_TKEEP(m_axis_net.tkeep), // output [7 : 0] M00_AXIS_TKEEP + .M00_AXIS_TLAST(m_axis_net.tlast), // output M00_AXIS_TLAST + .S00_ARB_REQ_SUPPRESS(1'b0), // input S00_ARB_REQ_SUPPRESS + .S01_ARB_REQ_SUPPRESS(1'b0) // input S01_ARB_REQ_SUPPRESS + //.S02_ARB_REQ_SUPPRESS(1'b0) // input S01_ARB_REQ_SUPPRESS +); + +arp_server_subnet_ip arp_server_inst( + .m_axis_TVALID(axis_arp_to_arp_slice.tvalid), + .m_axis_TREADY(axis_arp_to_arp_slice.tready), + .m_axis_TDATA(axis_arp_to_arp_slice.tdata), + .m_axis_TKEEP(axis_arp_to_arp_slice.tkeep), + .m_axis_TLAST(axis_arp_to_arp_slice.tlast), + .m_axis_arp_lookup_reply_V_TVALID(axis_arp_lookup_reply.valid), + .m_axis_arp_lookup_reply_V_TREADY(axis_arp_lookup_reply.ready), + .m_axis_arp_lookup_reply_V_TDATA(axis_arp_lookup_reply.data), + .m_axis_host_arp_lookup_reply_V_TVALID(arp_lookup_reply.valid), //axis_host_arp_lookup_reply_TVALID), + .m_axis_host_arp_lookup_reply_V_TREADY(arp_lookup_reply.ready), //axis_host_arp_lookup_reply_TREADY), + .m_axis_host_arp_lookup_reply_V_TDATA(arp_lookup_reply.data), //axis_host_arp_lookup_reply_TDATA), + .s_axis_TVALID(axis_arp_slice_to_arp.tvalid), + .s_axis_TREADY(axis_arp_slice_to_arp.tready), + .s_axis_TDATA(axis_arp_slice_to_arp.tdata), + .s_axis_TKEEP(axis_arp_slice_to_arp.tkeep), + .s_axis_TLAST(axis_arp_slice_to_arp.tlast), + .s_axis_arp_lookup_request_V_V_TVALID(axis_arp_lookup_request_r.valid), + .s_axis_arp_lookup_request_V_V_TREADY(axis_arp_lookup_request_r.ready), + .s_axis_arp_lookup_request_V_V_TDATA(axis_arp_lookup_request_r.data), + .s_axis_host_arp_lookup_request_V_V_TVALID(arp_lookup_request.valid), //axis_host_arp_lookup_request_TVALID), + .s_axis_host_arp_lookup_request_V_V_TREADY(arp_lookup_request.ready), //axis_host_arp_lookup_request_TREADY), + .s_axis_host_arp_lookup_request_V_V_TDATA(arp_lookup_request.data), //axis_host_arp_lookup_request_TDATA), + + .myMacAddress_V(arp_mac_address), + .myIpAddress_V(arp_ip_address), + .regRequestCount_V(arp_request_pkg_counter), + .regRequestCount_V_ap_vld(), + .regReplyCount_V(arp_reply_pkg_counter), + .regReplyCount_V_ap_vld(), + + .ap_clk(net_clk), // input aclk + .ap_rst_n(net_aresetn_r) // input aresetn +); + +// Local IP +always @(posedge net_clk) begin + if (~net_aresetn_r) begin + local_ip_address <= 32'hD1D4010B; + board_number <= 0; + end + else begin + if (set_ip_addr.valid) begin + local_ip_address[7:0] <= set_ip_addr.data[31:24]; + local_ip_address[15:8] <= set_ip_addr.data[23:16]; + local_ip_address[23:16] <= set_ip_addr.data[15:8]; + local_ip_address[31:24] <= set_ip_addr.data[7:0]; + end + if (set_board_number.valid) begin + board_number <= set_board_number.data; + end + end +end + +assign set_ip_addr.ready = 1'b1; +assign set_board_number.ready = 1'b1; + +// Merge host and user commands +axis_interconnect_merger_256 tx_metadata_merger ( + .ACLK(net_clk), + .ARESETN(net_aresetn_r), + .S00_AXIS_ACLK(net_clk), + .S00_AXIS_ARESETN(net_aresetn_r), + .S00_AXIS_TVALID(s_axis_host_meta.valid), + .S00_AXIS_TREADY(s_axis_host_meta.ready), + .S00_AXIS_TDATA(s_axis_host_meta.data), + .S01_AXIS_ACLK(net_clk), + .S01_AXIS_ARESETN(net_aresetn_r), + .S01_AXIS_TVALID(s_axis_card_meta.valid), + .S01_AXIS_TREADY(s_axis_card_meta.ready), + .S01_AXIS_TDATA(s_axis_card_meta.data), + .M00_AXIS_ACLK(net_clk), + .M00_AXIS_ARESETN(net_aresetn_r), + .M00_AXIS_TVALID(axis_tx_metadata.valid), + .M00_AXIS_TREADY(axis_tx_metadata.ready), + .M00_AXIS_TDATA(axis_tx_metadata.data), + .S00_ARB_REQ_SUPPRESS(1'b0), + .S01_ARB_REQ_SUPPRESS(1'b0) +); + +/** + * Roce stack + */ + +roce_stack inst_roce_stack( + .net_clk(net_clk), // input aclk + .net_aresetn(net_aresetn_r), // input aresetn + + // IPv4 + .s_axis_rx_data(axis_roce_slice_to_roce), + .m_axis_tx_data(axis_roce_to_roce_slice), + + // User + .s_axis_tx_meta(axis_tx_metadata), + .m_axis_rx_rpc_params(m_axis_rpc_meta), + + // Memory + .m_axis_mem_write_cmd(m_axis_roce_write_cmd), + .m_axis_mem_read_cmd(m_axis_roce_read_cmd), + .m_axis_mem_write_data(m_axis_roce_write_data), + .s_axis_mem_read_data(s_axis_roce_read_data), + + // Control + .s_axis_qp_interface(qp_interface), + .s_axis_qp_conn_interface(conn_interface), + + //.local_ip_address_V(link_local_ipv6_address), // Use IPv6 addr + .local_ip_address(iph_ip_address), //Use IPv4 addr + + // Debug + .crc_drop_pkg_count_valid(regCrcDropPkgCount_valid), + .crc_drop_pkg_count_data(regCrcDropPkgCount), + .psn_drop_pkg_count_valid(regInvalidPsnDropCount_valid), + .psn_drop_pkg_count_data(regInvalidPsnDropCount) +); + +/** + * Statistics + */ + +/* +assign rdma_debug.roce_crc_pkg_drop_count = regCrcDropPkgCount; +assign rdma_debug.roce_psn_pkg_drop_count = regInvalidPsnDropCount; +assign rdma_debug.rx_word_counter = rx_word_counter; +assign rdma_debug.rx_pkg_counter = rx_pkg_counter; +assign rdma_debug.tx_word_counter = tx_word_counter; +assign rdma_debug.tx_pkg_counter = tx_pkg_counter; +assign rdma_debug.arp_rx_pkg_counter = arp_rx_pkg_counter; +assign rdma_debug.arp_tx_pkg_counter = arp_tx_pkg_counter; +assign rdma_debug.arp_request_pkg_counter = arp_request_pkg_counter; +assign rdma_debug.arp_reply_pkg_counter = arp_reply_pkg_counter; +assign rdma_debug.icmp_rx_pkg_counter = icmp_rx_pkg_counter; +assign rdma_debug.icmp_tx_pkg_counter = icmp_tx_pkg_counter; +assign rdma_debug.tcp_rx_pkg_counter = tcp_rx_pkg_counter; +assign rdma_debug.tcp_tx_pkg_counter = tcp_tx_pkg_counter; +assign rdma_debug.roce_rx_pkg_counter = roce_rx_pkg_counter; +assign rdma_debug.roce_tx_pkg_counter = roce_tx_pkg_counter; +assign rdma_debug.roce_data_rx_word_counter = roce_data_rx_word_counter; +assign rdma_debug.roce_data_rx_pkg_counter = roce_data_rx_pkg_counter; +assign rdma_debug.roce_data_tx_role_word_counter = roce_data_tx_role_word_counter; +assign rdma_debug.roce_data_tx_role_pkg_counter = roce_data_tx_role_pkg_counter; +assign rdma_debug.roce_data_tx_host_word_counter = roce_data_tx_host_word_counter; +assign rdma_debug.roce_data_tx_host_pkg_counter = roce_data_tx_host_pkg_counter; +assign rdma_debug.axis_stream_down = axis_stream_down; + +always @(posedge net_clk) begin + if (set_ip_addr.valid) begin + rx_word_counter <= '0; + rx_pkg_counter <= '0; + tx_word_counter <= '0; + tx_pkg_counter <= '0; + + tcp_rx_pkg_counter <= '0; + tcp_tx_pkg_counter <= '0; + + roce_data_rx_word_counter <= '0; + roce_data_rx_pkg_counter <= '0; + roce_data_tx_role_word_counter <= '0; + roce_data_tx_role_pkg_counter <= '0; + roce_data_tx_host_word_counter <= '0; + roce_data_tx_host_pkg_counter <= '0; + + arp_rx_pkg_counter <= '0; + arp_tx_pkg_counter <= '0; + + udp_rx_pkg_counter <= '0; + udp_tx_pkg_counter <= '0; + + roce_rx_pkg_counter <= '0; + roce_tx_pkg_counter <= '0; + + axis_stream_down_counter <= '0; + axis_stream_down <= 1'b0; + end + + if (s_axis_net.tready) begin + axis_stream_down_counter <= '0; + end + if (s_axis_net.tvalid && ~s_axis_net.tready) begin + axis_stream_down_counter <= axis_stream_down_counter + 1; + end + if (axis_stream_down_counter > 2) begin + axis_stream_down <= 1'b1; + end + if (s_axis_net.tvalid && s_axis_net.tready) begin + rx_word_counter <= rx_word_counter + 1; + if (s_axis_net.tlast) begin + rx_pkg_counter <= rx_pkg_counter + 1; + end + end + if (m_axis_net.tvalid && m_axis_net.tready) begin + tx_word_counter <= tx_word_counter + 1; + if (m_axis_net.tlast) begin + tx_pkg_counter <= tx_pkg_counter + 1; + end + end + //arp + if (axis_arp_slice_to_arp.tvalid && axis_arp_slice_to_arp.tready) begin + if (axis_arp_slice_to_arp.tlast) begin + arp_rx_pkg_counter <= arp_rx_pkg_counter + 1; + end + end + if (axis_arp_to_arp_slice.tvalid && axis_arp_to_arp_slice.tready) begin + if (axis_arp_to_arp_slice.tlast) begin + arp_tx_pkg_counter <= arp_tx_pkg_counter + 1; + end + end + //icmp + if (axis_icmp_slice_to_icmp.tvalid && axis_icmp_slice_to_icmp.tready) begin + if (axis_icmp_slice_to_icmp.tlast) begin + icmp_rx_pkg_counter <= icmp_rx_pkg_counter + 1; + end + end + if (axis_icmp_to_icmp_slice.tvalid && axis_icmp_to_icmp_slice.tready) begin + if (axis_icmp_to_icmp_slice.tlast) begin + icmp_tx_pkg_counter <= icmp_tx_pkg_counter + 1; + end + end + //tcp + if (axis_toe_slice_to_toe.tvalid && axis_toe_slice_to_toe.tready) begin + if (axis_toe_slice_to_toe.tlast) begin + tcp_rx_pkg_counter <= tcp_rx_pkg_counter + 1; + end + end + if (axis_toe_to_toe_slice.tvalid && axis_toe_to_toe_slice.tready) begin + if (axis_toe_to_toe_slice.tlast) begin + tcp_tx_pkg_counter <= tcp_tx_pkg_counter + 1; + end + end + //udp + if (axis_udp_slice_to_udp.tvalid && axis_udp_slice_to_udp.tready) begin + if (axis_udp_slice_to_udp.tlast) begin + udp_rx_pkg_counter <= udp_rx_pkg_counter + 1; + end + end + if (axis_udp_to_udp_slice.tvalid && axis_udp_to_udp_slice.tready) begin + if (axis_udp_to_udp_slice.tlast) begin + udp_tx_pkg_counter <= udp_tx_pkg_counter + 1; + end + end + //roce + if (axis_roce_slice_to_roce.tvalid && axis_roce_slice_to_roce.tready) begin + if (axis_roce_slice_to_roce.tlast) begin + roce_rx_pkg_counter <= roce_rx_pkg_counter + 1; + end + end + if (axis_roce_to_roce_slice.tvalid && axis_roce_to_roce_slice.tready) begin + if (axis_roce_to_roce_slice.tlast) begin + roce_tx_pkg_counter <= roce_tx_pkg_counter + 1; + end + end + //roce data + if (m_axis_roce_write_data.tvalid && m_axis_roce_write_data.tready) begin + roce_data_rx_word_counter <= roce_data_rx_word_counter + 1; + if (m_axis_roce_write_data.tlast) begin + roce_data_rx_pkg_counter <= roce_data_rx_pkg_counter + 1; + end + end + if (s_axis_roce_read_data.tvalid && s_axis_roce_read_data.tready) begin + roce_data_tx_host_word_counter <= roce_data_tx_host_word_counter + 1; + if (s_axis_roce_read_data.tlast) begin + roce_data_tx_host_pkg_counter <= roce_data_tx_host_pkg_counter + 1; + end + end + if (s_axis_roce_role_tx_data.tvalid && s_axis_roce_role_tx_data.tready) begin + roce_data_tx_role_word_counter <= roce_data_tx_role_word_counter + 1; + if (s_axis_roce_role_tx_data.tlast) begin + roce_data_tx_role_pkg_counter <= roce_data_tx_role_pkg_counter + 1; + end + end +end +*/ + +// DEBUG ila -------------------------------------------------------------------------------- +/* +ila_conn inst_ila_conn ( + .clk(net_clk), + .probe0(qp_interface.valid), + .probe1(qp_interface.ready), + .probe2(qp_interface.data), + .probe3(conn_interface.valid), + .probe4(conn_interface.ready), + .probe5(conn_interface.data), + .probe6(local_ip_address), + .probe7(board_number), + .probe8(arp_lookup_request.valid), + .probe9(arp_lookup_request.ready), + .probe10(arp_lookup_request.data), + .probe11(arp_lookup_reply.valid), + .probe12(arp_lookup_reply.ready), + .probe13(arp_lookup_reply.data) +); +*/ +/* +ila_network_stack inst_ila_network_stack ( + .clk(net_clk), + .probe0(s_axis_net.tvalid), + .probe1(s_axis_net.tready), + .probe2(s_axis_net.tlast), + .probe3(m_axis_net.tvalid), + .probe4(m_axis_net.tready), + .probe5(m_axis_net.tlast), + .probe6(axis_roce_slice_to_merge.tvalid), + .probe7(axis_roce_slice_to_merge.tready), + .probe8(axis_roce_slice_to_merge.tlast), + .probe9(m_axis_roce_read_cmd.valid), + .probe10(m_axis_roce_read_cmd.ready), + .probe11(m_axis_roce_read_cmd.req.vaddr), //48 + .probe12(m_axis_roce_read_cmd.req.len), //28 + .probe13(m_axis_roce_read_cmd.req.ctl), + .probe14(m_axis_roce_read_cmd.req.id), //4 + .probe15(m_axis_roce_read_cmd.req.host), + .probe16(m_axis_roce_write_cmd.valid), + .probe17(m_axis_roce_write_cmd.ready), + .probe18(m_axis_roce_write_cmd.req.vaddr), //48 + .probe19(m_axis_roce_write_cmd.req.len), //28 + .probe20(m_axis_roce_write_cmd.req.ctl), + .probe21(m_axis_roce_write_cmd.req.id), //4 + .probe22(m_axis_roce_write_cmd.req.host), + .probe23(s_axis_roce_read_data.tvalid), + .probe24(s_axis_roce_read_data.tready), + .probe25(s_axis_roce_read_data.tlast), + .probe26(m_axis_roce_write_data.tvalid), + .probe27(m_axis_roce_write_data.tready), + .probe28(m_axis_roce_write_data.tlast), + .probe29(axis_roce_slice_to_merge.tdata), //512 + .probe30(m_axis_rpc_meta.data), //256 + .probe31(m_axis_rpc_meta.valid), + .probe32(m_axis_rpc_meta.ready), + .probe33(axis_tx_metadata.valid), + .probe34(axis_tx_metadata.ready), + .probe35(axis_tx_metadata.data) //256 +); +*/ + +/* +ila_network_stack_rpc inst_ila_rpc ( + .clk(net_clk), + .probe0(m_axis_rpc_meta.valid), + .probe1(m_axis_rpc_meta.ready), + .probe2(m_axis_rpc_meta.data[199:192]), + .probe3(cnt_rpc), + .probe4(axis_tx_metadata.valid), + .probe5(axis_tx_metadata.ready), + .probe6(axis_tx_metadata.data[63:0]), + .probe7(cnt_meta), + .probe8(s_axis_roce_read_data.tvalid), + .probe9(s_axis_roce_read_data.tready), + .probe10(s_axis_roce_read_data.tlast) +); +*/ +endmodule diff --git a/hw/hdl/network/network_top.sv b/hw/hdl/network/network_top.sv new file mode 100644 index 00000000..0020bf8f --- /dev/null +++ b/hw/hdl/network/network_top.sv @@ -0,0 +1,255 @@ +`timescale 1ns / 1ps + +import lynxTypes::*; + +module network_top ( + // Pcie + input wire aclk, + input wire aresetn, + + // Net + input wire sys_reset, + input wire dclk, + input wire gt_refclk_p, + input wire gt_refclk_n, + + // Phys. +`ifdef EN_RDMA_10G + input wire [0:0] gt_rxp_in, + input wire [0:0] gt_rxn_in, + output wire [0:0] gt_txp_out, + output wire [0:0] gt_txn_out, +`else + input wire [3:0] gt_rxp_in, + input wire [3:0] gt_rxn_in, + output wire [3:0] gt_txp_out, + output wire [3:0] gt_txn_out, +`endif + + // Init + metaIntf.s arp_lookup_request, + metaIntf.m arp_lookup_reply, + metaIntf.s set_ip_addr, + metaIntf.s set_board_number, + metaIntf.s qp_interface, + metaIntf.s conn_interface, + + // Commands + metaIntf.s rdma_req_host [N_REGIONS], +`ifdef EN_FVV + metaIntf.s rdma_req_card [N_REGIONS], + metaIntf.m rdma_req_fv [N_REGIONS], +`endif + + // RDMA ctrl + data + reqIntf.m rdma_rd_cmd [N_REGIONS], + reqIntf.m rdma_wr_cmd [N_REGIONS], + AXI4S.s axis_rdma_rd_data [N_REGIONS], + AXI4S.m axis_rdma_wr_data [N_REGIONS] +); + +/** + * Clock Generation + */ +logic network_init; + +// Network clock +logic net_aresetn; +logic net_clk; + +// Network reset +BUFG bufg_aresetn( + .I(network_init), + .O(net_aresetn) +); + +/** + * Network module + */ +`ifdef EN_RDMA_10G + AXI4S #(.AXI4S_DATA_BITS(64)) axis_net_rx_data_na(); + AXI4S #(.AXI4S_DATA_BITS(64)) axis_net_tx_data_na(); +`else + AXI4S axis_net_rx_data_na(); + AXI4S axis_net_tx_data_na(); +`endif + +AXI4S axis_net_rx_data(); +AXI4S axis_net_tx_data(); + +network_module inst_network_module +( + .dclk (dclk), + .net_clk(net_clk), + .sys_reset (sys_reset), + .aresetn(net_aresetn), + .network_init_done(network_init), + + .gt_refclk_p(gt_refclk_p), + .gt_refclk_n(gt_refclk_n), + + .gt_rxp_in(gt_rxp_in), + .gt_rxn_in(gt_rxn_in), + .gt_txp_out(gt_txp_out), + .gt_txn_out(gt_txn_out), + + .user_rx_reset(), + .user_tx_reset(), + .gtpowergood_out(), + + //master 0 + .m_axis_net_rx(axis_net_rx_data_na), + .s_axis_net_tx(axis_net_tx_data_na) +); + +/** + * Width adjustments + */ +`ifdef EN_RDMA_10G + axis_64_to_512_converter net_rx_converter ( + .aclk(net_clk), + .aresetn(net_aresetn), + .s_axis_tvalid(axis_net_rx_data_na.tvalid), + .s_axis_tready(axis_net_rx_data_na.tready), + .s_axis_tdata(axis_net_rx_data_na.tdata), + .s_axis_tkeep(axis_net_rx_data_na.tkeep), + .s_axis_tlast(axis_net_rx_data_na.tlast), + .s_axis_tdest(0), + .m_axis_tvalid(axis_net_rx_data.tvalid), + .m_axis_tready(axis_net_rx_data.tready), + .m_axis_tdata(axis_net_rx_data.tdata), + .m_axis_tkeep(axis_net_rx_data.tkeep), + .m_axis_tlast(axis_net_rx_data.tlast), + .m_axis_tdest() + ); + axis_512_to_64_converter net_tx_converter ( + .aclk(net_clk), + .aresetn(net_aresetn), + .s_axis_tvalid(axis_net_tx_data.tvalid), + .s_axis_tready(axis_net_tx_data.tready), + .s_axis_tdata(axis_net_tx_data.tdata), + .s_axis_tkeep(axis_net_tx_data.tkeep), + .s_axis_tlast(axis_net_tx_data.tlast), + .m_axis_tvalid(axis_net_tx_data_na.tvalid), + .m_axis_tready(axis_net_tx_data_na.tready), + .m_axis_tdata(axis_net_tx_data_na.tdata), + .m_axis_tkeep(axis_net_tx_data_na.tkeep), + .m_axis_tlast(axis_net_tx_data_na.tlast) + ); +`else + assign axis_net_rx_data.tvalid = axis_net_rx_data_na.tvalid; + assign axis_net_rx_data_na.tready = axis_net_rx_data.tready; + assign axis_net_rx_data.tdata = axis_net_rx_data_na.tdata; + assign axis_net_rx_data.tkeep = axis_net_rx_data_na.tkeep; + assign axis_net_rx_data.tlast = axis_net_rx_data_na.tlast; + + assign axis_net_tx_data_na.tvalid = axis_net_tx_data.tvalid; + assign axis_net_tx_data.tready = axis_net_tx_data_na.tready; + assign axis_net_tx_data_na.tdata = axis_net_tx_data.tdata; + assign axis_net_tx_data_na.tkeep = axis_net_tx_data.tkeep; + assign axis_net_tx_data_na.tlast = axis_net_tx_data.tlast; +`endif + +// Slices +AXI4S axis_net_rx_data_r (); +AXI4S axis_net_tx_data_r (); +axis_reg inst_slice_rx (.aclk(net_clk), .aresetn(net_aresetn), .axis_in(axis_net_rx_data), .axis_out(axis_net_rx_data_r)); +axis_reg inst_slice_tx (.aclk(net_clk), .aresetn(net_aresetn), .axis_in(axis_net_tx_data_r), .axis_out(axis_net_tx_data)); + +/** + * Network stack + */ + +// Decl. +metaIntf #(.DATA_BITS(32)) arp_lookup_request_nclk(); +metaIntf #(.DATA_BITS(56)) arp_lookup_reply_nclk(); +metaIntf #(.DATA_BITS(32)) set_ip_addr_nclk(); +metaIntf #(.DATA_BITS(4)) set_board_number_nclk(); +metaIntf #(.DATA_BITS(144)) qp_interface_nclk(); +metaIntf #(.DATA_BITS(184)) conn_interface_nclk(); + +metaIntf #(.DATA_BITS(FV_REQ_BITS)) rdma_req_host_nclk(); +metaIntf #(.DATA_BITS(FV_REQ_BITS)) rdma_req_card_nclk(); +metaIntf #(.DATA_BITS(FV_REQ_BITS)) rdma_req_fv_nclk(); + +rdmaIntf rdma_rd_cmd_nclk (); +rdmaIntf rdma_wr_cmd_nclk (); +AXI4S axis_rdma_rd_data_nclk (); +AXI4S axis_rdma_wr_data_nclk (); + +network_stack inst_network_stack ( + .net_clk(net_clk), + .net_aresetn(net_aresetn), + + .s_axis_net(axis_net_rx_data_r), + .m_axis_net(axis_net_tx_data_r), + + .arp_lookup_request(arp_lookup_request_nclk), + .arp_lookup_reply(arp_lookup_reply_nclk), + .set_ip_addr(set_ip_addr_nclk), + .set_board_number(set_board_number_nclk), + .qp_interface(qp_interface_nclk), + .conn_interface(conn_interface_nclk), + + .s_axis_host_meta(rdma_req_host_nclk), + .s_axis_card_meta(rdma_req_card_nclk), + .m_axis_rpc_meta(rdma_req_fv_nclk), + + .m_axis_roce_read_cmd(rdma_rd_cmd_nclk), + .m_axis_roce_write_cmd(rdma_wr_cmd_nclk), + .s_axis_roce_read_data(axis_rdma_rd_data_nclk), + .m_axis_roce_write_data(axis_rdma_wr_data_nclk) +); + +network_clk_cross inst_network_clk_cross ( + .aclk(aclk), + .aresetn(aresetn), + .net_clk(net_clk), + .net_aresetn(net_aresetn), + + // ACLK + .arp_lookup_request_aclk(arp_lookup_request), + .arp_lookup_reply_aclk(arp_lookup_reply), + .set_ip_addr_aclk(set_ip_addr), + .set_board_number_aclk(set_board_number), + .qp_interface_aclk(qp_interface), + .conn_interface_aclk(conn_interface), + + .rdma_req_host_aclk(rdma_req_host), +`ifdef EN_FVV + .rdma_req_card_aclk(rdma_req_card), + .rdma_req_fv_aclk(rdma_req_fv), +`endif + + .rdma_rd_cmd_aclk(rdma_rd_cmd), + .rdma_wr_cmd_aclk(rdma_wr_cmd), + .axis_rdma_rd_data_aclk(axis_rdma_rd_data), + .axis_rdma_wr_data_aclk(axis_rdma_wr_data), + + // NCLK + .arp_lookup_request_nclk(arp_lookup_request_nclk), + .arp_lookup_reply_nclk(arp_lookup_reply_nclk), + .set_ip_addr_nclk(set_ip_addr_nclk), + .set_board_number_nclk(set_board_number_nclk), + .qp_interface_nclk(qp_interface_nclk), + .conn_interface_nclk(conn_interface_nclk), + + .rdma_req_host_nclk(rdma_req_host_nclk), +`ifdef EN_FVV + .rdma_req_card_nclk(rdma_req_card_nclk), + .rdma_req_fv_nclk(rdma_req_fv_nclk), +`endif + + .rdma_rd_cmd_nclk(rdma_rd_cmd_nclk), + .rdma_wr_cmd_nclk(rdma_wr_cmd_nclk), + .axis_rdma_rd_data_nclk(axis_rdma_rd_data_nclk), + .axis_rdma_wr_data_nclk(axis_rdma_wr_data_nclk) +); + +`ifndef EN_FVV +assign rdma_req_card_nclk.valid = 1'b0; +assign rdma_req_fv_nclk.ready = 1'b1; +`endif + + +endmodule \ No newline at end of file diff --git a/hw/hdl/network/roce_stack.sv b/hw/hdl/network/roce_stack.sv new file mode 100644 index 00000000..3bd58d9f --- /dev/null +++ b/hw/hdl/network/roce_stack.sv @@ -0,0 +1,122 @@ +import lynxTypes::*; + +module roce_stack ( + input logic net_clk, + input logic net_aresetn, + + // Network interface + AXI4S.s s_axis_rx_data, + AXI4S.m m_axis_tx_data, + + // User command + metaIntf.s s_axis_tx_meta, + + // RPC command + metaIntf.m m_axis_rx_rpc_params, + + // Memory + rdmaIntf.m m_axis_mem_read_cmd, + rdmaIntf.m m_axis_mem_write_cmd, + AXI4S.s s_axis_mem_read_data, + AXI4S.m m_axis_mem_write_data, + + // Control + metaIntf.s s_axis_qp_interface, + metaIntf.s s_axis_qp_conn_interface, + input logic [31:0] local_ip_address, + + // Debug + output logic crc_drop_pkg_count_valid, + output logic[31:0] crc_drop_pkg_count_data, + output logic psn_drop_pkg_count_valid, + output logic[31:0] psn_drop_pkg_count_data +); + +// Requests +logic [103:0] rd_cmd_data; +logic [103:0] wr_cmd_data; + +assign m_axis_mem_read_cmd.req.vaddr = rd_cmd_data[0+:VADDR_BITS]; +assign m_axis_mem_read_cmd.req.len = rd_cmd_data[64+:LEN_BITS]; +assign m_axis_mem_read_cmd.req.sync = 1'b0; +assign m_axis_mem_read_cmd.req.ctl = rd_cmd_data[100+:1]; +assign m_axis_mem_read_cmd.req.stream = rd_cmd_data[64+LEN_BITS+:1]; +assign m_axis_mem_read_cmd.req.id = rd_cmd_data[96+:N_REQUEST_BITS]; +assign m_axis_mem_read_cmd.req.host = rd_cmd_data[101+:1]; + +assign m_axis_mem_write_cmd.req.vaddr = wr_cmd_data[0+:VADDR_BITS]; +assign m_axis_mem_write_cmd.req.len = wr_cmd_data[64+:LEN_BITS]; +assign m_axis_mem_write_cmd.req.sync = 1'b0; +assign m_axis_mem_write_cmd.req.ctl = wr_cmd_data[100+:1]; +assign m_axis_mem_write_cmd.req.stream = wr_cmd_data[64+LEN_BITS+:1]; +assign m_axis_mem_write_cmd.req.id = wr_cmd_data[96+:N_REQUEST_BITS]; +assign m_axis_mem_write_cmd.req.host = wr_cmd_data[101+:1]; + +rocev2_ip rocev2_inst( + .ap_clk(net_clk), // input aclk + .ap_rst_n(net_aresetn), // input aresetn + + // RX + .s_axis_rx_data_TVALID(s_axis_rx_data.tvalid), + .s_axis_rx_data_TREADY(s_axis_rx_data.tready), + .s_axis_rx_data_TDATA(s_axis_rx_data.tdata), + .s_axis_rx_data_TKEEP(s_axis_rx_data.tkeep), + .s_axis_rx_data_TLAST(s_axis_rx_data.tlast), + + // TX + .m_axis_tx_data_TVALID(m_axis_tx_data.tvalid), + .m_axis_tx_data_TREADY(m_axis_tx_data.tready), + .m_axis_tx_data_TDATA(m_axis_tx_data.tdata), + .m_axis_tx_data_TKEEP(m_axis_tx_data.tkeep), + .m_axis_tx_data_TLAST(m_axis_tx_data.tlast), + + // User commands + .s_axis_tx_meta_V_TVALID(s_axis_tx_meta.valid), + .s_axis_tx_meta_V_TREADY(s_axis_tx_meta.ready), + .s_axis_tx_meta_V_TDATA(s_axis_tx_meta.data), + + // RPC commands + .m_axis_rx_rpc_params_V_data_TVALID(m_axis_rx_rpc_params.valid), + .m_axis_rx_rpc_params_V_data_TREADY(m_axis_rx_rpc_params.ready), + .m_axis_rx_rpc_params_V_data_TDATA(m_axis_rx_rpc_params.data), + + // Memory + // Write commands + .m_axis_mem_write_cmd_V_data_TVALID(m_axis_mem_write_cmd.valid), + .m_axis_mem_write_cmd_V_data_TREADY(m_axis_mem_write_cmd.ready), + .m_axis_mem_write_cmd_V_data_TDATA(wr_cmd_data), + // Read commands + .m_axis_mem_read_cmd_V_data_TVALID(m_axis_mem_read_cmd.valid), + .m_axis_mem_read_cmd_V_data_TREADY(m_axis_mem_read_cmd.ready), + .m_axis_mem_read_cmd_V_data_TDATA(rd_cmd_data), + // Write data + .m_axis_mem_write_data_TVALID(m_axis_mem_write_data.tvalid), + .m_axis_mem_write_data_TREADY(m_axis_mem_write_data.tready), + .m_axis_mem_write_data_TDATA(m_axis_mem_write_data.tdata), + .m_axis_mem_write_data_TKEEP(m_axis_mem_write_data.tkeep), + .m_axis_mem_write_data_TLAST(m_axis_mem_write_data.tlast), + // Read data + .s_axis_mem_read_data_TVALID(s_axis_mem_read_data.tvalid), + .s_axis_mem_read_data_TREADY(s_axis_mem_read_data.tready), + .s_axis_mem_read_data_TDATA(s_axis_mem_read_data.tdata), + .s_axis_mem_read_data_TKEEP(s_axis_mem_read_data.tkeep), + .s_axis_mem_read_data_TLAST(s_axis_mem_read_data.tlast), + + // QP intf + .s_axis_qp_interface_V_TVALID(s_axis_qp_interface.valid), + .s_axis_qp_interface_V_TREADY(s_axis_qp_interface.ready), + .s_axis_qp_interface_V_TDATA(s_axis_qp_interface.data), + .s_axis_qp_conn_interface_V_TVALID(s_axis_qp_conn_interface.valid), + .s_axis_qp_conn_interface_V_TREADY(s_axis_qp_conn_interface.ready), + .s_axis_qp_conn_interface_V_TDATA(s_axis_qp_conn_interface.data), + .local_ip_address_V({local_ip_address,local_ip_address,local_ip_address,local_ip_address}), //Use IPv4 addr + + // Debug + .regCrcDropPkgCount_V(crc_drop_pkg_count_data), + .regCrcDropPkgCount_V_ap_vld(crc_drop_pkg_count_valid), + .regInvalidPsnDropCount_V(psn_drop_pkg_count_data), + .regInvalidPsnDropCount_V_ap_vld(psn_drop_pkg_count_valid) + +); + +endmodule \ No newline at end of file diff --git a/hw/hdl/network/rx_interface.v b/hw/hdl/network/rx_interface.v new file mode 100644 index 00000000..3bbce1c8 --- /dev/null +++ b/hw/hdl/network/rx_interface.v @@ -0,0 +1,551 @@ +/******************************************************************************* +** ? Copyright 2010 - 2011 Xilinx, Inc. All rights reserved. +** This file contains confidential and proprietary information of Xilinx, Inc. and +** is protected under U.S. and international copyright and other intellectual property laww +w +s. +******************************************************************************* +** ____ ____ +** / /\/ / +** /___/ \ / Vendor: Xilinx +** \ \ \/ +** \ \ +** / / +** /___/ /\ +** \ \ / \ Virtex-7 XT Connectivity Domain Targeted Reference Design +** \___\/\___\ +** +** Device: xc7k325t-ffg900-2 +** Version: 1.0 +** +******************************************************************************* +** +*******************************************************************************/ + +/****************************************************************************** +The module performs address filtering on the receive. The receive logic FSM detects +a good frame and makes it available to the packet FIFO interface. Two state machines +are implemented: one FSM covers the write data from XGEMAC interface and another FSM controls +the read logic to packet FIFO +*******************************************************************************/ + +`timescale 1ps / 1ps + +module rx_interface #( + parameter FIFO_CNT_WIDTH = 11 +) +( + input [63:0] axi_str_tdata_from_xgmac, + input [7:0] axi_str_tkeep_from_xgmac, + input axi_str_tvalid_from_xgmac, + input axi_str_tlast_from_xgmac, + input axi_str_tuser_from_xgmac, + + input axi_str_tready_from_fifo, + + output [63:0] axi_str_tdata_to_fifo, + output [7:0] axi_str_tkeep_to_fifo, + output axi_str_tvalid_to_fifo, + output axi_str_tlast_to_fifo, + output [15:0] rd_pkt_len, + output reg rx_fifo_overflow = 1'b0, + + input [13:0] rx_statistics_vector, + input rx_statistics_valid, + + output [FIFO_CNT_WIDTH-1:0] rd_data_count , + + input user_clk, + input reset + +); + + //Wire declaration + //wire broadcast_detect; + //wire [47:0] rx_mac_id_i; + wire axis_rd_tlast; + wire axis_rd_tvalid; + wire [63:0] axis_rd_tdata; + wire [7:0] axis_rd_tkeep; + wire axis_wr_tlast; + wire axis_wr_tvalid; + wire [63:0] axis_wr_tdata; + wire [7:0] axis_wr_tkeep; + //wire da_match ; + wire full; + wire empty; + wire valid_cmd; + wire crc_pass; + wire [15:0] cmd_out; + wire axis_wr_tready; + wire [FIFO_CNT_WIDTH-1:0] wr_data_count ; + wire [FIFO_CNT_WIDTH-1:0] left_over_space_in_fifo; + wire wr_reached_threshold; + wire wr_reached_threshold_extend; + //wire [47:0] mac_id_sync; + //wire mac_id_valid_sync; + //wire promiscuous_mode_en_sync; + wire frame_len_ctr_valid; + + //Reg declaration + reg [63:0] axi_str_tdata_from_xgmac_r ; + reg [7:0] axi_str_tkeep_from_xgmac_r ; + reg axi_str_tvalid_from_xgmac_r; + reg axi_str_tlast_from_xgmac_r ; + reg axi_str_tuser_from_xgmac_r ; + reg force_tlast_to_fifo='d0 ; + reg address_chk_en = 'd0; + reg assert_rd='d0; + reg [15:0] cmd_in = 'd0; + reg wr_en=1'b0; + reg rd_en=1'b0; + reg axis_rd_tready='d0 ; + reg axis_rd_tvalid_from_fsm=1'b0; + reg [3:0] tkeep_decoded_value; +// reg axi_str_tvalid_from_fsm=1'b0; + reg [12:0] rd_pkt_len_count='d0; + reg [13:0] rx_stats_vec_reg='d0; + + reg [3:0] frame_len_ctr; + +localparam + //states for Write FSM + IDLE_WR = 4'b0001, + DA_DECODE = 4'b0010, + BEGIN_WRITE = 4'b0100, + DROP_FRAME = 4'b1000, + + //states for Read FSM + IDLE_RD = 4'b0001, + PREP_READ_1 = 4'b0010, + PREP_READ_2 = 4'b0100, + BEGIN_READ = 4'b1000; + +localparam THRESHOLD = 200; +localparam THRESHOLD_EXT = 400; + + reg [3:0] state_wr = IDLE_WR; + reg [3:0] state_rd = IDLE_RD; + + + //Synchronize mac_id, promiscuous_mode_en and mac_id_valid with the destination clock + /*synchronizer_simple #(.DATA_WIDTH (1)) sync_to_mac_clk_0 + ( + .data_in (promiscuous_mode_en), + .new_clk (user_clk), + .data_out (promiscuous_mode_en_sync) + ); + + synchronizer_simple #(.DATA_WIDTH (1)) sync_to_mac_clk_1 + ( + .data_in (mac_id_valid), + .new_clk (user_clk), + .data_out (mac_id_valid_sync) + ); + + synchronizer_simple #(.DATA_WIDTH (48)) sync_to_mac_clk_2 + ( + .data_in (mac_id), + .new_clk (user_clk), + .data_out (mac_id_sync) + );*/ + + //assign broadcast_detect = ((axi_str_tdata_from_xgmac_r[47:0]== {48{1'b1}}) && (address_chk_en == 1'b1))?1'b1:1'b0; + + //assign rx_mac_id_i = (address_chk_en == 1'b1)?axi_str_tdata_from_xgmac_r[47:0]:48'b0; + + //assign da_match = ((rx_mac_id_i == mac_id_sync) & mac_id_valid_sync)?1'b1:1'b0; + + //Add a pipelining stage for received data from xgemac interface. + //This is necessary for FSM control logic + always @(posedge user_clk) + begin + axi_str_tdata_from_xgmac_r <= axi_str_tdata_from_xgmac; + axi_str_tkeep_from_xgmac_r <= axi_str_tkeep_from_xgmac; + axi_str_tvalid_from_xgmac_r <= axi_str_tvalid_from_xgmac; + axi_str_tlast_from_xgmac_r <= axi_str_tlast_from_xgmac; + axi_str_tuser_from_xgmac_r <= axi_str_tuser_from_xgmac; + end + + + begin + assign axis_wr_tvalid = (state_wr==DROP_FRAME) ? 1'b0 : + (axi_str_tvalid_from_xgmac_r | (force_tlast_to_fifo & (state_wr == BEGIN_WRITE))); + end + + assign axis_wr_tlast = (axi_str_tlast_from_xgmac_r | force_tlast_to_fifo); + assign axis_wr_tkeep = axi_str_tkeep_from_xgmac_r; + assign axis_wr_tdata = axi_str_tdata_from_xgmac_r; + + //Register Rx statistics vector to be used in the read FSM later + //Rx statistics is valid only if rx_statistics_valid is asserted + //from XGEMAC + //- bits 18:5 in stats vector provide frame length including FCS, hence + //subtract 4 bytes to get the frame length only. + always @(posedge user_clk) + begin + if(rx_statistics_valid) + rx_stats_vec_reg <= rx_statistics_vector[13:0] - 14'd4; + end + + assign left_over_space_in_fifo = {1'b1,{(FIFO_CNT_WIDTH-1){1'b0}}} - wr_data_count[FIFO_CNT_WIDTH-1:0]; + + assign wr_reached_threshold = (left_over_space_in_fifo < THRESHOLD)?1'b1:1'b0; + assign wr_reached_threshold_extend = (left_over_space_in_fifo < THRESHOLD_EXT)?1'b1:1'b0; + + always @(posedge user_clk) + begin + if(force_tlast_to_fifo) + force_tlast_to_fifo <= 1'b0; + else if(wr_reached_threshold & !(axi_str_tlast_from_xgmac & axi_str_tvalid_from_xgmac)) + force_tlast_to_fifo <= 1'b1; + end + + // Counter to count frame length when length is less than 64B + // For frame length less than 64B, XGEMAC core reports length including the + // padded characters. To overcome this situation, a separate counter is implemented + always @(posedge user_clk) + begin + if (reset) + frame_len_ctr <= 'd0; + else if (axi_str_tlast_from_xgmac & axi_str_tvalid_from_xgmac) + frame_len_ctr <= 'd0; + else if (frame_len_ctr > 4'h8) + frame_len_ctr <= frame_len_ctr; + else if(axi_str_tvalid_from_xgmac) + frame_len_ctr <= frame_len_ctr+1; + end + + assign frame_len_ctr_valid = (frame_len_ctr != 0) & (frame_len_ctr < 8) & axi_str_tvalid_from_xgmac & axi_str_tlast_from_xgmac; + + // Decoder for TKEEP signal + always @(axi_str_tkeep_from_xgmac) + case(axi_str_tkeep_from_xgmac) + 'h00 : tkeep_decoded_value <= 'd0; + 'h01 : tkeep_decoded_value <= 'd1; + 'h03 : tkeep_decoded_value <= 'd2; + 'h07 : tkeep_decoded_value <= 'd3; + 'h0F : tkeep_decoded_value <= 'd4; + 'h1F : tkeep_decoded_value <= 'd5; + 'h3F : tkeep_decoded_value <= 'd6; + 'h7F : tkeep_decoded_value <= 'd7; + 'hFF : tkeep_decoded_value <= 'd8; + default : tkeep_decoded_value <= 'h00; + endcase + + //Two FIFOs are implemented: one for XGEMAC data(data FIFO) and the other for controlling + //read side command(command FIFO). + //Write FSM: 6 states control the entire write operation + //cmd_in is an input to the command FIFO and controls the read side command + //Ethernet packet frame size is available from Rx statistics vector and is + //made available to the read side through command FIFO + //FSM states: + //IDLE_WR : Wait in this state until valid is received from XGEMAC. If the + // data FIFO is full or tready is de-asserted from FIFO interface + // it drops the current frame from XGEMAC + //DA_DECODE: Destination Address from XGEMAC is decoded in this state. If destination + // address matches with MAC address or promiscuous mode is enabled + // or broadcast is detected, next state is BEGIN_WRITE. Else the FSM transitions + // to IDLE_WR state + //BEGIN_WRITE: The FSM continues to write data into data FIFO until tlast from XGEMAC is hit. + // FSM transitions to CHECK_ERROR state if tlast has arrived + //DROP_FRAME: The FSM enters into this state if the data FIFO is full or tready from data FIFO is de-asserted + // In this state, tvalid to FIFO is de-asserted +always @(posedge user_clk) + begin + if(reset) + state_wr <= IDLE_WR; + else + begin + case(state_wr) + IDLE_WR : begin + cmd_in <= 'b0; + wr_en <= 1'b0; + + if(axi_str_tvalid_from_xgmac & (full | wr_reached_threshold)) + begin + state_wr <= DROP_FRAME; + end + else if(axi_str_tvalid_from_xgmac) + begin + state_wr <= DA_DECODE; + end + else + begin + state_wr <= IDLE_WR; + end + end + DA_DECODE : begin + wr_en <= 1'b0; + cmd_in[1] <= 1'b1; + + state_wr <= BEGIN_WRITE; + end + BEGIN_WRITE : begin + cmd_in[15:2] <= frame_len_ctr_valid ? + ((frame_len_ctr << 3) + tkeep_decoded_value) : rx_stats_vec_reg; + if(force_tlast_to_fifo) + begin + wr_en <= 1'b1; + cmd_in[0] <= 1'b0; + state_wr <= DROP_FRAME; + end + else if(axi_str_tlast_from_xgmac & axi_str_tvalid_from_xgmac) + begin + wr_en <= 1'b1; + cmd_in[0] <= axi_str_tuser_from_xgmac; + state_wr <= IDLE_WR; + end + else + begin + wr_en <= 1'b0; + cmd_in[0] <= 1'b0; + state_wr <= BEGIN_WRITE; + end + end + DROP_FRAME : begin + wr_en <= 1'b0; + if(axi_str_tlast_from_xgmac_r & axi_str_tvalid_from_xgmac_r & !wr_reached_threshold_extend) + + begin + //- signals a back 2 back packet + if(axi_str_tvalid_from_xgmac) + begin + state_wr <= DA_DECODE; + end + else + state_wr <= IDLE_WR; + end + else + state_wr <= DROP_FRAME; + end + default : state_wr <= IDLE_WR; + endcase + end + end + + assign valid_cmd = cmd_out[1]; + assign crc_pass = ~cmd_out[0]; + assign rd_pkt_len = {2'b0,cmd_out[15:2]}; + + //Read FSM reads out the data from data FIFO and present it to the packet FIFO interface + //The read FSM starts reading data from the data FIFO as soon as it decodes a valid command + //from the command FIFO. Various state transitions are basically controlled by the command FIFO + //empty flag and tready assertion from packet FIFO interface + //FSM states + //IDLE_RD: The FSM stays in this state until command FIFO empty is de-asserted and tready from packet + // FIFO interface is active low. + //PREP_READ_1: This is an idle cycle, used basically to de-assert rd_en so that command FIFO is read only + // once + //PREP_READ_2: If the decoded command from command FIFO is valid and CRC detects no error for the frame + // the FSM transitions to BEGIN_READ state. tready to FIFO is controlled by tready + // from the packet FIFO interface. If CRC fails for a frame, the entire frame is dropped + // by de-asserting tvalid to packet FIFO interface + //BEGIN_READ: In this state, the FSM reads data until tlast from XGEMAC is encountered + always @(posedge user_clk) + begin + if(reset) + begin + state_rd <= IDLE_RD; + end + else + begin + case(state_rd) + IDLE_RD : begin + if(axi_str_tready_from_fifo & !empty) + begin + state_rd <= PREP_READ_1; + rd_en <= 1'b1; + end + else + begin + state_rd <= IDLE_RD; + end + end + PREP_READ_1 : begin + rd_en <= 1'b0; + state_rd <= PREP_READ_2; + end + PREP_READ_2 : begin + //Continue reading data if CRC passes for a forthcoming frame + //CRC check is passed through command FIFO from write side logic + if(valid_cmd & crc_pass) + begin + state_rd <= BEGIN_READ; + end + else + begin + state_rd <= BEGIN_READ; + end + end + BEGIN_READ : begin + //Continue reading data until tlast from XGEMAC is received + if(axis_rd_tlast & axis_rd_tvalid & axis_rd_tready) + begin + state_rd <= IDLE_RD; + end + else + begin + state_rd <= BEGIN_READ; + end + end + default : state_rd <= IDLE_RD; + endcase + end + end + + always @(state_rd, valid_cmd, crc_pass,axis_rd_tlast,axis_rd_tvalid,axi_str_tready_from_fifo) + begin + if(state_rd==PREP_READ_2) + begin + if(valid_cmd & crc_pass) + begin + axis_rd_tready <= axi_str_tready_from_fifo; + axis_rd_tvalid_from_fsm <= axis_rd_tvalid; + // rd_pkt_len_count <= rd_pkt_len; + end + else + begin + axis_rd_tready <= 1'b1; + axis_rd_tvalid_from_fsm <= 1'b0; + end + end + else if(state_rd==BEGIN_READ) + begin + if (valid_cmd & crc_pass) + begin + //if (rd_pkt_len_count > + axis_rd_tready <= axi_str_tready_from_fifo; + axis_rd_tvalid_from_fsm <= axis_rd_tvalid; + //rd_pkt_len_count <= rd_pkt_len_count -= 8; + end + else + begin + axis_rd_tready <= 1'b1; + axis_rd_tvalid_from_fsm <= 1'b0; + end + end + else + begin + axis_rd_tready <= 1'b0; + axis_rd_tvalid_from_fsm <= 1'b0; + end + end + + //-Data FIFO instance: AXI Stream Asynchronous FIFO + //XGEMAC interface outputs an entire frame in a single shot + //TREADY signal from slave interface of FIFO is left unconnected + axis_sync_fifo axis_fifo_inst1 ( + .m_axis_tready (axis_rd_tready ), + .s_aresetn (~reset ), + .s_axis_tready (axis_wr_tready ), + //.s_aclk (user_clk ), + .s_axis_tvalid (axis_wr_tvalid ), + .m_axis_tvalid (axis_rd_tvalid ), + .s_aclk (user_clk ), + .m_axis_tlast (axis_rd_tlast ), + .s_axis_tlast (axis_wr_tlast ), + .s_axis_tdata (axis_wr_tdata ), + .m_axis_tdata (axis_rd_tdata ), + .s_axis_tkeep (axis_wr_tkeep ), + .m_axis_tkeep (axis_rd_tkeep ), + //.axis_rd_data_count (rd_data_count ), + //.axis_wr_data_count (wr_data_count ) + .axis_data_count (wr_data_count ) //1024 items = [10:0] + ); + + //command FIFO interface for controlling the read side interface + cmd_fifo_xgemac_rxif cmd_fifo_inst ( + .clk (user_clk ), + .rst (reset ), + .din (cmd_in ), // Bus [15 : 0] + .wr_en (wr_en ), + .rd_en (rd_en ), + .dout (cmd_out ), // Bus [15 : 0] + .full (full ), + .empty (empty ) + ); + + assign axi_str_tdata_to_fifo = axis_rd_tdata; + assign axi_str_tkeep_to_fifo = axis_rd_tkeep; + assign axi_str_tlast_to_fifo = axis_rd_tlast; + assign axi_str_tvalid_to_fifo = axis_rd_tvalid_from_fsm; + + always @(posedge user_clk) + if (reset) + rx_fifo_overflow <= 1'b0; + else if (state_wr==DROP_FRAME) + rx_fifo_overflow <= 1'b1; + + + +/*wire [35:0] control0; +wire [35:0] control1; +wire [63:0] vio_signals; +wire [127:0] debug_signal; + +icon icon_isnt +( + .CONTROL0 (control0), + .CONTROL1 (control1) +); + +ila ila_inst +( + .CLK (user_clk), + .CONTROL (control0), + .TRIG0 (debug_signal) +); + +vio vio_inst +( + .CLK (user_clk), + .CONTROL (control1), + .SYNC_OUT (vio_signals) +); + + +reg[2:0] pkg_count; + +always @(posedge user_clk) +begin + if (reset == 1) begin + pkg_count <= 3'b000; + end + else begin + if ((axi_str_tvalid_from_xgmac == 1'b1) && (axi_str_tlast_from_xgmac == 1'b1)) begin + pkg_count <= pkg_count + 1; + end + end +end + + +assign debug_signal[3:0] = frame_len_ctr; +assign debug_signal[4] = frame_len_ctr_valid; +assign debug_signal[8:5] = state_wr; +assign debug_signal[12:9] = state_rd; +assign debug_signal[28:13] = cmd_in; +assign debug_signal[31:29] = cmd_out[3:0]; +assign debug_signal[63:32] = axi_str_tdata_from_xgmac[31:0]; +assign debug_signal[71:64] = axi_str_tkeep_from_xgmac; +assign debug_signal[72] = axi_str_tvalid_from_xgmac; +assign debug_signal[73] = rx_statistics_valid; +assign debug_signal[87:74] = rx_stats_vec_reg; +assign debug_signal[105:90] = axi_str_tdata_to_fifo[15:0]; +assign debug_signal[113:106] = axi_str_tkeep_to_fifo; +assign debug_signal[114] = axi_str_tvalid_to_fifo; +assign debug_signal[115] = axi_str_tready_from_fifo; +//assign debug_signal[118:116] = rx_stats_vec_reg[2:0]; +//assign debug_signal[116] = ap_ready; +//assign debug_signal[117] = ap_done; +//assign debug_signal[118] = ap_idle; +assign debug_signal[119] = axi_str_tlast_from_xgmac; +assign debug_signal[120] = axi_str_tlast_to_fifo; +assign debug_signal[123:121] = pkg_count; +assign debug_signal[124] = force_tlast_to_fifo; + +assign debug_signal[125] = axis_rd_tvalid; +assign debug_signal[126] = axis_rd_tready; +assign debug_signal[127] = axis_rd_tlast;*/ + +endmodule diff --git a/hw/hdl/network/tx_interface.v b/hw/hdl/network/tx_interface.v new file mode 100644 index 00000000..b543d1d1 --- /dev/null +++ b/hw/hdl/network/tx_interface.v @@ -0,0 +1,275 @@ +`timescale 1ns / 1ps +//---------------------------------------------------------- +//Copyright (c) 2016, Xilinx, Inc. +//All rights reserved. +// +//Redistribution and use in source and binary forms, with or without modification, +//are permitted provided that the following conditions are met: +// +//1. Redistributions of source code must retain the above copyright notice, +//this list of conditions and the following disclaimer. +// +//2. Redistributions in binary form must reproduce the above copyright notice, +//this list of conditions and the following disclaimer in the documentation +//and/or other materials provided with the distribution. +// +//3. Neither the name of the copyright holder nor the names of its contributors +//may be used to endorse or promote products derived from this software +//without specific prior written permission. +// +//THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +//ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +//THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +//IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +//INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +//PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +//HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +//OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +//EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +//---------------------------------------------------------- +////////////////////////////////////////////////////////////////////////////////// +// Company: +// Engineer: +// +// Create Date: 21.08.2013 09:24:34 +// Design Name: +// Module Name: tx_interface +// Project Name: +// Target Devices: +// Tool Versions: +// Description: +// +// Dependencies: +// +// Revision: +// Revision 0.01 - File Created +// Additional Comments: +// +////////////////////////////////////////////////////////////////////////////////// + + +module tx_interface #( + parameter FIFO_CNT_WIDTH = 11 //depth: 4096 not sure why +) +( + output [63:0] axi_str_tdata_to_xgmac, + output [7:0] axi_str_tkeep_to_xgmac, + output axi_str_tvalid_to_xgmac, + output axi_str_tlast_to_xgmac, + output axi_str_tuser_to_xgmac, + input axi_str_tready_from_xgmac, + + input [63:0] axi_str_tdata_from_fifo, + input [7:0] axi_str_tkeep_from_fifo, + input axi_str_tvalid_from_fifo, + output axi_str_tready_to_fifo, + input axi_str_tlast_from_fifo, + + + input user_clk, + input reset + +); + +reg state_wr; +reg state_rd; +reg pkg_push; + +reg cmd_fifo_din; +reg cmd_fifo_wr_en; +//wire cmd_fifo_dout; +reg cmd_fifo_rd_en; +wire cmd_fifo_full; +wire cmd_fifo_empty; + +wire [FIFO_CNT_WIDTH-1:0] wr_data_count ; + wire [FIFO_CNT_WIDTH-1:0] left_over_space_in_fifo; + +localparam IDLE = 0; +localparam LOAD = 1; +localparam PUSH = 1; + +wire axis_rd_tready; +wire axis_rd_tvalid; +wire axis_rd_tlast; +wire[63:0] axis_rd_tdata; +wire[7:0] axis_rd_tkeep; + +wire axis_wr_tready; +wire axis_wr_tvalid; +wire axis_wr_tlast; +wire[63:0] axis_wr_tdata; +wire[7:0] axis_wr_tkeep; + +assign axi_str_tready_to_fifo = (!cmd_fifo_full) & axis_wr_tready; +assign axis_wr_tvalid = axi_str_tvalid_from_fifo & axi_str_tready_to_fifo; +assign axis_wr_tlast = axi_str_tlast_from_fifo; +assign axis_wr_tdata = axi_str_tdata_from_fifo; +assign axis_wr_tkeep = axi_str_tkeep_from_fifo; + +assign axis_rd_tready = axi_str_tready_from_xgmac & pkg_push; +assign axi_str_tvalid_to_xgmac = axis_rd_tvalid & pkg_push; +assign axi_str_tlast_to_xgmac = axis_rd_tlast; +assign axi_str_tdata_to_xgmac = axis_rd_tdata; +assign axi_str_tkeep_to_xgmac = axis_rd_tkeep; + +assign axi_str_tuser_to_xgmac = 1'b0; + +assign left_over_space_in_fifo = {1'b1,{(FIFO_CNT_WIDTH-1){1'b0}}} - wr_data_count[FIFO_CNT_WIDTH-1:0]; + + +//observes if complete pkg in buffer +always @(posedge user_clk) +begin + if (reset == 1) begin + //pkg_loaded <= 1'b0; + cmd_fifo_wr_en <= 1'b0; + cmd_fifo_din <= 1'b0; + state_wr <= IDLE; + end + else begin + case (state_wr) + IDLE: begin + //pkg_loaded <= 1'b0; + cmd_fifo_din <= 1'b0; + cmd_fifo_wr_en <= 1'b0; + if (axis_wr_tvalid) begin + state_wr <= LOAD; + end + end + LOAD: begin + cmd_fifo_wr_en <= 1'b0; + if (axis_wr_tlast & axis_wr_tvalid) begin + //pkg_loaded <= 1'b1; + if (!cmd_fifo_full && axis_wr_tready) begin + cmd_fifo_din <= 1'b1; + cmd_fifo_wr_en <= 1'b1; + state_wr <= IDLE; + end + end + end + endcase + end +end + +always @(posedge user_clk) +begin + if (reset == 1) begin + state_rd <= IDLE; + pkg_push <= 1'b0; + cmd_fifo_rd_en <= 1'b0; + end + else begin + case (state_rd) + IDLE: begin + pkg_push <= 1'b0; + cmd_fifo_rd_en <= 1'b0; + if (!cmd_fifo_empty) begin + pkg_push <= 1'b1; + cmd_fifo_rd_en <= 1'b1; + state_rd <= PUSH; + end + end + PUSH: begin + pkg_push <= 1'b1; + cmd_fifo_rd_en <= 1'b0; + if (axis_rd_tlast & axis_rd_tready & axis_rd_tvalid) begin + pkg_push <= 1'b0; + state_rd <= IDLE; + end + end + endcase + end +end + +//-Data FIFO instance: AXI Stream Asynchronous FIFO + //XGEMAC interface outputs an entire frame in a single shot + //TREADY signal from slave interface of FIFO is left unconnected + axis_sync_fifo axis_fifo_inst1 ( + .m_axis_tready (axis_rd_tready ), + .s_aresetn (~reset ), + .s_axis_tready (axis_wr_tready ), + .s_aclk (user_clk ), + .s_axis_tvalid (axis_wr_tvalid ), + .m_axis_tvalid (axis_rd_tvalid ), + //.m_aclk (user_clk ), + .m_axis_tlast (axis_rd_tlast ), + .s_axis_tlast (axis_wr_tlast ), + .s_axis_tdata (axis_wr_tdata ), + .m_axis_tdata (axis_rd_tdata ), + .s_axis_tkeep (axis_wr_tkeep ), + .m_axis_tkeep (axis_rd_tkeep ), + //.axis_rd_data_count (rd_data_count ), + .axis_data_count (wr_data_count ) + ); + +cmd_fifo_xgemac_txif cmd_fifo_inst ( +.clk(user_clk), // input clk +.rst(reset), // input rst +.din(cmd_fifo_din), // input [0 : 0] din +.wr_en(cmd_fifo_wr_en), // input wr_en +.rd_en(cmd_fifo_rd_en), // input rd_en +.dout(), // output [0 : 0] dout +.full(cmd_fifo_full), // output full +.empty(cmd_fifo_empty) // output empty +); + +/*wire [35:0] control0; +wire [35:0] control1; +wire [63:0] vio_signals; +wire [127:0] debug_signal; + +icon icon_isnt +( + .CONTROL0 (control0), + .CONTROL1 (control1) +); + +ila ila_inst +( + .CLK (user_clk), + .CONTROL (control0), + .TRIG0 (debug_signal) +); + +vio vio_inst +( + .CLK (user_clk), + .CONTROL (control1), + .SYNC_OUT (vio_signals) +); + +reg[2:0] pkg_count; + +always @(posedge user_clk) +begin + if (reset == 1) begin + pkg_count <= 0; + end + else begin + if (cmd_fifo_wr_en == 1'b1) begin + pkg_count <= pkg_count + 1; + end + end +end + +assign debug_signal[63:0] = axi_str_tdata_from_fifo; +assign debug_signal[71:64] = axi_str_tkeep_from_fifo; +assign debug_signal[72] = axi_str_tvalid_from_fifo; +assign debug_signal[73] = axi_str_tready_to_fifo; +assign debug_signal[74] = axi_str_tlast_from_fifo; +assign debug_signal[75] = 1'b0; +assign debug_signal[78:76] = pkg_count; +//assign debug_signal[79] = pkg_loaded; +assign debug_signal[80] = axis_wr_tready; +assign debug_signal[81] = axis_wr_tvalid; +assign debug_signal[82] = axis_wr_tlast; +assign debug_signal[98:83] = axis_wr_tdata[15:0]; +assign debug_signal[106:99] = axis_wr_tkeep; +assign debug_signal[107] = cmd_fifo_din; +assign debug_signal[108] = cmd_fifo_wr_en; +assign debug_signal[109] = cmd_fifo_rd_en; +assign debug_signal[110] = cmd_fifo_full; +assign debug_signal[111] = cmd_fifo_empty;*/ + +endmodule diff --git a/hw/hdl/operators/addmul/addmul.sv b/hw/hdl/operators/addmul/addmul.sv new file mode 100644 index 00000000..087aafc9 --- /dev/null +++ b/hw/hdl/operators/addmul/addmul.sv @@ -0,0 +1,67 @@ +import lynxTypes::*; + +module addmul #( + parameter integer ADDMUL_DATA_BITS = AXI_DATA_BITS +) ( + input logic aclk, + input logic aresetn, + + input logic [15:0] mul_factor, + input logic [15:0] add_factor, + + AXI4S.s axis_in, + AXI4S.m axis_out +); + +localparam integer N_INTS = ADDMUL_DATA_BITS/32; + +logic [1:0] val_C; +logic [N_INTS-1:0][31:0] mul_C; +logic [N_INTS-1:0][31:0] add_C; + +logic [1:0][ADDMUL_DATA_BITS/8-1:0] keep_C; +logic [1:0] last_C; + +always_ff @(posedge aclk, negedge aresetn) begin + if(aresetn == 1'b0) begin + val_C <= 0; + keep_C <= 0; + last_C <= 0; + end + else begin + if(axis_out.tready) begin + val_C[0] <= axis_in.tvalid; + keep_C[0] <= axis_in.tkeep; + last_C[0] <= axis_in.tlast; + + val_C[1] <= val_C[0]; + keep_C[1] <= keep_C[0]; + last_C[1] <= last_C[0]; + end + end +end + +for(genvar i = 0; i < N_INTS; i++) begin + always_ff @(posedge aclk, negedge aresetn) begin + if(aresetn == 1'b0) begin + mul_C[i] <= 0; + add_C[i] <= 0; + end + else begin + if(axis_out.tready) begin + mul_C[i] <= axis_in.tdata[i*32+:32] * mul_factor; + add_C[i] <= mul_C[i] + add_factor; + end + end + end + + assign axis_out.tdata[i*32+:32] = add_C[i]; +end + +assign axis_in.tready = axis_out.tready; + +assign axis_out.tkeep = keep_C[1]; +assign axis_out.tlast = last_C[1]; +assign axis_out.tvalid = val_C[1]; + +endmodule \ No newline at end of file diff --git a/hw/hdl/operators/addmul/addmul_slave.sv b/hw/hdl/operators/addmul/addmul_slave.sv new file mode 100644 index 00000000..0fe431c1 --- /dev/null +++ b/hw/hdl/operators/addmul/addmul_slave.sv @@ -0,0 +1,226 @@ +import lynxTypes::*; + +module addmul_slave ( + input logic aclk, + input logic aresetn, + + AXI4L.s axi_ctrl, + + output logic [15:0] mul_factor, + output logic [15:0] add_factor +); + +//`define DEBUG_CNFG_SLAVE + +// -- Decl ---------------------------------------------------------- +// ------------------------------------------------------------------ + +// Constants +localparam integer N_REGS = 2; +localparam integer ADDR_LSB = (AXIL_DATA_BITS/32) + 1; +localparam integer ADDR_MSB = $clog2(N_REGS); +localparam integer AXI_ADDR_BITS = ADDR_LSB + ADDR_MSB; + +// Internal registers +logic [AXI_ADDR_BITS-1:0] axi_awaddr; +logic axi_awready; +logic [AXI_ADDR_BITS-1:0] axi_araddr; +logic axi_arready; +logic [1:0] axi_bresp; +logic axi_bvalid; +logic axi_wready; +logic [AXIL_DATA_BITS-1:0] axi_rdata; +logic [1:0] axi_rresp; +logic axi_rvalid; + +// Registers +logic [N_REGS-1:0][AXIL_DATA_BITS-1:0] slv_reg; +logic slv_reg_rden; +logic slv_reg_wren; +logic aw_en; + +// -- Def ----------------------------------------------------------- +// ------------------------------------------------------------------ + +// -- Register map ----------------------------------------------------------------------- +localparam integer MUL_FACT_REG = 0; +// 0 (WR) : Mul factor +localparam integer ADD_FACT_REG = 1; +// 1 (WR) : Add factor + +// Write process +assign slv_reg_wren = axi_wready && axi_ctrl.wvalid && axi_awready && axi_ctrl.awvalid; + +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) begin + slv_reg <= 0; + end + else begin + if(slv_reg_wren) begin + case (axi_awaddr[ADDR_LSB+ADDR_MSB-1:ADDR_LSB]) + MUL_FACT_REG: // Mul factor + for (int i = 0; i < (AXIL_DATA_BITS/8); i++) begin + if(axi_ctrl.wstrb[i]) begin + slv_reg[MUL_FACT_REG][(i*8)+:8] <= axi_ctrl.wdata[(i*8)+:8]; + end + end + ADD_FACT_REG: // Add factor + for (int i = 0; i < (AXIL_DATA_BITS/8); i++) begin + if(axi_ctrl.wstrb[i]) begin + slv_reg[ADD_FACT_REG][(i*8)+:8] <= axi_ctrl.wdata[(i*8)+:8]; + end + end + default : ; + endcase + end + end +end + +assign mul_factor = slv_reg[MUL_FACT_REG][15:0]; +assign add_factor = slv_reg[ADD_FACT_REG][15:0]; + +// Read process +assign slv_reg_rden = axi_arready & axi_ctrl.arvalid & ~axi_rvalid; + +always_ff @(posedge aclk, negedge aresetn) begin + if( aresetn == 1'b0 ) begin + axi_rdata <= 0; + end + else begin + axi_rdata <= 0; + if(slv_reg_rden) begin + case (axi_araddr[ADDR_LSB+ADDR_MSB-1:ADDR_LSB]) + MUL_FACT_REG: // Mul factor + axi_rdata <= slv_reg[MUL_FACT_REG]; + ADD_FACT_REG: // Add factor + axi_rdata <= slv_reg[ADD_FACT_REG]; + default: ; + endcase + end + end +end + +// I/O +assign axi_ctrl.awready = axi_awready; +assign axi_ctrl.arready = axi_arready; +assign axi_ctrl.bresp = axi_bresp; +assign axi_ctrl.bvalid = axi_bvalid; +assign axi_ctrl.wready = axi_wready; +assign axi_ctrl.rdata = axi_rdata; +assign axi_ctrl.rresp = axi_rresp; +assign axi_ctrl.rvalid = axi_rvalid; + +// awready and awaddr +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) + begin + axi_awready <= 1'b0; + axi_awaddr <= 0; + aw_en <= 1'b1; + end + else + begin + if (~axi_awready && axi_ctrl.awvalid && axi_ctrl.wvalid && aw_en) + begin + axi_awready <= 1'b1; + aw_en <= 1'b0; + axi_awaddr <= axi_ctrl.awaddr; + end + else if (axi_ctrl.bready && axi_bvalid) + begin + aw_en <= 1'b1; + axi_awready <= 1'b0; + end + else + begin + axi_awready <= 1'b0; + end + end +end + +// arready and araddr +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) + begin + axi_arready <= 1'b0; + axi_araddr <= 0; + end + else + begin + if (~axi_arready && axi_ctrl.arvalid) + begin + axi_arready <= 1'b1; + axi_araddr <= axi_ctrl.araddr; + end + else + begin + axi_arready <= 1'b0; + end + end +end + +// bvalid and bresp +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) + begin + axi_bvalid <= 0; + axi_bresp <= 2'b0; + end + else + begin + if (axi_awready && axi_ctrl.awvalid && ~axi_bvalid && axi_wready && axi_ctrl.wvalid) + begin + axi_bvalid <= 1'b1; + axi_bresp <= 2'b0; + end + else + begin + if (axi_ctrl.bready && axi_bvalid) + begin + axi_bvalid <= 1'b0; + end + end + end +end + +// wready +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) + begin + axi_wready <= 1'b0; + end + else + begin + if (~axi_wready && axi_ctrl.wvalid && axi_ctrl.awvalid && aw_en ) + begin + axi_wready <= 1'b1; + end + else + begin + axi_wready <= 1'b0; + end + end +end + +// rvalid and rresp (1Del?) +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) + begin + axi_rvalid <= 0; + axi_rresp <= 0; + end + else + begin + if (axi_arready && axi_ctrl.arvalid && ~axi_rvalid) + begin + axi_rvalid <= 1'b1; + axi_rresp <= 2'b0; + end + else if (axi_rvalid && axi_ctrl.rready) + begin + axi_rvalid <= 1'b0; + end + end +end + +endmodule // cnfg_slave \ No newline at end of file diff --git a/hw/hdl/operators/addmul/addmul_user_logic.sv b/hw/hdl/operators/addmul/addmul_user_logic.sv new file mode 100644 index 00000000..f5a6dfc1 --- /dev/null +++ b/hw/hdl/operators/addmul/addmul_user_logic.sv @@ -0,0 +1,58 @@ +`timescale 1ns / 1ps + +import lynxTypes::*; + +/** + * User logic + * + */ +module design_user_logic_0 ( + // Clock and reset + input wire aclk, + input wire[0:0] aresetn, + + // AXI4 control + AXI4L.s axi_ctrl, + + // AXI4S + AXI4S.m axis_src, + AXI4S.s axis_sink +); + +/* -- Tie-off unused interfaces and signals ----------------------------- */ +//always_comb axi_ctrl.tie_off_s(); +//always_comb axis_src.tie_off_m(); +//always_comb axis_sink.tie_off_s(); + +/* -- USER LOGIC -------------------------------------------------------- */ +// Reg input +AXI4S axis_sink_r (); +AXI4S axis_src_r (); +axis_reg_rtl inst_reg_sink (.aclk(aclk), .aresetn(aresetn), .axis_in(axis_sink), .axis_out(axis_sink_r)); +axis_reg_rtl inst_reg_src (.aclk(aclk), .aresetn(aresetn), .axis_in(axis_src_r), .axis_out(axis_src)); + +logic [15:0] mul_factor; +logic [15:0] add_factor; + +// Slave +addmul_slave inst_slave ( + .aclk(aclk), + .aresetn(aresetn), + .axi_ctrl(axi_ctrl), + .mul_factor(mul_factor), + .add_factor(add_factor) +); + +// Addmul +addmul #( + .ADDMUL_DATA_BITS(AXI_DATA_BITS) +) inst_top ( + .aclk(aclk), + .aresetn(aresetn), + .mul_factor(mul_factor), + .add_factor(add_factor), + .axis_in(axis_sink_r), + .axis_out(axis_src_r) +); + +endmodule diff --git a/hw/hdl/operators/aes/aes_pipe_stage.vhd b/hw/hdl/operators/aes/aes_pipe_stage.vhd new file mode 100644 index 00000000..077b1ed6 --- /dev/null +++ b/hw/hdl/operators/aes/aes_pipe_stage.vhd @@ -0,0 +1,53 @@ +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + +entity aes_pipe_stage is + port( + clk : in std_logic; + reset_n : in std_logic; + stall : in std_logic; + key_in : in std_logic_vector(127 downto 0); + last_in : in std_logic; + last_out : out std_logic; + keep_in : in std_logic_vector(15 downto 0); + keep_out : out std_logic_vector(15 downto 0); + dVal_in : in std_logic; + dVal_out : out std_logic; + data_in : in std_logic_vector(127 downto 0); + data_out : out std_logic_vector(127 downto 0) + ); +end entity aes_pipe_stage; + +architecture RTL of aes_pipe_stage is + + -- Internal signals + signal data_aes_out : std_logic_vector(127 downto 0); + +begin + + -- Instantiate AES round + GEN_AES_RND: entity work.aes_round + port map( + key_in => key_in, + data_in => data_in, + data_out => data_aes_out + ); + + -- Instantiate Pipe register + GEN_AES_REG: entity work.pipe_reg + port map( + clk => clk, + reset_n => reset_n, + stall => stall, + last_in => last_in, + last_out => last_out, + keep_in => keep_in, + keep_out => keep_out, + dVal_in => dVal_in, + dVal_out => dVal_out, + data_in => data_aes_out, + data_out => data_out + ); + +end architecture RTL; \ No newline at end of file diff --git a/hw/hdl/operators/aes/aes_pipe_stage_last.vhd b/hw/hdl/operators/aes/aes_pipe_stage_last.vhd new file mode 100644 index 00000000..44e5147e --- /dev/null +++ b/hw/hdl/operators/aes/aes_pipe_stage_last.vhd @@ -0,0 +1,55 @@ +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + +entity aes_pipe_stage_last is + port( + clk : in std_logic; + reset_n : in std_logic; + stall : in std_logic; + key_in : in std_logic_vector(127 downto 0); + key_last : in std_logic_vector(127 downto 0); + last_in : in std_logic; + last_out : out std_logic; + keep_in : in std_logic_vector(15 downto 0); + keep_out : out std_logic_vector(15 downto 0); + dVal_in : in std_logic; + dVal_out : out std_logic; + data_in : in std_logic_vector(127 downto 0); + data_out : out std_logic_vector(127 downto 0) + ); +end entity aes_pipe_stage_last; + +architecture RTL of aes_pipe_stage_last is + + -- Internal signals + signal data_aes_out : std_logic_vector(127 downto 0); + +begin + + -- Instantiate last AES round + GEN_AES_RND: entity work.aes_round_last + port map( + key_in => key_in, + key_last => key_last, + data_in => data_in, + data_out => data_aes_out + ); + + -- Instantiate Pipe register + GEN_AES_REG: entity work.pipe_reg + port map( + clk => clk, + reset_n => reset_n, + stall => stall, + last_in => last_in, + last_out => last_out, + keep_in => keep_in, + keep_out => keep_out, + dVal_in => dVal_in, + dVal_out => dVal_out, + data_in => data_aes_out, + data_out => data_out + ); + +end architecture RTL; \ No newline at end of file diff --git a/hw/hdl/operators/aes/aes_pipeline.vhd b/hw/hdl/operators/aes/aes_pipeline.vhd new file mode 100644 index 00000000..5a4cb63b --- /dev/null +++ b/hw/hdl/operators/aes/aes_pipeline.vhd @@ -0,0 +1,99 @@ +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + +entity aes_pipeline is + port( + clk : in std_logic; + reset_n : in std_logic; + stall : in std_logic; + -- Key + key_in : in std_logic_vector(11*128-1 downto 0); + last_in : in std_logic; + last_out : out std_logic; + keep_in : in std_logic_vector(15 downto 0); + keep_out : out std_logic_vector(15 downto 0); + -- Data valid + dVal_in : in std_logic; -- Data valid + dVal_out : out std_logic; + -- Data + data_in : in std_logic_vector(127 downto 0); + data_out : out std_logic_vector(127 downto 0) + ); +end entity aes_pipeline; + +architecture RTL of aes_pipeline is + + -- Internal signals + type dVal_array is array (8 downto 0) of std_logic; + type data_array is array (8 downto 0) of std_logic_vector(127 downto 0); + type last_array is array (8 downto 0) of std_logic; + type keep_array is array (8 downto 0) of std_logic_vector(15 downto 0); + + signal dVal_pipe : dVal_array; -- Data valid signal pipeline + signal data_pipe : data_array; -- Data pipeline + signal last_pipe : last_array; + signal keep_pipe : keep_array; + +begin + + -- Instantiate regular AES stages + GEN_AES: for i in 0 to 8 generate + + GEN_S0: if i = 0 generate + S0: entity work.aes_pipe_stage + port map( + clk => clk, + reset_n => reset_n, + stall => stall, + key_in => key_in(127 downto 0), + last_in => last_in, + last_out => last_pipe(0), + keep_in => keep_in, + keep_out => keep_pipe(0), + dVal_in => dVal_in, + dVal_out => dVal_pipe(0), + data_in => data_in, + data_out => data_pipe(0) + ); + end generate GEN_S0; + + GEN_SX: if i > 0 generate + SX: entity work.aes_pipe_stage + port map( + clk => clk, + reset_n => reset_n, + stall => stall, + key_in => key_in(128*i+127 downto 128*i), + last_in => last_pipe(i-1), + last_out => last_pipe(i), + keep_in => keep_pipe(i-1), + keep_out => keep_pipe(i), + dVal_in => dVal_pipe(i-1), + dVal_out => dVal_pipe(i), + data_in => data_pipe(i-1), + data_out => data_pipe(i) + ); + end generate GEN_SX; + + end generate GEN_AES; + + -- Instantiate last stage + SL: entity work.aes_pipe_stage_last + port map( + clk => clk, + reset_n => reset_n, + stall => stall, + key_in => key_in(128*9+127 downto 128*9), + key_last => key_in(128*10+127 downto 128*10), + last_in => last_pipe(8), + last_out => last_out, + keep_in => keep_pipe(8), + keep_out => keep_out, + dVal_in => dVal_pipe(8), + dVal_out => dVal_out, + data_in => data_pipe(8), + data_out => data_out + ); + +end architecture RTL; \ No newline at end of file diff --git a/hw/hdl/operators/aes/aes_round.vhd b/hw/hdl/operators/aes/aes_round.vhd new file mode 100644 index 00000000..52adc88a --- /dev/null +++ b/hw/hdl/operators/aes/aes_round.vhd @@ -0,0 +1,49 @@ +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + +entity aes_round is + port( + key_in : in std_logic_vector(127 downto 0); + data_in : in std_logic_vector(127 downto 0); + data_out : out std_logic_vector(127 downto 0) + ); +end entity aes_round; + +architecture RTL of aes_round is + + -- Internal signals + signal data_in_sbox : std_logic_vector(127 downto 0); + signal data_out_sbox : std_logic_vector(127 downto 0); + signal data_out_sr : std_logic_vector(127 downto 0); + signal data_out_mc : std_logic_vector(127 downto 0); + +begin + + -- Add round key + data_in_sbox <= data_in xor key_in; + + -- S-box stage + GEN_SBOX: for i in 0 to 15 generate + SBOX: entity work.s_box_lut port map( + data_in => data_in_sbox(8*i+7 downto 8*i), + data_out => data_out_sbox(8*i+7 downto 8*i) + ); + end generate GEN_SBOX; + + -- Shift row + GEN_SROW: entity work.shift_rows port map( + data_in => data_out_sbox, + data_out => data_out_sr + ); + + -- Mix columns + GEN_MCOL: entity work.mix_columns port map( + data_in => data_out_sr, + data_out => data_out_mc + ); + + -- Output + data_out <= data_out_mc; + +end architecture RTL; \ No newline at end of file diff --git a/hw/hdl/operators/aes/aes_round_last.vhd b/hw/hdl/operators/aes/aes_round_last.vhd new file mode 100644 index 00000000..147de538 --- /dev/null +++ b/hw/hdl/operators/aes/aes_round_last.vhd @@ -0,0 +1,43 @@ +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + +entity aes_round_last is + port( + key_in : in std_logic_vector(127 downto 0); + key_last : in std_logic_vector(127 downto 0); + data_in : in std_logic_vector(127 downto 0); + data_out : out std_logic_vector(127 downto 0) + ); +end entity aes_round_last; + +architecture RTL of aes_round_last is + + -- Internal signals + signal data_in_sbox : std_logic_vector(127 downto 0); + signal data_out_sbox : std_logic_vector(127 downto 0); + signal data_out_sr : std_logic_vector(127 downto 0); + +begin + + -- Add round key + data_in_sbox <= data_in xor key_in; + + -- S-box stage + GEN_SBOX: for i in 0 to 15 generate + SBOX: entity work.s_box_lut port map( + data_in => data_in_sbox(8*i+7 downto 8*i), + data_out => data_out_sbox(8*i+7 downto 8*i) + ); + end generate GEN_SBOX; + + -- Shift row + GEN_SROW: entity work.shift_rows port map( + data_in => data_out_sbox, + data_out => data_out_sr + ); + + -- Last add round key + data_out <= data_out_sr xor key_last; + +end architecture RTL; \ No newline at end of file diff --git a/hw/hdl/operators/aes/aes_slave.sv b/hw/hdl/operators/aes/aes_slave.sv new file mode 100644 index 00000000..e6333862 --- /dev/null +++ b/hw/hdl/operators/aes/aes_slave.sv @@ -0,0 +1,232 @@ +import lynxTypes::*; + +module aes_slave ( + input logic aclk, + input logic aresetn, + + AXI4L.s axi_ctrl, + + output logic [127:0] key_out, + output logic keyStart +); + +//`define DEBUG_CNFG_SLAVE + +// -- Decl ---------------------------------------------------------- +// ------------------------------------------------------------------ + +// Constants +localparam integer N_REGS = 3; + +localparam integer ADDR_LSB = $clog2(AXIL_DATA_BITS/8); +localparam integer ADDR_MSB = $clog2(N_REGS); +localparam integer AXI_ADDR_BITS = ADDR_LSB + ADDR_MSB; + +// Internal registers +logic [AXI_ADDR_BITS-1:0] axi_awaddr; +logic axi_awready; +logic [AXI_ADDR_BITS-1:0] axi_araddr; +logic axi_arready; +logic [1:0] axi_bresp; +logic axi_bvalid; +logic axi_wready; +logic [AXIL_DATA_BITS-1:0] axi_rdata; +logic [1:0] axi_rresp; +logic axi_rvalid; + +// Registers +logic [N_REGS-1:0][AXIL_DATA_BITS-1:0] slv_reg; +logic slv_reg_rden; +logic slv_reg_wren; +logic aw_en; + +// -- Def ----------------------------------------------------------- +// ------------------------------------------------------------------ + +// -- Register map ----------------------------------------------------------------------- +localparam integer KEY_LOW_REG = 0; +// 0 (WR) : Key low +localparam integer KEY_HIGH_REG = 1; +// 1 (WR) : Key high + +// Write process +assign slv_reg_wren = axi_wready && axi_ctrl.wvalid && axi_awready && axi_ctrl.awvalid; + +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) begin + slv_reg <= 0; + + keyStart <= 1'b0; + end + else begin + keyStart <= 1'b0; + + if(slv_reg_wren) begin + case (axi_awaddr[ADDR_LSB+ADDR_MSB-1:ADDR_LSB]) + KEY_LOW_REG: // Key low + for (int i = 0; i < (AXIL_DATA_BITS/8); i++) begin + if(axi_ctrl.wstrb[i]) begin + slv_reg[KEY_LOW_REG][(i*8)+:8] <= axi_ctrl.wdata[(i*8)+:8]; + end + end + KEY_HIGH_REG: // Key high + for (int i = 0; i < (AXIL_DATA_BITS/8); i++) begin + if(axi_ctrl.wstrb[i]) begin + slv_reg[KEY_HIGH_REG][(i*8)+:8] <= axi_ctrl.wdata[(i*8)+:8]; + keyStart <= 1'b1; + end + end + default : ; + endcase + end + end +end + +assign key_out[63:0] = slv_reg[KEY_LOW_REG]; +assign key_out[127:64] = slv_reg[KEY_HIGH_REG]; + +// Read process +assign slv_reg_rden = axi_arready & axi_ctrl.arvalid & ~axi_rvalid; + +always_ff @(posedge aclk, negedge aresetn) begin + if( aresetn == 1'b0 ) begin + axi_rdata <= 0; + end + else begin + axi_rdata <= 0; + if(slv_reg_rden) begin + case (axi_araddr[ADDR_LSB+ADDR_MSB-1:ADDR_LSB]) + KEY_LOW_REG: // Key low + axi_rdata <= slv_reg[KEY_LOW_REG]; + KEY_HIGH_REG: // Key high + axi_rdata <= slv_reg[KEY_HIGH_REG]; + default: ; + endcase + end + end +end + +// I/O +assign axi_ctrl.awready = axi_awready; +assign axi_ctrl.arready = axi_arready; +assign axi_ctrl.bresp = axi_bresp; +assign axi_ctrl.bvalid = axi_bvalid; +assign axi_ctrl.wready = axi_wready; +assign axi_ctrl.rdata = axi_rdata; +assign axi_ctrl.rresp = axi_rresp; +assign axi_ctrl.rvalid = axi_rvalid; + +// awready and awaddr +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) + begin + axi_awready <= 1'b0; + axi_awaddr <= 0; + aw_en <= 1'b1; + end + else + begin + if (~axi_awready && axi_ctrl.awvalid && axi_ctrl.wvalid && aw_en) + begin + axi_awready <= 1'b1; + aw_en <= 1'b0; + axi_awaddr <= axi_ctrl.awaddr; + end + else if (axi_ctrl.bready && axi_bvalid) + begin + aw_en <= 1'b1; + axi_awready <= 1'b0; + end + else + begin + axi_awready <= 1'b0; + end + end +end + +// arready and araddr +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) + begin + axi_arready <= 1'b0; + axi_araddr <= 0; + end + else + begin + if (~axi_arready && axi_ctrl.arvalid) + begin + axi_arready <= 1'b1; + axi_araddr <= axi_ctrl.araddr; + end + else + begin + axi_arready <= 1'b0; + end + end +end + +// bvalid and bresp +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) + begin + axi_bvalid <= 0; + axi_bresp <= 2'b0; + end + else + begin + if (axi_awready && axi_ctrl.awvalid && ~axi_bvalid && axi_wready && axi_ctrl.wvalid) + begin + axi_bvalid <= 1'b1; + axi_bresp <= 2'b0; + end + else + begin + if (axi_ctrl.bready && axi_bvalid) + begin + axi_bvalid <= 1'b0; + end + end + end +end + +// wready +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) + begin + axi_wready <= 1'b0; + end + else + begin + if (~axi_wready && axi_ctrl.wvalid && axi_ctrl.awvalid && aw_en ) + begin + axi_wready <= 1'b1; + end + else + begin + axi_wready <= 1'b0; + end + end +end + +// rvalid and rresp (1Del?) +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) + begin + axi_rvalid <= 0; + axi_rresp <= 0; + end + else + begin + if (axi_arready && axi_ctrl.arvalid && ~axi_rvalid) + begin + axi_rvalid <= 1'b1; + axi_rresp <= 2'b0; + end + else if (axi_rvalid && axi_ctrl.rready) + begin + axi_rvalid <= 1'b0; + end + end +end + +endmodule // cnfg_slave \ No newline at end of file diff --git a/hw/hdl/operators/aes/aes_top.vhd b/hw/hdl/operators/aes/aes_top.vhd new file mode 100644 index 00000000..3f755280 --- /dev/null +++ b/hw/hdl/operators/aes/aes_top.vhd @@ -0,0 +1,101 @@ +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + +entity aes_top is + generic( + NPAR : integer := 2 + ); + port( + clk : in std_logic; + reset_n : in std_logic; + stall : in std_logic; + -- Key + key_in : in std_logic_vector(127 downto 0); + keyVal_in : in std_logic; + keyVal_out : out std_logic; + last_in : in std_logic; + last_out : out std_logic; + keep_in : in std_logic_vector(NPAR*16-1 downto 0); + keep_out : out std_logic_vector(NPAR*16-1 downto 0); + -- Data valid + dVal_in : in std_logic; -- Data valid + dVal_out : out std_logic; + -- Data + data_in : in std_logic_vector(NPAR*128-1 downto 0); + data_out : out std_logic_vector(NPAR*128-1 downto 0) + ); +end entity aes_top; + +architecture RTL of aes_top is + + type keep_array is array (NPAR-1 downto 0) of std_logic_vector(15 downto 0); + + -- Internal signals + signal key_exp : std_logic_vector(11*128-1 downto 0); + signal dVal : std_logic_vector(NPAR-1 downto 0); + signal last : std_logic_vector(NPAR-1 downto 0); + signal keep : keep_array; + +begin + + -- Instantiate key pipeline + GEN_KEY_PIPE: entity work.key_pipeline + port map( + clk => clk, + reset_n => reset_n, + keyVal_in => keyVal_in, + keyVal_out => keyVal_out, + key_in => key_in, + key_out => key_exp + ); + + -- Instantiate AES pipelines + GEN_AES_PAR: for i in 0 to NPAR-1 generate + GEN_AES_PIPE: entity work.aes_pipeline + port map( + clk => clk, + reset_n => reset_n, + stall => stall, + -- Key + key_in => key_exp, + -- Data valid + dVal_in => dVal_in, + dVal_out => dVal(i), + last_in => last_in, + last_out => last(i), + keep_in => keep_in(i*16+15 downto i*16), + keep_out => keep(i), + -- Data + data_in => data_in(i*128+127 downto i*128), + data_out => data_out(i*128+127 downto i*128) + ); + end generate GEN_AES_PAR; + + + GEN_VALID: process (dVal) is + variable tmp : std_logic; + begin + tmp := '0'; + for i in 0 to NPAR-1 loop + tmp := tmp or dVal(i); + end loop; + dVal_out <= tmp; + end process GEN_VALID; + + GEN_LAST: process (last) is + variable tmp : std_logic; + begin + tmp := '0'; + for i in 0 to NPAR-1 loop + tmp := tmp or last(i); + end loop; + last_out <= tmp; + end process GEN_LAST; + + GEN_KEEP: for i in 0 to NPAR-1 generate + keep_out(i*16+15 downto i*16) <= keep(i); + end generate GEN_KEEP; + + +end architecture RTL; \ No newline at end of file diff --git a/hw/hdl/operators/aes/aes_user_logic.sv b/hw/hdl/operators/aes/aes_user_logic.sv new file mode 100644 index 00000000..86434fed --- /dev/null +++ b/hw/hdl/operators/aes/aes_user_logic.sv @@ -0,0 +1,72 @@ +`timescale 1ns / 1ps + +import lynxTypes::*; + +/** + * User logic + * + */ +module design_user_logic_0 ( + // Clock and reset + input wire aclk, + input wire[0:0] aresetn, + + // AXI4 control + AXI4L.s axi_ctrl, + + // AXI4S + AXI4S.m axis_src, + AXI4S.s axis_sink +); + +/* -- Tie-off unused interfaces and signals ----------------------------- */ +//always_comb axi_ctrl.tie_off_s(); +//always_comb axis_src.tie_off_m(); +//always_comb axis_sink.tie_off_s(); + +/* -- USER LOGIC -------------------------------------------------------- */ +// Reg input +//AXI4S axis_sink_r (); +//AXI4S axis_src_r (); +//axis_reg_rtl inst_reg_sink (.aclk(aclk), .aresetn(aresetn), .axis_in(axis_sink), .axis_out(axis_sink_r)); +//axis_reg_rtl inst_reg_src (.aclk(aclk), .aresetn(aresetn), .axis_in(axis_src_r), .axis_out(axis_src)); + +localparam integer N_AES_PIPELINES = 4; + +logic [127:0] key; +logic key_start; +logic key_done; + +// Slave +aes_slave inst_slave ( + .aclk(aclk), + .aresetn(aresetn), + .axi_ctrl(axi_ctrl), + .key_out(key), + .keyStart(key_start), + .keyDone(key_done) +); + +// AES pipelines +aes_top #( + .NPAR(N_AES_PIPELINES) +) inst_aes_top ( + .clk(aclk), + .reset_n(aresetn), + .stall(~axis_src.tready), + .key_in(key), + .keyVal_in(key_start), + .keyVal_out(key_done), + .last_in(axis_sink.tlast), + .last_out(axis_src.tlast), + .keep_in(axis_sink.tkeep), + .keep_out(axis_src.tkeep), + .dVal_in(axis_sink.tvalid), + .dVal_out(axis_src.tvalid), + .data_in(axis_sink.tdata), + .data_out(axis_src.tdata) +); + +assign axis_sink.tready = axis_src.tready; + +endmodule diff --git a/hw/hdl/operators/aes/key_expansion.vhd b/hw/hdl/operators/aes/key_expansion.vhd new file mode 100644 index 00000000..9d8f331e --- /dev/null +++ b/hw/hdl/operators/aes/key_expansion.vhd @@ -0,0 +1,54 @@ +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + +entity key_expansion is + port( + key_in : in std_logic_vector(127 downto 0); + key_out : out std_logic_vector(127 downto 0); + rnd_const : in std_logic_vector(7 downto 0) + ); +end entity key_expansion; + +architecture RTL of key_expansion is + + -- Internal signals + type word_array is array (3 downto 0) of std_logic_vector(31 downto 0); + signal key_word : word_array; + signal key_next : word_array; + signal key_shift : std_logic_vector(31 downto 0); + signal key_s_box : std_logic_vector(31 downto 0); + signal temp : std_logic_vector(31 downto 0); + +begin + + -- Key words + GEN_KW: for i in 0 to 3 generate + key_word(3-i) <= key_in(32*i+31 downto 32*i); + end generate GEN_KW; + + -- Rotate 8 bits + key_shift <= key_word(3)(23 downto 0) & key_word(3)(31 downto 24); + + -- S-box + GEN_SBOX: for i in 0 to 3 generate + SBOX: entity work.s_box_lut port map( + data_in => key_shift(i*8+7 downto i*8), + data_out => key_s_box(i*8+7 downto i*8) + ); + end generate GEN_SBOX; + + -- Add round constant + temp(31 downto 24) <= key_s_box(31 downto 24) xor rnd_const; + temp(23 downto 0) <= key_s_box(23 downto 0); + + -- Next key + key_next(0) <= key_word(0) xor temp; + key_next(1) <= key_word(1) xor key_next(0); + key_next(2) <= key_word(2) xor key_next(1); + key_next(3) <= key_word(3) xor key_next(2); + + -- Output + key_out <= key_next(0) & key_next(1) & key_next(2) & key_next(3); + +end architecture RTL; diff --git a/hw/hdl/operators/aes/key_pipe_reg.vhd b/hw/hdl/operators/aes/key_pipe_reg.vhd new file mode 100644 index 00000000..0a43e96d --- /dev/null +++ b/hw/hdl/operators/aes/key_pipe_reg.vhd @@ -0,0 +1,44 @@ +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + +entity key_pipe_reg is + port( + clk : in std_logic; + reset_n : in std_logic; + dVal_in : in std_logic; -- Data valid signal + dVal_out : out std_logic; + data_in : in std_logic_vector(127 downto 0); + data_out : out std_logic_vector(127 downto 0) + ); +end entity key_pipe_reg; + +architecture RTL of key_pipe_reg is + + -- Internal registers + signal dVal_reg : std_logic; + signal data_reg : std_logic_vector(127 downto 0); + +begin + + -- Register process + -- + CR_REG: process (clk, reset_n) is + begin + if reset_n = '0' then + dVal_reg <= '0'; + data_reg <= (others => '0'); + elsif rising_edge(clk) then + dVal_reg <= '0'; + if dVal_in = '1' then + dVal_reg <= '1'; + data_reg <= data_in; + end if; + end if; + end process CR_REG; + + -- Output + dVal_out <= dVal_reg; + data_out <= data_reg; + +end architecture RTL; \ No newline at end of file diff --git a/hw/hdl/operators/aes/key_pipe_stage.vhd b/hw/hdl/operators/aes/key_pipe_stage.vhd new file mode 100644 index 00000000..e835a3e8 --- /dev/null +++ b/hw/hdl/operators/aes/key_pipe_stage.vhd @@ -0,0 +1,43 @@ +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + +entity key_pipe_stage is + port( + clk : in std_logic; + reset_n : in std_logic; + keyVal_in : in std_logic; -- Key valid signal + keyVal_out : out std_logic; + key_in : in std_logic_vector(127 downto 0); + key_out : out std_logic_vector(127 downto 0); + rnd_const : in std_logic_vector(7 downto 0) + ); +end entity key_pipe_stage; + +architecture RTL of key_pipe_stage is + + -- Internal signals + signal key_exp : std_logic_vector(127 downto 0); + +begin + + -- Instantiate Key expansion + GEN_KEY_EXP: entity work.key_expansion + port map( + key_in => key_in, + key_out => key_exp, + rnd_const => rnd_const + ); + + -- Instantiate Pipe register + GEN_AES_REG: entity work.key_pipe_reg + port map( + clk => clk, + reset_n => reset_n, + dVal_in => keyVal_in, + dVal_out => keyVal_out, + data_in => key_exp, + data_out => key_out + ); + +end architecture RTL; \ No newline at end of file diff --git a/hw/hdl/operators/aes/key_pipeline.vhd b/hw/hdl/operators/aes/key_pipeline.vhd new file mode 100644 index 00000000..ac3ad13a --- /dev/null +++ b/hw/hdl/operators/aes/key_pipeline.vhd @@ -0,0 +1,64 @@ +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + +entity key_pipeline is + port( + clk : in std_logic; + reset_n : in std_logic; + keyVal_in : in std_logic; -- Key valid + keyVal_out : out std_logic; + key_in : in std_logic_vector(127 downto 0); + key_out : out std_logic_vector(11*128-1 downto 0) + ); +end entity key_pipeline; + +architecture RTL of key_pipeline is + + -- Internal signals + type keyVal_array is array (10 downto 0) of std_logic; + type key_array is array (10 downto 0) of std_logic_vector(127 downto 0); + + signal keyVal_pipe : keyVal_array; -- Key valid signal pipeline + signal key_pipe : key_array; -- Key pipeline + + -- Internal RAM for round constants + type ram_type is array(natural range<>) of std_logic_vector(7 downto 0); + constant rcon: ram_type(0 to 9) := (X"01", X"02", X"04", X"08", X"10", X"20", X"40", X"80", X"1b", X"36"); + +begin + + -- Instantiate base key register + GEN_KEY_BASE: entity work.key_pipe_reg + port map( + clk => clk, + reset_n => reset_n, + dVal_in => keyVal_in, + dVal_out => keyVal_pipe(0), + data_in => key_in, + data_out => key_pipe(0) + ); + + -- Instantiate key expansion pipeline + GEN_KEY_EXP: for i in 0 to 9 generate + KEY_X: entity work.key_pipe_stage + port map( + clk => clk, + reset_n => reset_n, + keyVal_in => keyVal_pipe(i), + keyVal_out => keyVal_pipe(i+1), + key_in => key_pipe(i), + key_out => key_pipe(i+1), + rnd_const => rcon(i) + ); + end generate GEN_KEY_EXP; + + -- Key valid out + keyVal_out <= keyVal_pipe(10); + + -- Keys out + GEN_KEYS_OUT: for i in 0 to 10 generate + key_out(i*128+127 downto i*128) <= key_pipe(i); + end generate GEN_KEYS_OUT; + +end architecture RTL; \ No newline at end of file diff --git a/hw/hdl/operators/aes/mix_columns.vhd b/hw/hdl/operators/aes/mix_columns.vhd new file mode 100644 index 00000000..f73f96c5 --- /dev/null +++ b/hw/hdl/operators/aes/mix_columns.vhd @@ -0,0 +1,51 @@ +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + +entity mix_columns is + port( + data_in : in std_logic_vector(127 downto 0); + data_out : out std_logic_vector(127 downto 0) + ); +end entity mix_columns; + +architecture RTL of mix_columns is + + -- Internal signals + type data_array is array (15 downto 0) of std_logic_vector(7 downto 0); + -- x1, x2, x3 multiplication + signal in_array, out_array, in_array_x2, in_array_x3 : data_array; + +begin + + -- Input generation + GEN_IN: for i in 15 downto 0 generate + in_array(15-i) <= data_in(i*8+7 downto i*8); + end generate GEN_IN; + + -- Multiplication + GEN_M: for i in 15 downto 0 generate + -- x2 + in_array_x2(15-i) <= + (in_array(15-i)(6 downto 0) & '0') xor "00011011" when in_array(15-i)(7) = '1' else + (in_array(15-i)(6 downto 0) & '0'); + -- x3 + in_array_x3(15-i) <= + (in_array(15-i)(6 downto 0) & '0') xor in_array(15-i) xor "00011011" when in_array(15-i)(7) = '1' else + (in_array(15-i)(6 downto 0) & '0') xor in_array(15-i); + end generate GEN_M; + + -- Mixed columns generation + GEN_MC: for i in 0 to 3 generate + out_array(4*i) <= in_array_x2(4*i) xor in_array_x3(4*i+1) xor in_array(4*i+2) xor in_array(4*i+3); + out_array(4*i+1) <= in_array(4*i) xor in_array_x2(4*i+1) xor in_array_x3(4*i+2) xor in_array(4*i+3); + out_array(4*i+2) <= in_array(4*i) xor in_array(4*i+1) xor in_array_x2(4*i+2) xor in_array_x3(4*i+3); + out_array(4*i+3) <= in_array_x3(4*i) xor in_array(4*i+1) xor in_array(4*i+2) xor in_array_x2(4*i+3); + end generate; + + -- Output generation + GEN_O: for i in 15 downto 0 generate + data_out(i*8+7 downto i*8) <= out_array(15-i); + end generate GEN_O; + +end architecture RTL; \ No newline at end of file diff --git a/hw/hdl/operators/aes/pipe_reg.vhd b/hw/hdl/operators/aes/pipe_reg.vhd new file mode 100644 index 00000000..03f02a34 --- /dev/null +++ b/hw/hdl/operators/aes/pipe_reg.vhd @@ -0,0 +1,59 @@ +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + +entity pipe_reg is + port( + clk : in std_logic; + reset_n : in std_logic; + stall : in std_logic; + last_in : in std_logic; + last_out : out std_logic; + keep_in : in std_logic_vector(15 downto 0); + keep_out : out std_logic_vector(15 downto 0); + dVal_in : in std_logic; -- Data valid signal + dVal_out : out std_logic; + data_in : in std_logic_vector(127 downto 0); + data_out : out std_logic_vector(127 downto 0) + ); +end entity pipe_reg; + +architecture RTL of pipe_reg is + + -- Internal registers + signal dVal_reg : std_logic; + signal last_reg : std_logic; + signal keep_reg : std_logic_vector(15 downto 0); + signal data_reg : std_logic_vector(127 downto 0); + +begin + + -- Register process + -- + CR_REG: process (clk, reset_n) is + begin + if reset_n = '0' then + dVal_reg <= '0'; + last_reg <= '0'; + keep_reg <= (others => '0'); + data_reg <= (others => '0'); + elsif rising_edge(clk) then + if stall = '0' then + dVal_reg <= '0'; + if dVal_in = '1' then + dVal_reg <= '1'; + last_reg <= last_in; + keep_reg <= keep_in; + data_reg <= data_in; + end if; + end if; + end if; + end process CR_REG; + + -- Output + dVal_out <= dVal_reg; + last_out <= last_reg; + keep_out <= keep_reg; + data_out <= data_reg; + +end architecture RTL; \ No newline at end of file diff --git a/hw/hdl/operators/aes/s_box_lut.vhd b/hw/hdl/operators/aes/s_box_lut.vhd new file mode 100644 index 00000000..db4cd61a --- /dev/null +++ b/hw/hdl/operators/aes/s_box_lut.vhd @@ -0,0 +1,40 @@ +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + +entity s_box_lut is + port( + data_in : in std_logic_vector(7 downto 0); + data_out : out std_logic_vector(7 downto 0) + ); +end entity s_box_lut; + +architecture RTL of s_box_lut is + + -- Internal RAM + type ram_type is array(natural range<>) of std_logic_vector(7 downto 0); + constant sbox_ram : ram_type(255 downto 0) := ( + X"16", X"bb", X"54", X"b0", X"0f", X"2d", X"99", X"41", X"68", X"42", X"e6", X"bf", X"0d", X"89", X"a1", X"8c", + X"df", X"28", X"55", X"ce", X"e9", X"87", X"1e", X"9b", X"94", X"8e", X"d9", X"69", X"11", X"98", X"f8", X"e1", + X"9e", X"1d", X"c1", X"86", X"b9", X"57", X"35", X"61", X"0e", X"f6", X"03", X"48", X"66", X"b5", X"3e", X"70", + X"8a", X"8b", X"bd", X"4b", X"1f", X"74", X"dd", X"e8", X"c6", X"b4", X"a6", X"1c", X"2e", X"25", X"78", X"ba", + X"08", X"ae", X"7a", X"65", X"ea", X"f4", X"56", X"6c", X"a9", X"4e", X"d5", X"8d", X"6d", X"37", X"c8", X"e7", + X"79", X"e4", X"95", X"91", X"62", X"ac", X"d3", X"c2", X"5c", X"24", X"06", X"49", X"0a", X"3a", X"32", X"e0", + X"db", X"0b", X"5e", X"de", X"14", X"b8", X"ee", X"46", X"88", X"90", X"2a", X"22", X"dc", X"4f", X"81", X"60", + X"73", X"19", X"5d", X"64", X"3d", X"7e", X"a7", X"c4", X"17", X"44", X"97", X"5f", X"ec", X"13", X"0c", X"cd", + X"d2", X"f3", X"ff", X"10", X"21", X"da", X"b6", X"bc", X"f5", X"38", X"9d", X"92", X"8f", X"40", X"a3", X"51", + X"a8", X"9f", X"3c", X"50", X"7f", X"02", X"f9", X"45", X"85", X"33", X"4d", X"43", X"fb", X"aa", X"ef", X"d0", + X"cf", X"58", X"4c", X"4a", X"39", X"be", X"cb", X"6a", X"5b", X"b1", X"fc", X"20", X"ed", X"00", X"d1", X"53", + X"84", X"2f", X"e3", X"29", X"b3", X"d6", X"3b", X"52", X"a0", X"5a", X"6e", X"1b", X"1a", X"2c", X"83", X"09", + X"75", X"b2", X"27", X"eb", X"e2", X"80", X"12", X"07", X"9a", X"05", X"96", X"18", X"c3", X"23", X"c7", X"04", + X"15", X"31", X"d8", X"71", X"f1", X"e5", X"a5", X"34", X"cc", X"f7", X"3f", X"36", X"26", X"93", X"fd", X"b7", + X"c0", X"72", X"a4", X"9c", X"af", X"a2", X"d4", X"ad", X"f0", X"47", X"59", X"fa", X"7d", X"c9", X"82", X"ca", + X"76", X"ab", X"d7", X"fe", X"2b", X"67", X"01", X"30", X"c5", X"6f", X"6b", X"f2", X"7b", X"77", X"7c", X"63" + ); + +begin + + -- Output + data_out <= sbox_ram(to_integer(unsigned(data_in))); + +end architecture RTL; \ No newline at end of file diff --git a/hw/hdl/operators/aes/shift_rows.vhd b/hw/hdl/operators/aes/shift_rows.vhd new file mode 100644 index 00000000..74751954 --- /dev/null +++ b/hw/hdl/operators/aes/shift_rows.vhd @@ -0,0 +1,54 @@ +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + +entity shift_rows is + port( + data_in : in std_logic_vector(127 downto 0); + data_out : out std_logic_vector(127 downto 0) + ); +end entity shift_rows; + +architecture RTL of shift_rows is + + -- Internal signals + type data_array is array (15 downto 0) of std_logic_vector(7 downto 0); + signal in_array, out_array : data_array; + +begin + + -- Input generation + GEN_IN: for i in 15 downto 0 generate + in_array(15-i) <= data_in(i*8+7 downto i*8); + end generate GEN_IN; + + --First mixed column input + out_array(0) <= in_array(0); + out_array(1) <= in_array(5); + out_array(2) <= in_array(10); + out_array(3) <= in_array(15); + + -- Second mixed column input + out_array(4) <= in_array(4); + out_array(5) <= in_array(9); + out_array(6) <= in_array(14); + out_array(7) <= in_array(3); + + -- Third mixed column input + out_array(8) <= in_array(8); + out_array(9) <= in_array(13); + out_array(10) <= in_array(2); + out_array(11) <= in_array(7); + + -- Fourth mixed column input + out_array(12) <= in_array(12); + out_array(13) <= in_array(1); + out_array(14) <= in_array(6); + out_array(15) <= in_array(11); + + -- Output generation + GEN_OUT: for i in 15 downto 0 generate + data_out(i*8+7 downto i*8) <= out_array(15-i); + end generate GEN_OUT; + +end architecture RTL; \ No newline at end of file diff --git a/hw/hdl/operators/aes/utils.vhd b/hw/hdl/operators/aes/utils.vhd new file mode 100644 index 00000000..a382bf10 --- /dev/null +++ b/hw/hdl/operators/aes/utils.vhd @@ -0,0 +1,31 @@ +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + +package utils is + function bitlength(number : integer) return positive; + +end package utils; + +package body utils is + + -- purpose: returns the minimum # of bits needed to represent the input number + function bitlength(number : integer) return positive is + variable acc : positive := 1; + variable i : natural := 0; + begin + if number = 0 or number = -1 then + return 1; + else + while True loop + if acc > number then + return i; + end if; + + acc := acc * 2; + i := i + 1; + end loop; + end if; + end function bitlength; + +end package body utils; \ No newline at end of file diff --git a/hw/hdl/operators/chacha/chacha_core.v b/hw/hdl/operators/chacha/chacha_core.v new file mode 100644 index 00000000..45ca870f --- /dev/null +++ b/hw/hdl/operators/chacha/chacha_core.v @@ -0,0 +1,704 @@ +// +// chacha_core.v +// -------------- +// Verilog 2001 implementation of the stream cipher ChaCha. +// This is the internal core with wide interfaces. +// +// +// Copyright (c) 2013 Secworks Sweden AB +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or +// without modification, are permitted provided that the following +// conditions are met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in +// the documentation and/or other materials provided with the +// distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +//====================================================================== + +module chacha_core( + input wire clk, + input wire reset_n, + + input wire init, + input wire next, + + input wire [255 : 0] key, + input wire keylen, + input wire [63 : 0] iv, + input wire [63 : 0] ctr, + input wire [4 : 0] rounds, + + input wire [511 : 0] data_in, + + output wire ready, + + output wire [511 : 0] data_out, + output wire data_out_valid + ); + + + //---------------------------------------------------------------- + // Internal constant and parameter definitions. + //---------------------------------------------------------------- + // Datapath quartterround states names. + localparam QR0 = 0; + localparam QR1 = 1; + + localparam NUM_ROUNDS = 4'h8; + + localparam TAU0 = 32'h61707865; + localparam TAU1 = 32'h3120646e; + localparam TAU2 = 32'h79622d36; + localparam TAU3 = 32'h6b206574; + + localparam SIGMA0 = 32'h61707865; + localparam SIGMA1 = 32'h3320646e; + localparam SIGMA2 = 32'h79622d32; + localparam SIGMA3 = 32'h6b206574; + + localparam CTRL_IDLE = 3'h0; + localparam CTRL_INIT = 3'h1; + localparam CTRL_ROUNDS = 3'h2; + localparam CTRL_FINALIZE = 3'h3; + localparam CTRL_DONE = 3'h4; + + + //---------------------------------------------------------------- + // l2b() + // + // Swap bytes from little to big endian byte order. + //---------------------------------------------------------------- + function [31 : 0] l2b(input [31 : 0] op); + begin + l2b = {op[7 : 0], op[15 : 8], op[23 : 16], op[31 : 24]}; + end + endfunction // b2l + + + //---------------------------------------------------------------- + // Registers including update variables and write enable. + //---------------------------------------------------------------- + reg [31 : 0] state_reg [0 : 15]; + reg [31 : 0] state_new [0 : 15]; + reg state_we; + + reg [511 : 0] data_out_reg; + reg [511 : 0] data_out_new; + + reg data_out_valid_reg; + reg data_out_valid_new; + reg data_out_valid_we; + + reg qr_ctr_reg; + reg qr_ctr_new; + reg qr_ctr_we; + reg qr_ctr_inc; + reg qr_ctr_rst; + + reg [3 : 0] dr_ctr_reg; + reg [3 : 0] dr_ctr_new; + reg dr_ctr_we; + reg dr_ctr_inc; + reg dr_ctr_rst; + + reg [31 : 0] block0_ctr_reg; + reg [31 : 0] block0_ctr_new; + reg block0_ctr_we; + reg [31 : 0] block1_ctr_reg; + reg [31 : 0] block1_ctr_new; + reg block1_ctr_we; + reg block_ctr_inc; + reg block_ctr_set; + + reg ready_reg; + reg ready_new; + reg ready_we; + + reg [2 : 0] chacha_ctrl_reg; + reg [2 : 0] chacha_ctrl_new; + reg chacha_ctrl_we; + + + //---------------------------------------------------------------- + // Wires. + //---------------------------------------------------------------- + reg [31 : 0] init_state_word [0 : 15]; + + reg init_state; + reg update_state; + reg update_output; + + reg [31 : 0] qr0_a; + reg [31 : 0] qr0_b; + reg [31 : 0] qr0_c; + reg [31 : 0] qr0_d; + wire [31 : 0] qr0_a_prim; + wire [31 : 0] qr0_b_prim; + wire [31 : 0] qr0_c_prim; + wire [31 : 0] qr0_d_prim; + + reg [31 : 0] qr1_a; + reg [31 : 0] qr1_b; + reg [31 : 0] qr1_c; + reg [31 : 0] qr1_d; + wire [31 : 0] qr1_a_prim; + wire [31 : 0] qr1_b_prim; + wire [31 : 0] qr1_c_prim; + wire [31 : 0] qr1_d_prim; + + reg [31 : 0] qr2_a; + reg [31 : 0] qr2_b; + reg [31 : 0] qr2_c; + reg [31 : 0] qr2_d; + wire [31 : 0] qr2_a_prim; + wire [31 : 0] qr2_b_prim; + wire [31 : 0] qr2_c_prim; + wire [31 : 0] qr2_d_prim; + + reg [31 : 0] qr3_a; + reg [31 : 0] qr3_b; + reg [31 : 0] qr3_c; + reg [31 : 0] qr3_d; + wire [31 : 0] qr3_a_prim; + wire [31 : 0] qr3_b_prim; + wire [31 : 0] qr3_c_prim; + wire [31 : 0] qr3_d_prim; + + + //---------------------------------------------------------------- + // Instantiation of the qr modules. + //---------------------------------------------------------------- + chacha_qr qr0( + .a(qr0_a), + .b(qr0_b), + .c(qr0_c), + .d(qr0_d), + + .a_prim(qr0_a_prim), + .b_prim(qr0_b_prim), + .c_prim(qr0_c_prim), + .d_prim(qr0_d_prim) + ); + + chacha_qr qr1( + .a(qr1_a), + .b(qr1_b), + .c(qr1_c), + .d(qr1_d), + + .a_prim(qr1_a_prim), + .b_prim(qr1_b_prim), + .c_prim(qr1_c_prim), + .d_prim(qr1_d_prim) + ); + + chacha_qr qr2( + .a(qr2_a), + .b(qr2_b), + .c(qr2_c), + .d(qr2_d), + + .a_prim(qr2_a_prim), + .b_prim(qr2_b_prim), + .c_prim(qr2_c_prim), + .d_prim(qr2_d_prim) + ); + + chacha_qr qr3( + .a(qr3_a), + .b(qr3_b), + .c(qr3_c), + .d(qr3_d), + + .a_prim(qr3_a_prim), + .b_prim(qr3_b_prim), + .c_prim(qr3_c_prim), + .d_prim(qr3_d_prim) + ); + + + //---------------------------------------------------------------- + // Concurrent connectivity for ports etc. + //---------------------------------------------------------------- + assign data_out = data_out_reg; + assign data_out_valid = data_out_valid_reg; + assign ready = ready_reg; + + + //---------------------------------------------------------------- + // reg_update + // + // Update functionality for all registers in the core. + // All registers are positive edge triggered with synchronous + // active low reset. All registers have write enable. + //---------------------------------------------------------------- + always @ (posedge clk) + begin : reg_update + integer i; + + if (!reset_n) + begin + for (i = 0 ; i < 16 ; i = i + 1) + state_reg[i] <= 32'h0; + + data_out_reg <= 512'h0; + data_out_valid_reg <= 0; + qr_ctr_reg <= QR0; + dr_ctr_reg <= 0; + block0_ctr_reg <= 32'h0; + block1_ctr_reg <= 32'h0; + chacha_ctrl_reg <= CTRL_IDLE; + ready_reg <= 1; + end + else + begin + if (state_we) + begin + for (i = 0 ; i < 16 ; i = i + 1) + state_reg[i] <= state_new[i]; + end + + if (update_output) + data_out_reg <= data_out_new; + + if (data_out_valid_we) + data_out_valid_reg <= data_out_valid_new; + + if (qr_ctr_we) + qr_ctr_reg <= qr_ctr_new; + + if (dr_ctr_we) + dr_ctr_reg <= dr_ctr_new; + + if (block0_ctr_we) + block0_ctr_reg <= block0_ctr_new; + + if (block1_ctr_we) + block1_ctr_reg <= block1_ctr_new; + + if (ready_we) + ready_reg <= ready_new; + + if (chacha_ctrl_we) + chacha_ctrl_reg <= chacha_ctrl_new; + end + end // reg_update + + + //---------------------------------------------------------------- + // init_state_logic + // + // Calculates the initial state for a given block. + //---------------------------------------------------------------- + always @* + begin : init_state_logic + reg [31 : 0] key0; + reg [31 : 0] key1; + reg [31 : 0] key2; + reg [31 : 0] key3; + reg [31 : 0] key4; + reg [31 : 0] key5; + reg [31 : 0] key6; + reg [31 : 0] key7; + + key0 = l2b(key[255 : 224]); + key1 = l2b(key[223 : 192]); + key2 = l2b(key[191 : 160]); + key3 = l2b(key[159 : 128]); + key4 = l2b(key[127 : 96]); + key5 = l2b(key[95 : 64]); + key6 = l2b(key[63 : 32]); + key7 = l2b(key[31 : 0]); + + init_state_word[04] = key0; + init_state_word[05] = key1; + init_state_word[06] = key2; + init_state_word[07] = key3; + init_state_word[12] = block0_ctr_reg; + init_state_word[13] = block1_ctr_reg; + init_state_word[14] = l2b(iv[63 : 32]); + init_state_word[15] = l2b(iv[31 : 0]); + + if (keylen) + begin + // 256 bit key. + init_state_word[00] = SIGMA0; + init_state_word[01] = SIGMA1; + init_state_word[02] = SIGMA2; + init_state_word[03] = SIGMA3; + init_state_word[08] = key4; + init_state_word[09] = key5; + init_state_word[10] = key6; + init_state_word[11] = key7; + end + else + begin + // 128 bit key. + init_state_word[00] = TAU0; + init_state_word[01] = TAU1; + init_state_word[02] = TAU2; + init_state_word[03] = TAU3; + init_state_word[08] = key0; + init_state_word[09] = key1; + init_state_word[10] = key2; + init_state_word[11] = key3; + end + end + + + //---------------------------------------------------------------- + // state_logic + // Logic to init and update the internal state. + //---------------------------------------------------------------- + always @* + begin : state_logic + integer i; + + for (i = 0 ; i < 16 ; i = i + 1) + state_new[i] = 32'h0; + state_we = 0; + + qr0_a = 32'h0; + qr0_b = 32'h0; + qr0_c = 32'h0; + qr0_d = 32'h0; + qr1_a = 32'h0; + qr1_b = 32'h0; + qr1_c = 32'h0; + qr1_d = 32'h0; + qr2_a = 32'h0; + qr2_b = 32'h0; + qr2_c = 32'h0; + qr2_d = 32'h0; + qr3_a = 32'h0; + qr3_b = 32'h0; + qr3_c = 32'h0; + qr3_d = 32'h0; + + if (init_state) + begin + for (i = 0 ; i < 16 ; i = i + 1) + state_new[i] = init_state_word[i]; + state_we = 1; + end // if (init_state) + + if (update_state) + begin + state_we = 1; + case (qr_ctr_reg) + QR0: + begin + qr0_a = state_reg[00]; + qr0_b = state_reg[04]; + qr0_c = state_reg[08]; + qr0_d = state_reg[12]; + qr1_a = state_reg[01]; + qr1_b = state_reg[05]; + qr1_c = state_reg[09]; + qr1_d = state_reg[13]; + qr2_a = state_reg[02]; + qr2_b = state_reg[06]; + qr2_c = state_reg[10]; + qr2_d = state_reg[14]; + qr3_a = state_reg[03]; + qr3_b = state_reg[07]; + qr3_c = state_reg[11]; + qr3_d = state_reg[15]; + state_new[00] = qr0_a_prim; + state_new[04] = qr0_b_prim; + state_new[08] = qr0_c_prim; + state_new[12] = qr0_d_prim; + state_new[01] = qr1_a_prim; + state_new[05] = qr1_b_prim; + state_new[09] = qr1_c_prim; + state_new[13] = qr1_d_prim; + state_new[02] = qr2_a_prim; + state_new[06] = qr2_b_prim; + state_new[10] = qr2_c_prim; + state_new[14] = qr2_d_prim; + state_new[03] = qr3_a_prim; + state_new[07] = qr3_b_prim; + state_new[11] = qr3_c_prim; + state_new[15] = qr3_d_prim; + end + + QR1: + begin + qr0_a = state_reg[00]; + qr0_b = state_reg[05]; + qr0_c = state_reg[10]; + qr0_d = state_reg[15]; + qr1_a = state_reg[01]; + qr1_b = state_reg[06]; + qr1_c = state_reg[11]; + qr1_d = state_reg[12]; + qr2_a = state_reg[02]; + qr2_b = state_reg[07]; + qr2_c = state_reg[08]; + qr2_d = state_reg[13]; + qr3_a = state_reg[03]; + qr3_b = state_reg[04]; + qr3_c = state_reg[09]; + qr3_d = state_reg[14]; + state_new[00] = qr0_a_prim; + state_new[05] = qr0_b_prim; + state_new[10] = qr0_c_prim; + state_new[15] = qr0_d_prim; + state_new[01] = qr1_a_prim; + state_new[06] = qr1_b_prim; + state_new[11] = qr1_c_prim; + state_new[12] = qr1_d_prim; + state_new[02] = qr2_a_prim; + state_new[07] = qr2_b_prim; + state_new[08] = qr2_c_prim; + state_new[13] = qr2_d_prim; + state_new[03] = qr3_a_prim; + state_new[04] = qr3_b_prim; + state_new[09] = qr3_c_prim; + state_new[14] = qr3_d_prim; + end + endcase // case (quarterround_select) + end // if (update_state) + end // state_logic + + + //---------------------------------------------------------------- + // data_out_logic + // Final output logic that combines the result from state + // update with the input block. This adds a 16 rounds and + // a final layer of XOR gates. + // + // Note that we also remap all the words into LSB format. + //---------------------------------------------------------------- + always @* + begin : data_out_logic + integer i; + reg [31 : 0] msb_block_state [0 : 15]; + reg [31 : 0] lsb_block_state [0 : 15]; + reg [511 : 0] block_state; + + for (i = 0 ; i < 16 ; i = i + 1) + begin + msb_block_state[i] = init_state_word[i] + state_reg[i]; + lsb_block_state[i] = l2b(msb_block_state[i][31 : 0]); + end + + block_state = {lsb_block_state[00], lsb_block_state[01], + lsb_block_state[02], lsb_block_state[03], + lsb_block_state[04], lsb_block_state[05], + lsb_block_state[06], lsb_block_state[07], + lsb_block_state[08], lsb_block_state[09], + lsb_block_state[10], lsb_block_state[11], + lsb_block_state[12], lsb_block_state[13], + lsb_block_state[14], lsb_block_state[15]}; + + data_out_new = data_in ^ block_state; + end // data_out_logic + + + //---------------------------------------------------------------- + // qr_ctr + // Update logic for the quarterround counter, a monotonically + // increasing counter with reset. + //---------------------------------------------------------------- + always @* + begin : qr_ctr + qr_ctr_new = 0; + qr_ctr_we = 0; + + if (qr_ctr_rst) + begin + qr_ctr_new = 0; + qr_ctr_we = 1; + end + + if (qr_ctr_inc) + begin + qr_ctr_new = qr_ctr_reg + 1'b1; + qr_ctr_we = 1; + end + end // qr_ctr + + + //---------------------------------------------------------------- + // dr_ctr + // Update logic for the round counter, a monotonically + // increasing counter with reset. + //---------------------------------------------------------------- + always @* + begin : dr_ctr + dr_ctr_new = 0; + dr_ctr_we = 0; + + if (dr_ctr_rst) + begin + dr_ctr_new = 0; + dr_ctr_we = 1; + end + + if (dr_ctr_inc) + begin + dr_ctr_new = dr_ctr_reg + 1'b1; + dr_ctr_we = 1; + end + end // dr_ctr + + + //---------------------------------------------------------------- + // block_ctr + // Update logic for the 64-bit block counter, a monotonically + // increasing counter with reset. + //---------------------------------------------------------------- + always @* + begin : block_ctr + block0_ctr_new = 32'h0; + block1_ctr_new = 32'h0; + block0_ctr_we = 0; + block1_ctr_we = 0; + + if (block_ctr_set) + begin + block0_ctr_new = ctr[31 : 00]; + block1_ctr_new = ctr[63 : 32]; + block0_ctr_we = 1; + block1_ctr_we = 1; + end + + if (block_ctr_inc) + begin + block0_ctr_new = block0_ctr_reg + 1; + block0_ctr_we = 1; + + // Avoid chaining the 32-bit adders. + if (block0_ctr_reg == 32'hffffffff) + begin + block1_ctr_new = block1_ctr_reg + 1; + block1_ctr_we = 1; + end + end + end // block_ctr + + + //---------------------------------------------------------------- + // chacha_ctrl_fsm + // Logic for the state machine controlling the core behaviour. + //---------------------------------------------------------------- + always @* + begin : chacha_ctrl_fsm + init_state = 0; + update_state = 0; + update_output = 0; + qr_ctr_inc = 0; + qr_ctr_rst = 0; + dr_ctr_inc = 0; + dr_ctr_rst = 0; + block_ctr_inc = 0; + block_ctr_set = 0; + ready_new = 0; + ready_we = 0; + data_out_valid_new = 0; + data_out_valid_we = 0; + chacha_ctrl_new = CTRL_IDLE; + chacha_ctrl_we = 0; + + case (chacha_ctrl_reg) + CTRL_IDLE: + begin + if (init) + begin + block_ctr_set = 1; + ready_new = 0; + ready_we = 1; + chacha_ctrl_new = CTRL_INIT; + chacha_ctrl_we = 1; + end + end + + CTRL_INIT: + begin + init_state = 1; + qr_ctr_rst = 1; + dr_ctr_rst = 1; + chacha_ctrl_new = CTRL_ROUNDS; + chacha_ctrl_we = 1; + end + + CTRL_ROUNDS: + begin + update_state = 1; + qr_ctr_inc = 1; + if (qr_ctr_reg == QR1) + begin + dr_ctr_inc = 1; + if (dr_ctr_reg == (rounds[4 : 1] - 1)) + begin + chacha_ctrl_new = CTRL_FINALIZE; + chacha_ctrl_we = 1; + end + end + end + + CTRL_FINALIZE: + begin + ready_new = 1; + ready_we = 1; + update_output = 1; + data_out_valid_new = 1; + data_out_valid_we = 1; + chacha_ctrl_new = CTRL_DONE; + chacha_ctrl_we = 1; + end + + CTRL_DONE: + begin + data_out_valid_new = 0; + data_out_valid_we = 1; + if (init) + begin + ready_new = 0; + ready_we = 1; + block_ctr_set = 1; + chacha_ctrl_new = CTRL_INIT; + chacha_ctrl_we = 1; + end + else if (next) + begin + ready_new = 0; + ready_we = 1; + block_ctr_inc = 1; + chacha_ctrl_new = CTRL_INIT; + chacha_ctrl_we = 1; + end + end + + default: + begin + + end + endcase // case (chacha_ctrl_reg) + end // chacha_ctrl_fsm +endmodule // chacha_core + +//====================================================================== +// EOF chacha_core.v +//====================================================================== \ No newline at end of file diff --git a/hw/hdl/operators/chacha/chacha_qr.v b/hw/hdl/operators/chacha/chacha_qr.v new file mode 100644 index 00000000..3cbbb0e6 --- /dev/null +++ b/hw/hdl/operators/chacha/chacha_qr.v @@ -0,0 +1,116 @@ +//====================================================================== +// +// chacha_qr.v +// ----------- +// Verilog 2001 implementation of the stream cipher ChaCha. +// This is the combinational QR logic as a separade module to allow +// us to build versions of the cipher with 1, 2, 4 and even 8 +// parallel qr functions. +// +// +// Copyright (c) 2013 Secworks Sweden AB +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or +// without modification, are permitted provided that the following +// conditions are met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in +// the documentation and/or other materials provided with the +// distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +//====================================================================== + +module chacha_qr( + input wire [31 : 0] a, + input wire [31 : 0] b, + input wire [31 : 0] c, + input wire [31 : 0] d, + + output wire [31 : 0] a_prim, + output wire [31 : 0] b_prim, + output wire [31 : 0] c_prim, + output wire [31 : 0] d_prim + ); + + //---------------------------------------------------------------- + // Wires. + //---------------------------------------------------------------- + reg [31 : 0] internal_a_prim; + reg [31 : 0] internal_b_prim; + reg [31 : 0] internal_c_prim; + reg [31 : 0] internal_d_prim; + + + //---------------------------------------------------------------- + // Concurrent connectivity for ports. + //---------------------------------------------------------------- + assign a_prim = internal_a_prim; + assign b_prim = internal_b_prim; + assign c_prim = internal_c_prim; + assign d_prim = internal_d_prim; + + + //---------------------------------------------------------------- + // qr + // + // The actual quarterround function. + //---------------------------------------------------------------- + always @* + begin : qr + reg [31 : 0] a0; + reg [31 : 0] a1; + + reg [31 : 0] b0; + reg [31 : 0] b1; + reg [31 : 0] b2; + reg [31 : 0] b3; + + reg [31 : 0] c0; + reg [31 : 0] c1; + + reg [31 : 0] d0; + reg [31 : 0] d1; + reg [31 : 0] d2; + reg [31 : 0] d3; + + a0 = a + b; + d0 = d ^ a0; + d1 = {d0[15 : 0], d0[31 : 16]}; + c0 = c + d1; + b0 = b ^ c0; + b1 = {b0[19 : 0], b0[31 : 20]}; + a1 = a0 + b1; + d2 = d1 ^ a1; + d3 = {d2[23 : 0], d2[31 : 24]}; + c1 = c0 + d3; + b2 = b1 ^ c1; + b3 = {b2[24 : 0], b2[31 : 25]}; + + internal_a_prim = a1; + internal_b_prim = b3; + internal_c_prim = c1; + internal_d_prim = d3; + end // qr +endmodule // chacha_qr + +//====================================================================== +// EOF chacha_qr.v +//====================================================================== \ No newline at end of file diff --git a/hw/hdl/operators/chacha/user_top.sv b/hw/hdl/operators/chacha/user_top.sv new file mode 100644 index 00000000..6818ad58 --- /dev/null +++ b/hw/hdl/operators/chacha/user_top.sv @@ -0,0 +1,122 @@ +`timescale 1ns / 1ps + +import lynxTypes::*; + +/** + * User logic + * + */ +module design_user_logic_0 ( + // Clock and reset + input wire aclk, + input wire[0:0] aresetn, + + // AXI4 control + AXI4L.s axi_ctrl, + + // Descriptor bypass + reqIntf.m rd_req_ul, + reqIntf.m wr_req_ul, + + // RDMA commands + reqIntf.s rd_req_rdma, + reqIntf.s wr_req_rdma, + + // AXI4S RDMA + AXI4S.m axis_rdma_src, + AXI4S.s axis_rdma_sink, + // AXI4S host + AXI4S.m axis_host_src, + AXI4S.s axis_host_sink +); + +/* -- Tie-off unused interfaces and signals ----------------------------- */ +always_comb axi_ctrl.tie_off_s(); +//always_comb rd_req_ul.tie_off_m(); +//always_comb wr_req_ul.tie_off_m(); +//always_comb rd_req_rdma.tie_off_s(); +//always_comb wr_req_rdma.tie_off_s(); +//always_comb axis_rdma_src.tie_off_m(); +//always_comb axis_rdma_sink.tie_off_s(); +//always_comb axis_host_src.tie_off_m(); +//always_comb axis_host_sink.tie_off_s(); + +/* -- USER LOGIC -------------------------------------------------------- */ +assign rd_req_ul.valid = rd_req_rdma.valid; +assign rd_req_ul.req = rd_req_rdma.req; +assign rd_req_rdma.ready = rd_req_ul.ready; + +assign wr_req_ul.valid = wr_req_rdma.valid; +assign wr_req_ul.req = wr_req_rdma.req; +assign wr_req_rdma.ready = wr_req_ul.ready; + +logic cc_valid_in; +logic cc_ready_in; +logic cc_valid_out; +logic [511:0] cc_data_in; +logic [511:0] cc_data_out; + +logic last_C; + +chacha_core( + .clk(aclk), + .reset_n(aresetn), + .init(cc_valid_in), + .next(0), + .key(0), + .keylen(0), + .iv(0), + .ctr(0), + .rounds(8), + .data_in(cc_data_in), + .ready(cc_ready_in), + .data_out(cc_data_out), + .data_out_valid(cc_valid_out) +) + +assign fifo_in.tready = fifo_out.tready && cc_ready_in; + +assign cc_valid_in = fifo_out.tready && cc_ready_in && fifo_in.tvalid; +assign cc_data_in = fifo_in.tdata; + +assign fifo_out.tvalid = cc_valid_out; +assign fifo_out.tdata = cc_data_out; +assign fifo_out.tkeep = ~0; +assign fifo_out.tlast = last_C; + +always_ff @(posedge aclk) begin + last_C <= cc_valid_in ? fifo_in.tlast : last_C; +end + +axis_data_fifo_chacha inst_fifo_in ( + .s_axis_aresetn(aresetn), + .s_axis_aclk(aclk), + .s_axis_tvalid(axis_host_sink.tvalid), + .s_axis_tready(axis_host_sink.tready), + .s_axis_tdata(axis_host_sink.tdata), + .s_axis_tkeep(axis_host_sink.tkeep), + .s_axis_tlast(axis_host_sink.tlast), + .m_axis_tvalid(fifo_in.tvalid), + .m_axis_tready(fifo_in.tready), + .m_axis_tdata(fifo_in.tdata), + .m_axis_tkeep(fifo_in.tkeep), + .m_axis_tlast(fifo_in.tlast) +); + +axis_data_fifo_chacha inst_fifo_in ( + .s_axis_aresetn(aresetn), + .s_axis_aclk(aclk), + .s_axis_tvalid(fifo_out.tvalid), + .s_axis_tready(fifo_out.tready), + .s_axis_tdata(fifo_out.tdata), + .s_axis_tkeep(fifo_out.tkeep), + .s_axis_tlast(fifo_out.tlast), + .m_axis_tvalid(axis_rdma_src.tvalid), + .m_axis_tready(axis_rdma_src.tready), + .m_axis_tdata(axis_rdma_src.tdata), + .m_axis_tkeep(axis_rdma_src.tkeep), + .m_axis_tlast(axis_rdma_src.tlast) +); + +endmodule + diff --git a/hw/hdl/operators/dtengine/gbm/dt_gbm_slave.sv b/hw/hdl/operators/dtengine/gbm/dt_gbm_slave.sv new file mode 100644 index 00000000..f13a1603 --- /dev/null +++ b/hw/hdl/operators/dtengine/gbm/dt_gbm_slave.sv @@ -0,0 +1,292 @@ +/** + * GBM slave + */ +import lynxTypes::*; + +module dt_gbm_slave #( + parameter integer NFEAUTRES_BITS = 16, + parameter integer TREEDEPTH_BITS = 8, + parameter integer PUTREES_BITS = 8, + parameter integer OUTNUMCLS_BITS = 32, + parameter integer LSTOUTMASK_BITS = 16 +) ( + input logic aclk, + input logic aresetn, + + AXI4L.s axi_ctrl, + + // User defined arguments + output logic ap_start, + output logic [NFEAUTRES_BITS-1:0] numFeatures, + output logic [TREEDEPTH_BITS-1:0] treeDepth, + output logic [PUTREES_BITS-1:0] puTrees, + output logic [OUTNUMCLS_BITS-1:0] outputNumCLs, + output logic [LSTOUTMASK_BITS-1:0] lastOutLineMask +); + +// -- Decl ---------------------------------------------------------- +// ------------------------------------------------------------------ +// Constants +localparam integer N_REGS = 6; +localparam integer ADDR_LSB = (AXIL_DATA_BITS/32) + 1; +localparam integer ADDR_MSB = $clog2(N_REGS); +localparam integer AXI_ADDR_BITS = ADDR_LSB + ADDR_MSB; + +// Internal registers +logic [AXI_ADDR_BITS-1:0] axi_awaddr; +logic axi_awready; +logic [AXI_ADDR_BITS-1:0] axi_araddr; +logic axi_arready; +logic [1:0] axi_bresp; +logic axi_bvalid; +logic axi_wready; +logic [AXIL_DATA_BITS-1:0] axi_rdata; +logic [1:0] axi_rresp; +logic axi_rvalid; + +// Registers +logic [N_REGS-1:0][AXIL_DATA_BITS-1:0] slv_reg; +logic slv_reg_rden; +logic slv_reg_wren; +logic aw_en; + +// -- Def ----------------------------------------------------------- +// ------------------------------------------------------------------ + +// -- Register map ----------------------------------------------------------------------- +// 0 (W1S) : AP start ( +localparam integer AP_CTRL_REG = 0; +// 0 - start +// 1 (WR) : Number of features +localparam integer NFEAUTERS_REG = 1; +// 2 (WR) : Treedepth +localparam integer TREEDEPTH_REG = 2; +// 3 (WR) : Putrees +localparam integer PUTREES_REG = 3; +// 4 (WR) : OutputNumCLs +localparam integer OUTNUMCLS_REG = 4; +// 5 (WR) : LastOutLineMask +localparam integer LSTOUTMASK_REG = 5; + +// Write process +assign slv_reg_wren = axi_wready && axi_ctrl.wvalid && axi_awready && axi_ctrl.awvalid; + +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) begin + slv_reg <= 0; + end + else begin + // Control + slv_reg[AP_CTRL_REG] <= 0; + + if(slv_reg_wren) begin + case (axi_awaddr[ADDR_LSB+ADDR_MSB-1:ADDR_LSB]) + 3'h0: // Control + for (int i = 0; i < (AXIL_DATA_BITS/8); i++) begin + if(axi_ctrl.wstrb[i]) begin + slv_reg[AP_CTRL_REG][(i*8)+:8] <= axi_ctrl.wdata[(i*8)+:8]; + end + end + 3'h1: // Number of features + for (int i = 0; i < (AXIL_DATA_BITS/8); i++) begin + if(axi_ctrl.wstrb[i]) begin + slv_reg[NFEAUTERS_REG][(i*8)+:8] <= axi_ctrl.wdata[(i*8)+:8]; + end + end + 3'h2: // Treedepth + for (int i = 0; i < (AXIL_DATA_BITS/8); i++) begin + if(axi_ctrl.wstrb[i]) begin + slv_reg[TREEDEPTH_REG][(i*8)+:8] <= axi_ctrl.wdata[(i*8)+:8]; + end + end + 3'h3: // Putrees + for (int i = 0; i < (AXIL_DATA_BITS/8); i++) begin + if(axi_ctrl.wstrb[i]) begin + slv_reg[PUTREES_REG][(i*8)+:8] <= axi_ctrl.wdata[(i*8)+:8]; + end + end + 3'h4: // Output number of CLs + for (int i = 0; i < (AXIL_DATA_BITS/8); i++) begin + if(axi_ctrl.wstrb[i]) begin + slv_reg[OUTNUMCLS_REG][(i*8)+:8] <= axi_ctrl.wdata[(i*8)+:8]; + end + end + 3'h5: // Last out line mask + for (int i = 0; i < (AXIL_DATA_BITS/8); i++) begin + if(axi_ctrl.wstrb[i]) begin + slv_reg[LSTOUTMASK_REG][(i*8)+:8] <= axi_ctrl.wdata[(i*8)+:8]; + end + end + default : ; + endcase + end + end +end + +// Read process +assign slv_reg_rden = axi_arready & axi_ctrl.arvalid & ~axi_rvalid; + +always_ff @(posedge aclk, negedge aresetn) begin + if( aresetn == 1'b0 ) begin + axi_rdata <= 0; + end + else begin + axi_rdata <= 0; + if(slv_reg_rden) begin + case (axi_araddr[ADDR_LSB+ADDR_MSB-1:ADDR_LSB]) + 3'h1: // Key low + axi_rdata[NFEAUTRES_BITS-1:0] <= slv_reg[NFEAUTERS_REG][NFEAUTRES_BITS-1:0]; + 3'h2: // Key high + axi_rdata[TREEDEPTH_BITS-1:0] <= slv_reg[TREEDEPTH_REG][TREEDEPTH_BITS-1:0]; + 3'h3: // Key high + axi_rdata[PUTREES_BITS-1:0] <= slv_reg[PUTREES_REG][PUTREES_BITS-1:0]; + 3'h4: // Key high + axi_rdata[OUTNUMCLS_BITS-1:0] <= slv_reg[OUTNUMCLS_REG][OUTNUMCLS_BITS-1:0]; + 3'h5: // Key high + axi_rdata[LSTOUTMASK_BITS-1:0] <= slv_reg[LSTOUTMASK_REG][LSTOUTMASK_BITS-1:0]; + default: ; + endcase + end + end +end + +// Output +always_comb begin + ap_start = slv_reg[AP_CTRL_REG][0]; + numFeatures = slv_reg[NFEAUTERS_REG][15:0]; + treeDepth = slv_reg[TREEDEPTH_REG][7:0]; + puTrees = slv_reg[PUTREES_REG][7:0]; + outputNumCLs = slv_reg[OUTNUMCLS_REG][31:0]; + lastOutLineMask = slv_reg[LSTOUTMASK_REG][15:0]; +end + + +/* -------------------------------------------------------------------------------------- */ +/* -- AXI CTRL -------------------------------------------------------------------------- */ +/* -------------------------------------------------------------------------------------- */ +// Don't edit + +// I/O +assign axi_ctrl.awready = axi_awready; +assign axi_ctrl.arready = axi_arready; +assign axi_ctrl.bresp = axi_bresp; +assign axi_ctrl.bvalid = axi_bvalid; +assign axi_ctrl.wready = axi_wready; +assign axi_ctrl.rdata = axi_rdata; +assign axi_ctrl.rresp = axi_rresp; +assign axi_ctrl.rvalid = axi_rvalid; + +// awready and awaddr +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) + begin + axi_awready <= 1'b0; + axi_awaddr <= 0; + aw_en <= 1'b1; + end + else + begin + if (~axi_awready && axi_ctrl.awvalid && axi_ctrl.wvalid && aw_en) + begin + axi_awready <= 1'b1; + aw_en <= 1'b0; + axi_awaddr <= axi_ctrl.awaddr; + end + else if (axi_ctrl.bready && axi_bvalid) + begin + aw_en <= 1'b1; + axi_awready <= 1'b0; + end + else + begin + axi_awready <= 1'b0; + end + end +end + +// arready and araddr +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) + begin + axi_arready <= 1'b0; + axi_araddr <= 0; + end + else + begin + if (~axi_arready && axi_ctrl.arvalid) + begin + axi_arready <= 1'b1; + axi_araddr <= axi_ctrl.araddr; + end + else + begin + axi_arready <= 1'b0; + end + end +end + +// bvalid and bresp +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) + begin + axi_bvalid <= 0; + axi_bresp <= 2'b0; + end + else + begin + if (axi_awready && axi_ctrl.awvalid && ~axi_bvalid && axi_wready && axi_ctrl.wvalid) + begin + axi_bvalid <= 1'b1; + axi_bresp <= 2'b0; + end + else + begin + if (axi_ctrl.bready && axi_bvalid) + begin + axi_bvalid <= 1'b0; + end + end + end +end + +// wready +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) + begin + axi_wready <= 1'b0; + end + else + begin + if (~axi_wready && axi_ctrl.wvalid && axi_ctrl.awvalid && aw_en ) + begin + axi_wready <= 1'b1; + end + else + begin + axi_wready <= 1'b0; + end + end +end + +// rvalid and rresp (1Del?) +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) + begin + axi_rvalid <= 0; + axi_rresp <= 0; + end + else + begin + if (axi_arready && axi_ctrl.arvalid && ~axi_rvalid) + begin + axi_rvalid <= 1'b1; + axi_rresp <= 2'b0; + end + else if (axi_rvalid && axi_ctrl.rready) + begin + axi_rvalid <= 1'b0; + end + end +end + +endmodule // gbm slave \ No newline at end of file diff --git a/hw/hdl/operators/dtengine/gbm/dt_user_logic.sv b/hw/hdl/operators/dtengine/gbm/dt_user_logic.sv new file mode 100644 index 00000000..5c712eec --- /dev/null +++ b/hw/hdl/operators/dtengine/gbm/dt_user_logic.sv @@ -0,0 +1,236 @@ +`timescale 1ns / 1ps + +import lynxTypes::*; + +/** + * User logic + * + */ +module design_user_logic_0 ( + // Clock and reset + input wire aclk, + input wire[0:0] aresetn, + + // AXI4 control + AXI4L.s axi_ctrl, + + // AXI4S + AXI4S.m axis_src, + AXI4S.s axis_sink +); + +/* -- Tie-off unused interfaces and signals ----------------------------- */ +//always_comb axi_ctrl.tie_off_s(); +//always_comb axis_src.tie_off_m(); +//always_comb axis_sink.tie_off_s(); + +/* -- USER LOGIC -------------------------------------------------------- */ +AXI4S axis_sink_r (); +AXI4S axis_src_r (); +axis_reg_rtl inst_reg_sink (.aclk(aclk), .aresetn(aresetn), .axis_in(axis_sink), .axis_out(axis_sink_r)); +axis_reg_rtl inst_reg_src (.aclk(aclk), .aresetn(aresetn), .axis_in(axis_src_r), .axis_out(axis_src)); + +localparam [1:0] IDLE = 2'b00, + READ_TREES = 2'b01, + WAIT_ALL_TREES = 2'b10, + READ_DATA = 2'b11; + +reg [1:0] reader_state; +reg [1:0] nxt_reader_state; +wire trees_read_done; +wire data_read_done; + +wire wr_tvalid; +wire wr_tready; +wire [511:0] wr_tdata; + +wire rd_tvalid; +wire rd_ttype; +wire rd_tlast; +wire rd_tready; +wire [511:0] rd_tdata; + +reg [31:0] sentOutCLs; +wire [31:0] next_sentOutCLs; + +logic ap_start_r = 1'b0; +logic ap_start_pulse_d1 = 1'b0; +wire ap_start ; +wire [16-1:0] numFeatures ; +wire [8-1:0] treeDepth ; +wire [8-1:0] puTrees ; +wire [16-1:0] lastOutLineMask ; +wire [31:0] outputNumCLs; + +logic [16-1:0] num_64bit_words_per_tuple; +logic [8-1:0] num_trees_per_pu_minus_one; + +logic [31:0] num_data_cls; +logic [31:0] num_trees_cls; +logic [31:0] num_result_cls; + +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Parameters on AxiLite ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// + +// AXI4-Lite slave interface +dt_gbm_slave inst_control_s_axi ( + .aclk ( ap_clk ), + .aresetn ( aresetn ), + .axi_ctrl ( axi_ctrl ), + .ap_start ( ap_start ), + .numFeatures ( numFeatures ), + .treeDepth ( treeDepth ), + .puTrees ( puTrees ), + .outputNumCLs ( outputNumCLs ), + .lastOutLineMask ( lastOutLineMask ) +); + +always @(posedge aclk) begin + if (~aresetn) begin + num_trees_per_pu_minus_one <= 0; + num_64bit_words_per_tuple <= 0; + + ap_start_r <= 0; + ap_start_pulse_d1 <= 0; + end + else begin + num_trees_per_pu_minus_one <= puTrees - 1'b1; + num_64bit_words_per_tuple <= numFeatures[7:1] + numFeatures[0]; + + ap_start_r <= ap_start; + ap_start_pulse_d1 <= ap_start_pulse; + end +end + +assign ap_start_pulse = ap_start & ~ap_start_r; +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Decode Input Streams ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// + +// Reader State +always@(posedge aclk) begin + if(~aresetn) begin + reader_state <= IDLE; + end + else begin + reader_state <= nxt_reader_state; + end +end + +always@(*) begin + case (reader_state) + IDLE : nxt_reader_state = (ap_start_pulse_d1)? READ_TREES : IDLE; + READ_TREES : nxt_reader_state = (trees_read_done)? WAIT_ALL_TREES: READ_TREES; + WAIT_ALL_TREES: nxt_reader_state = READ_DATA; + READ_DATA : nxt_reader_state = (data_read_done)? IDLE : READ_DATA; + default : nxt_reader_state = IDLE; + endcase +end + +assign trees_read_done = axis_sink_r.tlast && axis_sink_r.tvalid && axis_sink_r.tready; +assign data_read_done = axis_sink_r.tlast && axis_sink_r.tvalid && axis_sink_r.tready; + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Engine Core ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// + +// Input Streams +assign rd_tdata = axis_sink_r.tdata; +assign rd_tvalid = axis_sink_r.tvalid; +assign rd_tlast = axis_sink_r.tlast; +assign rd_ttype = reader_state == READ_DATA; + +assign axis_sink_r.tready = rd_tready && (reader_state == READ_TREES || reader_state == READ_DATA); + +// Output Stream +assign axis_src_r.tdata = wr_tdata; +assign axis_src_r.tkeep = 64'hffffffffffffffff; +assign axis_src_r.tvalid = wr_tvalid; +assign axis_src_r.tlast = next_sentOutCLs == outputNumCLs; + +assign wr_tready = axis_src_r.tready; + +// Count output numCLs + +assign next_sentOutCLs = sentOutCLs + 1'b1; + +always@(posedge aclk) begin + if(~aresetn) begin + sentOutCLs <= 0; + end + else begin + if(sentOutCLs == outputNumCLs) begin + sentOutCLs <= 0; + end + else if(wr_tvalid && wr_tready) begin + sentOutCLs <= next_sentOutCLs; + end + end +end + + +DTProcessor DTProcessor( + .clk (aclk), + .rst_n (aresetn), + .start_core (ap_start_pulse_d1), + // parameters + + .tuple_length (numFeatures[5:0]), + .num_trees_per_pu_minus_one (num_trees_per_pu_minus_one[4:0]), + .tree_depth (treeDepth[3:0]), + .num_lines_per_tuple ({2'b00, num_64bit_words_per_tuple[6:0]}), + // input trees + .core_data_in (rd_tdata), + .core_data_in_type (rd_ttype), + .core_data_in_valid (rd_tvalid), + .core_data_in_last (rd_tlast), + .core_data_in_ready (rd_tready), + // output + .last_result_line ( (next_sentOutCLs == outputNumCLs) ), + .last_result_line_mask (lastOutLineMask[15:0]), + .core_result_out (wr_tdata), + .core_result_valid (wr_tvalid), + .core_result_ready (wr_tready) +); + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// debug counters +always@(posedge aclk) begin + if(~aresetn) begin + num_data_cls <= 0; + num_trees_cls <= 0; + num_result_cls <= 0; + end + else begin + // + if(ap_start_pulse) begin + num_data_cls <= 0; + num_trees_cls <= 0; + num_result_cls <= 0; + end + else begin + // + if(rd_tvalid && rd_tready && rd_ttype) begin + num_data_cls <= num_data_cls + 1'b1; + end + // + if(rd_tvalid && rd_tready && !rd_ttype) begin + num_trees_cls <= num_trees_cls + 1'b1; + end + // + if(wr_tvalid && wr_tready) begin + num_result_cls <= num_result_cls + 1'b1; + end + end + end +end + +endmodule diff --git a/hw/hdl/operators/dtengine/gbm/inferCore/DTPackage.sv b/hw/hdl/operators/dtengine/gbm/inferCore/DTPackage.sv new file mode 100644 index 00000000..73aada7d --- /dev/null +++ b/hw/hdl/operators/dtengine/gbm/inferCore/DTPackage.sv @@ -0,0 +1,91 @@ + +/* + * Copyright 2019 - 2020 Systems Group, ETH Zurich + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + + +package DTPackage; + +parameter TREE_OFFSET_BITS = 10; +parameter TUPLE_OFFSET_BITS = 9; +parameter TREE_MEM_WIDTH_BITS = 1; +parameter MAX_TREE_DEPTH_BITS = 4; + +parameter NUM_PUS_PER_CLUSTER_BITS = 5; +parameter NUM_PUS_PER_CLUSTER = 28; +parameter NUM_DTPU_CLUSTERS = 2; +parameter NUM_DTPU_CLUSTERS_BITS = 1; +parameter NUM_TREES_PER_PU = 32; +parameter FEATURES_DISTR_DELAY = 8; +parameter DATA_PRECISION = 32; +parameter DATA_LINE_WIDTH = 64; + + +parameter TREE_MEM_READ_LATENCY = 2; +parameter DATA_MEM_READ_LATENCY = 2; + +parameter EMPTY_PIPELINE_WAIT_CYCLES = 128; + +parameter FP_ADDER_LATENCY = 2; + + + +typedef struct packed { + bit [TREE_OFFSET_BITS-1:0] tree_offset; + bit [TUPLE_OFFSET_BITS-1:0] tuple_offset; + bit last_tree; + bit empty_tree; +} TreeInstruction_t; + +typedef struct packed { + bit [TREE_OFFSET_BITS+TREE_MEM_WIDTH_BITS-1:0] node_address; + bit [TUPLE_OFFSET_BITS-1:0] tuple_offset; + bit [MAX_TREE_DEPTH_BITS-1:0] node_level; + bit empty_tree; + bit node_nop; + bit last_tree; + bit leaf_node; + + bit [31:0] node_res_val; +} NodeInstruction_t; + + + +typedef struct packed { + bit [3:0] split_dir; + bit [7:0] findex; + bit right_child; + bit left_child; + bit [1:0] op_type; +} NodeType_t; + +typedef struct packed { + bit [15:0] word_1_h; + bit [15:0] word_1_l; + bit [15:0] right_child_offset; + NodeType_t node_type; +} TreeNode_t; + + + +endpackage + diff --git a/hw/hdl/operators/dtengine/gbm/inferCore/DTProcessor.sv b/hw/hdl/operators/dtengine/gbm/inferCore/DTProcessor.sv new file mode 100644 index 00000000..ac8c72c0 --- /dev/null +++ b/hw/hdl/operators/dtengine/gbm/inferCore/DTProcessor.sv @@ -0,0 +1,543 @@ + +/* + * Copyright 2019 - 2020 Systems Group, ETH Zurich + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + + The Core module is where computations happen + + core_data_in carry a stream of Trees/Data for processing in the core + + tuple_out_data carries the result of inference on one tuple, this can be + a partial result if not the complete model is stored in the core or the + full result if the complete model fits in the core. +*/ + +import DTPackage::*; + +module DTProcessor ( + input wire clk, + input wire rst_n, + // parameters + input wire start_core, + + input wire [5:0] tuple_length, + input wire [4:0] num_trees_per_pu_minus_one, + input wire [3:0] tree_depth, + input wire [8:0] num_lines_per_tuple, + // input trees + input wire [511:0] core_data_in, + input wire core_data_in_type, // 0: trees, 1: data + input wire core_data_in_valid, + input wire core_data_in_last, + output wire core_data_in_ready, + // output + input wire last_result_line, + input wire [15:0] last_result_line_mask, + output wire [511:0] core_result_out, + output wire core_result_valid, + input wire core_result_ready +); + + + +localparam IDLE = 1'b0, + RUN_MODE = 1'b1; + + +wire [511:0] ctrl_line; + +wire in_fifo_re; +wire in_fifo_full; +wire in_fifo_valid; +wire in_fifo_data_last; +wire in_fifo_data_type; +wire [511:0] in_fifo_data; + +reg tree_length_set; +reg [9:0] tree_received_words; +reg [9:0] curr_tree_length; + +wire [9:0] tree_possible_words; +wire [9:0] tree_remaining_words; +wire [9:0] curr_tree_line_words; +wire tree_data_in_last; +wire in_fifo_trees_re; + + +reg aligned_fifo_data_type_d1; +reg tuple_start_set; +reg [5:0] tuple_received_words; +reg [5:0] curr_tuple_off; + +wire [5:0] tuple_possible_words; +wire [5:0] tuple_remaining_words; +wire [5:0] curr_tuple_line_words; +wire tuple_data_in_last; +wire in_fifo_tuples_re; + +wire in_fifo_item_last; +wire [3:0] in_fifo_data_off; +wire [4:0] in_fifo_data_word_count; +wire [2:0] in_fifo_data_size; +wire [4:0] in_fifo_data_size_t; + +wire aligned_fifo_almfull; +wire aligned_fifo_valid; +wire aligned_fifo_re; +wire aligned_fifo_data_last; +wire aligned_fifo_data_type; + +wire [2:0] aligned_fifo_data_size; +wire [511:0] aligned_fifo_data; + +wire aligner_data_out_valid; +wire aligner_data_out_last; +wire aligner_data_out_type; +wire [2:0] aligner_data_out_size; +wire [511:0] aligner_data_out; + +reg core_fsm_state; +reg [NUM_DTPU_CLUSTERS_BITS-1:0] data_line_cu; +reg [4:0] data_line_pu; +reg [NUM_DTPU_CLUSTERS_BITS-1:0] data_line_cu_d1; +reg [4:0] data_line_pu_d1; +reg [511:0] data_line; +reg data_line_last; +reg [2:0] data_line_last_valid_pos; +reg data_line_valid; + +logic [511:0] data_line_array[NUM_DTPU_CLUSTERS:0]; +logic data_line_valid_array[NUM_DTPU_CLUSTERS:0]; +logic data_line_prog_array[NUM_DTPU_CLUSTERS:0]; +logic data_line_ctrl_array[NUM_DTPU_CLUSTERS:0]; +logic data_line_last_array[NUM_DTPU_CLUSTERS:0]; + +logic [2:0] data_line_last_valid_pos_array[NUM_DTPU_CLUSTERS:0]; +logic [4:0] data_line_pu_array[NUM_DTPU_CLUSTERS:0]; +logic [NUM_DTPU_CLUSTERS_BITS-1:0] data_line_cu_array[NUM_DTPU_CLUSTERS:0]; + +wire [NUM_DTPU_CLUSTERS-1:0] data_line_ready_array; + +wire [31:0] cu_tuple_result_out[NUM_DTPU_CLUSTERS-1:0]; +wire [NUM_DTPU_CLUSTERS-1:0] cu_tuple_result_out_valid; + +reg [3-NUM_DTPU_CLUSTERS_BITS:0] curr_dest_result_fifo[NUM_DTPU_CLUSTERS-1:0]; + +wire [15:0] res_fifo_we; +wire [511:0] res_fifo_dout; +wire [15:0] res_fifo_valid; +wire [15:0] res_fifo_full; +wire res_fifo_re; + + + +reg start_core_d1; + +reg [7:0] data_present_age; +reg last_tree_line_sent; +reg pipeline_emptied; +wire aligned_fifo_empty; +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Core Input FIFO ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// + +assign ctrl_line = {480'b0, 7'b0, num_lines_per_tuple, 4'h0, tree_depth, 3'b0, num_trees_per_pu_minus_one}; + + +quick_fifo #(.FIFO_WIDTH(514), // data + data valid flag + last flag + prog flags + .FIFO_DEPTH_BITS(9), + .FIFO_ALMOSTFULL_THRESHOLD(508) + ) InDataFIFO + ( + .clk (clk), + .reset_n (rst_n), + .din ({core_data_in_last, core_data_in_type, core_data_in}), + .we (core_data_in_valid), + + .re (in_fifo_re), + .dout ({in_fifo_data_last, in_fifo_data_type, in_fifo_data}), + .empty (), + .valid (in_fifo_valid), + .full (in_fifo_full), + .count (), + .almostfull () + ); + + +assign core_data_in_ready = ~in_fifo_full; +assign in_fifo_re = ~aligned_fifo_almfull && ((in_fifo_data_type)? in_fifo_tuples_re && pipeline_emptied : in_fifo_trees_re ); +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Tracking Trees/Tuples ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// + +// Trees input stream decoding +always@(posedge clk) begin + // buffer start signal + start_core_d1 <= start_core; + + // Reset the rest + if(~rst_n || start_core) begin + tree_length_set <= 1'b0; + tree_received_words <= 10'd0; + curr_tree_length <= 10'd0; + last_tree_line_sent <= 1'b0; + data_present_age <= 0; + end + else begin + if(in_fifo_valid && ~aligned_fifo_almfull && !in_fifo_data_type) begin + if(!tree_length_set) begin + curr_tree_length <= in_fifo_data[9:0]; + if(in_fifo_data[9:0] > 15) begin + tree_length_set <= 1'b1; + tree_received_words <= 10'd15; + end + end + else if( tree_data_in_last ) begin + tree_length_set <= 1'b0; + tree_received_words <= 10'd0; + end + else begin + tree_received_words <= tree_received_words + 10'd16; + end + if(in_fifo_data_last) begin + last_tree_line_sent <= 1'b1; + end + end + // + if(last_tree_line_sent) begin + data_present_age <= data_present_age + 1'b1; + end + end +end +// +assign tree_possible_words = (tree_length_set)? 16 : 15; +assign tree_remaining_words = (tree_length_set)? (curr_tree_length - tree_received_words) : in_fifo_data[9:0]; +assign curr_tree_line_words = (tree_remaining_words > tree_possible_words)? tree_possible_words : tree_remaining_words; +assign tree_data_in_last = (tree_remaining_words > tree_possible_words)? 1'b0 : 1'b1; +assign in_fifo_trees_re = 1'b1; + +// Tuples input stream decoding +always@(posedge clk) begin + if(~rst_n || start_core) begin + tuple_received_words <= 6'd0; + tuple_start_set <= 1'b0; + curr_tuple_off <= 6'd0; + + pipeline_emptied <= 1'b0; + end + else begin + if(in_fifo_valid && ~aligned_fifo_almfull && in_fifo_data_type && pipeline_emptied) begin + if(!tuple_start_set) begin + if( (6'd16 - curr_tuple_off[3:0]) < tuple_length ) begin + tuple_start_set <= 1'b1; + tuple_received_words <= 6'd16 - curr_tuple_off[3:0]; + end + end + else if( tuple_data_in_last ) begin + tuple_start_set <= 1'b0; + tuple_received_words <= 6'd0; + end + else begin + tuple_received_words <= tuple_received_words + 6'd16; + end + // Tuple offset + if( tuple_data_in_last ) begin + curr_tuple_off <= curr_tuple_off + tuple_length; + end + end + // + if((data_present_age > EMPTY_PIPELINE_WAIT_CYCLES) && aligned_fifo_empty) begin + pipeline_emptied <= 1'b1; + end + end +end + + +assign tuple_possible_words = (tuple_start_set)? 6'd16 : (6'd16 - curr_tuple_off[3:0]); +assign tuple_remaining_words = tuple_length - tuple_received_words; +assign curr_tuple_line_words = (tuple_remaining_words > tuple_possible_words)? tuple_possible_words : tuple_remaining_words; +assign tuple_data_in_last = (tuple_remaining_words > tuple_possible_words)? 1'b0 : 1'b1; +assign in_fifo_tuples_re = (tuple_data_in_last)? (tuple_remaining_words == 6'd16) : 1'b1; + +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Bus Aligner ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// + +assign in_fifo_item_last = (in_fifo_data_type)? tuple_data_in_last : tree_data_in_last; +assign in_fifo_data_off = (in_fifo_data_type)? curr_tuple_off[3:0] : 4'd1; +assign in_fifo_data_size_t = ((in_fifo_data_type)? tuple_length[4:0] : ((tree_length_set)? curr_tree_length[4:0] : in_fifo_data[4:0])) - 5'd1; +assign in_fifo_data_size = in_fifo_data_size_t[3:1]; +assign in_fifo_data_word_count = (in_fifo_data_type)? curr_tuple_line_words : curr_tree_line_words; + +bus_aligner bus_aligner + ( + .clk (clk), + .rst_n (rst_n), + + .data_in (in_fifo_data), + .data_in_last (in_fifo_item_last), + .data_in_type (in_fifo_data_type), + .data_in_valid (in_fifo_valid && ~aligned_fifo_almfull && (~in_fifo_data_type || pipeline_emptied)), + .data_in_off (in_fifo_data_off), + .data_in_size (in_fifo_data_size), + .data_in_word_count (in_fifo_data_word_count), + .stream_last (in_fifo_data_last), + + .data_out (aligner_data_out), + .data_out_last (aligner_data_out_last), + .data_out_type (aligner_data_out_type), + .data_out_size (aligner_data_out_size), + .data_out_valid (aligner_data_out_valid) + ); + +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Aligned Data FIFO ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// + +quick_fifo #(.FIFO_WIDTH(517), // data + data valid flag + last flag + prog flags + .FIFO_DEPTH_BITS(9), + .FIFO_ALMOSTFULL_THRESHOLD(490) + ) AlignedDataFIFO + ( + .clk (clk), + .reset_n (rst_n), + .din ({aligner_data_out_last, aligner_data_out_type, aligner_data_out_size, aligner_data_out}), + .we (aligner_data_out_valid), + + .re (aligned_fifo_re), + .dout ({aligned_fifo_data_last, aligned_fifo_data_type, aligned_fifo_data_size, aligned_fifo_data}), + .empty (aligned_fifo_empty), + .valid (aligned_fifo_valid), + .full (), + .count (), + .almostfull (aligned_fifo_almfull) + ); + +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Distribute Trees/Tuples ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// +// +always@(posedge clk) begin + if(~rst_n || start_core) begin + core_fsm_state <= IDLE; + data_line_cu <= 0; + data_line_pu <= 5'd0; + data_line_cu_d1 <= 0; + data_line_pu_d1 <= 0; + data_line_last_valid_pos <= 4'd0; + data_line_valid <= 1'b0; + data_line_last <= 1'b0; + aligned_fifo_data_type_d1<= 1'b0; + end + else begin + // + data_line_last_valid_pos <= aligned_fifo_data_size; + data_line_valid <= aligned_fifo_valid; + data_line_last <= aligned_fifo_data_last; + aligned_fifo_data_type_d1<= aligned_fifo_data_type; + data_line_cu_d1 <= data_line_cu; + data_line_pu_d1 <= data_line_pu; + // + case (core_fsm_state) + IDLE: begin + data_line_cu <= 0; + data_line_pu <= 5'd0; + + if(start_core_d1) begin + core_fsm_state <= RUN_MODE; + end + end + RUN_MODE: begin + if(aligned_fifo_valid && aligned_fifo_re) begin + if(aligned_fifo_data_type == 1'b0) begin // trees stream + // PU + if(aligned_fifo_data_last) begin + if(data_line_pu == NUM_PUS_PER_CLUSTER-1) begin + data_line_pu <= 5'd0; + end + else begin + data_line_pu <= data_line_pu + 5'd1; + end + end + // CU + data_line_cu <= 0; + end + else begin // tuples stream + // PU + data_line_pu <= 5'd0; + // CU + if(aligned_fifo_data_last) begin + if(data_line_cu == NUM_DTPU_CLUSTERS-1) begin + data_line_cu <= 0; + end + else begin + data_line_cu <= data_line_cu + 1; + end + end + end + end + end + default : begin + core_fsm_state <= IDLE; + data_line_cu <= 0; + data_line_pu <= 5'd0; + end + endcase + end + // + data_line <= aligned_fifo_data; + // +end +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Engine Clusters ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// +/*assign data_line_array[0] = (core_fsm_state == IDLE)? ctrl_line : data_line; +assign data_line_valid_array[0] = (core_fsm_state == IDLE)? 1'b0 : data_line_valid && aligned_fifo_data_type_d1 && aligned_fifo_re; +assign data_line_last_valid_pos_array[0] = (core_fsm_state == IDLE)? 3'b0 : data_line_last_valid_pos; +assign data_line_prog_array[0] = (core_fsm_state == IDLE)? 1'b0 : data_line_valid && ~aligned_fifo_data_type_d1 && aligned_fifo_re; +assign data_line_last_array[0] = (core_fsm_state == IDLE)? start_core : data_line_last; +assign data_line_ctrl_array[0] = (core_fsm_state == IDLE)? start_core : 1'b0; +assign data_line_pu_array[0] = data_line_pu_d1; +assign data_line_cu_array[0] = data_line_cu_d1; +*/ + +always@(posedge clk) begin + if(~rst_n) begin + data_line_ctrl_array[0] <= 0; + data_line_prog_array[0] <= 0; + data_line_valid_array[0] <= 0; + end + else begin + data_line_ctrl_array[0] <= start_core; + data_line_prog_array[0] <= (start_core || (core_fsm_state == IDLE))? 1'b0 : aligned_fifo_valid && ~aligned_fifo_data_type && aligned_fifo_re; + data_line_valid_array[0] <= (start_core || (core_fsm_state == IDLE))? 1'b0 : aligned_fifo_valid && aligned_fifo_data_type && aligned_fifo_re; + end + // + data_line_array[0] <= (start_core)? ctrl_line : aligned_fifo_data; + data_line_last_valid_pos_array[0] <= (start_core)? 3'b0 : aligned_fifo_data_size; + data_line_last_array[0] <= start_core | aligned_fifo_data_last; + + data_line_pu_array[0] <= data_line_pu; + data_line_cu_array[0] <= data_line_cu; +end + +assign aligned_fifo_re = (aligned_fifo_data_type)? data_line_ready_array[ data_line_cu ] : data_line_ready_array[0]; + + +genvar i; +generate + for (i = 0; i < NUM_DTPU_CLUSTERS; i = i + 1) begin: cus + compute_unit #(.CU_ID (i) ) + cu_x( + .clk (clk), + .rst_n (rst_n && ~start_core), + + .data_line_in (data_line_array[i]), + .data_line_in_valid (data_line_valid_array[i]), + .data_line_in_last_valid_pos (data_line_last_valid_pos_array[i]), + .data_line_in_last (data_line_last_array[i]), + .data_line_in_ctrl (data_line_ctrl_array[i]), + .data_line_in_prog (data_line_prog_array[i]), + .data_line_in_pu (data_line_pu_array[i]), + .data_line_in_cu (data_line_cu_array[i]), + .data_line_in_ready (data_line_ready_array[i]), + + .data_line_out (data_line_array[i+1]), + .data_line_out_valid (data_line_valid_array[i+1]), + .data_line_out_last_valid_pos (data_line_last_valid_pos_array[i+1]), + .data_line_out_last (data_line_last_array[i+1]), + .data_line_out_ctrl (data_line_ctrl_array[i+1]), + .data_line_out_prog (data_line_prog_array[i+1]), + .data_line_out_pu (data_line_pu_array[i+1]), + .data_line_out_cu (data_line_cu_array[i+1]), + + .tuple_result_out (cu_tuple_result_out[i]), + .tuple_result_out_valid (cu_tuple_result_out_valid[i]), + .tuple_result_out_ready ( ~res_fifo_full[i] ) + ); + + + // + always@(posedge clk) begin + if(~rst_n) begin + curr_dest_result_fifo[i] <= 0; + end + else begin + if(cu_tuple_result_out_valid[i]) begin + if(~res_fifo_full[ ({curr_dest_result_fifo[i], {NUM_DTPU_CLUSTERS_BITS{1'b0}}} + i) ]) begin + curr_dest_result_fifo[i] <= curr_dest_result_fifo[i] + 1'b1; + end + end + end +end + end +endgenerate +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Push Results Out ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// +// +generate + for (i = 0; i < 16; i = i + 1) begin: out_fifos + quick_fifo #(.FIFO_WIDTH(32), // data + .FIFO_DEPTH_BITS(9), + .FIFO_ALMOSTFULL_THRESHOLD(490) ) + ResultsFIFO_x( + .clk (clk), + .reset_n (rst_n), + .din ( cu_tuple_result_out[i%NUM_DTPU_CLUSTERS] ), + .we ( res_fifo_we[i] ), + + .re (res_fifo_re), + .dout (res_fifo_dout[32*i+31:i*32]), + .empty (), + .valid (res_fifo_valid[i]), + .full (res_fifo_full[i]), + .count (), + .almostfull () + ); + + assign res_fifo_we[i] = cu_tuple_result_out_valid[i%NUM_DTPU_CLUSTERS] && (curr_dest_result_fifo[i%NUM_DTPU_CLUSTERS] == (i/NUM_DTPU_CLUSTERS)); + + end +endgenerate + +assign res_fifo_re = core_result_ready && core_result_valid; +assign core_result_out = res_fifo_dout; +assign core_result_valid = &res_fifo_valid || ((&(res_fifo_valid | last_result_line_mask)) && last_result_line) ; + + +endmodule diff --git a/hw/hdl/operators/dtengine/gbm/inferCore/Data_Memory.sv b/hw/hdl/operators/dtengine/gbm/inferCore/Data_Memory.sv new file mode 100644 index 00000000..ee541e4b --- /dev/null +++ b/hw/hdl/operators/dtengine/gbm/inferCore/Data_Memory.sv @@ -0,0 +1,73 @@ + +/* + * Copyright 2019 - 2020 Systems Group, ETH Zurich + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +module Data_Memory #( + parameter DATA_WIDTH = 32, + parameter ADDR_WIDTH = 8 +) ( + input wire clk, + input wire rst_n, + input wire we, + input wire re, + input wire [ADDR_WIDTH-1:0] raddr, + input wire [ADDR_WIDTH-2:0] waddr, + input wire [DATA_WIDTH-1:0] din, + output reg [31:0] dout, + output reg valid_out +); + +wire [DATA_WIDTH-1:0] dline; +reg raddr_d1; +reg re_d1; + + + +dual_port_mem Dualport_mem_inst ( + .clk ( clk ), + .da ( din ), + .wea ( we), + .ena ( we), + .addra ( waddr ), + + .web (1'b0), + .addrb ( raddr[ADDR_WIDTH-1:1] ), + .enb ( re ), + .qb ( dline ) + ); + + +always @(posedge clk) begin + raddr_d1 <= raddr[0]; + re_d1 <= re; + dout <= (raddr_d1)? dline[63:32] : dline[31:0]; + + if(~rst_n) begin + valid_out <= 1'b0; + end + else begin + valid_out <= re_d1; + end +end + +endmodule + diff --git a/hw/hdl/operators/dtengine/gbm/inferCore/FPAdder_2cycles_latency.sv b/hw/hdl/operators/dtengine/gbm/inferCore/FPAdder_2cycles_latency.sv new file mode 100644 index 00000000..178202d4 --- /dev/null +++ b/hw/hdl/operators/dtengine/gbm/inferCore/FPAdder_2cycles_latency.sv @@ -0,0 +1,389 @@ +// File 2cycles_latency.vhdl translated with vhd2vl v2.4 VHDL to Verilog RTL translator +// vhd2vl settings: +// * Verilog Module Declaration Style: 1995 + +// vhd2vl is Free (libre) Software: +// Copyright (C) 2001 Vincenzo Liguori - Ocean Logic Pty Ltd +// http://www.ocean-logic.com +// Modifications Copyright (C) 2006 Mark Gonzales - PMC Sierra Inc +// Modifications (C) 2010 Shankar Giri +// Modifications Copyright (C) 2002, 2005, 2008-2010 Larry Doolittle - LBNL +// http://doolittle.icarus.com/~larry/vhd2vl/ +// +// vhd2vl comes with ABSOLUTELY NO WARRANTY. Always check the resulting +// Verilog for correctness, ideally with a formal verification tool. +// +// You are welcome to redistribute vhd2vl under certain conditions. +// See the license (GPLv2) file included with the source for details. + +// The result of translation follows. Its copyright status should be +// considered unchanged from the original VHDL. + +//------------------------------------------------------------------------------ +// FPAdder_8_23_uid2_RightShifter +// (RightShifter_24_by_max_26_uid4) +// This operator is part of the Infinite Virtual Library FloPoCoLib +// and is distributed under the terms of the GNU Lesser General Public Licence +// with a Tobin Tax restriction (see README file for details). +// Authors: Florent de Dinechin, Bogdan Pasca (2007,2008,2009,2010) +//------------------------------------------------------------------------------ +// no timescale needed + +module FPAdder_8_23_uid2_RightShifter_l2( +X, +S, +R +); + +input [23:0] X; +input [4:0] S; +output [49:0] R; + +wire clk; +wire rst; +wire [23:0] X; +wire [4:0] S; +wire [49:0] R; + + +wire [23:0] level0; +wire [4:0] ps; +wire [24:0] level1; +wire [26:0] level2; +wire [30:0] level3; +wire [38:0] level4; +wire [54:0] level5; + + + assign level0 = X; + assign ps = S; + assign level1 = ps[0] == 1'b1 ? {1'b0,level0} : {level0,1'b0}; + assign level2 = ps[1] == 1'b1 ? {2'b00,level1} : {level1,2'b00}; + assign level3 = ps[2] == 1'b1 ? {4'b0000,level2} : {level2,4'b0000}; + assign level4 = ps[3] == 1'b1 ? {8'b00000000,level3} : {level3,8'b00000000}; + assign level5 = ps[4] == 1'b1 ? {16'b0000000000000000,level4} : {level4,16'b0000000000000000}; + assign R = level5[54:5]; + +endmodule + +//------------------------------------------------------------------------------ +// IntAdder_27_f110_uid6 +// This operator is part of the Infinite Virtual Library FloPoCoLib +// and is distributed under the terms of the GNU Lesser General Public Licence +// with a Tobin Tax restriction (see README file for details). +// Authors: Bogdan Pasca, Florent de Dinechin (2008-2010) +//------------------------------------------------------------------------------ +// Pipeline depth: 0 cycles +// no timescale needed + +module IntAdder_27_f110_uid6_l2( +X, +Y, +Cin, +R +); + +input [26:0] X; +input [26:0] Y; +input Cin; +output [26:0] R; + +wire clk; +wire rst; +wire [26:0] X; +wire [26:0] Y; +wire Cin; +wire [26:0] R; + + //Alternative + assign R = X + Y + Cin; +//------------------------------------------------------------------------------ +// LZCShifter_28_to_28_counting_32_uid16 +// This operator is part of the Infinite Virtual Library FloPoCoLib +// and is distributed under the terms of the GNU Lesser General Public Licence +// with a Tobin Tax restriction (see README file for details). +// Authors: Florent de Dinechin, Bogdan Pasca (2007) +//------------------------------------------------------------------------------ +// Pipeline depth: 1 cycles + +endmodule + +module LZCShifter_28_to_28_counting_32_uid16_l2( +clk, +stall, +I, +Count, +O +); + +input clk, stall; +input [27:0] I; +output [4:0] Count; +output [27:0] O; + +wire clk; +wire rst; +wire [27:0] I; +wire [4:0] Count; +wire [27:0] O; + + +wire [27:0] level5; +wire count4; reg count4_d1; +wire [27:0] level4; +wire count3; reg count3_d1; +wire [27:0] level3; +wire count2; reg count2_d1; +wire [27:0] level2; reg [27:0] level2_d1; +wire count1; +wire [27:0] level1; +wire count0; +wire [27:0] level0; +wire [4:0] sCount; + + always @(posedge clk) begin + if( ~stall ) begin + count4_d1 <= count4; + count3_d1 <= count3; + count2_d1 <= count2; + level2_d1 <= level2; + end + end + + assign level5 = I; + assign count4 = level5[27:12] == 16'b0000000000000000 ? 1'b1 : 1'b0; + assign level4 = count4 == 1'b0 ? level5[27:0] : {level5[11:0],16'b0000000000000000}; + assign count3 = level4[27:20] == 8'b00000000 ? 1'b1 : 1'b0; + assign level3 = count3 == 1'b0 ? level4[27:0] : {level4[19:0],8'b00000000}; + assign count2 = level3[27:24] == 4'b0000 ? 1'b 1 : 1'b0; + assign level2 = count2 == 1'b0 ? level3[27:0] : {level3[23:0],4'b0000}; + //--------------Synchro barrier, entering cycle 1---------------- + assign count1 = level2_d1[27:26] == 2'b00 ? 1'b1 : 1'b0; + assign level1 = count1 == 1'b0 ? level2_d1[27:0] : {level2_d1[25:0],2'b00}; + assign count0 = level1[27:27] == 1'b0 ? 1'b1 : 1'b0; + assign level0 = count0 == 1'b0 ? level1[27:0] : {level1[26:0],1'b0}; + assign O = level0; + assign sCount = {count4_d1,count3_d1,count2_d1,count1,count0}; + assign Count = sCount; +//------------------------------------------------------------------------------ +// IntAdder_34_f110_uid18 +// This operator is part of the Infinite Virtual Library FloPoCoLib +// and is distributed under the terms of the GNU Lesser General Public Licence +// with a Tobin Tax restriction (see README file for details). +// Authors: Bogdan Pasca, Florent de Dinechin (2008-2010) +//------------------------------------------------------------------------------ +// Pipeline depth: 0 cycles + +endmodule + +module IntAdder_34_f110_uid18_l2( +X, +Y, +Cin, +R +); + + +input [33:0] X; +input [33:0] Y; +input Cin; +output [33:0] R; + +wire [33:0] X; +wire [33:0] Y; +wire Cin; +wire [33:0] R; + + //Alternative + assign R = X + Y + Cin; +//------------------------------------------------------------------------------ +// FPAdder_8_23_uid2 +// This operator is part of the Infinite Virtual Library FloPoCoLib +// and is distributed under the terms of the GNU Lesser General Public Licence +// with a Tobin Tax restriction (see README file for details). +// Authors: Bogdan Pasca, Florent de Dinechin (2010) +//------------------------------------------------------------------------------ +// Pipeline depth: 2 cycles + +endmodule + +module FPAdder_8_23_uid2_l2( +clk, +rst, +seq_stall, +X, +Y, +R +); + +input clk, rst; +input seq_stall; +input [8 + 23 + 2:0] X; +input [8 + 23 + 2:0] Y; +output [8 + 23 + 2:0] R; + +wire clk; +wire rst; +wire [8 + 23 + 2:0] X; +wire [8 + 23 + 2:0] Y; +wire [8 + 23 + 2:0] R; + + +wire [32:0] excExpFracX; +wire [32:0] excExpFracY; +wire [8:0] eXmeY; +wire [8:0] eYmeX; +wire swap; +wire [33:0] newX; reg [33:0] newX_d1; +wire [33:0] newY; +wire [7:0] expX; reg [7:0] expX_d1; +wire [1:0] excX; +wire [1:0] excY; +wire signX; +wire signY; +wire EffSub; reg EffSub_d1; reg EffSub_d2; +wire [5:0] sdsXsYExnXY; +wire [3:0] sdExnXY; +wire [23:0] fracY; +reg [1:0] excRt; reg [1:0] excRt_d1; reg [1:0] excRt_d2; +wire signR; reg signR_d1; reg signR_d2; +wire [8:0] expDiff; +wire shiftedOut; +wire [4:0] shiftVal; +wire [49:0] shiftedFracY; reg [49:0] shiftedFracY_d1; +wire sticky; +wire [26:0] fracYfar; +wire [26:0] fracYfarXorOp; +wire [26:0] fracXfar; +wire cInAddFar; +wire [26:0] fracAddResult; +wire [27:0] fracGRS; +wire [9:0] extendedExpInc; reg [9:0] extendedExpInc_d1; +wire [4:0] nZerosNew; +wire [27:0] shiftedFrac; +wire [9:0] updatedExp; +wire eqdiffsign; +wire [33:0] expFrac; +wire stk; +wire rnd; +wire grd; +wire lsb; +wire addToRoundBit; +wire [33:0] RoundedExpFrac; +wire [1:0] upExc; +wire [22:0] fracR; +wire [7:0] expR; +wire [3:0] exExpExc; +reg [1:0] excRt2; +wire [1:0] excR; +wire [33:0] computedR; + + always @(posedge clk) begin + if( ~seq_stall)begin + newX_d1 <= newX; + expX_d1 <= expX; + EffSub_d1 <= EffSub; + EffSub_d2 <= EffSub_d1; + excRt_d1 <= excRt; + excRt_d2 <= excRt_d1; + signR_d1 <= signR; + signR_d2 <= signR_d1; + shiftedFracY_d1 <= shiftedFracY; + extendedExpInc_d1 <= extendedExpInc; + end + end + + // Exponent difference and swap -- + assign excExpFracX = {X[33:32],X[30:0]}; + assign excExpFracY = {Y[33:32],Y[30:0]}; + assign eXmeY = ({1'b 0,X[30:23]}) - ({1'b 0,Y[30:23]}); + assign eYmeX = ({1'b 0,Y[30:23]}) - ({1'b 0,X[30:23]}); + assign swap = excExpFracX >= excExpFracY ? 1'b 0 : 1'b 1; + assign newX = swap == 1'b0 ? X : Y; + assign newY = swap == 1'b0 ? Y : X; + assign expX = newX[30:23]; + assign excX = newX[33:32]; + assign excY = newY[33:32]; + assign signX = newX[31]; + assign signY = newY[31]; + assign EffSub = signX ^ signY; + assign sdsXsYExnXY = {signX,signY,excX,excY}; + assign sdExnXY = {excX,excY}; + assign fracY = excY == 2'b00 ? 24'b000000000000000000000000 : {1'b1,newY[22:0]}; + always @(*) begin + case(sdsXsYExnXY) + 6'b000000,6'b010000,6'b100000,6'b110000 : excRt <= 2'b00; + 6'b000101,6'b010101,6'b100101,6'b110101,6'b000100,6'b010100,6'b100100,6'b110100,6'b000001,6'b010001,6'b100001,6'b110001 : excRt <= 2'b01; + 6'b111010,6'b001010,6'b001000,6'b011000,6'b101000,6'b111000,6'b000010,6'b010010,6'b100010,6'b110010,6'b001001,6'b011001,6'b101001,6'b111001,6'b000110,6'b010110,6'b100110,6'b110110 : excRt <= 2'b10; + default : excRt <= 2'b 11; + endcase + end + + assign signR = (sdsXsYExnXY == 6'b100000 || sdsXsYExnXY == 6'b010000) ? 1'b0 : signX; + //-------------- cycle 0---------------- + assign expDiff = swap == 1'b0 ? eXmeY : eYmeX; + assign shiftedOut = (expDiff >= 25) ? 1'b1 : 1'b0; + assign shiftVal = shiftedOut == 1'b0 ? expDiff[4:0] : 5'b11010; + FPAdder_8_23_uid2_RightShifter_l2 RightShifterComponent( + .R(shiftedFracY), + .S(shiftVal), + .X(fracY)); + + //--------------Synchro barrier, entering cycle 1---------------- + assign sticky = (shiftedFracY_d1[23:0] == 23'b00000000000000000000000) ? 1'b0 : 1'b1; + //-------------- cycle 0---------------- + //--------------Synchro barrier, entering cycle 1---------------- + assign fracYfar = {1'b 0,shiftedFracY_d1[49:24]}; + assign fracYfarXorOp = fracYfar ^ ({EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1}); + assign fracXfar = {2'b01,(newX_d1[22:0]),2'b00}; + assign cInAddFar = EffSub_d1 & ~sticky; + IntAdder_27_f110_uid6_l2 fracAdder( + .Cin(cInAddFar), + .R(fracAddResult), + .X(fracXfar), + .Y(fracYfarXorOp)); + + assign fracGRS = {fracAddResult,sticky}; + assign extendedExpInc = ({2'b00,expX_d1}) + 1'b1; + LZCShifter_28_to_28_counting_32_uid16_l2 LZC_component( + .clk(clk), + .stall(seq_stall), + .Count(nZerosNew), + .I(fracGRS), + .O(shiftedFrac)); + + //--------------Synchro barrier, entering cycle 2---------------- + assign updatedExp = extendedExpInc_d1 - ({5'b00000,nZerosNew}); + assign eqdiffsign = nZerosNew == 5'b11111 ? 1'b1 : 1'b0; + assign expFrac = {updatedExp,shiftedFrac[26:3]}; + //-------------- cycle 2---------------- + assign stk = shiftedFrac[1] | shiftedFrac[0]; + assign rnd = shiftedFrac[2]; + assign grd = shiftedFrac[3]; + assign lsb = shiftedFrac[4]; + assign addToRoundBit = (lsb == 1'b0 && grd == 1'b1 && rnd == 1'b0 && stk == 1'b0) ? 1'b0 : 1'b1; + IntAdder_34_f110_uid18_l2 roundingAdder( + .Cin(addToRoundBit), + .R(RoundedExpFrac), + .X(expFrac), + .Y(34'b0000000000000000000000000000000000)); + + //-------------- cycle 2---------------- + assign upExc = RoundedExpFrac[33:32]; + assign fracR = RoundedExpFrac[23:1]; + assign expR = RoundedExpFrac[31:24]; + assign exExpExc = {upExc,excRt_d2}; + always @(*) begin + case((exExpExc)) + 4'b0000,4'b0100,4'b1000,4'b1100,4'b1001,4'b1101 : excRt2 <= 2'b00; + 4'b0001 : excRt2 <= 2'b01; + 4'b0010,4'b0110,4'b0101 : excRt2 <= 2'b10; + default : excRt2 <= 2'b11; + endcase + end + + assign excR = (eqdiffsign == 1'b1 && EffSub_d2 == 1'b1) ? 2'b00 : excRt2; + assign computedR = {excR,signR_d2,expR,fracR}; + assign R = computedR; + +endmodule diff --git a/hw/hdl/operators/dtengine/gbm/inferCore/FPAdder_3cycles_latency.v b/hw/hdl/operators/dtengine/gbm/inferCore/FPAdder_3cycles_latency.v new file mode 100644 index 00000000..ab8c0e7a --- /dev/null +++ b/hw/hdl/operators/dtengine/gbm/inferCore/FPAdder_3cycles_latency.v @@ -0,0 +1,458 @@ +//------------------------------------------------------------------------------ +// FPAdder_8_23_uid2_RightShifter +// (RightShifter_24_by_max_26_uid4) +// This operator is part of the Infinite Virtual Library FloPoCoLib +// and is distributed under the terms of the GNU Lesser General Public Licence +// with a Tobin Tax restriction (see README file for details). +// Authors: Florent de Dinechin, Bogdan Pasca (2007,2008,2009,2010) +//------------------------------------------------------------------------------ +// no timescale needed + +module FPAdder_8_23_uid2_RightShifter_l3( +clk, +rst, +X, +S, +R +); + +input clk, rst; +input [23:0] X; +input [4:0] S; +output [49:0] R; + +wire clk; +wire rst; +wire [23:0] X; +wire [4:0] S; +wire [49:0] R; + + +wire [23:0] level0; +wire [4:0] ps; +wire [24:0] level1; +wire [26:0] level2; +wire [30:0] level3; +wire [38:0] level4; +wire [54:0] level5; + + + assign level0 = X; + assign ps = S; + assign level1 = ps[0] == 1'b1 ? {1'b0,level0} : {level0,1'b0}; + assign level2 = ps[1] == 1'b1 ? {2'b00,level1} : {level1,2'b00}; + assign level3 = ps[2] == 1'b1 ? {4'b0000,level2} : {level2,4'b0000}; + assign level4 = ps[3] == 1'b1 ? {8'b00000000,level3} : {level3,8'b00000000}; + assign level5 = ps[4] == 1'b1 ? {16'b0000000000000000,level4} : {level4,16'b0000000000000000}; + assign R = level5[54:5]; + +endmodule + +//------------------------------------------------------------------------------ +// IntAdder_27_f150_uid6 +// This operator is part of the Infinite Virtual Library FloPoCoLib +// and is distributed under the terms of the GNU Lesser General Public Licence +// with a Tobin Tax restriction (see README file for details). +// Authors: Bogdan Pasca, Florent de Dinechin (2008-2010) +//------------------------------------------------------------------------------ +// Pipeline depth: 0 cycles +// no timescale needed + +module IntAdder_27_f150_uid6_l3( +clk, +rst, +X, +Y, +Cin, +R +); + +input clk, rst; +input [26:0] X; +input [26:0] Y; +input Cin; +output [26:0] R; + +wire clk; +wire rst; +wire [26:0] X; +wire [26:0] Y; +wire Cin; +wire [26:0] R; + + + //Alternative + assign R = X + Y + Cin; + +endmodule + +//------------------------------------------------------------------------------ +// LZCShifter_28_to_28_counting_32_uid16 +// This operator is part of the Infinite Virtual Library FloPoCoLib +// and is distributed under the terms of the GNU Lesser General Public Licence +// with a Tobin Tax restriction (see README file for details). +// Authors: Florent de Dinechin, Bogdan Pasca (2007) +//------------------------------------------------------------------------------ +// Pipeline depth: 1 cycles +// no timescale needed + +module LZCShifter_28_to_28_counting_32_uid16_l3( +clk, +rst, +I, +Count, +O +); + +input clk, rst; +input [27:0] I; +output [4:0] Count; +output [27:0] O; + +wire clk; +wire rst; +wire [27:0] I; +wire [4:0] Count; +wire [27:0] O; +wire [27:0] level5; + +wire count4; +//reg count4_d1; +wire count4_d1; + +wire [27:0] level4; +//reg [27:0] level4_d1; +wire [27:0] level4_d1; + +wire count3; +//reg count3_d1; +wire count3_d1; + +wire [27:0] level3; +wire count2; +wire [27:0] level2; +wire count1; +wire [27:0] level1; +wire count0; +wire [27:0] level0; +wire [4:0] sCount; + + assign level5 = I; + assign count4 = level5[27:12] == 16'b0000000000000000 ? 1'b1 : 1'b0; + assign level4 = count4 == 1'b0 ? level5[27:0] : {level5[11:0],16'b0000000000000000}; + assign count3 = level4[27:20] == 8'b00000000 ? 1'b1 : 1'b0; + //--------------Synchro barrier, entering cycle 1---------------- + + /* Added by Babis */ + assign count4_d1 = count4; + assign level4_d1 = level4; + assign count3_d1 = count3; + + assign level3 = count3_d1 == 1'b0 ? level4_d1[27:0] : {level4_d1[19:0],8'b00000000}; + assign count2 = level3[27:24] == 4'b0000 ? 1'b1 : 1'b0; + assign level2 = count2 == 1'b0 ? level3[27:0] : {level3[23:0],4'b0000}; + assign count1 = level2[27:26] == 2'b00 ? 1'b1 : 1'b0; + assign level1 = count1 == 1'b0 ? level2[27:0] : {level2[25:0],2'b00}; + assign count0 = level1[27:27] == 1'b0 ? 1'b1 : 1'b0; + assign level0 = count0 == 1'b0 ? level1[27:0] : {level1[26:0],1'b0}; + assign O = level0; + assign sCount = {count4_d1,count3_d1,count2,count1,count0}; + assign Count = sCount; + +endmodule + +//------------------------------------------------------------------------------ +// IntAdder_34_f150_uid18 +// This operator is part of the Infinite Virtual Library FloPoCoLib +// and is distributed under the terms of the GNU Lesser General Public Licence +// with a Tobin Tax restriction (see README file for details). +// Authors: Bogdan Pasca, Florent de Dinechin (2008-2010) +//------------------------------------------------------------------------------ +// Pipeline depth: 0 cycles +// no timescale needed + +module IntAdder_34_f150_uid18_l3( +clk, +rst, +X, +Y, +Cin, +R +); + +input clk, rst; +input [33:0] X; +input [33:0] Y; +input Cin; +output [33:0] R; + +wire clk; +wire rst; +wire [33:0] X; +wire [33:0] Y; +wire Cin; +wire [33:0] R; + + always @(posedge clk) begin + end + + //Alternative + assign R = X + Y + Cin; + +endmodule + +//------------------------------------------------------------------------------ +// FPAdder_8_23_uid2 +// This operator is part of the Infinite Virtual Library FloPoCoLib +// and is distributed under the terms of the GNU Lesser General Public Licence +// with a Tobin Tax restriction (see README file for details). +// Authors: Bogdan Pasca, Florent de Dinechin (2010) +//------------------------------------------------------------------------------ +// Pipeline depth: 3 cycles +// no timescale needed + +module FPAdder_8_23_uid2_l3( +clk, +rst, +seq_stall, +X, +Y, +R +); + +input clk, rst, seq_stall; +input [8 + 23 + 2:0] X; +input [8 + 23 + 2:0] Y; +output [8 + 23 + 2:0] R; + +wire clk; +wire rst; +wire seq_stall; +wire [8 + 23 + 2:0] X; +wire [8 + 23 + 2:0] Y; +wire [8 + 23 + 2:0] R; + + +wire [32:0] excExpFracX; +wire [32:0] excExpFracY; +wire [8:0] eXmeY; +wire [8:0] eYmeX; +wire swap; +wire [33:0] newX; reg [33:0] newX_d1; +wire [33:0] newY; +wire [7:0] expX; reg [7:0] expX_d1; +wire [1:0] excX; +wire [1:0] excY; +wire signX; +wire signY; +wire EffSub; reg EffSub_d1; reg EffSub_d2; reg EffSub_d3; +wire [5:0] sdsXsYExnXY; +wire [3:0] sdExnXY; +wire [23:0] fracY; +reg [1:0] excRt; reg [1:0] excRt_d1; reg [1:0] excRt_d2; reg [1:0] excRt_d3; +wire signR; reg signR_d1; reg signR_d2; reg signR_d3; +wire [8:0] expDiff; +wire shiftedOut; +wire [4:0] shiftVal; +wire [49:0] shiftedFracY; + +wire sticky; +wire [26:0] fracYfar; +wire [26:0] fracYfarXorOp; +wire [26:0] fracXfar; +wire cInAddFar; +wire [26:0] fracAddResult; +wire [27:0] fracGRS; + +//added by Babis +reg [27:0] fracGRS_d1; + +wire [9:0] extendedExpInc; reg [9:0] extendedExpInc_d1; reg [9:0] extendedExpInc_d2; +wire [4:0] nZerosNew; reg [4:0] nZerosNew_d1; +wire [27:0] shiftedFrac; +reg [27:0] shiftedFrac_d1; + +//reg [49:0] shiftedFracY_d1; + +/* Added by Babis */ +reg [4:0] shiftVal_d1; +reg [23:0] fracY_d1; + +wire [9:0] updatedExp; +wire eqdiffsign; +wire [33:0] expFrac; +wire stk; reg stk_d1; +wire rnd; reg rnd_d1; +wire grd; reg grd_d1; +wire lsb; reg lsb_d1; +wire addToRoundBit; +wire [33:0] RoundedExpFrac; +wire [1:0] upExc; +wire [22:0] fracR; +wire [7:0] expR; +wire [3:0] exExpExc; +reg [1:0] excRt2; +wire [1:0] excR; +wire [33:0] computedR; + + always @(posedge clk) begin + if(rst == 1'b1) begin + newX_d1 <= {34{1'b0}}; + expX_d1 <= {8{1'b0}}; + EffSub_d1 <= 1'b 0; + EffSub_d2 <= 1'b 0; + EffSub_d3 <= 1'b 0; + excRt_d1 <= {2{1'b0}}; + excRt_d2 <= {2{1'b0}}; + excRt_d3 <= {2{1'b0}}; + signR_d1 <= 1'b0; + signR_d2 <= 1'b0; + signR_d3 <= 1'b0; + //shiftedFracY_d1 <= {50{1'b0}}; + /* Added by Babis */ + shiftVal_d1 <= {4{1'b0}}; + fracY_d1 <= {24{1'b0}}; + fracGRS_d1 <= {28{1'b0}}; + + extendedExpInc_d1 <= {10{1'b0}}; + extendedExpInc_d2 <= {10{1'b0}}; + nZerosNew_d1 <= {5{1'b0}}; + shiftedFrac_d1 <= {28{1'b0}}; + stk_d1 <= 1'b 0; + rnd_d1 <= 1'b 0; + grd_d1 <= 1'b 0; + lsb_d1 <= 1'b 0; + end + else begin + if(~seq_stall) begin + newX_d1 <= newX; + expX_d1 <= expX; + EffSub_d1 <= EffSub; + EffSub_d2 <= EffSub_d1; + EffSub_d3 <= EffSub_d2; + excRt_d1 <= excRt; + excRt_d2 <= excRt_d1; + excRt_d3 <= excRt_d2; + signR_d1 <= signR; + signR_d2 <= signR_d1; + signR_d3 <= signR_d2; + //shiftedFracY_d1 <= shiftedFracY; + /* Added by Babis */ + shiftVal_d1 <= shiftVal; + fracY_d1 <= fracY; + fracGRS_d1 <= fracGRS; + + extendedExpInc_d1 <= extendedExpInc; + extendedExpInc_d2 <= extendedExpInc_d1; + nZerosNew_d1 <= nZerosNew; + shiftedFrac_d1 <= shiftedFrac; + stk_d1 <= stk; + rnd_d1 <= rnd; + grd_d1 <= grd; + lsb_d1 <= lsb; + end + end + end + + // Exponent difference and swap -- + assign excExpFracX = {X[33:32],X[30:0]}; + assign excExpFracY = {Y[33:32],Y[30:0]}; + assign eXmeY = ({1'b0,X[30:23]}) - ({1'b0,Y[30:23]}); + assign eYmeX = ({1'b0,Y[30:23]}) - ({1'b0,X[30:23]}); + assign swap = excExpFracX >= excExpFracY ? 1'b0 : 1'b1; + assign newX = swap == 1'b0 ? X : Y; + assign newY = swap == 1'b0 ? Y : X; + assign expX = newX[30:23]; + assign excX = newX[33:32]; + assign excY = newY[33:32]; + assign signX = newX[31]; + assign signY = newY[31]; + assign EffSub = signX ^ signY; + assign sdsXsYExnXY = {signX,signY,excX,excY}; + assign sdExnXY = {excX,excY}; + assign fracY = excY == 2'b00 ? 24'b000000000000000000000000 : {1'b1, newY[22:0]}; + always @(*) begin + case(sdsXsYExnXY) + 6'b000000,6'b010000,6'b100000,6'b110000 : excRt <= 2'b00; + 6'b000101,6'b010101,6'b100101,6'b110101,6'b000100,6'b010100,6'b100100,6'b110100,6'b000001,6'b010001,6'b100001,6'b110001 : excRt <= 2'b01; + 6'b111010,6'b001010,6'b001000,6'b011000,6'b101000,6'b111000,6'b000010,6'b010010,6'b100010,6'b110010,6'b001001,6'b011001,6'b101001,6'b111001,6'b000110,6'b010110,6'b100110,6'b110110 : excRt <= 2'b10; + default : excRt <= 2'b11; + endcase + end + + assign signR = (sdsXsYExnXY == 6'b100000 || sdsXsYExnXY == 6'b010000) ? 1'b0 : signX; + //-------------- cycle 0---------------- + assign expDiff = swap == 1'b0 ? eXmeY : eYmeX; + assign shiftedOut = (expDiff >= 25) ? 1'b1 : 1'b0; + assign shiftVal = shiftedOut == 1'b0 ? expDiff[4:0] : 5'b11010; + FPAdder_8_23_uid2_RightShifter_l3 RightShifterComponent( + .clk(clk), + .rst(rst), + .R(shiftedFracY), + .S(shiftVal_d1), + .X(fracY_d1)); + + //--------------Synchro barrier, entering cycle 1---------------- + assign sticky = (shiftedFracY[23:0] == 23'b00000000000000000000000) ? 1'b0 : 1'b1; + //-------------- cycle 0---------------- + //--------------Synchro barrier, entering cycle 1---------------- + assign fracYfar = {1'b0,shiftedFracY[49:24]}; + assign fracYfarXorOp = fracYfar ^ ({EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1}); + assign fracXfar = {2'b01,(newX_d1[22:0]),2'b00}; + assign cInAddFar = EffSub_d1 & ~sticky; + IntAdder_27_f150_uid6_l3 fracAdder( + .clk(clk), + .rst(rst), + .Cin(cInAddFar), + .R(fracAddResult), + .X(fracXfar), + .Y(fracYfarXorOp)); + + assign fracGRS = {fracAddResult,sticky}; + assign extendedExpInc = ({2'b 00,expX_d1}) + 1'b 1; + LZCShifter_28_to_28_counting_32_uid16_l3 LZC_component( + .clk(clk), + .rst(rst), + .Count(nZerosNew), + .I(fracGRS_d1), /* Added by Babis */ + .O(shiftedFrac)); + + //--------------Synchro barrier, entering cycle 2---------------- + //--------------Synchro barrier, entering cycle 3---------------- + assign updatedExp = extendedExpInc_d2 - ({5'b00000,nZerosNew_d1}); + assign eqdiffsign = nZerosNew_d1 == 5'b11111 ? 1'b1 : 1'b0; + assign expFrac = {updatedExp,shiftedFrac_d1[26:3]}; + //-------------- cycle 2---------------- + assign stk = shiftedFrac[1] | shiftedFrac[0]; + assign rnd = shiftedFrac[2]; + assign grd = shiftedFrac[3]; + assign lsb = shiftedFrac[4]; + //--------------Synchro barrier, entering cycle 3---------------- + assign addToRoundBit = (lsb_d1 == 1'b 0 && grd_d1 == 1'b 1 && rnd_d1 == 1'b 0 && stk_d1 == 1'b 0) ? 1'b 0 : 1'b 1; + IntAdder_34_f150_uid18_l3 roundingAdder( + .clk(clk), + .rst(rst), + .Cin(addToRoundBit), + .R(RoundedExpFrac), + .X(expFrac), + .Y(34'b0000000000000000000000000000000000)); + + //-------------- cycle 3---------------- + assign upExc = RoundedExpFrac[33:32]; + assign fracR = RoundedExpFrac[23:1]; + assign expR = RoundedExpFrac[31:24]; + assign exExpExc = {upExc,excRt_d3}; + always @(*) begin + case((exExpExc)) + 4'b 0000,4'b 0100,4'b 1000,4'b 1100,4'b 1001,4'b 1101 : excRt2 <= 2'b 00; + 4'b 0001 : excRt2 <= 2'b 01; + 4'b 0010,4'b 0110,4'b 0101 : excRt2 <= 2'b 10; + default : excRt2 <= 2'b 11; + endcase + end + + assign excR = (eqdiffsign == 1'b 1 && EffSub_d3 == 1'b 1) ? 2'b 00 : excRt2; + assign computedR = {excR,signR_d3,expR,fracR}; + assign R = computedR; + +endmodule diff --git a/hw/hdl/operators/dtengine/gbm/inferCore/FPAdder_4cycles_latency.v b/hw/hdl/operators/dtengine/gbm/inferCore/FPAdder_4cycles_latency.v new file mode 100644 index 00000000..f6b677ce --- /dev/null +++ b/hw/hdl/operators/dtengine/gbm/inferCore/FPAdder_4cycles_latency.v @@ -0,0 +1,420 @@ +//------------------------------------------------------------------------------ +// FPAdder_8_23_uid2_RightShifter +// (RightShifter_24_by_max_26_uid4) +// This operator is part of the Infinite Virtual Library FloPoCoLib +// and is distributed under the terms of the GNU Lesser General Public Licence +// with a Tobin Tax restriction (see README file for details). +// Authors: Florent de Dinechin, Bogdan Pasca (2007,2008,2009,2010) +//------------------------------------------------------------------------------ +// no timescale needed + +module FPAdder_8_23_uid2_RightShifter_l4( X, S, R ); + +// input/output definition + +input [23:0] X; +input [4:0] S; +output [49:0] R; + +//signals + +wire [23:0] X; +wire [4:0] S; +wire [49:0] R; + +wire [23:0] level0; +wire [4:0] ps; +wire [24:0] level1; +wire [26:0] level2; +wire [30:0] level3; +wire [38:0] level4; +wire [54:0] level5; + + assign level0 = X; + assign ps = S; + assign level1 = ps[0] == 1'b 1 ? { 1'b0,level0 } : { level0,1'b0 }; + assign level2 = ps[1] == 1'b 1 ? { 2'b00,level1 } : { level1,2'b00 }; + assign level3 = ps[2] == 1'b 1 ? { 4'b0000,level2 } : { level2,4'b0000 }; + assign level4 = ps[3] == 1'b 1 ? { 8'b00000000,level3 } : { level3,8'b00000000 }; + assign level5 = ps[4] == 1'b 1 ? { 16'b0000000000000000,level4 } : { level4,16'b0000000000000000 }; + assign R = level5[54:5]; + +endmodule + +//------------------------------------------------------------------------------ +// IntAdder_27_f200_uid6 +// This operator is part of the Infinite Virtual Library FloPoCoLib +// and is distributed under the terms of the GNU Lesser General Public Licence +// with a Tobin Tax restriction (see README file for details). +// Authors: Bogdan Pasca, Florent de Dinechin (2008-2010) +//------------------------------------------------------------------------------ +// Pipeline depth: 0 cycles +// no timescale needed + +module IntAdder_27_f200_uid6_l4( X, Y, Cin, R); + +input [26:0] X; +input [26:0] Y; +input Cin; +output [26:0] R; + +wire [26:0] X; +wire [26:0] Y; +wire Cin; +wire [26:0] R; + + //Classical + assign R = X + Y + Cin; + +endmodule + +//------------------------------------------------------------------------------ +// LZCShifter_28_to_28_counting_32_uid16 +// This operator is part of the Infinite Virtual Library FloPoCoLib +// and is distributed under the terms of the GNU Lesser General Public Licence +// with a Tobin Tax restriction (see README file for details). +// Authors: Florent de Dinechin, Bogdan Pasca (2007) +//------------------------------------------------------------------------------ +// Pipeline depth: 2 cycles +// no timescale needed + +module LZCShifter_28_to_28_counting_32_uid16_l4( clk, rst, seq_stall, I, Count, O ); + +input clk, rst; +input seq_stall; +input [27:0] I; +output [4:0] Count; +output [27:0] O; + +wire clk; +wire rst; +wire seq_stall; +wire [27:0] I; +wire [4:0] Count; +wire [27:0] O; + +wire [27:0] level5; +wire count4; reg count4_d1; reg count4_d2; +wire [27:0] level4; reg [27:0] level4_d1; +wire count3; reg count3_d1; +wire [27:0] level3; +wire count2; reg count2_d1; +wire [27:0] level2; +wire count1; reg count1_d1; +wire [27:0] level1; reg [27:0] level1_d1; +wire count0; +wire [27:0] level0; +wire [4:0] sCount; + + always @( posedge clk, posedge rst ) begin + + if( rst==1'b1 ) + begin + count4_d1 <= 0; + count4_d2 <= 0; + level4_d1 <= 0; + count3_d1 <= 0; + count2_d1 <= 0; + count1_d1 <= 0; + level1_d1 <= 0; + end + + else if(~seq_stall) + begin + count4_d1 <= count4; + count4_d2 <= count4_d1; + level4_d1 <= level4; + count3_d1 <= count3; + count2_d1 <= count2; + count1_d1 <= count1; + level1_d1 <= level1; + end + end + + assign level5 = I; + assign count4 = level5[27:12] == 16'b0000000000000000 ? 1'b1 : 1'b0; + assign level4 = count4 == 1'b 0 ? level5[27:0] : { level5[11:0],16'b0000000000000000 }; + + //--------------Synchro barrier, entering cycle 1---------------- + assign count3 = level4_d1[27:20] == 8'b00000000 ? 1'b1 : 1'b0; + assign level3 = count3 == 1'b 0 ? level4_d1[27:0] : { level4_d1[19:0],8'b00000000 }; + assign count2 = level3[27:24] == 4'b0000 ? 1'b1 : 1'b0; + assign level2 = count2 == 1'b 0 ? level3[27:0] : { level3[23:0],4'b0000 }; + assign count1 = level2[27:26] == 2'b00 ? 1'b1 : 1'b0; + assign level1 = count1 == 1'b 0 ? level2[27:0] : { level2[25:0],2'b00 }; + + //--------------Synchro barrier, entering cycle 2---------------- + assign count0 = level1_d1[27:27] == 1'b0 ? 1'b 1 : 1'b0; + assign level0 = count0 == 1'b0 ? level1_d1[27:0] : { level1_d1[26:0],1'b0 }; + assign O = level0; + assign sCount = { count4_d2,count3_d1,count2_d1,count1_d1,count0 }; + assign Count = sCount; + +endmodule + +//------------------------------------------------------------------------------ +// IntAdder_34_f200_uid18 +// This operator is part of the Infinite Virtual Library FloPoCoLib +// and is distributed under the terms of the GNU Lesser General Public Licence +// with a Tobin Tax restriction (see README file for details). +// Authors: Bogdan Pasca, Florent de Dinechin (2008-2010) +//------------------------------------------------------------------------------ +// Pipeline depth: 1 cycles +// no timescale needed + +module IntAdder_34_f200_uid18_l4( clk, rst, seq_stall, X, Y, Cin, R ); + +input clk, rst; +input seq_stall; +input [33:0] X; +input [33:0] Y; +input Cin; +output [33:0] R; + +wire clk; +wire rst; +wire seq_stall; +wire [33:0] X; +wire [33:0] Y; +wire Cin; +wire [33:0] R; + +reg [33:0] X_d1; +reg [33:0] Y_d1; +reg Cin_d1; + + always @(posedge clk, posedge rst) begin + + if ( rst == 1'b1 ) + begin + X_d1 <= 0; + Y_d1 <= 0; + Cin_d1 <= 0; + end + + else if(~seq_stall) + begin + X_d1 <= X; + Y_d1 <= Y; + Cin_d1 <= Cin; + end + end + + //Classical + //--------------Synchro barrier, entering cycle 1---------------- + + assign R = X_d1 + Y_d1 + Cin_d1; + +endmodule + +//------------------------------------------------------------------------------ +// FPAdder_8_23_uid2 +// This operator is part of the Infinite Virtual Library FloPoCoLib +// and is distributed under the terms of the GNU Lesser General Public Licence +// with a Tobin Tax restriction (see README file for details). +// Authors: Bogdan Pasca, Florent de Dinechin (2010) +//------------------------------------------------------------------------------ +// Pipeline depth: 4 cycles +// no timescale needed + +module FPAdder_8_23_uid2_l4( clk, rst, seq_stall, X, Y, R); + +input clk, rst; +input seq_stall; +input [8 + 23 + 2:0] X; +input [8 + 23 + 2:0] Y; +output [8 + 23 + 2:0] R; + +wire clk; +wire rst; +wire seq_stall; +wire [8 + 23 + 2:0] X; +wire [8 + 23 + 2:0] Y; +wire [8 + 23 + 2:0] R; + +wire [32:0] excExpFracX; +wire [32:0] excExpFracY; +wire [8:0] eXmeY; +wire [8:0] eYmeX; +wire swap; +wire [33:0] newX; reg [33:0] newX_d1; +wire [33:0] newY; +wire [7:0] expX; reg [7:0] expX_d1; +wire [1:0] excX; +wire [1:0] excY; +wire signX; +wire signY; +wire EffSub; reg EffSub_d1; reg EffSub_d2; reg EffSub_d3; reg EffSub_d4; +wire [5:0] sdsXsYExnXY; +wire [3:0] sdExnXY; +wire [23:0] fracY; +reg [1:0] excRt; reg [1:0] excRt_d1; reg [1:0] excRt_d2; reg [1:0] excRt_d3; reg [1:0] excRt_d4; +wire signR; reg signR_d1; reg signR_d2; reg signR_d3; reg signR_d4; +wire [8:0] expDiff; +wire shiftedOut; +wire [4:0] shiftVal; +wire [49:0] shiftedFracY; reg [49:0] shiftedFracY_d1; +wire sticky; +wire [26:0] fracYfar; +wire [26:0] fracYfarXorOp; +wire [26:0] fracXfar; +wire cInAddFar; +wire [26:0] fracAddResult; +wire [27:0] fracGRS; +wire [9:0] extendedExpInc; reg [9:0] extendedExpInc_d1; reg [9:0] extendedExpInc_d2; +wire [4:0] nZerosNew; +wire [27:0] shiftedFrac; +wire [9:0] updatedExp; +wire eqdiffsign; reg eqdiffsign_d1; +wire [33:0] expFrac; +wire stk; +wire rnd; +wire grd; +wire lsb; +wire addToRoundBit; +wire [33:0] RoundedExpFrac; +wire [1:0] upExc; +wire [22:0] fracR; +wire [7:0] expR; +wire [3:0] exExpExc; +reg [1:0] excRt2; +wire [1:0] excR; +wire [33:0] computedR; + + always @(posedge clk) begin + + if(~seq_stall) + begin + newX_d1 <= newX; + expX_d1 <= expX; + EffSub_d1 <= EffSub; + EffSub_d2 <= EffSub_d1; + EffSub_d3 <= EffSub_d2; + EffSub_d4 <= EffSub_d3; + excRt_d1 <= excRt; + excRt_d2 <= excRt_d1; + excRt_d3 <= excRt_d2; + excRt_d4 <= excRt_d3; + signR_d1 <= signR; + signR_d2 <= signR_d1; + signR_d3 <= signR_d2; + signR_d4 <= signR_d3; + shiftedFracY_d1 <= shiftedFracY; + extendedExpInc_d1 <= extendedExpInc; + extendedExpInc_d2 <= extendedExpInc_d1; + eqdiffsign_d1 <= eqdiffsign; + end + end + + // Exponent difference and swap -- + + assign excExpFracX = {X[33:32],X[30:0]}; + assign excExpFracY = {Y[33:32],Y[30:0]}; + assign eXmeY = ({1'b0,X[30:23]}) - ({1'b0,Y[30:23]}); + assign eYmeX = ({1'b0,Y[30:23]}) - ({1'b0,X[30:23]}); + assign swap = excExpFracX >= excExpFracY ? 1'b0 : 1'b1; + assign newX = swap == 1'b0 ? X : Y; + assign newY = swap == 1'b0 ? Y : X; + assign expX = newX[30:23]; + assign excX = newX[33:32]; + assign excY = newY[33:32]; + assign signX = newX[31]; + assign signY = newY[31]; + assign EffSub = signX ^ signY; + assign sdsXsYExnXY = {signX,signY,excX,excY}; + assign sdExnXY = {excX,excY}; + assign fracY = excY == 2'b00 ? 24'b000000000000000000000000 : {1'b 1,newY[22:0]}; + + always @(*) begin + case(sdsXsYExnXY) + 6'b000000,6'b010000,6'b100000,6'b110000 : excRt <= 2'b00; + 6'b000101,6'b010101,6'b100101,6'b110101,6'b000100,6'b010100,6'b100100,6'b110100,6'b000001,6'b010001,6'b100001,6'b110001 : excRt <= 2'b01; + 6'b111010,6'b001010,6'b001000,6'b011000,6'b101000,6'b111000,6'b000010,6'b010010,6'b100010,6'b110010,6'b001001,6'b011001,6'b101001,6'b111001,6'b000110,6'b010110,6'b100110,6'b110110 : excRt <= 2'b10; + default : excRt <= 2'b11; + endcase + end + + assign signR = (sdsXsYExnXY == 6'b100000 || sdsXsYExnXY == 6'b010000) ? 1'b0 : signX; + + //-------------- cycle 0---------------- + assign expDiff = swap == 1'b0 ? eXmeY : eYmeX; + assign shiftedOut = (expDiff >= 25) ? 1'b1 : 1'b0; + assign shiftVal = shiftedOut == 1'b0 ? expDiff[4:0] : 5'b11010; + + FPAdder_8_23_uid2_RightShifter_l4 RightShifterComponent( + .R(shiftedFracY), + .S(shiftVal), + .X(fracY)); + + //--------------Synchro barrier, entering cycle 1---------------- + assign sticky = (shiftedFracY_d1[23:0] == 23'b00000000000000000000000) ? 1'b0 : 1'b1; + + //-------------- cycle 0---------------- + + //--------------Synchro barrier, entering cycle 1---------------- + + assign fracYfar = {1'b 0,shiftedFracY_d1[49:24]}; + assign fracYfarXorOp = fracYfar ^({EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1}); + assign fracXfar = {2'b 01,(newX_d1[22:0]),2'b 00}; + assign cInAddFar = EffSub_d1 & ~sticky; + + IntAdder_27_f200_uid6_l4 fracAdder( + .Cin(cInAddFar), + .R(fracAddResult), + .X(fracXfar), + .Y(fracYfarXorOp)); + + assign fracGRS = {fracAddResult,sticky}; + assign extendedExpInc = ( {2'b00,expX_d1} ) + 1'b1; + + LZCShifter_28_to_28_counting_32_uid16_l4 LZC_component( + .clk(clk), + .rst(rst), + .seq_stall(seq_stall), + .Count(nZerosNew), + .I(fracGRS), + .O(shiftedFrac)); + + //--------------Synchro barrier, entering cycle 3---------------- + assign updatedExp = extendedExpInc_d2 - ({5'b00000,nZerosNew}); + assign eqdiffsign = nZerosNew == 5'b11111 ? 1'b1 : 1'b0; + assign expFrac = {updatedExp,shiftedFrac[26:3]}; + //-------------- cycle 3---------------- + + assign stk = shiftedFrac[1] | shiftedFrac[0]; + assign rnd = shiftedFrac[2]; + assign grd = shiftedFrac[3]; + assign lsb = shiftedFrac[4]; + assign addToRoundBit = (lsb == 1'b0 && grd == 1'b1 && rnd == 1'b0 && stk == 1'b0) ? 1'b0 : 1'b1; + + IntAdder_34_f200_uid18_l4 roundingAdder( + .clk(clk), + .rst(rst), + .seq_stall(seq_stall), + .Cin(addToRoundBit), + .R(RoundedExpFrac), + .X(expFrac), + .Y(34'b0000000000000000000000000000000000)); + + //-------------- cycle 4---------------- + + assign upExc = RoundedExpFrac[33:32]; + assign fracR = RoundedExpFrac[23:1]; + assign expR = RoundedExpFrac[31:24]; + assign exExpExc = {upExc,excRt_d4}; + + always @(*) begin + case((exExpExc)) + 4'b0000,4'b0100,4'b1000,4'b1100,4'b1001,4'b1101 : excRt2 <= 2'b00; + 4'b0001 : excRt2 <= 2'b01; + 4'b0010,4'b0110,4'b0101 : excRt2 <= 2'b10; + default : excRt2 <= 2'b 11; + endcase + end + + assign excR = (eqdiffsign_d1 == 1'b1 && EffSub_d4 == 1'b1) ? 2'b00 : excRt2; + assign computedR = {excR,signR_d4,expR,fracR}; + assign R = computedR; + +endmodule + diff --git a/hw/hdl/operators/dtengine/gbm/inferCore/FPAddersReduceTree.sv b/hw/hdl/operators/dtengine/gbm/inferCore/FPAddersReduceTree.sv new file mode 100644 index 00000000..f4d88c2d --- /dev/null +++ b/hw/hdl/operators/dtengine/gbm/inferCore/FPAddersReduceTree.sv @@ -0,0 +1,156 @@ + +/* + * Copyright 2019 - 2020 Systems Group, ETH Zurich + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +module FPAddersReduceTree #(parameter NUM_FP_POINTS = 8, + parameter FP_ADDER_LATENCY = 3) + ( + input wire clk, + input wire rst_n, + + input wire [31:0] fp_in_vector[NUM_FP_POINTS-1:0], + input wire fp_in_vector_valid[NUM_FP_POINTS-1:0], + input wire fp_in_vector_last[NUM_FP_POINTS-1:0], + + output wire [31:0] reduce_out, + output wire reduce_out_valid, + input wire reduce_out_ready + ); + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Local Parameters ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// + +localparam NUM_TREE_LEVELS = (NUM_FP_POINTS <= 2 )? 1 : + (NUM_FP_POINTS <= 4 )? 2 : + (NUM_FP_POINTS <= 8 )? 3 : + (NUM_FP_POINTS <= 16)? 4 : + (NUM_FP_POINTS <= 32)? 5 : 6; + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Signals Declarations ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// + +wire [33:0] tree_data[NUM_TREE_LEVELS:0][(NUM_FP_POINTS>>1)-1:0][1:0]; +wire [31:0] tree_out; + +wire fp_in_valid_delayed; +wire fp_in_last_delayed; + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// FP Adders Tree ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// +// first level of tree adders +generate + genvar i; + for (i = 0; i < (NUM_FP_POINTS>>1); i = i + 1) + begin:treeLevel1 + + assign tree_data[0][i][0] = {1'b0, {|(fp_in_vector[i<<1])}, fp_in_vector[i<<1] }; + assign tree_data[0][i][1] = {1'b0, {|(fp_in_vector[(i<<1)+1])}, fp_in_vector[(i<<1)+1] }; + + FPAdder_8_23_uid2_l3 fpadder_1_x( + .clk (clk), + .rst (~rst_n), + .seq_stall (1'b0), + .X (tree_data[0][i][0]), + .Y (tree_data[0][i][1]), + .R (tree_data[1][i>>1][i%2]) + ); + end +endgenerate + +// the rest of levels +generate + genvar j; + for (i = 1; i < NUM_TREE_LEVELS; i = i + 1) + begin:treeLevels + for (j = 0; j < (NUM_FP_POINTS >> (i+1)); j = j + 1) + begin:levelAdders + FPAdder_8_23_uid2_l3 fpadder_i_x( + .clk (clk), + .rst (~rst_n), + .seq_stall (1'b0), + .X (tree_data[i][j][0]), + .Y (tree_data[i][j][1]), + .R (tree_data[i+1][j>>1][j%2]) + ); + end + end +endgenerate + +// delay valid and last +delay #(.DATA_WIDTH(1), + .DELAY_CYCLES(FP_ADDER_LATENCY*NUM_TREE_LEVELS) + ) fpadder_delay( + + .clk (clk), + .rst_n (rst_n), + .data_in (fp_in_vector_last[0]), // + .data_in_valid (fp_in_vector_valid[0]), + .data_out (fp_in_last_delayed), + .data_out_valid (fp_in_valid_delayed) + ); + +// assign tree output +assign tree_out = (tree_data[NUM_TREE_LEVELS][0][0][33:32] == 2'b00)? 0 : tree_data[NUM_TREE_LEVELS][0][0][31:0]; + +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// FP Aggregator ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// + +FPAggregator #(.FP_ADDER_LATENCY(2)) + + tree_aggregator( + + .clk (clk), + .rst_n (rst_n), + + .fp_in (tree_out), + .fp_in_valid (fp_in_valid_delayed), + .fp_in_last (fp_in_last_delayed), + .fp_in_ready (), + + .aggreg_out (reduce_out), + .aggreg_out_valid (reduce_out_valid), + .aggreg_out_ready (reduce_out_ready) + ); + + + + + + +endmodule + diff --git a/hw/hdl/operators/dtengine/gbm/inferCore/FPAddersReduceTree_sync.sv b/hw/hdl/operators/dtengine/gbm/inferCore/FPAddersReduceTree_sync.sv new file mode 100644 index 00000000..6d797217 --- /dev/null +++ b/hw/hdl/operators/dtengine/gbm/inferCore/FPAddersReduceTree_sync.sv @@ -0,0 +1,286 @@ + +/* + * Copyright 2019 - 2020 Systems Group, ETH Zurich + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +module FPAddersReduceTree_sync #(parameter NUM_FP_POINTS = 8, + parameter FP_ADDER_LATENCY = 3) + ( + input wire clk, + input wire rst_n, + + input wire [31:0] fp_in_vector[NUM_FP_POINTS-1:0], + input wire fp_in_vector_valid[NUM_FP_POINTS-1:0], + input wire fp_in_vector_last[NUM_FP_POINTS-1:0], + + output wire [31:0] reduce_out, + output wire reduce_out_valid, + input wire reduce_out_ready + ); + + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Signals Declarations ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// + +wire [33:0] tree_data_il1[27:0]; +wire [33:0] tree_data_ol1[13:0]; + +wire [33:0] tree_data_il2[13:0]; +wire [33:0] tree_data_ol2[6:0]; + +wire [33:0] tree_data_il3[6:0]; +wire [33:0] tree_data_ol3[3:0]; + +wire [33:0] tree_data_il4[3:0]; +wire [33:0] tree_data_ol4[1:0]; + +wire [33:0] tree_data_il5[1:0]; +wire [33:0] tree_data_ol5; + +reg [31:0] tree_out; + +wire fp_in_valid_delayed; +wire fp_in_last_delayed; + +genvar i; +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// FP Adders Tree ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Level 0: introduce delay on even words + +generate for (i = 0; i < 28; i=i+1) begin : Level0 + if(i%2 == 0) begin + delay #(.DATA_WIDTH(34), + .DELAY_CYCLES(1) + ) level0_regs + ( + .clk (clk), + .rst_n (rst_n), + .data_in ({1'b0, {|(fp_in_vector[i])}, fp_in_vector[i]}), // + .data_in_valid (1'b0), + .data_out (tree_data_il1[i]), + .data_out_valid () + ); + end + else begin + assign tree_data_il1[i] = {1'b0, {|(fp_in_vector[i])}, fp_in_vector[i] }; + end +end +endgenerate + +// Level 1: Adders +generate for (i = 0; i < 28; i = i + 2) begin:treeLevel1 + + FPAdder_8_23_uid2_l3 fpadder_l1 + ( + .clk (clk), + .rst (~rst_n), + .seq_stall (1'b0), + .X (tree_data_il1[i]), + .Y (tree_data_il1[i+1]), + .R (tree_data_ol1[i/2]) + ); + + if(i%4 == 0) begin + delay #(.DATA_WIDTH(34), + .DELAY_CYCLES(2) + ) level1_regs + ( + .clk (clk), + .rst_n (rst_n), + .data_in (tree_data_ol1[i/2]), // + .data_in_valid (1'b0), + .data_out (tree_data_il2[i/2]), + .data_out_valid () + ); + end + else begin + assign tree_data_il2[i/2] = tree_data_ol1[i/2]; + end +end +endgenerate + +// Level 2: Adders +generate for (i = 0; i < 14; i = i + 2) begin:treeLevel2 + + FPAdder_8_23_uid2_l3 fpadder_l2 + ( + .clk (clk), + .rst (~rst_n), + .seq_stall (1'b0), + .X (tree_data_il2[i]), + .Y (tree_data_il2[i+1]), + .R (tree_data_ol2[i/2]) + ); + + if((i%4 == 0) && (i < 12)) begin + delay #(.DATA_WIDTH(34), + .DELAY_CYCLES(4) + ) level2_regs + ( + .clk (clk), + .rst_n (rst_n), + .data_in (tree_data_ol2[i/2]), // + .data_in_valid (1'b0), + .data_out (tree_data_il3[i/2]), + .data_out_valid () + ); + end + else begin + assign tree_data_il3[i/2] = tree_data_ol2[i/2]; + end +end +endgenerate +// Level 3: Adders +generate for (i = 0; i < 7; i = i + 2) begin:treeLevel3 +if(i < 6) begin + FPAdder_8_23_uid2_l3 fpadder_l3 + ( + .clk (clk), + .rst (~rst_n), + .seq_stall (1'b0), + .X (tree_data_il3[i]), + .Y (tree_data_il3[i+1]), + .R (tree_data_ol3[i/2]) + ); + + if(i%4 == 0) begin + delay #(.DATA_WIDTH(34), + .DELAY_CYCLES( (i == 0)?8:1) + ) level3_regs + ( + .clk (clk), + .rst_n (rst_n), + .data_in (tree_data_ol3[i/2]), // + .data_in_valid (1'b0), + .data_out (tree_data_il4[i/2]), + .data_out_valid () + ); + end + else begin + assign tree_data_il4[i/2] = tree_data_ol3[i/2]; + end +end +else begin + assign tree_data_il4[i/2] = tree_data_il3[i]; +end +end +endgenerate + +// Level 4: Adders +generate for (i = 0; i < 4; i = i + 2) begin:treeLevel4 + + FPAdder_8_23_uid2_l3 fpadder_l4 + ( + .clk (clk), + .rst (~rst_n), + .seq_stall (1'b0), + .X (tree_data_il4[i]), + .Y (tree_data_il4[i+1]), + .R (tree_data_ol4[i/2]) + ); + + if(i%4 == 0) begin + delay #(.DATA_WIDTH(34), + .DELAY_CYCLES(9) + ) level4_regs + ( + .clk (clk), + .rst_n (rst_n), + .data_in (tree_data_ol4[i/2]), // + .data_in_valid (1'b0), + .data_out (tree_data_il5[i/2]), + .data_out_valid () + ); + end + else begin + assign tree_data_il5[i/2] = tree_data_ol4[i/2]; + end +end +endgenerate + +// Level 5: Adders + + FPAdder_8_23_uid2_l3 fpadder_l5 + ( + .clk (clk), + .rst (~rst_n), + .seq_stall (1'b0), + .X (tree_data_il5[0]), + .Y (tree_data_il5[1]), + .R (tree_data_ol5) + ); + +// delay valid and last +delay #(.DATA_WIDTH(1), + .DELAY_CYCLES(FP_ADDER_LATENCY*5 + 1 + 24) + ) fpadder_delay( + + .clk (clk), + .rst_n (rst_n), + .data_in (fp_in_vector_last[0]), // + .data_in_valid (fp_in_vector_valid[0]), + .data_out (fp_in_last_delayed), + .data_out_valid (fp_in_valid_delayed) + ); + +// assign tree output +always@(posedge clk) begin + tree_out <= (tree_data_ol5[33:32] == 2'b00)? 0 : tree_data_ol5[31:0]; +end + +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// FP Aggregator ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// + +FPAggregator #(.FP_ADDER_LATENCY(3)) + + tree_aggregator( + + .clk (clk), + .rst_n (rst_n), + + .fp_in (tree_out), + .fp_in_valid (fp_in_valid_delayed), + .fp_in_last (fp_in_last_delayed), + .fp_in_ready (), + + .aggreg_out (reduce_out), + .aggreg_out_valid (reduce_out_valid), + .aggreg_out_ready (reduce_out_ready) + ); + + + + + + +endmodule + diff --git a/hw/hdl/operators/dtengine/gbm/inferCore/FPAggregator.sv b/hw/hdl/operators/dtengine/gbm/inferCore/FPAggregator.sv new file mode 100644 index 00000000..4124cd2f --- /dev/null +++ b/hw/hdl/operators/dtengine/gbm/inferCore/FPAggregator.sv @@ -0,0 +1,174 @@ + +/* + * Copyright 2019 - 2020 Systems Group, ETH Zurich + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +module FPAggregator #(parameter FP_ADDER_LATENCY = 2) ( + + input wire clk, + input wire rst_n, + + input wire [31:0] fp_in, + input wire fp_in_valid, + input wire fp_in_last, + output wire fp_in_ready, + + output wire [31:0] aggreg_out, + output wire aggreg_out_valid, + input wire aggreg_out_ready + ); + + + + +wire aggreg_in_fifo_full; +wire aggreg_in_fifo_valid; +wire aggreg_in_fifo_re; +wire [32:0] aggreg_in_fifo_dout; + +wire [33:0] input_A; +reg [33:0] prev_aggreg_value; +wire [33:0] aggreg_value; +reg [3:0] fpadder_latency_count; + + +wire fp_in_valid_delayed; +wire fp_in_last_delayed; +wire aggregator_ready; + +reg [31:0] aggreg_out_d1; +reg aggreg_out_valid_d1; + +wire aggreg_out_fifo_almfull; +//////////////////////////////////////////////////////////////////////////////// +assign fp_in_ready = ~aggreg_in_fifo_full; + +quick_fifo #(.FIFO_WIDTH(32+1), + .FIFO_DEPTH_BITS(9), + .FIFO_ALMOSTFULL_THRESHOLD(508) + ) aggreg_in_fifo ( + .clk (clk), + .reset_n (rst_n), + .din ({fp_in_last, fp_in}), + .we (fp_in_valid), + + .re (aggreg_in_fifo_re), + .dout (aggreg_in_fifo_dout), + .empty (), + .valid (aggreg_in_fifo_valid), + .full (aggreg_in_fifo_full), + .count (), + .almostfull () + ); + +assign aggreg_in_fifo_re = aggregator_ready; +//////////////////////////////////////////////////////////////////////////////// + +always @(posedge clk) begin + if (~rst_n) begin + // reset + prev_aggreg_value <= 0; + fpadder_latency_count <= 0; + aggreg_out_valid_d1 <= 1'b0; + aggreg_out_d1 <= 0; + end + else begin + if(aggregator_ready & aggreg_in_fifo_valid) begin + fpadder_latency_count <= FP_ADDER_LATENCY; + end + else if(!(fpadder_latency_count == 0)) begin + fpadder_latency_count <= fpadder_latency_count - 1'b1; + end + //--------------------- Do aggregation --------------------------// + if(fp_in_valid_delayed) begin + if(~fp_in_last_delayed) begin + prev_aggreg_value <= aggreg_value; + end + else begin + prev_aggreg_value <= 0; + end + end + + //--------------------- Tuple Output ----------------------------// + aggreg_out_valid_d1 <= 1'b0; + + if(fp_in_valid_delayed & fp_in_last_delayed) begin + if(aggreg_value[33:32] == 2'b00) begin + aggreg_out_d1 <= 0; + end + else begin + aggreg_out_d1 <= aggreg_value[31:0]; + end + + aggreg_out_valid_d1 <= 1'b1; + end + end +end + +assign aggregator_ready = (fpadder_latency_count == 0) & ~aggreg_out_fifo_almfull; + +assign input_A = {1'b0, {|(aggreg_in_fifo_dout[31:0])}, aggreg_in_fifo_dout[31:0]}; + +FPAdder_8_23_uid2_l3 fpadder( + .clk (clk), + .rst (~rst_n), + .seq_stall (1'b0), + .X (input_A), + .Y (prev_aggreg_value), + .R (aggreg_value) + ); + +// delay valid, last with FPAdder Latency +delay #(.DATA_WIDTH(1), + .DELAY_CYCLES(FP_ADDER_LATENCY) + ) fpadder_delay( + + .clk (clk), + .rst_n (rst_n), + .data_in (aggreg_in_fifo_dout[32]), // + .data_in_valid (aggreg_in_fifo_valid & aggregator_ready), + .data_out (fp_in_last_delayed), + .data_out_valid (fp_in_valid_delayed) + ); + + +quick_fifo #(.FIFO_WIDTH(32), + .FIFO_DEPTH_BITS(9), + .FIFO_ALMOSTFULL_THRESHOLD(490) + ) aggreg_out_fifo ( + .clk (clk), + .reset_n (rst_n), + .din (aggreg_out_d1), + .we (aggreg_out_valid_d1), + + .re (aggreg_out_ready), + .dout (aggreg_out), + .empty (), + .valid (aggreg_out_valid), + .full (), + .count (), + .almostfull (aggreg_out_fifo_almfull) + ); + + + + +endmodule diff --git a/hw/hdl/operators/dtengine/gbm/inferCore/LineRateConvertor.sv b/hw/hdl/operators/dtengine/gbm/inferCore/LineRateConvertor.sv new file mode 100644 index 00000000..dfbb6f8a --- /dev/null +++ b/hw/hdl/operators/dtengine/gbm/inferCore/LineRateConvertor.sv @@ -0,0 +1,134 @@ + +/* + * Copyright 2019 - 2020 Systems Group, ETH Zurich + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +import DTPackage::*; + + +module LineRateConvertor #(parameter CU_ID = 0 ) + ( + input wire clk, + input wire rst_n, + + input wire [511:0] data_line_in, + input wire data_line_in_valid, + input wire [2:0] data_line_in_last_valid_pos, + input wire data_line_in_last, + input wire data_line_in_ctrl, + input wire data_line_in_prog, + input wire [NUM_PUS_PER_CLUSTER_BITS-1:0] data_line_in_pu, + input wire [NUM_DTPU_CLUSTERS_BITS-1:0] data_line_in_cu, + output wire data_line_in_ready, + + output reg [DATA_LINE_WIDTH-1:0] data_line_out, + output reg data_line_out_valid, + output reg data_line_out_ctrl, + output reg data_line_out_last, + output reg data_line_out_prog, + output reg [NUM_PUS_PER_CLUSTER_BITS-1:0] data_line_out_pu, + input wire data_line_out_ready +); + + +wire data_line_fifo_we; +wire data_line_fifo_almfull; +wire data_line_fifo_valid; +wire data_line_fifo_re; + +wire [63:0] data_line_array[7:0]; + +wire [511:0] data_line_in_fifo_data; +wire data_line_in_fifo_valid; +wire [2:0] data_line_in_fifo_last_valid_pos; +wire data_line_in_fifo_last; +wire data_line_in_fifo_ctrl; +wire data_line_in_fifo_prog; +wire [NUM_PUS_PER_CLUSTER_BITS-1:0] data_line_in_fifo_pu; + +reg [2:0] curr_word; + +///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +assign data_line_fifo_we = (data_line_in_valid && (data_line_in_cu == CU_ID)) || data_line_in_ctrl || data_line_in_prog; +assign data_line_in_ready = ~data_line_fifo_almfull; + +// Input Line FIFO +quick_fifo #(.FIFO_WIDTH(512+3+1+1+1+1+5), + .FIFO_DEPTH_BITS(9), + .FIFO_ALMOSTFULL_THRESHOLD(500) + ) data_line_fifo ( + .clk (clk), + .reset_n (rst_n), + .din ({data_line_in_pu, data_line_in_prog, data_line_in_ctrl, data_line_in_last, data_line_in_valid, data_line_in_last_valid_pos, data_line_in}), + .we (data_line_fifo_we), + + .re (data_line_fifo_re), + .dout ({data_line_in_fifo_pu, data_line_in_fifo_prog, data_line_in_fifo_ctrl, data_line_in_fifo_last, data_line_in_fifo_valid, data_line_in_fifo_last_valid_pos, data_line_in_fifo_data}), + .empty (), + .valid (data_line_fifo_valid), + .full (), + .count (), + .almostfull (data_line_fifo_almfull) + ); + + +// Put the input data line in an array +genvar i; +generate for (i = 0; i < 8; i=i+1) begin + assign data_line_array[i] = data_line_in_fifo_data[64*i+63:64*i]; +end +endgenerate + +// Select output 64-bit word +always@(posedge clk) begin + data_line_out <= data_line_array[ curr_word ]; + data_line_out_valid <= data_line_fifo_valid && data_line_in_fifo_valid && data_line_out_ready; + data_line_out_last <= data_line_in_fifo_last && (curr_word == data_line_in_fifo_last_valid_pos); + data_line_out_ctrl <= data_line_fifo_valid && data_line_in_fifo_ctrl; + data_line_out_prog <= data_line_fifo_valid && data_line_in_fifo_prog; + data_line_out_pu <= data_line_in_fifo_pu; +end + +// data_line_fifo_re +assign data_line_fifo_re = data_line_out_ready && ( (data_line_in_fifo_last && (curr_word == data_line_in_fifo_last_valid_pos)) || (curr_word == 3'b111) ); + +// curr_word calculation +always@(posedge clk) begin + if(~rst_n) begin + curr_word <= 3'b000; + end + else begin + if(data_line_out_ready && data_line_fifo_valid) begin + if(data_line_in_fifo_last && (curr_word == data_line_in_fifo_last_valid_pos) ) begin + curr_word <= 3'b000; + end + else if(curr_word == 3'b111) begin + curr_word <= 3'b000; + end + else begin + curr_word <= curr_word + 1'b1; + end + end + end +end + + +endmodule \ No newline at end of file diff --git a/hw/hdl/operators/dtengine/gbm/inferCore/RegBasedFIFO.sv b/hw/hdl/operators/dtengine/gbm/inferCore/RegBasedFIFO.sv new file mode 100644 index 00000000..19b8a46e --- /dev/null +++ b/hw/hdl/operators/dtengine/gbm/inferCore/RegBasedFIFO.sv @@ -0,0 +1,122 @@ + +/* + * Copyright 2019 - 2020 Systems Group, ETH Zurich + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +module RegBasedFIFO #(parameter FIFO_WIDTH = 32, + parameter FIFO_DEPTH_BITS = 2 + )( + input wire clk, + input wire rst_n, + + input wire [FIFO_WIDTH-1:0] data_in, + input wire data_in_valid, + output wire data_in_ready, + + output wire [FIFO_WIDTH-1:0] data_out, + output wire data_out_valid, + input wire data_out_ready + ); + + + +localparam FIFO_NUM_REGS = 2**FIFO_DEPTH_BITS; + + +reg [FIFO_WIDTH-1:0] fifo_reg_data[FIFO_NUM_REGS-1:0]; +reg fifo_reg_valid[FIFO_NUM_REGS-1:0]; + +// Last Reg in the FIFO +// valid +always@(posedge clk) begin + // data + if(data_out_ready || ~fifo_reg_valid[FIFO_NUM_REGS-1]) begin + fifo_reg_data[FIFO_NUM_REGS-1] <= data_in; + end + // valid + if(~rst_n) begin + fifo_reg_valid[FIFO_NUM_REGS-1] <= 1'b0; + end + else begin + if(data_out_ready) begin + fifo_reg_valid[FIFO_NUM_REGS-1] <= 1'b0; + end + else if(!fifo_reg_valid[FIFO_NUM_REGS-1] && fifo_reg_valid[FIFO_NUM_REGS-2]) begin + fifo_reg_valid[FIFO_NUM_REGS-1] <= data_in_valid; + end + end +end + +// Rest of Regs +genvar i; + +generate for (i = 0; i < FIFO_NUM_REGS-1; i=i+1) begin: fifo_regs + // valid + always@(posedge clk) begin + // Data + if(~fifo_reg_valid[i]) begin + fifo_reg_data[i] <= data_in; + end + else if(data_out_ready) begin + if(fifo_reg_valid[i+1]) begin + fifo_reg_data[i] <= fifo_reg_data[i+1]; + end + else begin + fifo_reg_data[i] <= data_in; + end + end + // valid + if(~rst_n) begin + fifo_reg_valid[i] <= 1'b0; + end + else if(~fifo_reg_valid[i]) begin + if(i == 0) begin + fifo_reg_valid[i] <= data_in_valid; + end + else begin + if(!data_out_ready && fifo_reg_valid[i-1]) begin + fifo_reg_valid[i] <= data_in_valid; + end + end + end + else if(data_out_ready) begin + if(fifo_reg_valid[i+1]) begin + fifo_reg_valid[i] <= 1'b1; + end + else begin + fifo_reg_valid[i] <= data_in_valid; + end + end + end +end +endgenerate + + +// +assign data_in_ready = ~fifo_reg_valid[FIFO_NUM_REGS-2]; +assign data_out = fifo_reg_data[0]; +assign data_out_valid = fifo_reg_valid[0]; + + + + + +endmodule \ No newline at end of file diff --git a/hw/hdl/operators/dtengine/gbm/inferCore/Tree_Memory.sv b/hw/hdl/operators/dtengine/gbm/inferCore/Tree_Memory.sv new file mode 100644 index 00000000..a01e3134 --- /dev/null +++ b/hw/hdl/operators/dtengine/gbm/inferCore/Tree_Memory.sv @@ -0,0 +1,192 @@ + +/* + * Copyright 2019 - 2020 Systems Group, ETH Zurich + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + + + +module Tree_Memory #( + parameter DATA_WIDTH = 32, + parameter ADDR_WIDTH = 10 +) ( + input wire clk, + input wire rst_n, + input wire we, + input wire rea, + input wire reb, + input wire [ADDR_WIDTH-1:0] addr_port_b, + input wire [ADDR_WIDTH-1:0] addr_port_a, + input wire [DATA_WIDTH-1:0] din, + output reg [DATA_WIDTH-1:0] dout1, + output reg valid_out1, + output reg [31:0] dout2, + output reg valid_out2 +); + + +// Port A +reg rea_d1; +reg addr_port_a_d1; + +wire [31:0] dout_a_1; +wire [31:0] dout_a_2; + +wire [9:0] addr_a_1; +wire [9:0] addr_a_2; + +// Port B +reg reb_d1; +reg addr_port_b_d1; + +wire [31:0] dout_b_1; +wire [31:0] dout_b_2; + + + +bram_1_in_2_out bram1in2out_inst_1 ( + .clk ( clk ), + .da ( din[31:0] ), + .ena (rea || we), + .addra ( addr_a_1 ), + .wea ( we ), + .qa ( dout_a_1 ), + + .enb (reb), + .db ( 0 ), + .addrb ( addr_port_b[10:1] ), + .web ( 1'b0 ), + .qb ( dout_b_1 ) + ); + + bram_1_in_2_out bram1in2out_inst_2 ( + .clk ( clk ), + .da ( din[63:32] ), + .ena (rea || we), + .addra ( addr_a_2 ), + .wea ( we ), + .qa ( dout_a_2 ), + + .enb (reb), + .db ( 0 ), + .addrb ( addr_port_b[10:1] ), + .web ( 1'b0 ), + .qb ( dout_b_2 ) + ); + + + +/*bram_1_in_2_out bram1in2out_inst_1 ( + .address_a ( addr_a_1 ), + .address_b ( addr_port_b[10:2] ), + .clock ( clk ), + .data_a ( din[31:0] ), + .data_b ( 0 ), + .wren_a ( we ), + .wren_b ( 1'b0 ), + .q_a ( dout_a_1 ), + .q_b ( dout_b_1 ) + ); + +bram_1_in_2_out bram1in2out_inst_2 ( + .address_a ( addr_a_2 ), + .address_b ( addr_port_b[10:2] ), + .clock ( clk ), + .data_a ( din[63:32] ), + .data_b ( 0 ), + .wren_a ( we ), + .wren_b ( 1'b0 ), + .q_a ( dout_a_2 ), + .q_b ( dout_b_2 ) + ); + +bram_1_in_2_out bram1in2out_inst_3 ( + .address_a ( addr_a_3 ), + .address_b ( addr_port_b[10:2] ), + .clock ( clk ), + .data_a ( din[95:64] ), + .data_b ( 0 ), + .wren_a ( we ), + .wren_b ( 1'b0 ), + .q_a ( dout_a_3 ), + .q_b ( dout_b_3 ) + ); + +bram_1_in_2_out bram1in2out_inst_4 ( + .address_a ( addr_a_4 ), + .address_b ( addr_port_b[10:2] ), + .clock ( clk ), + .data_a ( din[127:96] ), + .data_b ( 0 ), + .wren_a ( we ), + .wren_b ( 1'b0 ), + .q_a ( dout_a_4 ), + .q_b ( dout_b_4 ) + ); +*/ +//------------------------ Port A ---------------------------// +// rd_addr_a +assign addr_a_1 = (we || (addr_port_a[0] == 1'b0))? addr_port_a[10:1] : addr_port_a[10:1] + 1'b1; +assign addr_a_2 = addr_port_a[10:1]; +// +always @(posedge clk) begin + addr_port_a_d1 <= addr_port_a[0]; + + if(~rst_n) begin + valid_out1 <= 1'b0; + rea_d1 <= 1'b0; + end + else begin + rea_d1 <= rea; + valid_out1 <= rea_d1; + end + // + case (addr_port_a_d1) + 1'b0: dout1 <= {dout_a_2, dout_a_1}; + 1'b1: dout1 <= {dout_a_1, dout_a_2}; + default: dout1 <= 64'b0; + endcase +end +//----------------------- Port B ----------------------------// +always @(posedge clk) begin + + addr_port_b_d1 <= addr_port_b[0]; + + if(~rst_n) begin + reb_d1 <= 1'b0; + valid_out2 <= 1'b0; + end + else begin + reb_d1 <= reb; + valid_out2 <= reb_d1; + end + // + if (addr_port_b_d1) begin + dout2 <= dout_b_2; + end + else begin + dout2 <= dout_b_1; + end +end + + + +endmodule // Mem1in2out diff --git a/hw/hdl/operators/dtengine/gbm/inferCore/bram.sv b/hw/hdl/operators/dtengine/gbm/inferCore/bram.sv new file mode 100644 index 00000000..a7d650ff --- /dev/null +++ b/hw/hdl/operators/dtengine/gbm/inferCore/bram.sv @@ -0,0 +1,55 @@ + + +/* + * Copyright 2019 - 2020 Systems Group, ETH Zurich + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +module bram #( + parameter DATA_WIDTH = 32, + parameter ADDR_WIDTH = 8 +) ( + input wire clk, + input wire we, + input wire re, + input wire [ADDR_WIDTH-1:0] raddr, + input wire [ADDR_WIDTH-1:0] waddr, + input wire [DATA_WIDTH-1:0] din, + output reg [DATA_WIDTH-1:0] dout +); + + +`ifdef VENDOR_XILINX + (* ram_extract = "yes", ram_style = "block" *) + reg [DATA_WIDTH-1:0] mem[0:2**ADDR_WIDTH-1]; +`else +(* ramstyle = "no_rw_check" *) reg [DATA_WIDTH-1:0] mem[0:2**ADDR_WIDTH-1]; +`endif + + + always @(posedge clk) begin + if (we) + mem[waddr] <= din; + + if (re) + dout <= mem[raddr]; + end + +endmodule \ No newline at end of file diff --git a/hw/hdl/operators/dtengine/gbm/inferCore/bram_1_in_2_out.sv b/hw/hdl/operators/dtengine/gbm/inferCore/bram_1_in_2_out.sv new file mode 100644 index 00000000..7cabab16 --- /dev/null +++ b/hw/hdl/operators/dtengine/gbm/inferCore/bram_1_in_2_out.sv @@ -0,0 +1,136 @@ +//--------------------------------------------------------------------------------------- +// Amazon FPGA Hardware Development Kit +// +// Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Amazon Software License (the "License"). You may not use +// this file except in compliance with the License. A copy of the License is +// located at +// +// http://aws.amazon.com/asl/ +// +// or in the "license" file accompanying this file. This file is distributed on +// an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or +// implied. See the License for the specific language governing permissions and +// limitations under the License. +//--------------------------------------------------------------------------------------- +//---------------------------------------------------- +// This is a dual ported BRAM +//---------------------------------------------------- +module bram_1_in_2_out #(parameter WIDTH=32, parameter ADDR_WIDTH=10, parameter DEPTH=1024, parameter PIPELINE=0, parameter MEMORY_TYPE = "auto") +( + input clk, + input wea, + input ena, + input[ADDR_WIDTH-1:0] addra, + input[WIDTH-1:0] da, + output logic[WIDTH-1:0] qa, + + input web, + input enb, + input[ADDR_WIDTH-1:0] addrb, + input[WIDTH-1:0] db, + output logic[WIDTH-1:0] qb + + + ); + +`ifndef NO_XILINX_XPM_RAM + + xpm_memory_tdpram # ( + // Common module parameters + .MEMORY_SIZE (WIDTH*DEPTH), //positive integer + .MEMORY_PRIMITIVE (MEMORY_TYPE), //string; "auto", "distributed", "block" or "ultra"; + .CLOCKING_MODE ("common_clock"), //string; "common_clock", "independent_clock" + .MEMORY_INIT_FILE ("none"), //string; "none" or ".mem" + .MEMORY_INIT_PARAM ("" ), //string; + .USE_MEM_INIT (1), //integer; 0,1 + .WAKEUP_TIME ("disable_sleep"), //string; "disable_sleep" or "use_sleep_pin" + .MESSAGE_CONTROL (0), //integer; 0,1 + // Port A module parameters + .WRITE_DATA_WIDTH_A (WIDTH), //positive integer + .READ_DATA_WIDTH_A (WIDTH), //positive integer + .BYTE_WRITE_WIDTH_A (WIDTH), //integer; 8, 9, or WRITE_DATA_WIDTH_A value + .ADDR_WIDTH_A (ADDR_WIDTH), //positive integer + .READ_RESET_VALUE_A ("0"), //string + .READ_LATENCY_A (PIPELINE+1), //non-negative integer + .WRITE_MODE_A ("read_first"), //string; "write_first", "read_first", "no_change" + // Port B module parameters + .WRITE_DATA_WIDTH_B (WIDTH), //positive integer + .READ_DATA_WIDTH_B (WIDTH), //positive integer + .BYTE_WRITE_WIDTH_B (WIDTH), //integer; 8, 9, or WRITE_DATA_WIDTH_B value + .ADDR_WIDTH_B (ADDR_WIDTH), //positive integer + .READ_RESET_VALUE_B ("0"), //vector of READ_DATA_WIDTH_B bits + .READ_LATENCY_B (PIPELINE+1), //non-negative integer + .WRITE_MODE_B ("read_first") //string; "write_first", "read_first", "no_change" + ) xpm_memory_tdpram_inst ( + // Common module ports + .sleep (1'b0), + // Port A module ports + .clka (clk), + .rsta (1'b0), + .ena (ena), + .regcea (1'b1), + .wea (wea), + .addra (addra), + .dina (da), + .injectsbiterra (1'b0), //do not change + .injectdbiterra (1'b0), //do not change + .douta (qa), + .sbiterra (), //do not change + .dbiterra (), //do not change + // Port B module ports + .clkb (clk), + .rstb (1'b0), + .enb (enb), + .regceb (1'b1), + .web (web), + .addrb (addrb), + .dinb (db), + .injectsbiterrb (1'b0), //do not change + .injectdbiterrb (1'b0), //do not change + .doutb (qb), + .sbiterrb (), //do not change + .dbiterrb () //do not change + ); + // End of xpm_memory_tdpram instance declaration + +`else + + logic[WIDTH-1:0] ram[DEPTH-1:0]; + + logic[WIDTH-1:0] rddata_a, rddata_a_q; + logic[WIDTH-1:0] rddata_b, rddata_b_q; + + always @(posedge clk) + if (ena) + begin + if (wea) + ram[addra] <= da; + else + rddata_a <= ram[addra]; + end + + always @(posedge clk) + rddata_a_q <= rddata_a; + + always @(posedge clk) + if (enb) + begin + if (web) + ram[addrb] <= db; + else + rddata_b <= ram[addrb]; + end + + always @(posedge clk) + rddata_b_q <= rddata_b; + + assign qa = (PIPELINE)? rddata_a_q: rddata_a; + assign qb = (PIPELINE)? rddata_b_q: rddata_b; + +`endif + + + +endmodule diff --git a/hw/hdl/operators/dtengine/gbm/inferCore/bus_aligner.sv b/hw/hdl/operators/dtengine/gbm/inferCore/bus_aligner.sv new file mode 100644 index 00000000..b023dce9 --- /dev/null +++ b/hw/hdl/operators/dtengine/gbm/inferCore/bus_aligner.sv @@ -0,0 +1,183 @@ + +/* + * Copyright 2019 - 2020 Systems Group, ETH Zurich + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +import DTPackage::*; + +module bus_aligner + ( + input wire clk, + input wire rst_n, + + input wire [511:0] data_in, + input wire data_in_last, + input wire data_in_type, + input wire data_in_valid, + input wire [3:0] data_in_off, + input wire [2:0] data_in_size, + input wire [4:0] data_in_word_count, + input wire stream_last, + + output wire [511:0] data_out, + output wire data_out_last, + output wire data_out_type, + output wire [2:0] data_out_size, + output wire data_out_valid + ); + + +reg [511:0] data_in_d1; +reg [511:0] data_in_d2; + +reg data_in_valid_d1; +reg data_in_valid_d2; + +reg data_in_last_d1; +reg data_in_last_d2; + +reg [3:0] data_in_off_d1; +reg [3:0] data_in_off_d2; + +reg [2:0] data_in_size_d1; +reg [2:0] data_in_size_d2; + +reg [4:0] data_in_word_count_d1; +reg [4:0] data_in_word_count_d2; + +reg stream_last_d1; +reg stream_last_d2; + +reg data_in_type_d1; +reg data_in_type_d2; + +wire shifter_in_last; +/////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Prepare lines for Shifter ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// +always@(posedge clk)begin + ////////////// Signals with reset ////////////////// + // Line 1 + if(~rst_n) begin + data_in_valid_d1 <= 1'b0; + data_in_valid_d2 <= 1'b0; + end + else begin + // Line 1 + data_in_valid_d1 <= data_in_valid; + + // Line 2 + if(data_in_valid_d2) begin + if(data_in_valid_d1) begin + if(data_in_last_d2) begin + data_in_valid_d2 <= 1'b1; + end + else if(data_in_last_d1) begin + data_in_valid_d2 <= data_in_word_count_d1 > data_in_off_d2; + end + else begin + data_in_valid_d2 <= 1'b1; + end + end + else if(stream_last_d2) begin + data_in_valid_d2 <= 1'b0; + end + end + else begin + data_in_valid_d2 <= data_in_valid_d1; + end + end + ////////////// Signals with no reset ////////////////// + // Line 1 + data_in_last_d1 <= data_in_last; + data_in_d1 <= data_in; + data_in_off_d1 <= data_in_off; + data_in_word_count_d1 <= data_in_word_count; + data_in_size_d1 <= data_in_size; + data_in_type_d1 <= data_in_type; + stream_last_d1 <= stream_last; + + // Line 2 + if(data_in_valid_d2) begin + if(data_in_valid_d1) begin + data_in_last_d2 <= data_in_last_d1; + data_in_d2 <= data_in_d1; + data_in_off_d2 <= data_in_off_d1; + data_in_word_count_d2 <= data_in_word_count_d1; + data_in_size_d2 <= data_in_size_d1; + data_in_type_d2 <= data_in_type_d1; + stream_last_d2 <= stream_last_d1; + end + end + else begin + data_in_last_d2 <= data_in_last_d1; + data_in_d2 <= data_in_d1; + data_in_off_d2 <= data_in_off_d1; + data_in_word_count_d2 <= data_in_word_count_d1; + data_in_type_d2 <= data_in_type_d1; + data_in_size_d2 <= data_in_size_d1; + stream_last_d2 <= stream_last_d1; + end +end + + +delay #(.DATA_WIDTH( 4 ), + .DELAY_CYCLES(16) + ) smart_shifter_delay( + .clk (clk), + .rst_n (rst_n), + .data_in ( {data_in_type_d2, data_in_size_d2} ), // + .data_in_valid ( (data_in_valid_d1 || stream_last_d2) && data_in_valid_d2 ), + .data_out ( {data_out_type, data_out_size} ), + .data_out_valid ( ) + ); + +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Smart Shifter ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// + +assign shifter_in_last = data_in_last_d2 || (data_in_last_d1 && (data_in_word_count_d1 <= data_in_off_d1)); + +smart_shifter #(.ELEMENTS_PER_CL(16), + .ELEMENTS_PER_CL_BITS(4)) + smart_shifter_x( + .clk (clk), + .rst_n (rst_n), + + .inValid ((data_in_valid_d1 || stream_last_d2) && data_in_valid_d2), + .inOffs (data_in_off_d2), + .inLast (shifter_in_last), + .inData ({data_in_d1, data_in_d2}), + + .outValid (data_out_valid), + .outLast (data_out_last), + .outData (data_out) + ); + + +endmodule + + + diff --git a/hw/hdl/operators/dtengine/gbm/inferCore/compute_unit.sv b/hw/hdl/operators/dtengine/gbm/inferCore/compute_unit.sv new file mode 100644 index 00000000..08aba473 --- /dev/null +++ b/hw/hdl/operators/dtengine/gbm/inferCore/compute_unit.sv @@ -0,0 +1,241 @@ + + +/* + * Copyright 2019 - 2020 Systems Group, ETH Zurich + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +import DTPackage::*; + + +module compute_unit #(parameter CU_ID = 0 ) + ( + input wire clk, + input wire rst_n, + + input wire [511:0] data_line_in, + input wire data_line_in_valid, + input wire [2:0] data_line_in_last_valid_pos, + input wire data_line_in_last, + input wire data_line_in_ctrl, + input wire data_line_in_prog, + input wire [NUM_PUS_PER_CLUSTER_BITS-1:0] data_line_in_pu, + input wire [NUM_DTPU_CLUSTERS_BITS-1:0] data_line_in_cu, + output wire data_line_in_ready, + + output reg [511:0] data_line_out, + output reg data_line_out_valid, + output reg [2:0] data_line_out_last_valid_pos, + output reg data_line_out_ctrl, + output reg data_line_out_last, + output reg data_line_out_prog, + output reg [NUM_PUS_PER_CLUSTER_BITS-1:0] data_line_out_pu, + output reg [NUM_DTPU_CLUSTERS_BITS-1:0] data_line_out_cu, + + output wire [DATA_PRECISION-1:0] tuple_result_out, + output wire tuple_result_out_valid, + input wire tuple_result_out_ready + + ); + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Signals Declarations ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// +wire [DATA_LINE_WIDTH-1:0] line_rate_convertor_data_out; +wire line_rate_convertor_out_valid; +wire line_rate_convertor_out_last; +wire line_rate_convertor_out_ctrl; +wire line_rate_convertor_out_prog; +wire line_rate_convertor_out_ready; +wire [NUM_PUS_PER_CLUSTER_BITS-1:0] line_rate_convertor_out_pu; + + +wire [DATA_LINE_WIDTH-1:0] data_line_array[NUM_PUS_PER_CLUSTER:0]; +wire data_line_valid_array[NUM_PUS_PER_CLUSTER:0]; +wire data_line_last_array[NUM_PUS_PER_CLUSTER:0]; +wire data_line_ready_array[NUM_PUS_PER_CLUSTER:0]; +wire data_line_ctrl_array[NUM_PUS_PER_CLUSTER:0]; +wire data_line_prog_array[NUM_PUS_PER_CLUSTER:0]; +wire [NUM_PUS_PER_CLUSTER_BITS-1:0] data_line_pu_array[NUM_PUS_PER_CLUSTER:0]; + +wire [DATA_PRECISION-1:0] pu_tree_leaf_out[NUM_PUS_PER_CLUSTER-1:0]; +wire pu_tree_leaf_out_valid[NUM_PUS_PER_CLUSTER-1:0]; +wire pu_tree_leaf_out_last[NUM_PUS_PER_CLUSTER-1:0]; + +wire [31:0] fp_in_vector[31:0]; +wire fp_in_vector_valid[31:0]; +wire fp_in_vector_last[31:0]; +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Pipeline to next CU ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// + +always @(posedge clk) begin + // info + data_line_out <= data_line_in; + data_line_out_pu <= data_line_in_pu; + data_line_out_last_valid_pos <= data_line_in_last_valid_pos; + data_line_out_cu <= data_line_in_cu; + data_line_out_last <= data_line_in_last; + + // valids + if(~rst_n) begin + data_line_out_valid <= 1'b0; + data_line_out_ctrl <= 1'b0; + data_line_out_prog <= 1'b0; + end + else begin + data_line_out_valid <= data_line_in_valid; + data_line_out_ctrl <= data_line_in_ctrl; + data_line_out_prog <= data_line_in_prog; + end +end +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Line Rate Convertor ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// + +LineRateConvertor #(.CU_ID (CU_ID) ) + bus_convertor( + .clk (clk), + .rst_n (rst_n), + + .data_line_in (data_line_in), + .data_line_in_valid (data_line_in_valid), + .data_line_in_last_valid_pos (data_line_in_last_valid_pos), + .data_line_in_last (data_line_in_last), + .data_line_in_ctrl (data_line_in_ctrl), + .data_line_in_prog (data_line_in_prog), + .data_line_in_pu (data_line_in_pu), + .data_line_in_cu (data_line_in_cu), + .data_line_in_ready (data_line_in_ready), + + + .data_line_out (line_rate_convertor_data_out), + .data_line_out_valid (line_rate_convertor_out_valid), + .data_line_out_ctrl (line_rate_convertor_out_ctrl), + .data_line_out_last (line_rate_convertor_out_last), + .data_line_out_prog (line_rate_convertor_out_prog), + .data_line_out_pu (line_rate_convertor_out_pu), + .data_line_out_ready (line_rate_convertor_out_ready) + ); + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Generate DTPU Instances ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// + +// initialize input to first PU +assign data_line_array[0] = line_rate_convertor_data_out; +assign data_line_valid_array[0] = line_rate_convertor_out_valid; +assign data_line_last_array[0] = line_rate_convertor_out_last; +assign data_line_ctrl_array[0] = line_rate_convertor_out_ctrl; +assign data_line_prog_array[0] = line_rate_convertor_out_prog; +assign data_line_pu_array[0] = line_rate_convertor_out_pu; + +assign line_rate_convertor_out_ready = data_line_ready_array[0]; + + +// generate a cascade of PUs +genvar i; +generate + for (i = 0; i < NUM_PUS_PER_CLUSTER; i = i + 1) begin: pus + processing_element_async #(.PE_ID (i) ) + pe_x( + .clk (clk), + .rst_n (rst_n), + + .data_line_in (data_line_array[i]), + .data_line_in_valid (data_line_valid_array[i]), + .data_line_in_last (data_line_last_array[i]), + .data_line_in_ctrl (data_line_ctrl_array[i]), + .data_line_in_prog (data_line_prog_array[i]), + .data_line_in_pu (data_line_pu_array[i]), + .data_line_in_ready (data_line_ready_array[i]), + + .data_line_out (data_line_array[i+1]), + .data_line_out_valid (data_line_valid_array[i+1]), + .data_line_out_ctrl (data_line_ctrl_array[i+1]), + .data_line_out_last (data_line_last_array[i+1]), + .data_line_out_prog (data_line_prog_array[i+1]), + .data_line_out_pu (data_line_pu_array[i+1]), + + .pu_tree_leaf_out (pu_tree_leaf_out[i]), + .pu_tree_leaf_out_valid (pu_tree_leaf_out_valid[i]), + .pu_tree_leaf_out_last (pu_tree_leaf_out_last[i]) + ); + end +endgenerate +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Instance of FPAdders Tree ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// + +generate for (i = 0; i < 32; i=i+1) begin + if( i < NUM_PUS_PER_CLUSTER ) begin + assign fp_in_vector[i] = pu_tree_leaf_out[i]; + assign fp_in_vector_valid[i] = pu_tree_leaf_out_valid[i]; + assign fp_in_vector_last[i] = pu_tree_leaf_out_last[i]; + end + else begin + assign fp_in_vector[i] = 32'b0; + assign fp_in_vector_valid[i] = 1'b0; + assign fp_in_vector_last[i] = 1'b0; + end +end +endgenerate + +FPAddersReduceTree_sync #(.NUM_FP_POINTS(32) + ) reduce_leaves( + .clk (clk), + .rst_n (rst_n), + + .fp_in_vector (fp_in_vector), + .fp_in_vector_valid (fp_in_vector_valid), + .fp_in_vector_last (fp_in_vector_last), + + .reduce_out (tuple_result_out), + .reduce_out_valid (tuple_result_out_valid), + .reduce_out_ready (tuple_result_out_ready) + ); + + +endmodule + + + + + + + + + + + + diff --git a/hw/hdl/operators/dtengine/gbm/inferCore/delay.sv b/hw/hdl/operators/dtengine/gbm/inferCore/delay.sv new file mode 100644 index 00000000..fe4b7a76 --- /dev/null +++ b/hw/hdl/operators/dtengine/gbm/inferCore/delay.sv @@ -0,0 +1,75 @@ + + +/* + * Copyright 2019 - 2020 Systems Group, ETH Zurich + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +module delay #(parameter DATA_WIDTH = 32, + parameter DELAY_CYCLES = 4 + ) ( + + input wire clk, + input wire rst_n, + input wire [DATA_WIDTH-1:0] data_in, + input wire data_in_valid, + output wire [DATA_WIDTH-1:0] data_out, + output wire data_out_valid + ); + + +reg [DATA_WIDTH-1:0] data_array[DELAY_CYCLES]; +reg data_array_valid[DELAY_CYCLES]; + + +always @(posedge clk) begin + // Valid Bit + if(~rst_n) begin + data_array_valid[0] <= 0; + end + else begin + data_array_valid[0] <= data_in_valid; + end + // Data word + data_array[0] <= data_in; +end + + +genvar i; +generate for (i = 1; i < DELAY_CYCLES; i = i +1) begin: delayPipe + always @(posedge clk) begin + // Valid Bit + if(~rst_n) begin + data_array_valid[i] <= 0; + end + else begin + data_array_valid[i] <= data_array_valid[i-1]; + end + // Data word + data_array[i] <= data_array[i-1]; + end +end +endgenerate + +assign data_out = data_array[DELAY_CYCLES-1]; +assign data_out_valid = data_array_valid[DELAY_CYCLES-1]; + +endmodule // delay diff --git a/hw/hdl/operators/dtengine/gbm/inferCore/dual_port_mem.sv b/hw/hdl/operators/dtengine/gbm/inferCore/dual_port_mem.sv new file mode 100644 index 00000000..c6640267 --- /dev/null +++ b/hw/hdl/operators/dtengine/gbm/inferCore/dual_port_mem.sv @@ -0,0 +1,128 @@ +//--------------------------------------------------------------------------------------- +// Amazon FPGA Hardware Development Kit +// +// Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Amazon Software License (the "License"). You may not use +// this file except in compliance with the License. A copy of the License is +// located at +// +// http://aws.amazon.com/asl/ +// +// or in the "license" file accompanying this file. This file is distributed on +// an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or +// implied. See the License for the specific language governing permissions and +// limitations under the License. +//--------------------------------------------------------------------------------------- +//---------------------------------------------------- +// This is a dual ported BRAM +//---------------------------------------------------- +module dual_port_mem #(parameter WIDTH=64, parameter ADDR_WIDTH=9, parameter DEPTH=512, parameter PIPELINE=0, parameter MEMORY_TYPE = "auto") +( + input clk, + input wea, + input ena, + input[ADDR_WIDTH-1:0] addra, + input[WIDTH-1:0] da, + output logic[WIDTH-1:0] qa, + + input web, + input enb, + input[ADDR_WIDTH-1:0] addrb, + input[WIDTH-1:0] db, + output logic[WIDTH-1:0] qb + + + ); + +`ifndef NO_XILINX_XPM_RAM + + xpm_memory_tdpram # ( + // Common module parameters + .MEMORY_SIZE (WIDTH*DEPTH), //positive integer + .MEMORY_PRIMITIVE (MEMORY_TYPE), //string; "auto", "distributed", "block" or "ultra"; + .CLOCKING_MODE ("common_clock"), //string; "common_clock", "independent_clock" + .MEMORY_INIT_FILE ("none"), //string; "none" or ".mem" + .MEMORY_INIT_PARAM ("" ), //string; + .USE_MEM_INIT (1), //integer; 0,1 + .WAKEUP_TIME ("disable_sleep"), //string; "disable_sleep" or "use_sleep_pin" + .MESSAGE_CONTROL (0), //integer; 0,1 + // Port A module parameters + .WRITE_DATA_WIDTH_A (WIDTH), //positive integer + .READ_DATA_WIDTH_A (WIDTH), //positive integer + .BYTE_WRITE_WIDTH_A (WIDTH), //integer; 8, 9, or WRITE_DATA_WIDTH_A value + .ADDR_WIDTH_A (ADDR_WIDTH), //positive integer + .READ_RESET_VALUE_A ("0"), //string + .READ_LATENCY_A (PIPELINE+1), //non-negative integer + .WRITE_MODE_A ("read_first"), //string; "write_first", "read_first", "no_change" + // Port B module parameters + .WRITE_DATA_WIDTH_B (WIDTH), //positive integer + .READ_DATA_WIDTH_B (WIDTH), //positive integer + .BYTE_WRITE_WIDTH_B (WIDTH), //integer; 8, 9, or WRITE_DATA_WIDTH_B value + .ADDR_WIDTH_B (ADDR_WIDTH), //positive integer + .READ_RESET_VALUE_B ("0"), //vector of READ_DATA_WIDTH_B bits + .READ_LATENCY_B (PIPELINE+1), //non-negative integer + .WRITE_MODE_B ("read_first") //string; "write_first", "read_first", "no_change" + ) xpm_memory_tdpram_inst ( + // Common module ports + .sleep (1'b0), + // Port A module ports + .clka (clk), + .rsta (1'b0), + .ena (ena), + .regcea (1'b1), + .wea (wea), + .addra (addra), + .dina (da), + .injectsbiterra (1'b0), //do not change + .injectdbiterra (1'b0), //do not change + .douta (qa), + .sbiterra (), //do not change + .dbiterra (), //do not change + // Port B module ports + .clkb (clk), + .rstb (1'b0), + .enb (enb), + .regceb (1'b1), + .web (web), + .addrb (addrb), + .dinb (db), + .injectsbiterrb (1'b0), //do not change + .injectdbiterrb (1'b0), //do not change + .doutb (qb), + .sbiterrb (), //do not change + .dbiterrb () //do not change + ); + // End of xpm_memory_tdpram instance declaration + +`else + + logic[WIDTH-1:0] ram[DEPTH-1:0]; + + logic[WIDTH-1:0] rddata_a, rddata_a_q; + logic[WIDTH-1:0] rddata_b, rddata_b_q; + + always @(posedge clk) + if (enb) begin + rddata_b <= ram[addrb]; + end + + always @(posedge clk) + rddata_b_q <= rddata_b; + + always @(posedge clk) + if (wea) begin + ram[addra] <= da; + end + + always @(posedge clk) + rddata_a_q <= 0; + + assign qa = (PIPELINE)? rddata_a_q: rddata_a; + assign qb = (PIPELINE)? rddata_b_q: rddata_b; + +`endif + + + +endmodule diff --git a/hw/hdl/operators/dtengine/gbm/inferCore/pe_datapath.sv b/hw/hdl/operators/dtengine/gbm/inferCore/pe_datapath.sv new file mode 100644 index 00000000..d0fe1747 --- /dev/null +++ b/hw/hdl/operators/dtengine/gbm/inferCore/pe_datapath.sv @@ -0,0 +1,369 @@ + +/* + * Copyright 2019 - 2020 Systems Group, ETH Zurich + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + PE Datapath flow: + + - Read Tree Node: This stage consumes either the input tree instruction or the next node instruction. + This stage issues a read address to tree memory and delays all the necessary information + for the tree memory delay. By the end of the stage, the tree node basic and extra part are + available in addition to the delayed information. + + - Read feature: Use the feature index in the tree node to compute the feature address and read it from + the data memory. Then, we delay the current node instruction and obtained tree node info. + + - Read Split Set: In this stage, we compute the address for large bitset word to read from + tree memory, If there is no large split bitset then the tree memory output is ignored. + In addition, the early tree evaluation occurs. In parallel, we use the feature value to select + from the small bitset, we use it also to compare to the split value, and we use the feature value + to check if it is outside the boundaries of the large bitset. In addition, If node_nop flag is zero, + then the node_res_val is not used, hence we store W2 of the tree node in its place to save resources. + In addition, we check if the feature value is NaN or not. + - Evaluate Node: At this stage we have all the necessary information to process a node. + Then based on the operation type the evaluation happens. + Pick A branch: In this stage, based on the node evaluation result then we compute next node + to evaluate, or pick result if no children on the next branch and set NOP if we + are not at the last level. In this branch, also if we at the last level, then we + output the result if the tree is not empty. +*/ + +import DTPackage::*; + +module pe_datapath ( + + input wire clk, + input wire rst_n, + + input wire [MAX_TREE_DEPTH_BITS-1:0] tree_depth, + + // tree instruction + input TreeInstruction_t tree_instruction, + input wire tree_instruction_valid, + output wire tree_instruction_ready, + + // data memory read port + output wire [TUPLE_OFFSET_BITS:0] data_mem_rd_addr, + output wire data_mem_ren, + input wire [31:0] data_mem_feature, + + // tree memory read port A + output wire [TREE_OFFSET_BITS:0] tree_mem_rd_addr_a, + output wire tree_mem_ren_a, + input TreeNode_t tree_node_basic, + + // tree memory read port B + output wire [TREE_OFFSET_BITS:0] tree_mem_rd_addr_b, + output wire tree_mem_ren_b, + input wire [31:0] node_large_bitset, + + // result output + output wire [31:0] tree_eval_result, + output wire tree_eval_last, + output wire tree_eval_result_valid +); + + + + +NodeInstruction_t next_node_instr; +wire next_node_instr_valid; + +NodeInstruction_t curr_node_instr_s1; +wire curr_node_instr_valid_s1; +NodeInstruction_t curr_node_instr_s2; +wire curr_node_instr_valid_s2; +NodeInstruction_t curr_node_instr_s3; +NodeInstruction_t curr_node_instr_s3_1; +wire curr_node_instr_valid_s3; +NodeInstruction_t curr_node_instr_s4; +NodeInstruction_t curr_node_instr_s4_1; +wire curr_node_instr_valid_s4; +NodeInstruction_t curr_node_instr_s5; +NodeInstruction_t curr_node_instr_s5_1; +wire curr_node_instr_valid_s5; + +wire [31:0] small_bitset; +wire is_Feature_NaN; +wire small_bitset_eval; +wire split_value_eval; +wire feature_data_outside_bitset; + +wire is_Feature_NaN_s4; +wire small_bitset_eval_s4; +wire split_value_eval_s4; +wire feature_data_outside_bitset_s4; +wire [4:0] feature_data_s4; + +wire is_Feature_NaN_s5; + +TreeNode_t tree_node_basic_s3; +TreeNode_t tree_node_basic_s4; +TreeNode_t tree_node_basic_s5; + +reg go_right; +reg node_eval; + +wire [10:0] left_child_offset_1; +wire [10:0] left_child_offset_2; + +wire [10:0] right_child_offset; + +wire [10:0] next_node_address; +wire [MAX_TREE_DEPTH_BITS-1:0] next_node_level; + +wire next_node_nop; +wire [10:0] next_child_offset; +wire [10:0] result_offset; +wire next_node_leaf; + +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Read Node Stage ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// +/* +*/ +always @(*) begin + if(next_node_instr_valid) begin + curr_node_instr_s1 = next_node_instr; + end + else begin + curr_node_instr_s1 = '{node_address: {tree_instruction.tree_offset, 1'b0}, + tuple_offset: tree_instruction.tuple_offset, + node_level : 3'b000, + empty_tree : tree_instruction.empty_tree, + node_nop : 1'b0, + last_tree : tree_instruction.last_tree, + leaf_node : 1'b0, + node_res_val: 32'b0 + }; + end +end + +assign curr_node_instr_valid_s1 = next_node_instr_valid | tree_instruction_valid; + +assign tree_instruction_ready = !next_node_instr_valid; + +// Send read requests to tree memory +assign tree_mem_rd_addr_a = curr_node_instr_s1.node_address; +assign tree_mem_ren_a = curr_node_instr_valid_s1; + +// Pipeline to next stage +delay #(.DATA_WIDTH( $bits(NodeInstruction_t)), + .DELAY_CYCLES(TREE_MEM_READ_LATENCY) + ) ReadNodeStageDelay( + .clk ( clk ), + .rst_n ( rst_n ), + .data_in ( curr_node_instr_s1 ), // + .data_in_valid ( curr_node_instr_valid_s1 ), + .data_out ( curr_node_instr_s2 ), + .data_out_valid ( curr_node_instr_valid_s2 ) + ); + +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Read Feature Stage ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// +/* + This stage reads the feature data corresponding to current tree node. +*/ +assign data_mem_ren = curr_node_instr_valid_s2 && (tree_node_basic.node_type.findex != 8'hFF); +assign data_mem_rd_addr = {curr_node_instr_s2.tuple_offset, 1'b0} + tree_node_basic.node_type.findex; // 10-bit address + +// Pipeline to next stage +delay #(.DATA_WIDTH($bits(NodeInstruction_t) + $bits(TreeNode_t)), + .DELAY_CYCLES(DATA_MEM_READ_LATENCY) + ) ReadFeatureStageDelay( + .clk (clk), + .rst_n (rst_n), + .data_in ( {curr_node_instr_s2, tree_node_basic} ), // + .data_in_valid ( curr_node_instr_valid_s2 ), + .data_out ( {curr_node_instr_s3, tree_node_basic_s3} ), + .data_out_valid ( curr_node_instr_valid_s3 ) + ); +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Read Split Set/Value Stage ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// +assign tree_mem_rd_addr_b = curr_node_instr_s3.node_address + + ((curr_node_instr_s3.leaf_node)? 0 : + (((!tree_node_basic_s3.node_type.left_child)? 1'b1 : 0) + + ((!tree_node_basic_s3.node_type.right_child)? 1'b1 : 0) + + data_mem_feature[15:5] + 11'b00000010)); +assign tree_mem_ren_b = curr_node_instr_valid_s3; + +// Early Node evaluation +assign curr_node_instr_s3_1 = '{ node_address: curr_node_instr_s3.node_address, + tuple_offset: curr_node_instr_s3.tuple_offset, + node_level : curr_node_instr_s3.node_level, + empty_tree : curr_node_instr_s3.empty_tree, + node_nop : ((curr_node_instr_s3.node_nop)? 1'b1 : !curr_node_instr_s3.leaf_node && (tree_node_basic_s3.node_type.findex == 8'hFF) ), + last_tree : curr_node_instr_s3.last_tree, + leaf_node : curr_node_instr_s3.leaf_node, + node_res_val: ( (curr_node_instr_s3.node_nop)? curr_node_instr_s3.node_res_val : {tree_node_basic_s3.word_1_h, tree_node_basic_s3.word_1_l}) + }; + +assign small_bitset = {tree_node_basic_s3.word_1_h, tree_node_basic_s3.word_1_l}; + +assign is_Feature_NaN = (data_mem_feature[30:23] == 7'b1111111) && (|data_mem_feature[22:0]); +assign small_bitset_eval = (data_mem_feature[31:5] == 0)? small_bitset[ data_mem_feature[4:0] ] : 1'b0; +assign split_value_eval = data_mem_feature < {tree_node_basic_s3.word_1_h, tree_node_basic_s3.word_1_l}; + +assign feature_data_outside_bitset = (data_mem_feature < tree_node_basic_s3.word_1_h) || + (data_mem_feature > (tree_node_basic_s3.word_1_h + (tree_node_basic_s3.word_1_l << 5) ) ); + +// Pipeline to next stage +delay #(.DATA_WIDTH($bits(NodeInstruction_t) + $bits(TreeNode_t) + 5 + 1 + 1 + 1 + 1), + .DELAY_CYCLES(TREE_MEM_READ_LATENCY) + ) ReadSplitSetStageDelay( + .clk (clk), + .rst_n (rst_n), + .data_in ( {curr_node_instr_s3_1, tree_node_basic_s3, data_mem_feature[4:0], small_bitset_eval, split_value_eval, feature_data_outside_bitset, is_Feature_NaN} ), // + .data_in_valid ( curr_node_instr_valid_s3 ), + .data_out ( {curr_node_instr_s4, tree_node_basic_s4, feature_data_s4, small_bitset_eval_s4, split_value_eval_s4, feature_data_outside_bitset_s4, is_Feature_NaN_s4} ), + .data_out_valid ( curr_node_instr_valid_s4 ) + ); + +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Node Evaluation Stage ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// +/* +*/ + +always@(posedge clk)begin + if(~rst_n) begin + node_eval <= 1'b0; + end + else begin + case (tree_node_basic_s4.node_type.op_type) + 2'b00: node_eval <= split_value_eval_s4; + 2'b10: node_eval <= small_bitset_eval_s4; + 2'b11: node_eval <= (feature_data_outside_bitset_s4)? 1'b0 : node_large_bitset[ feature_data_s4 ]; + default : node_eval <= 1'b0; + endcase + end +end + +// Early Node evaluation +assign curr_node_instr_s4_1 = '{ node_address: curr_node_instr_s4.node_address, + tuple_offset: curr_node_instr_s4.tuple_offset, + node_level : curr_node_instr_s4.node_level, + empty_tree : curr_node_instr_s4.empty_tree, + node_nop : curr_node_instr_s4.node_nop, + last_tree : curr_node_instr_s4.last_tree, + leaf_node : curr_node_instr_s4.leaf_node, + node_res_val: ( (curr_node_instr_s4.node_nop || !curr_node_instr_s4.leaf_node)? curr_node_instr_s4.node_res_val : node_large_bitset ) + }; + +// Pipeline to next stage +delay #(.DATA_WIDTH($bits(NodeInstruction_t) + $bits(TreeNode_t) + 1), + .DELAY_CYCLES(1) + ) EvalNodeStageDelay( + .clk (clk), + .rst_n (rst_n), + .data_in ( {curr_node_instr_s4_1, tree_node_basic_s4, is_Feature_NaN_s4} ), // + .data_in_valid ( curr_node_instr_valid_s4 ), + .data_out ( {curr_node_instr_s5, tree_node_basic_s5, is_Feature_NaN_s5} ), + .data_out_valid ( curr_node_instr_valid_s5 ) + ); + +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Pick Branch Stage ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// +/* +*/ + +always@(*)begin + if(tree_node_basic_s5.node_type.split_dir == 4'b0001) begin + go_right = is_Feature_NaN_s5; + end + else if(tree_node_basic_s5.node_type.split_dir[0] == 1'b0) begin + if(is_Feature_NaN_s5) begin + go_right = 1'b0; + end + else begin + go_right = node_eval; + end + end + else begin + if(is_Feature_NaN_s5) begin + go_right = 1'b1; + end + else begin + go_right = node_eval; + end + end +end + +// compute left child offset, +assign left_child_offset_1 = 11'b00000000010 + ((tree_node_basic_s5.node_type.left_child)? 0 : 1'b1) + ((tree_node_basic_s5.node_type.right_child)? 0 : 1'b1); +assign left_child_offset_2 = left_child_offset_1 + ( (tree_node_basic_s5.node_type.op_type != 3)? 11'd0 : tree_node_basic_s5.word_1_l[10:0]); +// compute right child offset +assign right_child_offset = left_child_offset_2 + tree_node_basic_s5.right_child_offset[10:0]; +assign next_child_offset = (go_right && tree_node_basic_s5.node_type.left_child)? right_child_offset : left_child_offset_2; + +// compute leaf node result offset +assign result_offset = (go_right)? 11'b00000000010 + ((tree_node_basic_s5.node_type.left_child)? 0 : 1'b1) : 11'b00000000010; +assign next_node_leaf = (go_right)? !tree_node_basic_s5.node_type.right_child : !tree_node_basic_s5.node_type.left_child; + +// Next level node address +assign next_node_address = curr_node_instr_s5.node_address + ( (next_node_leaf)? result_offset : next_child_offset); + +assign next_node_level = (curr_node_instr_s5.node_level == (tree_depth))? (tree_depth) : curr_node_instr_s5.node_level + 1'b1; +assign next_node_nop = curr_node_instr_s5.node_nop || ((curr_node_instr_s5.node_level < (tree_depth) && curr_node_instr_s5.leaf_node)? 1'b1 : 1'b0); + +assign curr_node_instr_s5_1 = '{ node_address: next_node_address, + tuple_offset: curr_node_instr_s5.tuple_offset, + node_level : next_node_level, + empty_tree : curr_node_instr_s5.empty_tree, + node_nop : next_node_nop, + last_tree : curr_node_instr_s5.last_tree, + leaf_node : ( curr_node_instr_s5.leaf_node || next_node_leaf), + node_res_val: curr_node_instr_s5.node_res_val + }; +// Pipeline to next stage +delay #(.DATA_WIDTH( $bits(NodeInstruction_t) ), + .DELAY_CYCLES(1) + ) PickBranchStageDelay( + .clk (clk), + .rst_n (rst_n), + .data_in ( curr_node_instr_s5_1 ), // + .data_in_valid ( curr_node_instr_valid_s5 && (curr_node_instr_s5.node_level < (tree_depth)) ), + .data_out ( next_node_instr ), + .data_out_valid ( next_node_instr_valid ) + ); + + +assign tree_eval_result = (curr_node_instr_s5.empty_tree)? 32'b0 : curr_node_instr_s5.node_res_val; +assign tree_eval_result_valid = curr_node_instr_valid_s5 && (curr_node_instr_s5.node_level == (tree_depth) ); +assign tree_eval_last = curr_node_instr_s5.last_tree; + + + +endmodule \ No newline at end of file diff --git a/hw/hdl/operators/dtengine/gbm/inferCore/processing_element.sv b/hw/hdl/operators/dtengine/gbm/inferCore/processing_element.sv new file mode 100644 index 00000000..19194934 --- /dev/null +++ b/hw/hdl/operators/dtengine/gbm/inferCore/processing_element.sv @@ -0,0 +1,442 @@ + +/* + * Copyright 2019 - 2020 Systems Group, ETH Zurich + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +import DTPackage::*; + +/* + +*/ +module processing_element #(parameter PE_ID = 0 ) + ( + input wire clk, + input wire rst_n, + + input wire [DATA_LINE_WIDTH-1:0] data_line_in, + input wire data_line_in_valid, + input wire data_line_in_last, + input wire data_line_in_ctrl, + input wire data_line_in_prog, + input wire [NUM_PUS_PER_CLUSTER_BITS-1:0] data_line_in_pu, + output reg data_line_in_ready, + + output reg [DATA_LINE_WIDTH-1:0] data_line_out, + output reg data_line_out_valid, + output reg data_line_out_ctrl, + output reg data_line_out_last, + output reg data_line_out_prog, + output reg [NUM_PUS_PER_CLUSTER_BITS-1:0] data_line_out_pu, + + output reg [DATA_PRECISION-1:0] pu_tree_leaf_out, + output reg pu_tree_leaf_out_valid, + output reg pu_tree_leaf_out_last + ); + +localparam INSTRUCTION_DELAY = NUM_PUS_PER_CLUSTER - PE_ID - 1; // PU pipeline depth-1 - PU_ID +localparam TREE_MODE = 1'b0; +localparam DATA_MODE = 1'b1; +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Signals Declarations ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +TreeInstruction_t dp_tree_instruction; +wire dp_tree_instruction_valid; +wire dp_tree_instruction_ready; + +wire dp_data_mem_ren; +wire [31:0] dp_data_mem_feature; +wire [TUPLE_OFFSET_BITS:0] dp_data_mem_rd_addr; + +// tree memory read port A +wire [TREE_OFFSET_BITS:0] dp_tree_mem_rd_addr_a; +wire dp_tree_mem_ren_a; +TreeNode_t dp_tree_node_basic; +wire [63:0] dp_tree_node_basic_vec; + +// tree memory read port B +wire [TREE_OFFSET_BITS:0] dp_tree_mem_rd_addr_b; +wire dp_tree_mem_ren_b; +wire [31:0] dp_node_large_bitset; + +// result output +wire [31:0] dp_tree_eval_result; +wire dp_tree_eval_last; +wire dp_tree_eval_result_valid; + + +reg [1:0] curr_tree_id; +reg [8:0] curr_tuple_offset; +reg [9:0] time_stamp; +reg tuple_old_enough_set; + +wire tuple_instr_fifo_ready; +wire tuple_instr_fifo_valid; +wire [9:0] tuple_instr_fifo_dout; +wire tuple_instr_re; +wire tuple_old_enough; + + +wire curr_feature_done; +reg [9:0] features_mem_count; +reg [8:0] features_wr_addr; + +reg [2:0] local_num_trees; +reg [2:0] local_num_trees_minus_one; +reg [TREE_OFFSET_BITS-1:0] received_tree_lines; +reg [TREE_OFFSET_BITS-1:0] tree_prog_addr; +reg [TREE_OFFSET_BITS-1:0] tree_offsets[3:0]; +wire [TREE_OFFSET_BITS :0] tree_mem_addr_a; + +reg [4:0] num_trees_per_pu_minus_one; +reg [3:0] tree_depth; +reg [8:0] num_lines_per_tuple; + +reg [7:0] tree_addra[5:0]; +reg [7:0] tree_addrb[5:0]; + +reg [2:0] rd_a_count; +reg [2:0] rd_b_count; + +reg first_word_correct; +reg [63:0] first_word; +reg pe_state; + +reg [63:0] tree_mem_out_a; +reg [31:0] data_mem_out_a; +reg tree_out_a_set; +reg data_out_a_set; +wire dp_tree_valid_out1; +wire dp_data_mem_feature_valid; +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// PU Programming Logic ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// + +always @(posedge clk) begin + if(~rst_n) begin + data_line_out_valid <= 0; + data_line_out_last <= 0; + data_line_out_ctrl <= 0; + data_line_out_prog <= 0; + + pe_state <= TREE_MODE; + end + else begin + data_line_out_valid <= data_line_in_valid; + data_line_out_last <= data_line_in_last; + data_line_out_ctrl <= data_line_in_ctrl; + data_line_out_prog <= data_line_in_prog; + + if(data_line_in_valid | (pe_state == DATA_MODE) ) begin + pe_state <= DATA_MODE; + end + end + // + data_line_out <= data_line_in; + data_line_out_pu <= data_line_in_pu; +end + +always @(posedge clk) begin + if(~rst_n) begin + num_trees_per_pu_minus_one <= 5'b0; + tree_depth <= 4'b0; + num_lines_per_tuple <= 9'b0; + end + else if(data_line_in_ctrl) begin + num_trees_per_pu_minus_one <= data_line_in[4:0]; + tree_depth <= data_line_in[8+4-1:8]; + num_lines_per_tuple <= data_line_in[16+9-1:16]; + end +end + +//assign pu_debug_counters = {local_num_trees, received_tree_lines[8:0], tree_offsets[3][8:0], tree_offsets[2][8:0], tree_offsets[1][8:0], tree_offsets[0][8:0]}; +assign pu_debug_counters = {tree_addrb[5], tree_addrb[4], tree_addrb[3], tree_addrb[2], tree_addrb[1], tree_addrb[0], tree_addra[5], tree_addra[4], tree_addra[3], tree_addra[2], tree_addra[1], tree_addra[0]}; +assign pu_debug_counters2 = {tree_mem_out_a[31:0], first_word}; +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Memory Banks ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// + +genvar j; +generate for ( j = 0; j < 6; j=j+1) begin + always@(posedge clk) begin + if(~rst_n) begin + tree_addra[j] <= 0; + tree_addrb[j] <= 0; + end + else begin + if(dp_tree_mem_ren_a && (rd_a_count == j)) begin + tree_addra[j] <= tree_mem_addr_a[7:0]; + end + + if(dp_tree_mem_ren_b && (rd_b_count == j)) begin + tree_addrb[j] <= dp_tree_mem_rd_addr_b[7:0]; + end + end + end +end +endgenerate + +integer i; +always@(posedge clk) begin + if(~rst_n) begin + rd_a_count <= 0; + rd_b_count <= 0; + + tree_mem_out_a <= 0; + data_mem_out_a <= 0; + tree_out_a_set <= 1'b0; + data_out_a_set <= 1'b0; + first_word_correct <= 0; + first_word <= 0; + end + else begin + if(dp_tree_mem_ren_a && rd_a_count < 6) begin + rd_a_count <= rd_a_count + 1'b1; + end + + if(dp_tree_mem_ren_b && rd_b_count < 6) begin + rd_b_count <= rd_b_count + 1'b1; + end + // + if(dp_tree_valid_out1 && ~tree_out_a_set) begin + tree_mem_out_a <= dp_tree_node_basic_vec; + tree_out_a_set <= 1'b1; + end + + if(dp_data_mem_feature_valid && ~data_out_a_set) begin + data_mem_out_a <= dp_data_mem_feature; + data_out_a_set <= 1'b1; + end + + // + if((tree_prog_addr == 0) && (data_line_in_prog && (data_line_in_pu == PE_ID) )) begin + first_word_correct <= data_line_in[15:0] == 16'h200c; + first_word <= data_line_in; + end + end +end + + +//----------------------------- Tree Nodes Weight memory ------------------------------// +Tree_Memory #( .DATA_WIDTH(64), + .ADDR_WIDTH(11) ) +TreeNodes( + + .clk ( clk ), + .rst_n ( rst_n ), + .we ( data_line_in_prog && (data_line_in_pu == PE_ID) ), + .rea ( dp_tree_mem_ren_a ), + .reb ( dp_tree_mem_ren_b ), + .addr_port_a ( tree_mem_addr_a ), + .addr_port_b ( dp_tree_mem_rd_addr_b ), + .din ( data_line_in ), + .dout1 ( dp_tree_node_basic_vec ), + .valid_out1 ( dp_tree_valid_out1), + .dout2 ( dp_node_large_bitset ), + .valid_out2 () +); +assign dp_tree_node_basic.word_1_h = dp_tree_node_basic_vec[63:48]; +assign dp_tree_node_basic.word_1_l = dp_tree_node_basic_vec[47:32]; +assign dp_tree_node_basic.right_child_offset = dp_tree_node_basic_vec[31:16]; +assign dp_tree_node_basic.node_type.op_type = dp_tree_node_basic_vec[1:0]; +assign dp_tree_node_basic.node_type.left_child = dp_tree_node_basic_vec[2]; +assign dp_tree_node_basic.node_type.right_child = dp_tree_node_basic_vec[3]; +assign dp_tree_node_basic.node_type.findex = dp_tree_node_basic_vec[11:4]; +assign dp_tree_node_basic.node_type.split_dir = dp_tree_node_basic_vec[15:12]; + + +assign tree_mem_addr_a = ( pe_state == TREE_MODE )? {tree_prog_addr, 1'b0} : dp_tree_mem_rd_addr_a; + +always @(posedge clk) begin + // + if(~rst_n) begin + tree_prog_addr <= 0; + local_num_trees <= 0; + received_tree_lines <= 0; + + local_num_trees_minus_one <= 0; + + for (i = 0; i < 4; i++) begin + tree_offsets[ i ] <= 0; + end + end + else begin + local_num_trees_minus_one <= local_num_trees; + // + if(data_line_in_prog && (data_line_in_pu == PE_ID)) begin + tree_prog_addr <= tree_prog_addr + 1'b1; + received_tree_lines <= received_tree_lines + 1'b1; + + if(data_line_in_last && (local_num_trees[1:0] < num_trees_per_pu_minus_one[1:0])) begin + tree_offsets[ local_num_trees[1:0] + 1'b1 ] <= received_tree_lines + 1'b1; + local_num_trees <= local_num_trees + 1'b1; + end + end + end +end +//--------------------------- Input tuple features memory -----------------------------// +/* We write to the features memory when flags indicate incoming data is tuples and not programming data +*/ +Data_Memory #( .DATA_WIDTH(64), + .ADDR_WIDTH(10) ) +SamplesFeatures_Mem( + .clk (clk), + .rst_n (rst_n), + .we (data_line_in_valid), + .re (dp_data_mem_ren), + .raddr (dp_data_mem_rd_addr), + .waddr (features_wr_addr), + .din (data_line_in), + .dout (dp_data_mem_feature), + .valid_out (dp_data_mem_feature_valid) +); + +always @(posedge clk) begin + if(~rst_n) begin + features_wr_addr <= 9'b0; + features_mem_count <= 0; + end + else begin + features_wr_addr <= features_wr_addr + ((data_line_in_valid)? 9'd1 : 9'd0); + features_mem_count <= features_mem_count + ((data_line_in_valid)? 9'd1 : 9'd0) - ((curr_feature_done)? num_lines_per_tuple : 9'b0); + end +end + +assign curr_feature_done = dp_tree_eval_result_valid & dp_tree_eval_last; + +assign data_line_in_ready = tuple_instr_fifo_ready & (features_mem_count < (512- PE_ID - 1)); +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Tuple Instruction FIFO ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// +/* Once all features of a tuple are in features memory, we enqueue an instruction to execute all the + trees in the PU on the current tuple features, the instruction simply include the tuple offset. +*/ +RegBasedFIFO #(.FIFO_WIDTH(10), + .FIFO_DEPTH_BITS(2) + ) tuple_instr_fifo ( + .clk (clk), + .rst_n (rst_n), + .data_in ( time_stamp ), + .data_in_valid ( data_line_in_valid && data_line_in_last ), + .data_out_ready (tuple_instr_re), + .data_out (tuple_instr_fifo_dout), + .data_out_valid (tuple_instr_fifo_valid), + .data_in_ready (tuple_instr_fifo_ready) + ); + +assign tuple_instr_re = dp_tree_instruction_ready && (tuple_old_enough || tuple_old_enough_set) && (curr_tree_id == num_trees_per_pu_minus_one); + +assign tuple_old_enough = tuple_instr_fifo_valid && ((time_stamp <= tuple_instr_fifo_dout) || ((time_stamp - tuple_instr_fifo_dout) > INSTRUCTION_DELAY)); + +assign dp_tree_instruction_valid = tuple_instr_fifo_valid && (tuple_old_enough || tuple_old_enough_set); + +assign dp_tree_instruction = '{tree_offset: tree_offsets[curr_tree_id], + tuple_offset: curr_tuple_offset, + last_tree: (curr_tree_id == num_trees_per_pu_minus_one), + empty_tree: (curr_tree_id > local_num_trees)}; + +// Flags, counters used in issuing tree instructions to the datapath and synchronizing logic with other PEs. +always @(posedge clk) begin + if(~rst_n) begin + time_stamp <= 0; + tuple_old_enough_set <= 0; + curr_tuple_offset <= 0; + curr_tree_id <= 0; + end + else begin + // tuple_old_enough_set + tuple_old_enough_set <= (tuple_instr_re && tuple_instr_fifo_valid)? 1'b0 : (tuple_old_enough_set || tuple_old_enough); + + // time_stamp + time_stamp <= time_stamp + 1'b1; + + // curr_tuple_offset + if(tuple_instr_fifo_valid && tuple_instr_re) begin + curr_tuple_offset <= curr_tuple_offset + num_lines_per_tuple; + end + + // curr_tree_id + if(tuple_instr_fifo_valid && dp_tree_instruction_ready && (tuple_old_enough || tuple_old_enough_set)) begin + if(curr_tree_id == num_trees_per_pu_minus_one) begin + curr_tree_id <= 0; + end + else begin + curr_tree_id <= curr_tree_id + 1'b1; + end + end + end +end +//////////////////////////////////////////////////////////////////////////////////////////////////// +// PE Datapath +pe_datapath pe_datapath( + .clk (clk), + .rst_n (rst_n), + .tree_depth (tree_depth), + // tree instruction + .tree_instruction (dp_tree_instruction), + .tree_instruction_valid (dp_tree_instruction_valid), + .tree_instruction_ready (dp_tree_instruction_ready), + // data memory read port + .data_mem_rd_addr (dp_data_mem_rd_addr), + .data_mem_ren (dp_data_mem_ren), + .data_mem_feature (dp_data_mem_feature), + // tree memory read port A + .tree_mem_rd_addr_a (dp_tree_mem_rd_addr_a), + .tree_mem_ren_a (dp_tree_mem_ren_a), + .tree_node_basic (dp_tree_node_basic), + // tree memory read port B + .tree_mem_rd_addr_b (dp_tree_mem_rd_addr_b), + .tree_mem_ren_b (dp_tree_mem_ren_b), + .node_large_bitset (dp_node_large_bitset), + // result output + .tree_eval_result (dp_tree_eval_result), + .tree_eval_last (dp_tree_eval_last), + .tree_eval_result_valid (dp_tree_eval_result_valid) +); + +always @(posedge clk) begin + if(~rst_n) begin + pu_tree_leaf_out <= 0; + pu_tree_leaf_out_last <= 0; + pu_tree_leaf_out_valid <= 0; + end + else begin + pu_tree_leaf_out <= dp_tree_eval_result; + pu_tree_leaf_out_last <= dp_tree_eval_last; + pu_tree_leaf_out_valid <= dp_tree_eval_result_valid; + end +end + + + +endmodule // DTPU + + diff --git a/hw/hdl/operators/dtengine/gbm/inferCore/processing_element_async.sv b/hw/hdl/operators/dtengine/gbm/inferCore/processing_element_async.sv new file mode 100644 index 00000000..87929fcd --- /dev/null +++ b/hw/hdl/operators/dtengine/gbm/inferCore/processing_element_async.sv @@ -0,0 +1,453 @@ + +/* + * Copyright 2019 - 2020 Systems Group, ETH Zurich + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +import DTPackage::*; + +/* + +*/ +module processing_element_async #(parameter PE_ID = 0 ) + ( + input wire clk, + input wire rst_n, + + input wire [DATA_LINE_WIDTH-1:0] data_line_in, + input wire data_line_in_valid, + input wire data_line_in_last, + input wire data_line_in_ctrl, + input wire data_line_in_prog, + input wire [NUM_PUS_PER_CLUSTER_BITS-1:0] data_line_in_pu, + output reg data_line_in_ready, + + output reg [DATA_LINE_WIDTH-1:0] data_line_out, + output reg data_line_out_valid, + output reg data_line_out_ctrl, + output reg data_line_out_last, + output reg data_line_out_prog, + output reg [NUM_PUS_PER_CLUSTER_BITS-1:0] data_line_out_pu, + + output reg [DATA_PRECISION-1:0] pu_tree_leaf_out, + output reg pu_tree_leaf_out_valid, + output reg pu_tree_leaf_out_last, + + // debug counters + output wire [95:0] pu_debug_counters, + output wire [95:0] pu_debug_counters2 + + ); + +localparam INSTRUCTION_DELAY = NUM_PUS_PER_CLUSTER - PE_ID - 1; // PU pipeline depth-1 - PU_ID +localparam TREE_MODE = 1'b0; +localparam DATA_MODE = 1'b1; +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Signals Declarations ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +TreeInstruction_t dp_tree_instruction; +wire dp_tree_instruction_valid; +wire dp_tree_instruction_ready; + +wire dp_data_mem_ren; +wire [31:0] dp_data_mem_feature; +wire [TUPLE_OFFSET_BITS:0] dp_data_mem_rd_addr; + +// tree memory read port A +wire [TREE_OFFSET_BITS:0] dp_tree_mem_rd_addr_a; +wire dp_tree_mem_ren_a; +TreeNode_t dp_tree_node_basic; +wire [63:0] dp_tree_node_basic_vec; + +// tree memory read port B +wire [TREE_OFFSET_BITS:0] dp_tree_mem_rd_addr_b; +wire dp_tree_mem_ren_b; +wire [31:0] dp_node_large_bitset; + +// result output +wire [31:0] dp_tree_eval_result; +wire dp_tree_eval_last; +wire dp_tree_eval_result_valid; + + +reg [2:0] curr_tree_id; +reg [8:0] curr_tuple_offset; +reg [9:0] time_stamp; +reg tuple_old_enough_set; + +wire tuple_instr_fifo_ready; +wire tuple_instr_fifo_valid; +wire [9:0] tuple_instr_fifo_dout; +wire tuple_instr_re; +wire tuple_old_enough; + + +wire curr_feature_done; +reg [9:0] features_mem_count; +reg [8:0] features_wr_addr; + +reg [2:0] local_num_trees; +reg [2:0] local_num_trees_minus_one; +reg [TREE_OFFSET_BITS-1:0] received_tree_lines; +reg [TREE_OFFSET_BITS-1:0] tree_prog_addr; +reg [TREE_OFFSET_BITS-1:0] tree_offsets[7:0]; +wire [TREE_OFFSET_BITS :0] tree_mem_addr_a; + +reg [4:0] num_trees_per_pu_minus_one; +reg [3:0] tree_depth; +reg [8:0] num_lines_per_tuple; + +reg [7:0] tree_addra[5:0]; +reg [7:0] tree_addrb[5:0]; + +reg [2:0] rd_a_count; +reg [2:0] rd_b_count; + +reg first_word_correct; +reg [63:0] first_word; +reg pe_state; + +reg [63:0] tree_mem_out_a; +reg [31:0] data_mem_out_a; +reg tree_out_a_set; +reg data_out_a_set; +wire dp_tree_valid_out1; +wire dp_data_mem_feature_valid; + +reg [31:0] num_tuples_received; +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// PU Programming Logic ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// + +always @(posedge clk) begin + if(~rst_n) begin + data_line_out_valid <= 0; + data_line_out_last <= 0; + data_line_out_ctrl <= 0; + data_line_out_prog <= 0; + + pe_state <= TREE_MODE; + + num_tuples_received <= 0; + end + else begin + data_line_out_valid <= data_line_in_valid; + data_line_out_last <= data_line_in_last; + data_line_out_ctrl <= data_line_in_ctrl; + data_line_out_prog <= data_line_in_prog; + + if(data_line_in_valid | (pe_state == DATA_MODE) ) begin + pe_state <= DATA_MODE; + end + + if(data_line_in_valid && data_line_in_last) begin + num_tuples_received <= num_tuples_received + 1'b1; + end + end + // + data_line_out <= data_line_in; + data_line_out_pu <= data_line_in_pu; +end + +always @(posedge clk) begin + if(~rst_n) begin + num_trees_per_pu_minus_one <= 5'b0; + tree_depth <= 4'b0; + num_lines_per_tuple <= 9'b0; + end + else if(data_line_in_ctrl) begin + num_trees_per_pu_minus_one <= data_line_in[4:0]; + tree_depth <= data_line_in[8+4-1:8]; + num_lines_per_tuple <= data_line_in[16+9-1:16]; + end +end + +//assign pu_debug_counters = {local_num_trees, received_tree_lines[8:0], tree_offsets[3][8:0], tree_offsets[2][8:0], tree_offsets[1][8:0], tree_offsets[0][8:0]}; +assign pu_debug_counters = {tree_addrb[5], tree_addrb[4], tree_addrb[3], tree_addrb[2], tree_addrb[1], tree_addrb[0], tree_addra[5], tree_addra[4], tree_addra[3], tree_addra[2], tree_addra[1], tree_addra[0]}; +assign pu_debug_counters2 = {tree_mem_out_a[31:0], first_word}; +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Memory Banks ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// + +genvar j; +generate for ( j = 0; j < 6; j=j+1) begin + always@(posedge clk) begin + if(~rst_n) begin + tree_addra[j] <= 0; + tree_addrb[j] <= 0; + end + else begin + if(dp_tree_mem_ren_a && (rd_a_count == j)) begin + tree_addra[j] <= tree_mem_addr_a[7:0]; + end + + if(dp_tree_mem_ren_b && (rd_b_count == j)) begin + tree_addrb[j] <= dp_tree_mem_rd_addr_b[7:0]; + end + end + end +end +endgenerate + +integer i; +always@(posedge clk) begin + if(~rst_n) begin + rd_a_count <= 0; + rd_b_count <= 0; + + tree_mem_out_a <= 0; + data_mem_out_a <= 0; + tree_out_a_set <= 1'b0; + data_out_a_set <= 1'b0; + first_word_correct <= 0; + first_word <= 0; + end + else begin + if(dp_tree_mem_ren_a && rd_a_count < 6) begin + rd_a_count <= rd_a_count + 1'b1; + end + + if(dp_tree_mem_ren_b && rd_b_count < 6) begin + rd_b_count <= rd_b_count + 1'b1; + end + // + if(dp_tree_valid_out1 && ~tree_out_a_set) begin + tree_mem_out_a <= dp_tree_node_basic_vec; + tree_out_a_set <= 1'b1; + end + + if(dp_data_mem_feature_valid && ~data_out_a_set) begin + data_mem_out_a <= dp_data_mem_feature; + data_out_a_set <= 1'b1; + end + + // + if((tree_prog_addr == 0) && (data_line_in_prog && (data_line_in_pu == PE_ID) )) begin + first_word_correct <= data_line_in[15:0] == 4'h200c; + first_word <= data_line_in; + end + end +end + + +//----------------------------- Tree Nodes Weight memory ------------------------------// +Tree_Memory #( .DATA_WIDTH(64), + .ADDR_WIDTH(11) ) +TreeNodes( + + .clk ( clk ), + .rst_n ( rst_n ), + .we ( data_line_in_prog && (data_line_in_pu == PE_ID) ), + .rea ( dp_tree_mem_ren_a ), + .reb ( dp_tree_mem_ren_b ), + .addr_port_a ( tree_mem_addr_a ), + .addr_port_b ( dp_tree_mem_rd_addr_b ), + .din ( data_line_in ), + .dout1 ( dp_tree_node_basic_vec ), + .valid_out1 ( dp_tree_valid_out1), + .dout2 ( dp_node_large_bitset ), + .valid_out2 () +); +assign dp_tree_node_basic.word_1_h = dp_tree_node_basic_vec[63:48]; +assign dp_tree_node_basic.word_1_l = dp_tree_node_basic_vec[47:32]; +assign dp_tree_node_basic.right_child_offset = dp_tree_node_basic_vec[31:16]; +assign dp_tree_node_basic.node_type.op_type = dp_tree_node_basic_vec[1:0]; +assign dp_tree_node_basic.node_type.left_child = dp_tree_node_basic_vec[2]; +assign dp_tree_node_basic.node_type.right_child = dp_tree_node_basic_vec[3]; +assign dp_tree_node_basic.node_type.findex = dp_tree_node_basic_vec[11:4]; +assign dp_tree_node_basic.node_type.split_dir = dp_tree_node_basic_vec[15:12]; + + +assign tree_mem_addr_a = ( pe_state == TREE_MODE )? {tree_prog_addr, 1'b0} : dp_tree_mem_rd_addr_a; + +always @(posedge clk) begin + // + if(~rst_n) begin + tree_prog_addr <= 0; + local_num_trees <= 0; + received_tree_lines <= 0; + + local_num_trees_minus_one <= 0; + + for (i = 0; i < 8; i++) begin + tree_offsets[ i ] <= 0; + end + end + else begin + local_num_trees_minus_one <= local_num_trees; + // + if(data_line_in_prog && (data_line_in_pu == PE_ID)) begin + tree_prog_addr <= tree_prog_addr + 1'b1; + received_tree_lines <= received_tree_lines + 1'b1; + + if(data_line_in_last && (local_num_trees[2:0] < num_trees_per_pu_minus_one[2:0])) begin + tree_offsets[ local_num_trees[2:0] + 1'b1 ] <= received_tree_lines + 1'b1; + local_num_trees <= local_num_trees + 1'b1; + end + end + end +end +//--------------------------- Input tuple features memory -----------------------------// +/* We write to the features memory when flags indicate incoming data is tuples and not programming data +*/ +Data_Memory #( .DATA_WIDTH(64), + .ADDR_WIDTH(10) ) +SamplesFeatures_Mem( + .clk (clk), + .rst_n (rst_n), + .we (data_line_in_valid), + .re (dp_data_mem_ren), + .raddr (dp_data_mem_rd_addr), + .waddr (features_wr_addr), + .din (data_line_in), + .dout (dp_data_mem_feature), + .valid_out (dp_data_mem_feature_valid) +); + +always @(posedge clk) begin + if(~rst_n) begin + features_wr_addr <= 9'b0; + features_mem_count <= 0; + end + else begin + features_wr_addr <= features_wr_addr + ((data_line_in_valid)? 9'd1 : 9'd0); + features_mem_count <= features_mem_count + ((data_line_in_valid)? 9'd1 : 9'd0) - ((curr_feature_done)? num_lines_per_tuple : 9'b0); + end +end + +assign curr_feature_done = dp_tree_eval_result_valid & dp_tree_eval_last; + + +always @(posedge clk) begin + if(~rst_n) begin + data_line_in_ready <= 0; + end + else begin + data_line_in_ready <= tuple_instr_fifo_ready & (features_mem_count < (512- PE_ID - 4)); + end +end +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Tuple Instruction FIFO ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// +/* Once all features of a tuple are in features memory, we enqueue an instruction to execute all the + trees in the PU on the current tuple features, the instruction simply include the tuple offset. +*/ +RegBasedFIFO #(.FIFO_WIDTH(1), + .FIFO_DEPTH_BITS(2) + ) tuple_instr_fifo ( + .clk (clk), + .rst_n (rst_n), + .data_in ( 1'b1 ), + .data_in_valid ( data_line_in_valid && data_line_in_last ), + .data_out_ready (tuple_instr_re), + .data_out (tuple_instr_fifo_dout), + .data_out_valid (tuple_instr_fifo_valid), + .data_in_ready (tuple_instr_fifo_ready) + ); + +assign tuple_instr_re = dp_tree_instruction_ready && (curr_tree_id == num_trees_per_pu_minus_one); + +assign dp_tree_instruction_valid = tuple_instr_fifo_valid; + +assign dp_tree_instruction = '{tree_offset: tree_offsets[curr_tree_id], + tuple_offset: curr_tuple_offset, + last_tree: (curr_tree_id == num_trees_per_pu_minus_one), + empty_tree: (curr_tree_id > local_num_trees)}; + +// Flags, counters used in issuing tree instructions to the datapath and synchronizing logic with other PEs. +always @(posedge clk) begin + if(~rst_n) begin + curr_tuple_offset <= 0; + curr_tree_id <= 0; + end + else begin + // curr_tuple_offset + if(tuple_instr_fifo_valid && tuple_instr_re) begin + curr_tuple_offset <= curr_tuple_offset + num_lines_per_tuple; + end + + // curr_tree_id + if(tuple_instr_fifo_valid && dp_tree_instruction_ready) begin + if(curr_tree_id == num_trees_per_pu_minus_one) begin + curr_tree_id <= 0; + end + else begin + curr_tree_id <= curr_tree_id + 1'b1; + end + end + end +end +//////////////////////////////////////////////////////////////////////////////////////////////////// +// PE Datapath +pe_datapath pe_datapath( + .clk (clk), + .rst_n (rst_n), + .tree_depth (tree_depth), + // tree instruction + .tree_instruction (dp_tree_instruction), + .tree_instruction_valid (dp_tree_instruction_valid), + .tree_instruction_ready (dp_tree_instruction_ready), + // data memory read port + .data_mem_rd_addr (dp_data_mem_rd_addr), + .data_mem_ren (dp_data_mem_ren), + .data_mem_feature (dp_data_mem_feature), + // tree memory read port A + .tree_mem_rd_addr_a (dp_tree_mem_rd_addr_a), + .tree_mem_ren_a (dp_tree_mem_ren_a), + .tree_node_basic (dp_tree_node_basic), + // tree memory read port B + .tree_mem_rd_addr_b (dp_tree_mem_rd_addr_b), + .tree_mem_ren_b (dp_tree_mem_ren_b), + .node_large_bitset (dp_node_large_bitset), + // result output + .tree_eval_result (dp_tree_eval_result), + .tree_eval_last (dp_tree_eval_last), + .tree_eval_result_valid (dp_tree_eval_result_valid) +); + + +always @(posedge clk) begin + if(~rst_n) begin + pu_tree_leaf_out <= 0; + pu_tree_leaf_out_last <= 0; + pu_tree_leaf_out_valid <= 0; + end + else begin + pu_tree_leaf_out <= dp_tree_eval_result; + pu_tree_leaf_out_last <= dp_tree_eval_last; + pu_tree_leaf_out_valid <= dp_tree_eval_result_valid; + end +end + + +endmodule // DTPU + + diff --git a/hw/hdl/operators/dtengine/gbm/inferCore/quick_fifo.sv b/hw/hdl/operators/dtengine/gbm/inferCore/quick_fifo.sv new file mode 100644 index 00000000..79360b2e --- /dev/null +++ b/hw/hdl/operators/dtengine/gbm/inferCore/quick_fifo.sv @@ -0,0 +1,149 @@ + +/* + * Copyright 2019 - 2020 Systems Group, ETH Zurich + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +module quick_fifo #( + parameter FIFO_WIDTH = 32, + parameter FIFO_DEPTH_BITS = 8, + parameter FIFO_ALMOSTFULL_THRESHOLD = 2**FIFO_DEPTH_BITS - 4 +) ( + input wire clk, + input wire reset_n, + + input wire we, // input write enable + input wire [FIFO_WIDTH - 1:0] din, // input write data with configurable width + + input wire re, // input read enable + output reg valid, // dout valid + output reg [FIFO_WIDTH - 1:0] dout, // output read data with configurable width + + output reg [FIFO_DEPTH_BITS - 1:0] count, // output FIFOcount + output reg empty, // output FIFO empty + output reg full, // output FIFO full + output reg almostfull // output configurable programmable full/ almost full +); + + reg [FIFO_DEPTH_BITS - 1:0] rp = 0; + reg [FIFO_DEPTH_BITS - 1:0] wp = 0; + + reg [FIFO_DEPTH_BITS - 1:0] mem_count = 0; // output FIFOcount + reg mem_empty = 1'b1; + + reg valid_t1 = 0, valid_t2 = 0; + reg valid0 = 0; + + wire remem; + wire wemem; + wire remem_valid; + + wire [FIFO_WIDTH-1:0] dout_mem; + + assign remem = (re & valid_t1 & valid_t2) | ~(valid_t1 & valid_t2); + assign wemem = we & ~full; + + assign remem_valid = remem & ~mem_empty; + + + bram #(.DATA_WIDTH(FIFO_WIDTH), + .ADDR_WIDTH(FIFO_DEPTH_BITS)) fifo_mem( + .clk (clk), + .we (wemem), + .re (remem), + .raddr (rp), + .waddr (wp), + .din (din), + .dout (dout_mem) + ); + + // data + always @(posedge clk) begin + dout <= (valid_t2)? ((re)? dout_mem : dout) : dout_mem; + end + + // valids, flags + always @(posedge clk) begin + if (~reset_n) begin + empty <= 1'b1; + full <= 1'b0; + almostfull <= 1'b0; + count <= 0; //32'b0; + rp <= 0; + wp <= 0; + valid_t2 <= 1'b0; + valid_t1 <= 1'b0; + mem_empty <= 1'b1; + mem_count <= 'b0; + + //dout <= 0; + valid <= 0; + valid0 <= 0; + end + + else begin + + valid <= (valid)? ((re)? valid0 : 1'b1) : valid0; + valid0 <= (remem)? ~mem_empty : valid0; + + valid_t2 <= (valid_t2)? ((re)? valid_t1 : 1'b1) : valid_t1; + + valid_t1 <= (remem)? ~mem_empty : valid_t1; + rp <= (remem & ~mem_empty)? (rp + 1'b1) : rp; + wp <= (wemem)? (wp + 1'b1) : wp; + + // mem_empty + if (we) mem_empty <= 1'b0; + else if(remem & (mem_count == 1'b1)) mem_empty <= 1'b1; + + // mem_count + if( wemem & ~remem_valid) mem_count <= mem_count + 1'b1; + else if (~wemem & remem_valid) mem_count <= mem_count - 1'b1; + + + // empty + if (we) empty <= 1'b0; + else if((re & valid_t2 & ~valid_t1) & (count == 1'b1)) empty <= 1'b1; + + // count + if( wemem & (~(re & valid_t2) | ~re) ) count <= count + 1'b1; + else if (~wemem & (re & valid_t2)) count <= count - 1'b1; + + // + if (we & ~re) begin + + if (count == (2**FIFO_DEPTH_BITS-1)) + full <= 1'b1; + + if (count == (FIFO_ALMOSTFULL_THRESHOLD-1)) + almostfull <= 1'b1; + end + // + if ((~we | full) & re) begin // + full <= 1'b0; + + if (count == FIFO_ALMOSTFULL_THRESHOLD) + almostfull <= 1'b0; + end + end + end + +endmodule diff --git a/hw/hdl/operators/dtengine/gbm/inferCore/smart_shifter.sv b/hw/hdl/operators/dtengine/gbm/inferCore/smart_shifter.sv new file mode 100644 index 00000000..65331cae --- /dev/null +++ b/hw/hdl/operators/dtengine/gbm/inferCore/smart_shifter.sv @@ -0,0 +1,83 @@ + +/* + * Copyright 2019 - 2020 Systems Group, ETH Zurich + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +module smart_shifter #(parameter ELEMENTS_PER_CL = 16, + parameter ELEMENTS_PER_CL_BITS = 4) + ( + input wire clk, + input wire rst_n, + + input wire inValid, + input wire inLast, + input wire [3:0] inOffs, + input wire [1023:0] inData, + + output wire outValid, + output wire outLast, + output reg [511:0] outData + ); + + reg [1023:0] shData [14:0]; + reg [3:0] shOffs [14:0]; + + integer i; + + always @(posedge clk) begin + // Level 0 + if ( |inOffs ) begin + shData[0] <= {32'b0, inData[1023:32]}; + shOffs[0] <= inOffs - 4'd1; + end + else begin + shData[0] <= inData; + shOffs[0] <= inOffs; + end + // Rest of levels: Data, shOffs + for ( i = 0; i <14; i = i+1) begin + if ( |shOffs[i] ) begin + shData[i+1] <= {32'b0, shData[i][1023:32]}; + shOffs[i+1] <= shOffs[i] - 4'd1; + end + else begin + shData[i+1] <= shData[i]; + shOffs[i+1] <= shOffs[i]; + end + end + + outData <= shData[14][511:0]; + end + // + + delay #(.DATA_WIDTH(1), + .DELAY_CYCLES(16) + ) validDelay( + .clk (clk), + .rst_n (rst_n), + .data_in ( inLast ), // + .data_in_valid ( inValid ), + .data_out ( outLast ), + .data_out_valid ( outValid ) + ); + +endmodule diff --git a/hw/hdl/operators/dtengine/xgboost/Core.sv b/hw/hdl/operators/dtengine/xgboost/Core.sv new file mode 100644 index 00000000..f40b8616 --- /dev/null +++ b/hw/hdl/operators/dtengine/xgboost/Core.sv @@ -0,0 +1,528 @@ + +/* + + The Core module is where computations happen + + core_data_in carry a stream of Trees/Data for processing in the core + + tuple_out_data carries the result of inference on one tuple, this can be + a partial result if not the complete model is stored in the core or the + full result if the complete model fits in the core. + +*/ + + +import DTEngine_Types::*; + + +module Core ( + input wire clk, + input wire rst_n, + input wire start_core, + + input CoreDataIn core_data_in, + input wire core_data_in_valid, + output wire core_data_in_ready, + + // parameters + input wire [NUM_DTPU_CLUSTERS-1:0] prog_schedule, + input wire [NUM_DTPU_CLUSTERS-1:0] proc_schedule, + + input wire [31:0] missing_value, + input wire [15:0] tree_feature_index_numcls, + input wire [15:0] tree_weights_numcls, + input wire [15:0] tuple_numcls, + input wire [3 :0] num_levels_per_tree_minus_one, + input wire [7 :0] num_trees_per_pu_minus_one, + input wire [3 :0] num_clusters_per_tuple, + input wire [3 :0] num_clusters_per_tuple_minus_one, + + // output + output wire [DATA_PRECISION-1:0] tuple_out_data, + output wire tuple_out_data_valid, + input wire tuple_out_data_ready, + // + output reg [31:0] data_lines, + output reg [31:0] prog_lines, + output reg [31:0] num_out_tuples, + + output reg [31:0] aggreg_tuples_in, + output reg [31:0] aggreg_part_res_in, + + output reg [1:0] core_state, + output reg started, + output reg [31:0] tuples_passed, + + output reg [31:0] cluster_out_valids, + + output wire [31:0] cluster_tuples_received[NUM_DTPU_CLUSTERS-1:0], + output wire [31:0] cluster_lines_received[NUM_DTPU_CLUSTERS-1:0], + output wire [31:0] cluster_tuples_res_out[NUM_DTPU_CLUSTERS-1:0], + output wire [31:0] cluster_tree_res_out[NUM_DTPU_CLUSTERS-1:0], + output wire [31:0] cluster_reduce_tree_outs[NUM_DTPU_CLUSTERS-1:0], + output wire [31:0] cluster_reduce_tree_outs_valids[NUM_DTPU_CLUSTERS-1:0] + +); + + + +localparam DATA_LINE_DISTR_LEVELS = (NUM_DTPU_CLUSTERS == 8)? 3 : + (NUM_DTPU_CLUSTERS == 4)? 2 : 1; + + +localparam [1:0] IDLE = 2'b00, + PROG_MODE = 2'b01, + PROCESS_MODE = 2'b10, + ENGINE_DONE = 2'b11; + + +reg [1:0] core_fsm_state; +reg init_w; +reg init_idx; +reg init_p; + +wire InDataFIFO_re; +wire InDataFIFO_empty; +wire InDataFIFO_valid_out; +wire InDataFIFO_full; +CoreDataIn InDataFIFO_dout; + +reg shift_enable; +reg [NUM_DTPU_CLUSTERS_BITS-1:0] shift_count; +reg [NUM_DTPU_CLUSTERS-1:0] schedule_to_shift; +wire [NUM_DTPU_CLUSTERS-1:0] shifted_schedule; + +reg data_line_distr_valid[DATA_LINE_DISTR_LEVELS:0][(2**DATA_LINE_DISTR_LEVELS)-1:0]; +reg data_line_distr_last[DATA_LINE_DISTR_LEVELS:0][(2**DATA_LINE_DISTR_LEVELS)-1:0]; +reg data_line_distr_ctrl[DATA_LINE_DISTR_LEVELS:0][(2**DATA_LINE_DISTR_LEVELS)-1:0]; +reg [1:0] data_line_distr_mode[DATA_LINE_DISTR_LEVELS:0][(2**DATA_LINE_DISTR_LEVELS)-1:0]; +reg [NUM_DTPU_CLUSTERS-1:0] data_line_distr_en[DATA_LINE_DISTR_LEVELS:0][(2**DATA_LINE_DISTR_LEVELS)-1:0]; +reg [2:0] curr_pu; +reg [2:0] data_line_distr_pu[DATA_LINE_DISTR_LEVELS:0][(2**DATA_LINE_DISTR_LEVELS)-1:0]; +reg [2:0] num_trees_sent_to_cluster; +reg [DATA_BUS_WIDTH-1:0] data_line_distr[DATA_LINE_DISTR_LEVELS:0][(2**DATA_LINE_DISTR_LEVELS)-1:0]; + +wire data_line_valid_array[NUM_DTPU_CLUSTERS-1:0]; +wire data_line_last_array[NUM_DTPU_CLUSTERS-1:0]; +wire data_line_ctrl_array[NUM_DTPU_CLUSTERS-1:0]; +wire [1:0] data_line_mode_array[NUM_DTPU_CLUSTERS-1:0]; +wire [2:0] data_line_pu_array[NUM_DTPU_CLUSTERS-1:0]; +wire [NUM_DTPU_CLUSTERS-1:0] data_line_en_array[NUM_DTPU_CLUSTERS-1:0]; +wire [DATA_BUS_WIDTH-1:0] data_line_array[NUM_DTPU_CLUSTERS-1:0]; +wire data_line_ready_array[NUM_DTPU_CLUSTERS-1:0]; + +wire [DATA_PRECISION-1:0] partial_aggregation_out[NUM_DTPU_CLUSTERS-1:0]; +wire partial_aggregation_out_valid[NUM_DTPU_CLUSTERS-1:0]; +wire partial_aggregation_out_ready[NUM_DTPU_CLUSTERS-1:0]; + +wire [NUM_DTPU_CLUSTERS-1:0] clusters_ready; +wire target_clusters_ready; + +wire [NUM_DTPU_CLUSTERS_BITS-1:0] curr_cluster; +wire curr_cluster_valid; + +reg [31:0] partial_leaf_aggreg_value; +reg partial_leaf_aggreg_value_valid; +reg partial_leaf_aggreg_value_last; +reg [NUM_DTPU_CLUSTERS_BITS-1:0] tuple_cluster_offset; +reg [NUM_DTPU_CLUSTERS_BITS-1:0] tuple_cluster_base; +wire aggregator_ready; + +reg start_core_d1; +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Core State Machine ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// +always@(posedge clk) begin + if(~rst_n) begin + start_core_d1 <= 0; + end + else begin + start_core_d1 <= start_core; + end +end +always@(posedge clk) begin + if(~rst_n | start_core) begin + core_fsm_state <= IDLE; + init_w <= 0; + init_idx <= 0; + init_p <= 0; + + data_lines <= 0; + prog_lines <= 0; + + aggreg_tuples_in <= 0; + aggreg_part_res_in <= 0; + + cluster_out_valids <= 0; + + num_out_tuples <= 0; + + core_state <= IDLE; + + started <= 0; + end + else begin + + core_state <= core_fsm_state; + // + if(InDataFIFO_valid_out & InDataFIFO_dout.data_valid & InDataFIFO_re) begin + data_lines <= data_lines + 1'b1; + end + + if(InDataFIFO_valid_out & ~InDataFIFO_dout.data_valid & InDataFIFO_re) begin + prog_lines <= prog_lines + 1'b1; + end + + if(tuple_out_data_valid & tuple_out_data_ready) begin + num_out_tuples <= num_out_tuples + 1'b1; + end + + if(partial_leaf_aggreg_value_valid & aggregator_ready & partial_leaf_aggreg_value_last) begin + aggreg_tuples_in <= aggreg_tuples_in + 1'b1; + end + + if(partial_leaf_aggreg_value_valid & aggregator_ready) begin + aggreg_part_res_in <= aggreg_part_res_in + 1'b1; + end + + if(partial_aggregation_out_valid[0]) begin + cluster_out_valids <= cluster_out_valids + 1'b1; + end + + case (core_fsm_state) + IDLE: begin + + started <= 0; + + if( start_core ) begin + core_fsm_state <= PROG_MODE; + started <= 1'b1; + end + + init_w <= 0; + init_idx <= 0; + init_p <= 0; + end + PROG_MODE: begin + // Programming mode is done when all trees are written to their destination PU + if(InDataFIFO_valid_out & InDataFIFO_dout.data_valid) begin + core_fsm_state <= PROCESS_MODE; + init_p <= 1'b1; + end + else if(InDataFIFO_valid_out) begin + init_w <= init_w | InDataFIFO_dout.prog_mode; + init_idx <= init_idx | ~InDataFIFO_dout.prog_mode; + end + end + PROCESS_MODE: begin + + end + endcase + end +end + + + +quick_fifo #(.FIFO_WIDTH($bits(CoreDataIn)), // data + data valid flag + last flag + prog flags + .FIFO_DEPTH_BITS(9), + .FIFO_ALMOSTFULL_THRESHOLD(508) + ) InDataFIFO ( + .clk (clk), + .reset_n (rst_n), + .din (core_data_in), + .we (core_data_in_valid), + + .re (InDataFIFO_re), + .dout (InDataFIFO_dout), + .empty (InDataFIFO_empty), + .valid (InDataFIFO_valid_out), + .full (InDataFIFO_full), + .count (), + .almostfull () + ); + +assign core_data_in_ready = ~InDataFIFO_full; + +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Distributing Received Data ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// +/* + We read data from the input FIFO and redistribute it to the clusters. The mapping of + trees/tuples to clusters is dictated by "prog_schedule, and "proc_schedule". +*/ + +RLS #(.DATA_WIDTH(NUM_DTPU_CLUSTERS), + .DATA_WIDTH_BITS(NUM_DTPU_CLUSTERS_BITS) + ) schedule_shifter( + .clk (clk), + .rst_n (rst_n & ~start_core), + .shift_enable (shift_enable), + .data_in (schedule_to_shift), + .shift_count (shift_count), + .data_out (shifted_schedule) + ); + +always @(*) begin + if(InDataFIFO_valid_out) begin + if(~InDataFIFO_dout.data_valid) begin + shift_enable = InDataFIFO_dout.last & (num_trees_sent_to_cluster == NUM_PUS_PER_CLUSTER-1); + + shift_count = {{(NUM_DTPU_CLUSTERS_BITS-1){1'b0}}, 1'b1}; + + if( (init_w & InDataFIFO_dout.prog_mode) | (init_idx & ~InDataFIFO_dout.prog_mode) ) begin + schedule_to_shift = shifted_schedule; + end + else begin + schedule_to_shift = prog_schedule; + end + end + else begin + shift_enable = InDataFIFO_dout.last & target_clusters_ready; + + shift_count = num_clusters_per_tuple[NUM_DTPU_CLUSTERS_BITS-1:0]; + + if(init_p) begin + schedule_to_shift = shifted_schedule; + end + else begin + schedule_to_shift = proc_schedule; + end + end + end + else begin + shift_enable = 0; + shift_count = 0; + schedule_to_shift = shifted_schedule; + end +end + +// Read & Split lines + +always @(posedge clk) begin + if (~rst_n) begin + data_line_distr_valid[0][0] <= 0; + data_line_distr_last[0][0] <= 0; + data_line_distr_ctrl[0][0] <= 0; + data_line_distr_mode[0][0] <= 0; + data_line_distr_en[0][0] <= 0; + + data_line_part <= 0; + curr_pu <= 0; + tuples_passed <= 0; + num_trees_sent_to_cluster <= 0; + end + else begin + data_line_distr_ctrl[0][0] <= start_core_d1 & (core_fsm_state == IDLE); + + if (InDataFIFO_valid_out & (target_clusters_ready | ~InDataFIFO_dout.data_valid)) begin + + if(InDataFIFO_dout.last & InDataFIFO_dout.data_valid & data_line_part) begin + tuples_passed <= tuples_passed + 1'b1; + end + + data_line_part <= ~data_line_part; + + data_line_distr_valid[0][0] <= InDataFIFO_dout.data_valid; + data_line_distr_last[0][0] <= InDataFIFO_dout.last && data_line_part; + data_line_distr_mode[0][0] <= {InDataFIFO_dout.prog_mode, InDataFIFO_dout.data_valid}; + + data_line_distr_pu[0][0] <= curr_pu; + data_line_distr_en[0][0] <= schedule_to_shift; + + if(InDataFIFO_dout.last & data_line_part) begin + curr_pu <= curr_pu + 1'b1; + end + + // if this is programming data then we count how many trees we send to a cluster + if(~InDataFIFO_dout.data_valid & InDataFIFO_dout.last & data_line_part) begin + if(num_trees_sent_to_cluster == NUM_PUS_PER_CLUSTER-1) begin + num_trees_sent_to_cluster <= 0; + end + else begin + num_trees_sent_to_cluster <= num_trees_sent_to_cluster + 1'b1; + end + end + end + else begin + data_line_distr_valid[0][0] <= 0; + data_line_distr_last[0][0] <= 0; + data_line_distr_mode[0][0] <= 0; + end + end +end + +// select which part of the cache line to distribute +always @(posedge clk) begin + if(core_fsm_state == IDLE) begin + data_line_distr[0][0] <= {152'b0, tuple_numcls, missing_value, tree_feature_index_numcls, tree_weights_numcls, {4'b0, num_levels_per_tree_minus_one}, {8'b0}, num_trees_per_pu_minus_one}; + end + else if( ~data_line_part ) begin + data_line_distr[0][0] <= InDataFIFO_dout.data[255:0]; + end + else begin + data_line_distr[0][0] <= InDataFIFO_dout.data[511:256]; + end +end + +assign InDataFIFO_re = data_line_part & ((InDataFIFO_valid_out & ~InDataFIFO_dout.data_valid) | target_clusters_ready); + +assign target_clusters_ready = |(clusters_ready & schedule_to_shift); + +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Data Line Distribution Tree ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// + +genvar i, j; +generate +for ( i = 0; i < DATA_LINE_DISTR_LEVELS; i=i+1) begin: DL1 + for( j = 0; j < (1<<(i+1)); j = j+1) begin:DL2 + always @(posedge clk) begin + data_line_distr[i+1][j] <= data_line_distr[i][j>>1]; + end + + always @(posedge clk) begin + if(~rst_n) begin + data_line_distr_valid[i+1][j] <= 0; + data_line_distr_last[i+1][j] <= 0; + data_line_distr_ctrl[i+1][j] <= 0; + data_line_distr_mode[i+1][j] <= 0; + data_line_distr_pu[i+1][j] <= 0; + data_line_distr_en[i+1][j] <= 0; + end + else begin + data_line_distr_valid[i+1][j] <= data_line_distr_valid[i][j>>1]; + data_line_distr_last[i+1][j] <= data_line_distr_last[i][j>>1]; + data_line_distr_ctrl[i+1][j] <= data_line_distr_ctrl[i][j>>1]; + data_line_distr_mode[i+1][j] <= data_line_distr_mode[i][j>>1]; + data_line_distr_pu[i+1][j] <= data_line_distr_pu[i][j>>1]; + data_line_distr_en[i+1][j] <= data_line_distr_en[i][j>>1]; + end + + end + end + +end +endgenerate + +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Engine Clusters ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// +generate + for (i = 0; i < NUM_DTPU_CLUSTERS; i = i + 1) begin: clusters + DTPUCluster cluster_x( + + .clk (clk), + .rst_n (rst_n), + + .data_line_in (data_line_array[i]), + .data_line_in_valid (data_line_valid_array[i]), + .data_line_in_last (data_line_last_array[i]), + .data_line_in_ctrl (data_line_ctrl_array[i]), + .data_line_in_mode (data_line_mode_array[i]), + .data_line_in_pu (data_line_pu_array[i]), + .data_line_in_ready (data_line_ready_array[i]), + + .partial_tree_node_index_out (), + .partial_tree_node_index_out_valid (), + + .partial_aggregation_out (partial_aggregation_out[i]), + .partial_aggregation_out_valid (partial_aggregation_out_valid[i]), + .partial_aggregation_out_ready (partial_aggregation_out_ready[i]), + + .tuples_received (cluster_tuples_received[i]), + .lines_received (cluster_lines_received[i]), + .tuples_res_out (cluster_tuples_res_out[i]), + .tree_res_out (cluster_tree_res_out[i]), + .reduce_tree_outs (cluster_reduce_tree_outs[i]) + ); + + assign data_line_array[i] = data_line_distr[DATA_LINE_DISTR_LEVELS][i]; + assign data_line_valid_array[i] = data_line_distr_valid[DATA_LINE_DISTR_LEVELS][i] & data_line_distr_en[DATA_LINE_DISTR_LEVELS][i][i]; + assign data_line_last_array[i] = data_line_distr_last[DATA_LINE_DISTR_LEVELS][i] & data_line_distr_en[DATA_LINE_DISTR_LEVELS][i][i]; + assign data_line_ctrl_array[i] = data_line_distr_ctrl[DATA_LINE_DISTR_LEVELS][i]; + assign data_line_mode_array[i] = data_line_distr_mode[DATA_LINE_DISTR_LEVELS][i] & {2{data_line_distr_en[DATA_LINE_DISTR_LEVELS][i][i]}}; + assign data_line_pu_array[i] = data_line_distr_pu[DATA_LINE_DISTR_LEVELS][i]; + + assign partial_aggregation_out_ready[i] = aggregator_ready & (curr_cluster == i); + + assign clusters_ready[i] = data_line_ready_array[i]; + end +endgenerate + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Tree Leafs Aggregation ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//---------------- Further aggregating leaf values from multiple clusters --------------------// + +assign curr_cluster = tuple_cluster_base + tuple_cluster_offset; + +assign curr_cluster_valid = partial_aggregation_out_valid[curr_cluster]; + +FPAggregator #(.FP_ADDER_LATENCY(FP_ADDER_LATENCY)) + + cluster_aggregator( + + .clk (clk), + .rst_n (rst_n), + + .fp_in (partial_leaf_aggreg_value), + .fp_in_valid (partial_leaf_aggreg_value_valid), + .fp_in_last (partial_leaf_aggreg_value_last), + .fp_in_ready (aggregator_ready), + + .aggreg_out (tuple_out_data), + .aggreg_out_valid (tuple_out_data_valid), + .aggreg_out_ready (tuple_out_data_ready) + ); + +always @(posedge clk) begin + if(~rst_n) begin + partial_leaf_aggreg_value <= 0; + partial_leaf_aggreg_value_valid <= 0; + partial_leaf_aggreg_value_last <= 1'b0; + tuple_cluster_offset <= 0; + tuple_cluster_base <= 0; + end + else begin + + //---------------------- Select partial aggregation value from a cluster ----------------------------// + if(aggregator_ready) begin + partial_leaf_aggreg_value <= partial_aggregation_out[curr_cluster]; + partial_leaf_aggreg_value_valid <= curr_cluster_valid; + partial_leaf_aggreg_value_last <= 1'b0; + + if(curr_cluster_valid) begin + if(tuple_cluster_offset == (num_clusters_per_tuple_minus_one) ) begin + tuple_cluster_offset <= 0; + + if(num_clusters_per_tuple == NUM_DTPU_CLUSTERS) begin + tuple_cluster_base <= 0; + end + else begin + tuple_cluster_base <= tuple_cluster_base + num_clusters_per_tuple[NUM_DTPU_CLUSTERS_BITS-1:0]; + end + + partial_leaf_aggreg_value_last <= 1'b1; + end + else begin + tuple_cluster_offset <= tuple_cluster_offset + 1'b1; + end + end + end + end +end + +endmodule \ No newline at end of file diff --git a/hw/hdl/operators/dtengine/xgboost/DTInference.sv b/hw/hdl/operators/dtengine/xgboost/DTInference.sv new file mode 100644 index 00000000..05cba473 --- /dev/null +++ b/hw/hdl/operators/dtengine/xgboost/DTInference.sv @@ -0,0 +1,193 @@ + +/* + The PCIe RX unit receives data/ trees to local core and other cores in the FPGA network + it has a list of all devices addresses, and number of trees (numcls )per core + + While receiving trees it count lines and determine for which device to send them, or it broadcast + them to all devices if configured to do that. + + While receiving data it either broadcast the data to all devices if configured that way. Or, it + distribute batches of data to each device one after the other. + + Modes of Operation: + + - Tree ensemble spread over all the FPGAs and a tuple is broadcasted to all FPGAs + Partial results are forwarded from an FPGA to another and aggregate results. + + We batch at least 4 results together so we send full 128-bit line + + - Tree Ensemble fits in one FPGA and we partition tuples between FPGAs. + For ordering and schedling reasons, we batch every 4 consecutive tuples to one FPGA + so results from one FPGA are in order. +*/ + + +import DTEngine_Types::*; + +module DTInference ( + input wire clk, // Clock + input wire rst_n, // Asynchronous reset active low + + input wire start_core, + + input wire [15:0] tuple_numcls, + input wire [15:0] tree_weights_numcls_minus_one; + input wire [15:0] tree_feature_index_numcls_minus_one; + input wire [4:0] num_trees_per_pu_minus_one, + input wire [3:0] tree_depth, + input wire [8-1:0] prog_schedule, + input wire [8-1:0] proc_schedule, + // input trees + input wire [511:0] core_in, + input wire [1 :0] core_in_type, // 01: trees weights, 00: feature indexes, 10: data + input wire core_in_valid, + input wire core_in_last, + output wire core_in_ready, + // output + output reg [511:0] core_result_out, + output wire core_result_valid, + input wire core_result_ready + ); + + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Signals Declarations ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// + +wire start_core; // Triggers the operator + +//wire [3 :0] num_clusters_per_tuple; // Number of Clusters store the complete model +//wire [3 :0] num_clusters_per_tuple_minus_one; + +// Core +CoreDataIn core_data_in; +wire core_data_in_valid; +wire core_data_in_ready; + +// ResultsCombiner +wire [31:0] local_core_result; +wire local_core_result_valid; +wire local_core_result_ready; + +reg [31:0] core_result_out_array[15:0]; +reg res_line_valid; +reg [3:0] curr_out_word; + +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Engine Core ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// + +assign core_data_in.data = core_in; +assign core_data_in.data_valid = core_in_type[1]; +assign core_data_in.last = core_in_last; +assign core_data_in.prog_mode = core_in_type[0]; +assign core_data_in_valid = core_in_valid; + +assign core_in_ready = core_data_in_ready; + + +Core engine_core( + .clk (clk), + .rst_n (rst_n), + .start_core (start_core), + + .core_data_in (core_data_in), + .core_data_in_valid (core_data_in_valid), + .core_data_in_ready (core_data_in_ready), + + // parameters + .prog_schedule (prog_schedule), + .proc_schedule (proc_schedule), + + .missing_value (0), + .tree_feature_index_numcls (tree_feature_index_numcls_minus_one), + .tree_weights_numcls (tree_weights_numcls_minus_one), + .tuple_numcls (tuple_numcls), + .num_levels_per_tree_minus_one (tree_depth), + .num_trees_per_pu_minus_one (num_trees_per_pu_minus_one), + .num_clusters_per_tuple (NUM_DTPU_CLUSTERS), + .num_clusters_per_tuple_minus_one (NUM_DTPU_CLUSTERS-1), + + // output + .tuple_out_data (local_core_result), + .tuple_out_data_valid (local_core_result_valid), + .tuple_out_data_ready (local_core_result_ready), + + .data_lines (), + .prog_lines (), + .num_out_tuples (), + .aggreg_tuples_in (), + .aggreg_part_res_in (), + .core_state (), + .started (), + .tuples_passed (), + .cluster_out_valids (), + + .cluster_tuples_received (), + .cluster_lines_received (), + .cluster_tuples_res_out (), + .cluster_tree_res_out (), + .cluster_reduce_tree_outs (), + .cluster_reduce_tree_outs_valids () +); + +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// PCIe Transmitter ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// + +assign local_core_result_ready = core_result_ready || (curr_out_word != 4'h0); + +always@(posedge clk) begin + if(~rst_n || start_core) begin + curr_out_word <= 4'h0; + res_line_valid <= 1'b0; + end + else begin + // counter + if(local_core_result_valid) begin + if(curr_out_word == 4'h0) begin + if(core_result_ready) begin + curr_out_word <= curr_out_word + 1'b1; + end + end + else begin + curr_out_word <= curr_out_word + 1'b1; + end + end + + // + if(res_line_valid) begin + if(core_result_ready) begin + res_line_valid <= 1'b0; + end + end + else if( (curr_out_word == 4'hF) && local_core_result_valid) begin + res_line_valid <= 1'b1; + end + end + + // Fill in output data line + if((curr_out_word != 4'h0) || core_result_ready) begin + core_result_out_array[curr_out_word] <= local_core_result; + end +end + +// +always@(*) begin + for(i = 0; i < 16; i=i+1) begin + core_result_out[i*32+32-1:i*32] = core_result_out_array[i]; + end +end + +assign core_result_valid = res_line_valid; + + +endmodule + diff --git a/hw/hdl/operators/dtengine/xgboost/DTInferenceTop.sv b/hw/hdl/operators/dtengine/xgboost/DTInferenceTop.sv new file mode 100644 index 00000000..98597284 --- /dev/null +++ b/hw/hdl/operators/dtengine/xgboost/DTInferenceTop.sv @@ -0,0 +1,229 @@ +`timescale 1ns / 1ps +/** + * User logic wrapper + * + */ +module DTInferenceTop ( + // Clock and reset + input wire aclk, + input wire[0:0] aresetn, + + // AXI4 control + AXI4Lite.s axi_ctrl, + + // AXI4 data + AXI4.s axi_data, + + // AXI4S host + AXI4S.m axis_host_src, + AXI4S.s axis_host_sink, + + // AXI4S card + AXI4S.m axis_card_src, + AXI4S.s axis_card_sink +); + +/* -- Tie-off unused interfaces and signals ----------------------------- */ +//always_comb axi_ctrl.tie_off_s(); +always_comb axi_data.tie_off_s(); +//always_comb axis_host_src.tie_off_m(); +//always_comb axis_host_sink.tie_off_s(); +always_comb axis_card_src.tie_off_m(); +always_comb axis_card_sink.tie_off_s(); + +/* -- USER LOGIC -------------------------------------------------------- */ +localparam [1:0] IDLE = 2'b00, + READ_TREES = 2'b01, + WAIT_ALL_TREES = 2'b10, + READ_DATA = 2'b11; + +reg [1:0] reader_state; +reg [1:0] nxt_reader_state; +wire trees_read_done; +wire data_read_done; + +wire wr_tvalid; +wire wr_tready; +wire [511:0] wr_tdata; + +wire rd_tvalid; +wire rd_ttype; +wire rd_tlast; +wire rd_tready; +wire [511:0] rd_tdata; + +reg [31:0] sentOutCLs; +wire [31:0] next_sentOutCLs; + +logic ap_start_r = 1'b0; +logic ap_start_pulse_d1 = 1'b0; +wire ap_start ; +wire [16-1:0] tuple_numcls ; +wire [8-1:0] treeDepth ; +wire [8-1:0] puTrees ; +wire [31:0] outputNumCLs; + +wire [8-1:0] prog_schedule ; +wire [8-1:0] proc_schedule ; +wire [16-1:0] tree_weights_numcls_minus_one; +wire [16-1:0] tree_feature_index_numcls_minus_one; + +logic [8-1:0] num_trees_per_pu_minus_one; +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Parameters on AxiLite ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// + +// AXI4-Lite slave interface +engineParams #( + .C_ADDR_WIDTH ( 64 ), + .C_DATA_WIDTH ( 64 ) +) +inst_control_s_axi ( + .aclk ( ap_clk ), + .areset ( !aresetn ), + .aclk_en ( 1'b1 ), + .awvalid ( axi_ctrl.awvalid ), + .awready ( axi_ctrl.awready ), + .awaddr ( axi_ctrl.awaddr ), + .wvalid ( axi_ctrl.wvalid ), + .wready ( axi_ctrl.wready ), + .wdata ( axi_ctrl.wdata ), + .wstrb ( axi_ctrl.wstrb ), + .arvalid ( axi_ctrl.arvalid ), + .arready ( axi_ctrl.arready ), + .araddr ( axi_ctrl.araddr ), + .rvalid ( axi_ctrl.rvalid ), + .rready ( axi_ctrl.rready ), + .rdata ( axi_ctrl.rdata ), + .rresp ( axi_ctrl.rresp ), + .bvalid ( axi_ctrl.bvalid ), + .bready ( axi_ctrl.ready ), + .bresp ( axi_ctrl.bresp ), + .ap_start ( ap_start ), + .tuple_numcls ( tuple_numcls ), + .treeDepth ( treeDepth ), + .puTrees ( puTrees ), + .outputNumCLs ( outputNumCLs ), + .prog_schedule ( prog_schedule ), + .proc_schedule ( proc_schedule ), + + .tree_weights_numcls_minus_one (tree_weights_numcls_minus_one), + .tree_feature_index_numcls_minus_one (tree_feature_index_numcls_minus_one) +); + +always @(posedge aclk) begin + if (~aresetn) begin + num_trees_per_pu_minus_one <= 0; + + ap_start_r <= 0; + ap_start_pulse_d1 <= 0; + end + else begin + num_trees_per_pu_minus_one <= puTrees - 1'b1; + + ap_start_r <= ap_start; + ap_start_pulse_d1 <= ap_start_pulse; + end +end + +assign ap_start_pulse = ap_start & ~ap_start_r; +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Decode Input Streams ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// + +// Reader State +always@(posedge aclk) begin + if(~aresetn) begin + reader_state <= IDLE; + end + else begin + reader_state <= nxt_reader_state; + end +end + +always@(*) begin + case (reader_state) + IDLE : nxt_reader_state = (ap_start_pulse_d1)? READ_TREES : IDLE; + READ_TREES : nxt_reader_state = (trees_read_done)? WAIT_ALL_TREES: READ_TREES; + WAIT_ALL_TREES: nxt_reader_state = READ_DATA; + READ_DATA : nxt_reader_state = (data_read_done)? IDLE : READ_DATA; + default : nxt_reader_state = IDLE; + endcase +end + +assign trees_read_done = axis_host_sink.tlast && axis_host_sink.tvalid; +assign data_read_done = axis_host_sink.tlast && axis_host_sink.tvalid; + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Engine Core ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// + +// Input Streams +assign rd_tdata = axis_host_sink.tdata; +assign rd_tvalid = axis_host_sink.tvalid; +assign rd_tlast = axis_host_sink.tlast; +assign rd_ttype = reader_state == READ_DATA; + +assign axis_host_sink.tready = rd_tready; + +// Output Stream +assign axis_host_src.tdata = wr_tdata; +assign axis_host_src.tkeep = 64'hffffffffffffffff; +assign axis_host_src.tvalid = wr_tvalid; +assign axis_host_src.tlast = next_sentOutCLs == outputNumCLs; + +assign wr_tready = axis_host_src.tready; + +// Count output numCLs + +assign next_sentOutCLs = sentOutCLs + 1'b1; + +always@(posedge aclk) begin + if(~aresetn) begin + sentOutCLs <= 0; + end + else begin + if(sentOutCLs == outputNumCLs) begin + sentOutCLs <= 0; + end + else if(wr_tvalid && wr_tready) begin + sentOutCLs <= next_sentOutCLs; + end + end +end + + +DTInference DTInference( + .clk (aclk), + .rst_n (aresetn), + .start_core (ap_start_pulse_d1), + // parameters + + .tuple_numcls (tuple_numcls), + .tree_weights_numcls_minus_one (tree_weights_numcls_minus_one), + .tree_feature_index_numcls_minus_one (tree_feature_index_numcls_minus_one), + .num_trees_per_pu_minus_one (num_trees_per_pu_minus_one[4:0]), + .tree_depth (treeDepth[3:0]), + .prog_schedule (prog_schedule), + .proc_schedule (proc_schedule), + // input trees + .core_in (rd_tdata), + .core_in_type (rd_ttype), + .core_in_valid (rd_tvalid), + .core_in_last (rd_tlast), + .core_in_ready (rd_tready), + // output + .core_result_out (wr_tdata), + .core_result_valid (wr_tvalid), + .core_result_ready (wr_tready) +); + +endmodule + diff --git a/hw/hdl/operators/dtengine/xgboost/common/DTEngine_Types.sv b/hw/hdl/operators/dtengine/xgboost/common/DTEngine_Types.sv new file mode 100644 index 00000000..5461b3f5 --- /dev/null +++ b/hw/hdl/operators/dtengine/xgboost/common/DTEngine_Types.sv @@ -0,0 +1,49 @@ + +package DTEngine_Types; + +parameter DATA_BUS_WIDTH = 512; + +parameter NUM_PUS_PER_CLUSTER_BITS = 2; +parameter NUM_PUS_PER_CLUSTER = 4; +parameter NUM_DTPU_CLUSTERS = 8; +parameter NUM_DTPU_CLUSTERS_BITS = 3; +parameter NUM_TREES_PER_PU = 32; + +parameter FEATURES_DISTR_DELAY = 8; + +parameter DATA_PRECISION = 32; +parameter FIXED_POINT_ARITHMATIC = ((DATA_PRECISION < 32)? 1 : 0); + +parameter TREE_WEIGHTS_PROG = 1'b0; +parameter TREE_FEATURE_INDEX_PROG = 1'b1; + +parameter WAIT_CYCLES_FOR_LAST_TREE = 16; +parameter FP_ADDER_LATENCY = 2; + + + +// Streams types +parameter [15:0] DATA_STREAM = 1, + TREE_WEIGHT_STREAM = 2, + TREE_FINDEX_STREAM = 3, + RESULTS_STREAM = 4; + + + + +typedef struct packed +{ + logic [DATA_BUS_WIDTH-1:0] data; + logic data_valid; + logic last; + logic prog_mode; //1: weights, 0 feature indexes +} CoreDataIn; + + + + +endpackage + + + + diff --git a/hw/hdl/operators/dtengine/xgboost/common/FPAdder_2cycles_latency.v b/hw/hdl/operators/dtengine/xgboost/common/FPAdder_2cycles_latency.v new file mode 100644 index 00000000..178202d4 --- /dev/null +++ b/hw/hdl/operators/dtengine/xgboost/common/FPAdder_2cycles_latency.v @@ -0,0 +1,389 @@ +// File 2cycles_latency.vhdl translated with vhd2vl v2.4 VHDL to Verilog RTL translator +// vhd2vl settings: +// * Verilog Module Declaration Style: 1995 + +// vhd2vl is Free (libre) Software: +// Copyright (C) 2001 Vincenzo Liguori - Ocean Logic Pty Ltd +// http://www.ocean-logic.com +// Modifications Copyright (C) 2006 Mark Gonzales - PMC Sierra Inc +// Modifications (C) 2010 Shankar Giri +// Modifications Copyright (C) 2002, 2005, 2008-2010 Larry Doolittle - LBNL +// http://doolittle.icarus.com/~larry/vhd2vl/ +// +// vhd2vl comes with ABSOLUTELY NO WARRANTY. Always check the resulting +// Verilog for correctness, ideally with a formal verification tool. +// +// You are welcome to redistribute vhd2vl under certain conditions. +// See the license (GPLv2) file included with the source for details. + +// The result of translation follows. Its copyright status should be +// considered unchanged from the original VHDL. + +//------------------------------------------------------------------------------ +// FPAdder_8_23_uid2_RightShifter +// (RightShifter_24_by_max_26_uid4) +// This operator is part of the Infinite Virtual Library FloPoCoLib +// and is distributed under the terms of the GNU Lesser General Public Licence +// with a Tobin Tax restriction (see README file for details). +// Authors: Florent de Dinechin, Bogdan Pasca (2007,2008,2009,2010) +//------------------------------------------------------------------------------ +// no timescale needed + +module FPAdder_8_23_uid2_RightShifter_l2( +X, +S, +R +); + +input [23:0] X; +input [4:0] S; +output [49:0] R; + +wire clk; +wire rst; +wire [23:0] X; +wire [4:0] S; +wire [49:0] R; + + +wire [23:0] level0; +wire [4:0] ps; +wire [24:0] level1; +wire [26:0] level2; +wire [30:0] level3; +wire [38:0] level4; +wire [54:0] level5; + + + assign level0 = X; + assign ps = S; + assign level1 = ps[0] == 1'b1 ? {1'b0,level0} : {level0,1'b0}; + assign level2 = ps[1] == 1'b1 ? {2'b00,level1} : {level1,2'b00}; + assign level3 = ps[2] == 1'b1 ? {4'b0000,level2} : {level2,4'b0000}; + assign level4 = ps[3] == 1'b1 ? {8'b00000000,level3} : {level3,8'b00000000}; + assign level5 = ps[4] == 1'b1 ? {16'b0000000000000000,level4} : {level4,16'b0000000000000000}; + assign R = level5[54:5]; + +endmodule + +//------------------------------------------------------------------------------ +// IntAdder_27_f110_uid6 +// This operator is part of the Infinite Virtual Library FloPoCoLib +// and is distributed under the terms of the GNU Lesser General Public Licence +// with a Tobin Tax restriction (see README file for details). +// Authors: Bogdan Pasca, Florent de Dinechin (2008-2010) +//------------------------------------------------------------------------------ +// Pipeline depth: 0 cycles +// no timescale needed + +module IntAdder_27_f110_uid6_l2( +X, +Y, +Cin, +R +); + +input [26:0] X; +input [26:0] Y; +input Cin; +output [26:0] R; + +wire clk; +wire rst; +wire [26:0] X; +wire [26:0] Y; +wire Cin; +wire [26:0] R; + + //Alternative + assign R = X + Y + Cin; +//------------------------------------------------------------------------------ +// LZCShifter_28_to_28_counting_32_uid16 +// This operator is part of the Infinite Virtual Library FloPoCoLib +// and is distributed under the terms of the GNU Lesser General Public Licence +// with a Tobin Tax restriction (see README file for details). +// Authors: Florent de Dinechin, Bogdan Pasca (2007) +//------------------------------------------------------------------------------ +// Pipeline depth: 1 cycles + +endmodule + +module LZCShifter_28_to_28_counting_32_uid16_l2( +clk, +stall, +I, +Count, +O +); + +input clk, stall; +input [27:0] I; +output [4:0] Count; +output [27:0] O; + +wire clk; +wire rst; +wire [27:0] I; +wire [4:0] Count; +wire [27:0] O; + + +wire [27:0] level5; +wire count4; reg count4_d1; +wire [27:0] level4; +wire count3; reg count3_d1; +wire [27:0] level3; +wire count2; reg count2_d1; +wire [27:0] level2; reg [27:0] level2_d1; +wire count1; +wire [27:0] level1; +wire count0; +wire [27:0] level0; +wire [4:0] sCount; + + always @(posedge clk) begin + if( ~stall ) begin + count4_d1 <= count4; + count3_d1 <= count3; + count2_d1 <= count2; + level2_d1 <= level2; + end + end + + assign level5 = I; + assign count4 = level5[27:12] == 16'b0000000000000000 ? 1'b1 : 1'b0; + assign level4 = count4 == 1'b0 ? level5[27:0] : {level5[11:0],16'b0000000000000000}; + assign count3 = level4[27:20] == 8'b00000000 ? 1'b1 : 1'b0; + assign level3 = count3 == 1'b0 ? level4[27:0] : {level4[19:0],8'b00000000}; + assign count2 = level3[27:24] == 4'b0000 ? 1'b 1 : 1'b0; + assign level2 = count2 == 1'b0 ? level3[27:0] : {level3[23:0],4'b0000}; + //--------------Synchro barrier, entering cycle 1---------------- + assign count1 = level2_d1[27:26] == 2'b00 ? 1'b1 : 1'b0; + assign level1 = count1 == 1'b0 ? level2_d1[27:0] : {level2_d1[25:0],2'b00}; + assign count0 = level1[27:27] == 1'b0 ? 1'b1 : 1'b0; + assign level0 = count0 == 1'b0 ? level1[27:0] : {level1[26:0],1'b0}; + assign O = level0; + assign sCount = {count4_d1,count3_d1,count2_d1,count1,count0}; + assign Count = sCount; +//------------------------------------------------------------------------------ +// IntAdder_34_f110_uid18 +// This operator is part of the Infinite Virtual Library FloPoCoLib +// and is distributed under the terms of the GNU Lesser General Public Licence +// with a Tobin Tax restriction (see README file for details). +// Authors: Bogdan Pasca, Florent de Dinechin (2008-2010) +//------------------------------------------------------------------------------ +// Pipeline depth: 0 cycles + +endmodule + +module IntAdder_34_f110_uid18_l2( +X, +Y, +Cin, +R +); + + +input [33:0] X; +input [33:0] Y; +input Cin; +output [33:0] R; + +wire [33:0] X; +wire [33:0] Y; +wire Cin; +wire [33:0] R; + + //Alternative + assign R = X + Y + Cin; +//------------------------------------------------------------------------------ +// FPAdder_8_23_uid2 +// This operator is part of the Infinite Virtual Library FloPoCoLib +// and is distributed under the terms of the GNU Lesser General Public Licence +// with a Tobin Tax restriction (see README file for details). +// Authors: Bogdan Pasca, Florent de Dinechin (2010) +//------------------------------------------------------------------------------ +// Pipeline depth: 2 cycles + +endmodule + +module FPAdder_8_23_uid2_l2( +clk, +rst, +seq_stall, +X, +Y, +R +); + +input clk, rst; +input seq_stall; +input [8 + 23 + 2:0] X; +input [8 + 23 + 2:0] Y; +output [8 + 23 + 2:0] R; + +wire clk; +wire rst; +wire [8 + 23 + 2:0] X; +wire [8 + 23 + 2:0] Y; +wire [8 + 23 + 2:0] R; + + +wire [32:0] excExpFracX; +wire [32:0] excExpFracY; +wire [8:0] eXmeY; +wire [8:0] eYmeX; +wire swap; +wire [33:0] newX; reg [33:0] newX_d1; +wire [33:0] newY; +wire [7:0] expX; reg [7:0] expX_d1; +wire [1:0] excX; +wire [1:0] excY; +wire signX; +wire signY; +wire EffSub; reg EffSub_d1; reg EffSub_d2; +wire [5:0] sdsXsYExnXY; +wire [3:0] sdExnXY; +wire [23:0] fracY; +reg [1:0] excRt; reg [1:0] excRt_d1; reg [1:0] excRt_d2; +wire signR; reg signR_d1; reg signR_d2; +wire [8:0] expDiff; +wire shiftedOut; +wire [4:0] shiftVal; +wire [49:0] shiftedFracY; reg [49:0] shiftedFracY_d1; +wire sticky; +wire [26:0] fracYfar; +wire [26:0] fracYfarXorOp; +wire [26:0] fracXfar; +wire cInAddFar; +wire [26:0] fracAddResult; +wire [27:0] fracGRS; +wire [9:0] extendedExpInc; reg [9:0] extendedExpInc_d1; +wire [4:0] nZerosNew; +wire [27:0] shiftedFrac; +wire [9:0] updatedExp; +wire eqdiffsign; +wire [33:0] expFrac; +wire stk; +wire rnd; +wire grd; +wire lsb; +wire addToRoundBit; +wire [33:0] RoundedExpFrac; +wire [1:0] upExc; +wire [22:0] fracR; +wire [7:0] expR; +wire [3:0] exExpExc; +reg [1:0] excRt2; +wire [1:0] excR; +wire [33:0] computedR; + + always @(posedge clk) begin + if( ~seq_stall)begin + newX_d1 <= newX; + expX_d1 <= expX; + EffSub_d1 <= EffSub; + EffSub_d2 <= EffSub_d1; + excRt_d1 <= excRt; + excRt_d2 <= excRt_d1; + signR_d1 <= signR; + signR_d2 <= signR_d1; + shiftedFracY_d1 <= shiftedFracY; + extendedExpInc_d1 <= extendedExpInc; + end + end + + // Exponent difference and swap -- + assign excExpFracX = {X[33:32],X[30:0]}; + assign excExpFracY = {Y[33:32],Y[30:0]}; + assign eXmeY = ({1'b 0,X[30:23]}) - ({1'b 0,Y[30:23]}); + assign eYmeX = ({1'b 0,Y[30:23]}) - ({1'b 0,X[30:23]}); + assign swap = excExpFracX >= excExpFracY ? 1'b 0 : 1'b 1; + assign newX = swap == 1'b0 ? X : Y; + assign newY = swap == 1'b0 ? Y : X; + assign expX = newX[30:23]; + assign excX = newX[33:32]; + assign excY = newY[33:32]; + assign signX = newX[31]; + assign signY = newY[31]; + assign EffSub = signX ^ signY; + assign sdsXsYExnXY = {signX,signY,excX,excY}; + assign sdExnXY = {excX,excY}; + assign fracY = excY == 2'b00 ? 24'b000000000000000000000000 : {1'b1,newY[22:0]}; + always @(*) begin + case(sdsXsYExnXY) + 6'b000000,6'b010000,6'b100000,6'b110000 : excRt <= 2'b00; + 6'b000101,6'b010101,6'b100101,6'b110101,6'b000100,6'b010100,6'b100100,6'b110100,6'b000001,6'b010001,6'b100001,6'b110001 : excRt <= 2'b01; + 6'b111010,6'b001010,6'b001000,6'b011000,6'b101000,6'b111000,6'b000010,6'b010010,6'b100010,6'b110010,6'b001001,6'b011001,6'b101001,6'b111001,6'b000110,6'b010110,6'b100110,6'b110110 : excRt <= 2'b10; + default : excRt <= 2'b 11; + endcase + end + + assign signR = (sdsXsYExnXY == 6'b100000 || sdsXsYExnXY == 6'b010000) ? 1'b0 : signX; + //-------------- cycle 0---------------- + assign expDiff = swap == 1'b0 ? eXmeY : eYmeX; + assign shiftedOut = (expDiff >= 25) ? 1'b1 : 1'b0; + assign shiftVal = shiftedOut == 1'b0 ? expDiff[4:0] : 5'b11010; + FPAdder_8_23_uid2_RightShifter_l2 RightShifterComponent( + .R(shiftedFracY), + .S(shiftVal), + .X(fracY)); + + //--------------Synchro barrier, entering cycle 1---------------- + assign sticky = (shiftedFracY_d1[23:0] == 23'b00000000000000000000000) ? 1'b0 : 1'b1; + //-------------- cycle 0---------------- + //--------------Synchro barrier, entering cycle 1---------------- + assign fracYfar = {1'b 0,shiftedFracY_d1[49:24]}; + assign fracYfarXorOp = fracYfar ^ ({EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1,EffSub_d1}); + assign fracXfar = {2'b01,(newX_d1[22:0]),2'b00}; + assign cInAddFar = EffSub_d1 & ~sticky; + IntAdder_27_f110_uid6_l2 fracAdder( + .Cin(cInAddFar), + .R(fracAddResult), + .X(fracXfar), + .Y(fracYfarXorOp)); + + assign fracGRS = {fracAddResult,sticky}; + assign extendedExpInc = ({2'b00,expX_d1}) + 1'b1; + LZCShifter_28_to_28_counting_32_uid16_l2 LZC_component( + .clk(clk), + .stall(seq_stall), + .Count(nZerosNew), + .I(fracGRS), + .O(shiftedFrac)); + + //--------------Synchro barrier, entering cycle 2---------------- + assign updatedExp = extendedExpInc_d1 - ({5'b00000,nZerosNew}); + assign eqdiffsign = nZerosNew == 5'b11111 ? 1'b1 : 1'b0; + assign expFrac = {updatedExp,shiftedFrac[26:3]}; + //-------------- cycle 2---------------- + assign stk = shiftedFrac[1] | shiftedFrac[0]; + assign rnd = shiftedFrac[2]; + assign grd = shiftedFrac[3]; + assign lsb = shiftedFrac[4]; + assign addToRoundBit = (lsb == 1'b0 && grd == 1'b1 && rnd == 1'b0 && stk == 1'b0) ? 1'b0 : 1'b1; + IntAdder_34_f110_uid18_l2 roundingAdder( + .Cin(addToRoundBit), + .R(RoundedExpFrac), + .X(expFrac), + .Y(34'b0000000000000000000000000000000000)); + + //-------------- cycle 2---------------- + assign upExc = RoundedExpFrac[33:32]; + assign fracR = RoundedExpFrac[23:1]; + assign expR = RoundedExpFrac[31:24]; + assign exExpExc = {upExc,excRt_d2}; + always @(*) begin + case((exExpExc)) + 4'b0000,4'b0100,4'b1000,4'b1100,4'b1001,4'b1101 : excRt2 <= 2'b00; + 4'b0001 : excRt2 <= 2'b01; + 4'b0010,4'b0110,4'b0101 : excRt2 <= 2'b10; + default : excRt2 <= 2'b11; + endcase + end + + assign excR = (eqdiffsign == 1'b1 && EffSub_d2 == 1'b1) ? 2'b00 : excRt2; + assign computedR = {excR,signR_d2,expR,fracR}; + assign R = computedR; + +endmodule diff --git a/hw/hdl/operators/dtengine/xgboost/common/delay.v b/hw/hdl/operators/dtengine/xgboost/common/delay.v new file mode 100644 index 00000000..10d10336 --- /dev/null +++ b/hw/hdl/operators/dtengine/xgboost/common/delay.v @@ -0,0 +1,52 @@ + + +module delay #(parameter DATA_WIDTH = 32, + parameter DELAY_CYCLES = 4 + ) ( + + input wire clk, + input wire rst_n, + input wire [DATA_WIDTH-1:0] data_in, + input wire data_in_valid, + output wire [DATA_WIDTH-1:0] data_out, + output wire data_out_valid + ); + + +reg [DATA_WIDTH-1:0] data_array[DELAY_CYCLES]; +reg data_array_valid[DELAY_CYCLES]; + + +always @(posedge clk) begin + // Valid Bit + if(~rst_n) begin + data_array_valid[0] <= 0; + end + else begin + data_array_valid[0] <= data_in_valid; + end + // Data word + data_array[0] <= data_in; +end + + +genvar i; +generate for (i = 1; i < DELAY_CYCLES; i = i +1) begin: delayPipe + always @(posedge clk) begin + // Valid Bit + if(~rst_n) begin + data_array_valid[i] <= 0; + end + else begin + data_array_valid[i] <= data_array_valid[i-1]; + end + // Data word + data_array[i] <= data_array[i-1]; + end +end +endgenerate + +assign data_out = data_array[DELAY_CYCLES-1]; +assign data_out_valid = data_array_valid[DELAY_CYCLES-1]; + +endmodule // delay diff --git a/hw/hdl/operators/dtengine/xgboost/core/DTPU.sv b/hw/hdl/operators/dtengine/xgboost/core/DTPU.sv new file mode 100644 index 00000000..655db66d --- /dev/null +++ b/hw/hdl/operators/dtengine/xgboost/core/DTPU.sv @@ -0,0 +1,772 @@ + +import DTEngine_Types::*; + +/* + PU Constraits: + - MAX NUMBER OF TREE NODES IN TOTAL IT CAN HANDLE: 8192: 2^13 + - MAX NUMBER OF FEATURES PER TUPLE IT CAN HANDLE: 4096: 2^12 + - MAX NUMBER OF TREE DEPTH IT CAN HANDLE: Hybrid -> log2(8192/NUM_TREES) Levels on FPGA + rest on CPU, + FPGA Only -> log2(8192/NUM_TREES) Levels including LEAFS. + + +*/ + +module DTPU #(parameter PU_ID = 0 + )( + + input wire clk, + input wire rst_n, + + input wire [DATA_BUS_WIDTH-1:0] data_line_in, + input wire data_line_in_valid, + input wire data_line_in_last, + input wire data_line_in_ctrl, + input wire [1:0] data_line_in_mode, + input wire [NUM_PUS_PER_CLUSTER_BITS-1:0] data_line_in_pu, + output reg data_line_in_ready, + + output reg [DATA_BUS_WIDTH-1:0] data_line_out, + output reg data_line_out_valid, + output reg data_line_out_ctrl, + output reg data_line_out_last, + output reg [1:0] data_line_out_prog, + output reg [NUM_PUS_PER_CLUSTER_BITS-1:0] data_line_out_pu, + + output reg [15:0] pu_tree_node_index_out, + output reg pu_tree_node_index_out_valid, + + output wire [DATA_PRECISION-1:0] pu_tree_leaf_out, + output wire pu_tree_leaf_out_valid, + output wire pu_tree_leaf_out_last + + ); + + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Local Parameters ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// + + + +localparam MAX_NUMBER_OF_TREES_BITS = 4; +localparam MAX_TREE_DEPTH_BITS = 4; + +localparam MAX_NUM_TUPLE_FEATURES_BITS = 11; +localparam MAX_NUM_TREE_NODES_BITS = 13; +localparam NODE_WORD_OFFSET_BITS = 2; +localparam NUM_WORDS_PER_LINE = 4; +localparam NODE_FEATURE_INDEX_WIDTH = 16; + +localparam MEM_OUTPUT_PIPELINE_DEPTH = 2; +localparam INDEXES_PER_LINE_BITS = (DATA_BUS_WIDTH == 256)? 4 : + (DATA_BUS_WIDTH == 128)? 3 : 2; + + +localparam FINDEX_OFFSET_BITS = MAX_NUM_TREE_NODES_BITS - INDEXES_PER_LINE_BITS; +localparam TUPLE_OFFSET_BITS = MAX_NUM_TUPLE_FEATURES_BITS - NODE_WORD_OFFSET_BITS; // Feature memory max depth is 512 +localparam TREE_OFFSET_BITS = MAX_NUM_TREE_NODES_BITS - NODE_WORD_OFFSET_BITS; // Weights memory max depth is 1024 + +localparam READ_TNODE_LATENCY = 1+MEM_OUTPUT_PIPELINE_DEPTH; +localparam READ_FEATURE_LATENCY = 1+MEM_OUTPUT_PIPELINE_DEPTH; + +localparam INSTRUCTION_DELAY = 3+7 - PU_ID; // PU pipeline depth-1 - PU_ID +localparam INSTRUCTION_DELAY_NEGATIVE = -1*(3+7 - PU_ID); // PU pipeline depth-1 - PU_ID + +localparam INSTRUCTION_WIDTH = TREE_OFFSET_BITS + TREE_OFFSET_BITS + TUPLE_OFFSET_BITS + 1 + 1; + +localparam NUM_TREES_PER_PU_BITS = MAX_NUMBER_OF_TREES_BITS; +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Signals Declarations ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// + +wire TWM_reb; +wire [MAX_NUM_TREE_NODES_BITS-1:0] TWM_res_raddr; +wire TWM_wen; +wire TWM_rea; +wire TWM_res_valid; +wire TWM_weight_valid; +wire [MAX_NUM_TREE_NODES_BITS-1:0] TWM_weight_wraddr; +wire [DATA_BUS_WIDTH-1:0] TWM_wr_data; +wire [DATA_PRECISION-1:0] TWM_res_data; +wire [DATA_PRECISION-1:0] TWM_weight_data; + +reg [TREE_OFFSET_BITS-1: 0] tree_prog_addr; +reg [NUM_TREES_PER_PU_BITS-1:0] local_num_trees; + +wire TFI_wen; +wire TFI_ren; +wire TFI_rd_data_valid; + +reg [FINDEX_OFFSET_BITS-1:0] TFI_wr_addr; +wire [DATA_BUS_WIDTH-1:0] TFI_wr_data; + +wire [MAX_NUM_TREE_NODES_BITS-1:0] TFI_rd_addr; +wire [NODE_FEATURE_INDEX_WIDTH-1:0] TFI_rd_data; + +wire features_wen; +wire features_ren; +wire features_rd_data_valid; + +reg [TUPLE_OFFSET_BITS-1:0] features_wr_addr; +wire [DATA_BUS_WIDTH-1:0] features_wr_data; + +wire [MAX_NUM_TUPLE_FEATURES_BITS-1:0] features_rd_addr; +wire [DATA_PRECISION-1:0] features_rd_data; + +reg [TUPLE_OFFSET_BITS-1:0] tuple_offset; +reg tuple_valid; +reg tuple_offset_set; + +reg [MAX_NUMBER_OF_TREES_BITS-1:0] num_trees_per_pu_minus_one; +reg PartialTrees; +reg [MAX_TREE_DEPTH_BITS-1:0] LastLevelIndex; +reg [TREE_OFFSET_BITS-1:0] num_lines_per_tree_weights; +reg [TREE_OFFSET_BITS-1:0] num_lines_per_tree_findex; + +wire delayed_instruction_we; +wire delayed_instruction_re; +wire delayed_instruction_valid_f; +wire [TUPLE_OFFSET_BITS+19:0] delayed_instruction_i; +wire [TUPLE_OFFSET_BITS+19:0] delayed_instruction_o; +reg [3:0] instr_delay_cycles; +reg tuple_instruction_valid; +reg [TREE_OFFSET_BITS-1:0] curr_tree_w_offset; +reg [TREE_OFFSET_BITS-1:0] curr_tree_f_offset; +reg [MAX_NUMBER_OF_TREES_BITS-1:0] curr_tree_index; +wire last_tree; +wire instr_NOP; +wire tuple_instruction_we; +wire tuple_instruction_re; +wire tuple_instruction_valid_f; +wire [INSTRUCTION_WIDTH-1:0] tuple_instruction; + +wire [TREE_OFFSET_BITS-1:0] tree_w_offset_s1; +wire [TREE_OFFSET_BITS-1:0] tree_w_offset_d1; +wire [TREE_OFFSET_BITS-1:0] tree_f_offset_s1; +wire [TREE_OFFSET_BITS-1:0] tree_f_offset_d1; + +wire [MAX_NUM_TREE_NODES_BITS-1:0] tree_node_offset_s1; +wire [MAX_NUM_TREE_NODES_BITS-1:0] tree_w_node_addr_s1; +wire [MAX_NUM_TREE_NODES_BITS-1:0] tree_f_node_addr_s1; + +wire [MAX_NUM_TREE_NODES_BITS-1:0] next_tree_node_offset_s1; +wire [MAX_NUM_TREE_NODES_BITS-1:0] next_tree_w_node_addr_s1; +wire [MAX_NUM_TREE_NODES_BITS-1:0] next_tree_f_node_addr_s1; + +wire [MAX_NUM_TREE_NODES_BITS-1:0] next_tree_node_offset_d1; +wire [MAX_NUM_TREE_NODES_BITS-1:0] next_tree_w_node_addr_d1; +wire [MAX_NUM_TREE_NODES_BITS-1:0] next_tree_f_node_addr_d1; + +wire [TUPLE_OFFSET_BITS-1:0] tuple_offset_s1; +wire [TUPLE_OFFSET_BITS-1:0] tuple_offset_d1; + +wire tree_instr_NOP_s1; +wire last_tree_s1; +wire [MAX_TREE_DEPTH_BITS-1:0] tree_node_level_s1; +wire tree_instr_NOP_d1; +wire last_tree_d1; +wire [MAX_TREE_DEPTH_BITS-1:0] tree_node_level_d1; + +wire tree_node_ren; + +wire [DATA_PRECISION-1:0] weight_data_d2; +wire [2:0] feature_index_data_d2; +wire tree_node_rd_stage_valid; +wire feature_rd_stage_valid; + +wire tree_instr_NOP_d2; +wire last_tree_d2; +wire [MAX_TREE_DEPTH_BITS-1:0] tree_node_level_d2; + +wire [TUPLE_OFFSET_BITS-1:0] tuple_offset_d2; +wire [MAX_NUM_TREE_NODES_BITS-1:0] next_tree_node_offset_d2; +wire [MAX_NUM_TREE_NODES_BITS-1:0] next_tree_w_node_addr_d2; +wire [MAX_NUM_TREE_NODES_BITS-1:0] next_tree_f_node_addr_d2; +wire [TREE_OFFSET_BITS-1:0] tree_f_offset_d2; +wire [TREE_OFFSET_BITS-1:0] tree_w_offset_d2; + +wire isFeatureMissing; +wire isFeatureSmaller; +wire isRightChild; +wire isMissingRight; +wire isNextNodeLeaf; +wire isLastLevel; +wire goToOutput; +wire goToOutput_d3; + +wire incrementNodeOffset; +wire incrementNodeOffset_d3; +wire comparison_stage_valid; + +wire [TUPLE_OFFSET_BITS-1:0] tuple_offset_d3; +wire [MAX_NUM_TREE_NODES_BITS-1:0] next_tree_node_offset_d3; +wire [MAX_NUM_TREE_NODES_BITS-1:0] next_tree_w_node_addr_d3; +wire [MAX_NUM_TREE_NODES_BITS-1:0] next_tree_f_node_addr_d3; +wire [TREE_OFFSET_BITS-1:0] tree_f_offset_d3; +wire [TREE_OFFSET_BITS-1:0] tree_w_offset_d3; + +wire tree_instr_NOP_d3; +wire last_tree_d3; +wire [MAX_TREE_DEPTH_BITS-1:0] tree_node_level_d3; + +reg tree_instruction_valid; +reg tree_output_valid; +reg tree_instruction_type_NOP; +reg tree_instruction_type_EMPTY; +reg tree_instruction_last_flag; + +reg [TREE_OFFSET_BITS-1:0] tree_instruction_tree_w_offset; +reg [TREE_OFFSET_BITS-1:0] tree_instruction_tree_f_offset; + +reg [MAX_NUM_TREE_NODES_BITS-1:0] tree_instruction_node_w_addr; +reg [MAX_NUM_TREE_NODES_BITS-1:0] tree_instruction_node_f_addr; + +reg [TUPLE_OFFSET_BITS-1:0] tree_instruction_tuple_offset; +reg [MAX_NUM_TREE_NODES_BITS-1:0] tree_instruction_node_offset; + +reg [MAX_TREE_DEPTH_BITS-1:0] tree_instruction_node_level; + +reg [31:0] MissingFeatureValue; + +wire TupleInstrctionFIFO_full; + +reg [9:0] features_mem_count; +reg [9:0] tuple_numlines; + +wire delayed_instruction_fifo_almostfull; +wire delayed_instruction_fifo_full; + +reg [31:0] num_tuples_received; + +reg [19:0] time_stamp; +wire [20:0] time_stamp_diff; +wire tuple_old_enough; +reg tuple_old_enough_set; + +wire pu_tree_leaf_zero; +wire tree_instr_EMPTY_s1; +wire tree_instr_EMPTY_d1; +wire tree_instr_EMPTY_d2; +wire tree_instr_EMPTY_d3; + +wire curr_feature_done; +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Memory Banks ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//----------------------------- Tree Nodes Weight memory ------------------------------// +Mem1in2out #( .DATA_WIDTH(DATA_BUS_WIDTH), + .ADDR_WIDTH(TREE_OFFSET_BITS), + .LINE_ADDR_WIDTH(NODE_WORD_OFFSET_BITS), + .WORD_WIDTH(DATA_PRECISION), + .NUM_PIPELINE_LEVELS(MEM_OUTPUT_PIPELINE_DEPTH) ) +WeightsMem( + + .clk (clk), + .rst_n (rst_n), + .we (TWM_wen), + .rea (TWM_rea), + .reb (TWM_reb), + .raddr (TWM_res_raddr), + .wraddr (TWM_weight_wraddr), + .din (TWM_wr_data), + .dout1 (TWM_weight_data), + .valid_out1 (TWM_weight_valid), + .dout2 (TWM_res_data), + .valid_out2 (TWM_res_valid) +); + + +assign TWM_wen = ~data_line_in_mode[0] & data_line_in_mode[1] & (data_line_in_pu == PU_ID); +assign TWM_wr_data = data_line_in; + +always @(posedge clk) begin + if(~rst_n) begin + tree_prog_addr <= 0; + local_num_trees <= 0; + end + else if(TWM_wen) begin + tree_prog_addr <= tree_prog_addr + 1'b1; + + if(data_line_in_last) begin + local_num_trees <= local_num_trees + 1'b1; + end + end +end + +//------------------------ Tree Nodes Feature indexes memory --------------------------// +DualPortMem #( .DATA_WIDTH(DATA_BUS_WIDTH), + .ADDR_WIDTH(FINDEX_OFFSET_BITS), + .WORD_WIDTH(NODE_FEATURE_INDEX_WIDTH), + .LINE_ADDR_WIDTH(INDEXES_PER_LINE_BITS), + .NUM_PIPELINE_LEVELS(MEM_OUTPUT_PIPELINE_DEPTH) ) +TreeFeatureIndex_Mem( + + .clk (clk), + .rst_n (rst_n), + .we (TFI_wen), + .re (TFI_ren), + .raddr (TFI_rd_addr), + .waddr (TFI_wr_addr), + .din (TFI_wr_data), + .dout (TFI_rd_data), + .valid_out (TFI_rd_data_valid) +); + +assign TFI_ren = tree_node_ren; +assign TFI_rd_addr = tree_f_node_addr_s1; + +assign TFI_wen = ~data_line_in_mode[0] & ~data_line_in_mode[1] & (data_line_in_pu == PU_ID); +assign TFI_wr_data = data_line_in; + + +always @(posedge clk) begin + if(~rst_n) begin + TFI_wr_addr <= 0; + end + else if(TFI_wen) begin + TFI_wr_addr <= TFI_wr_addr + 1'b1; + end +end +//--------------------------- Input tuple features memory -----------------------------// + +/* We write to the features memory when flags indicate + incoming data is tuples and nor programming data +*/ +DualPortMem #( .DATA_WIDTH(DATA_BUS_WIDTH), + .ADDR_WIDTH(TUPLE_OFFSET_BITS), + .WORD_WIDTH(DATA_PRECISION), + .LINE_ADDR_WIDTH(NODE_WORD_OFFSET_BITS), + .NUM_PIPELINE_LEVELS(MEM_OUTPUT_PIPELINE_DEPTH) ) +SamplesFeatures_Mem( + + .clk (clk), + .rst_n (rst_n), + .we (features_wen), + .re (features_ren), + .raddr (features_rd_addr), + .waddr (features_wr_addr), + .din (features_wr_data), + .dout (features_rd_data), + .valid_out (features_rd_data_valid) +); + + +always @(posedge clk) begin + if(~rst_n) begin + features_wr_addr <= 9'b0; + tuple_offset <= 0; + tuple_offset_set <= 0; + //tuple_valid <= 0; + end + else begin + if(~tuple_offset_set) begin + tuple_offset <= features_wr_addr; + end + if(features_wen) begin + features_wr_addr <= features_wr_addr + 1'b1; + tuple_offset_set <= ~data_line_in_last; + end + //tuple_valid <= features_wen & data_line_in_last; + end +end + +assign features_wen = data_line_in_valid; +assign features_wr_data = data_line_in; +assign curr_feature_done = last_tree_d3 & goToOutput_d3 & comparison_stage_valid; + +always @(posedge clk) begin + if (~rst_n) begin + // reset + data_line_in_ready <= 1'b0; + features_mem_count <= 0; + end + else begin + data_line_in_ready <= (features_mem_count < (512-FEATURES_DISTR_DELAY)) & ~delayed_instruction_fifo_almostfull; + + if(features_wen & curr_feature_done) begin + features_mem_count <= features_mem_count + 1'b1 - tuple_numlines; + end + else if(features_wen) begin + features_mem_count <= features_mem_count + 1'b1; + end + else if (curr_feature_done) begin + features_mem_count <= features_mem_count - tuple_numlines; + end + end +end + +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// PU Programming Logic ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// + +always @(posedge clk) begin + if(~rst_n) begin + num_trees_per_pu_minus_one <= 0; + PartialTrees <= 0; + LastLevelIndex <= 0; + num_lines_per_tree_weights <= 0; + num_lines_per_tree_findex <= 0; + MissingFeatureValue <= 0; + end + else if(data_line_in_ctrl) begin + num_trees_per_pu_minus_one <= data_line_in[MAX_NUMBER_OF_TREES_BITS-1:0]; // First Byte 4 bits: MAX NUMBER OF TREES PER PU = 16 + PartialTrees <= data_line_in[8]; // Second Byte 1 bit : 1 means send partial results, 0 all trees fit on the FPGA + LastLevelIndex <= data_line_in[16+MAX_TREE_DEPTH_BITS-1:16]; // Third Byte 4 bits: MAX NUMBER OF LEVELS PER TREE = 12 + 1 LEAF LEVEL + num_lines_per_tree_weights <= {data_line_in[24+TREE_OFFSET_BITS-2:24]}; // B5B4 10 bits: MAX NUMBER OF LINES PER TREE = 1024 (consume all PU memory) + num_lines_per_tree_findex <= {data_line_in[40+TREE_OFFSET_BITS-2:40]}; // B7B6 10 bits: MAX NUMBER OF LINES PER TREE = 1024 (consume all PU memory) + MissingFeatureValue <= data_line_in[87: 56]; // B11B10B9B8 + tuple_numlines <= data_line_in[103:88]; // B13B12 + end +end + +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Tuple Instruction FIFO ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// +/* Once all features of a tuple are in features memory, we enqueue an instruction to execute all the + trees in the PU on the current tuple features, the instruction simply include the tuple offset. + +*/ + +assign delayed_instruction_we = features_wen & data_line_in_last; +assign delayed_instruction_i = {time_stamp, tuple_offset}; + +assign delayed_instruction_re = (tuple_old_enough | tuple_old_enough_set) & (curr_tree_index == num_trees_per_pu_minus_one) & ~TupleInstrctionFIFO_full; + +quick_fifo #(.FIFO_WIDTH(TUPLE_OFFSET_BITS+20), + .FIFO_DEPTH_BITS(9), + .FIFO_ALMOSTFULL_THRESHOLD(508) + ) DelayedTupleInstrctionFIFO ( + .clk (clk), + .reset_n (rst_n), + .din (delayed_instruction_i), + .we (delayed_instruction_we), + + .re (delayed_instruction_re), + .dout (delayed_instruction_o), + .empty (), + .valid (delayed_instruction_valid_f), + .full (delayed_instruction_fifo_full), + .count (), + .almostfull (delayed_instruction_fifo_almostfull) + ); + +assign time_stamp_diff = {1'b0, time_stamp} - {1'b0, delayed_instruction_o[TUPLE_OFFSET_BITS+19:TUPLE_OFFSET_BITS]}; + +assign tuple_old_enough = (time_stamp_diff[20])? (time_stamp_diff <= INSTRUCTION_DELAY_NEGATIVE) : (time_stamp_diff >= INSTRUCTION_DELAY); + +always @(posedge clk) begin + if(~rst_n) begin + instr_delay_cycles <= 0; + num_tuples_received <= 0; + time_stamp <= 0; + end + else begin + // + time_stamp <= time_stamp + 1'b1; + // + if(delayed_instruction_we) begin + num_tuples_received <= num_tuples_received + 1'b1; + end + // + if(delayed_instruction_valid_f) begin + if((instr_delay_cycles == INSTRUCTION_DELAY) & (curr_tree_index == num_trees_per_pu_minus_one) & ~TupleInstrctionFIFO_full) begin + instr_delay_cycles <= 0; + end + else if((instr_delay_cycles < INSTRUCTION_DELAY)) begin + instr_delay_cycles <= instr_delay_cycles + 1'b1; + end + end + end +end + +// issuing a copy of the instruction for each tree +always @(posedge clk) begin + if(~rst_n) begin + curr_tree_w_offset <= 0; + curr_tree_f_offset <= 0; + curr_tree_index <= 0; + tuple_old_enough_set <= 0; + end + else if((tuple_old_enough | tuple_old_enough_set) & delayed_instruction_valid_f) begin + if(~TupleInstrctionFIFO_full) begin + if(curr_tree_index < num_trees_per_pu_minus_one) begin + curr_tree_w_offset <= curr_tree_w_offset + num_lines_per_tree_weights; + curr_tree_f_offset <= curr_tree_f_offset + num_lines_per_tree_findex; + curr_tree_index <= curr_tree_index + 1'b1; + tuple_old_enough_set <= 1'b1; + end + else begin + curr_tree_w_offset <= 0; + curr_tree_f_offset <= 0; + curr_tree_index <= 0; + tuple_old_enough_set <= 1'b0; + end + end + end + else begin + curr_tree_w_offset <= 0; + curr_tree_f_offset <= 0; + curr_tree_index <= 0; + tuple_old_enough_set <= 1'b0; + end +end + +assign last_tree = (curr_tree_index == num_trees_per_pu_minus_one); +assign instr_NOP = ~(curr_tree_index < local_num_trees); + + + +quick_fifo #(.FIFO_WIDTH(INSTRUCTION_WIDTH), + .FIFO_DEPTH_BITS(9), + .FIFO_ALMOSTFULL_THRESHOLD(16) + ) TupleInstrctionFIFO ( + .clk (clk), + .reset_n (rst_n), + .din ( {last_tree, instr_NOP, delayed_instruction_o[TUPLE_OFFSET_BITS-1:0], curr_tree_f_offset, curr_tree_w_offset} ), + .we (tuple_instruction_we), + + .re (tuple_instruction_re), + .dout (tuple_instruction), + .empty (), + .valid (tuple_instruction_valid_f), + .full (TupleInstrctionFIFO_full), + .count (), + .almostfull () + ); + + +assign tuple_instruction_we = delayed_instruction_valid_f & (tuple_old_enough | tuple_old_enough_set); +assign tuple_instruction_re = ~tree_instruction_valid; +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Read Node Stage ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// +/* + This stage execute a tree or tuple instruction: + this instruction issues a read request to Weights and Feature Indexes memories. +*/ + +assign tree_w_offset_s1 = (tree_instruction_valid)? tree_instruction_tree_w_offset : tuple_instruction[TREE_OFFSET_BITS-1:0]; +assign tree_f_offset_s1 = (tree_instruction_valid)? tree_instruction_tree_f_offset : tuple_instruction[2*TREE_OFFSET_BITS-1:TREE_OFFSET_BITS]; // 10-bit tree address +assign tree_node_offset_s1 = (tree_instruction_valid)? tree_instruction_node_offset : 0; // 11 bit node offset +assign tree_w_node_addr_s1 = (tree_instruction_valid)? tree_instruction_node_w_addr : {tuple_instruction[TREE_OFFSET_BITS-1:0], {(NODE_WORD_OFFSET_BITS){1'b0}}}; // 13 bit node addr +assign tree_f_node_addr_s1 = (tree_instruction_valid)? tree_instruction_node_f_addr : {tuple_instruction[2*TREE_OFFSET_BITS-2:TREE_OFFSET_BITS], {(NODE_WORD_OFFSET_BITS+1){1'b0}}}; // 13 bit node addr + +assign tuple_offset_s1 = (tree_instruction_valid)? tree_instruction_tuple_offset : tuple_instruction[TUPLE_OFFSET_BITS+2*TREE_OFFSET_BITS-1:2*TREE_OFFSET_BITS]; // 9 bits tuple offset +assign tree_instr_NOP_s1 = (tree_instruction_valid)? tree_instruction_type_NOP : 1'b0; +assign tree_instr_EMPTY_s1 = (tree_instruction_valid)? tree_instruction_type_EMPTY : tuple_instruction[TUPLE_OFFSET_BITS + 2*TREE_OFFSET_BITS]; // 1 bit NOP operation flag +assign tree_node_level_s1 = (tree_instruction_valid)? tree_instruction_node_level + 1'b1 : 0; // 4 bits Tree Node level + +assign last_tree_s1 = (tree_instruction_valid)? tree_instruction_last_flag : tuple_instruction[TUPLE_OFFSET_BITS + 2*TREE_OFFSET_BITS + 1]; + +assign tree_node_ren = tree_instruction_valid | (tuple_instruction_valid_f); + +assign next_tree_w_node_addr_s1 = {tree_w_offset_s1, {(NODE_WORD_OFFSET_BITS){1'b0}}} + {tree_node_offset_s1[MAX_NUM_TREE_NODES_BITS-2:0], 1'b1}; +assign next_tree_f_node_addr_s1 = {tree_f_offset_s1[TREE_OFFSET_BITS-2:0], {(NODE_WORD_OFFSET_BITS+1){1'b0}}} + {tree_node_offset_s1[MAX_NUM_TREE_NODES_BITS-2:0], 1'b1}; +assign next_tree_node_offset_s1 = (tree_instr_NOP_s1)? tree_node_offset_s1 : {tree_node_offset_s1[MAX_NUM_TREE_NODES_BITS-2:0], 1'b1}; + +// Send read requests to Weights and Feature Indexes memories +assign TWM_weight_wraddr = (~data_line_in_mode[0])? {tree_prog_addr, {(NODE_WORD_OFFSET_BITS){1'b0}}} : tree_w_node_addr_s1; +assign TWM_rea = tree_node_ren; + +assign TFI_rd_addr = tree_f_node_addr_s1; +assign TFI_ren = tree_node_ren; + +// Pipeline to next stage +delay #(.DATA_WIDTH(3*MAX_NUM_TREE_NODES_BITS+2*TREE_OFFSET_BITS+TUPLE_OFFSET_BITS+1+MAX_TREE_DEPTH_BITS + 1+1), + .DELAY_CYCLES(READ_TNODE_LATENCY) + ) ReadNodeStageDelay( + + .clk (clk), + .rst_n (rst_n), + .data_in ( {tree_instr_EMPTY_s1, last_tree_s1, tree_node_level_s1, tree_instr_NOP_s1, /*tuple_UID_s1,*/ tuple_offset_s1, next_tree_f_node_addr_s1, next_tree_w_node_addr_s1, tree_f_offset_s1, tree_w_offset_s1, next_tree_node_offset_s1 } ), // + .data_in_valid (tree_node_ren), + .data_out ( {tree_instr_EMPTY_d1, last_tree_d1, tree_node_level_d1, tree_instr_NOP_d1, /*tuple_UID_d1,*/ tuple_offset_d1, next_tree_f_node_addr_d1, next_tree_w_node_addr_d1, tree_f_offset_d1, tree_w_offset_d1, next_tree_node_offset_d1 } ), + .data_out_valid (tree_node_rd_stage_valid) + ); + +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Read Feature Stage ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// +/* + This stage reads the feature data corresponding to current tree node. +*/ + +assign features_ren = tree_node_rd_stage_valid; +assign features_rd_addr = {TFI_rd_data[MAX_NUM_TUPLE_FEATURES_BITS-1:0]} + {tuple_offset_d1, {NODE_WORD_OFFSET_BITS{1'b0}} }; + +// Pipeline to next stage +delay #(.DATA_WIDTH(3*MAX_NUM_TREE_NODES_BITS+2*TREE_OFFSET_BITS+TUPLE_OFFSET_BITS+1+MAX_TREE_DEPTH_BITS + 1 + DATA_PRECISION+2+1+1), + .DELAY_CYCLES(READ_FEATURE_LATENCY) + ) ReadFeatureStageDelay( + + .clk (clk), + .rst_n (rst_n), + .data_in ( {TFI_rd_data[NODE_FEATURE_INDEX_WIDTH-1:NODE_FEATURE_INDEX_WIDTH-3], TWM_weight_data, tree_instr_EMPTY_d1, last_tree_d1, tree_node_level_d1, tree_instr_NOP_d1, tuple_offset_d1, next_tree_f_node_addr_d1, next_tree_w_node_addr_d1, tree_f_offset_d1, tree_w_offset_d1, next_tree_node_offset_d1 } ), // + .data_in_valid (features_ren), + .data_out ( {feature_index_data_d2, weight_data_d2, tree_instr_EMPTY_d2, last_tree_d2, tree_node_level_d2, tree_instr_NOP_d2, tuple_offset_d2, next_tree_f_node_addr_d2, next_tree_w_node_addr_d2, tree_f_offset_d2, tree_w_offset_d2, next_tree_node_offset_d2 } ), + .data_out_valid (feature_rd_stage_valid) + ); + +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Comparison Stage ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// +/* + In this stage we compare feature and weight values, take a decision to output the tree result + or continue to next stage +*/ + +assign isFeatureMissing = features_rd_data == MissingFeatureValue; + +assign isFeatureSmaller = {~features_rd_data[DATA_PRECISION-1], features_rd_data} < {~weight_data_d2[DATA_PRECISION-1], weight_data_d2}; + +assign isRightChild = ~isFeatureSmaller; + +assign isMissingRight = feature_index_data_d2[0]; + +assign isNextNodeLeaf = feature_index_data_d2[1]; + +assign isLastLevel = tree_node_level_d2 == LastLevelIndex; + +assign goToOutput = isLastLevel; + +assign incrementNodeOffset = (isFeatureMissing)? isMissingRight : isRightChild; +// Pipeline to next stage +delay #(.DATA_WIDTH(3*MAX_NUM_TREE_NODES_BITS+2*TREE_OFFSET_BITS+TUPLE_OFFSET_BITS+1+MAX_TREE_DEPTH_BITS+1+1+1+1), + .DELAY_CYCLES(1) + ) ComparisonStageDelay( + + .clk (clk), + .rst_n (rst_n), + .data_in ( {goToOutput, incrementNodeOffset, tree_instr_EMPTY_d2, last_tree_d2, tree_node_level_d2, (tree_instr_NOP_d2 | isNextNodeLeaf), tuple_offset_d2, next_tree_f_node_addr_d2, next_tree_w_node_addr_d2, tree_f_offset_d2, tree_w_offset_d2, next_tree_node_offset_d2 } ), // + .data_in_valid (feature_rd_stage_valid), + .data_out ( {goToOutput_d3, incrementNodeOffset_d3, tree_instr_EMPTY_d3, last_tree_d3, tree_node_level_d3, tree_instr_NOP_d3, tuple_offset_d3, next_tree_f_node_addr_d3, next_tree_w_node_addr_d3, tree_f_offset_d3, tree_w_offset_d3, next_tree_node_offset_d3 } ), + .data_out_valid (comparison_stage_valid) + ); + +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Prepare to Next Stage ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// +/* + Set next stage instruction, correct next node offset and addr using comparison stage results +*/ + +always @(posedge clk) begin + if(~rst_n) begin + tree_instruction_valid <= 0; + tree_output_valid <= 0; + end + else begin + if( comparison_stage_valid ) begin + tree_instruction_valid <= ~goToOutput_d3; + tree_output_valid <= goToOutput_d3; + end + else begin + tree_instruction_valid <= 0; + tree_output_valid <= 0; + end + end +end + +always @(posedge clk) begin + if( comparison_stage_valid ) begin + tree_instruction_tuple_offset <= tuple_offset_d3; + tree_instruction_node_w_addr <= next_tree_w_node_addr_d3 + incrementNodeOffset_d3; + tree_instruction_node_f_addr <= next_tree_f_node_addr_d3 + incrementNodeOffset_d3; + tree_instruction_node_offset <= (tree_instr_NOP_d3)? next_tree_node_offset_d3 : next_tree_node_offset_d3 + incrementNodeOffset_d3; + tree_instruction_tree_w_offset <= tree_w_offset_d3; + tree_instruction_tree_f_offset <= tree_f_offset_d3; + tree_instruction_type_NOP <= tree_instr_NOP_d3; + tree_instruction_type_EMPTY <= tree_instr_EMPTY_d3; + tree_instruction_node_level <= tree_node_level_d3; + tree_instruction_last_flag <= last_tree_d3; + end +end + +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// PU Output ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// +/* + prepare output of the current PU +*/ + +assign TWM_res_raddr = tree_instruction_node_w_addr; +assign TWM_reb = tree_output_valid & ~PartialTrees; + +//----------------------------------------- PU Output signals ------------------------------------// +// Tree node index +always @(posedge clk) begin + if(~rst_n) begin + pu_tree_node_index_out <= 0; + pu_tree_node_index_out_valid <= 0; + end + else begin + pu_tree_node_index_out <= next_tree_node_offset_d3; + pu_tree_node_index_out_valid <= goToOutput_d3 & PartialTrees; + end +end + +delay #(.DATA_WIDTH(1+1), + .DELAY_CYCLES(READ_TNODE_LATENCY+1) + ) PULeafOutputDelay( + + .clk (clk), + .rst_n (rst_n), + .data_in ( {tree_instr_EMPTY_d3, last_tree_d3} ), // + .data_in_valid (TWM_reb), + .data_out ( {pu_tree_leaf_zero, pu_tree_leaf_out_last} ), + .data_out_valid () + ); + +// Leaf value +assign pu_tree_leaf_out = (pu_tree_leaf_zero | ~TWM_res_valid)? 0 : TWM_res_data; +assign pu_tree_leaf_out_valid = TWM_res_valid; + + +//-----------------------------------------------------------------------------------------------// +always @(posedge clk) begin + + data_line_out <= data_line_in; + data_line_out_last <= data_line_in_last; + data_line_out_pu <= data_line_in_pu; + + if(~rst_n) begin + data_line_out_valid <= 0; + data_line_out_prog <= 0; + data_line_out_ctrl <= 0; + end + else begin + data_line_out_valid <= data_line_in_valid; + data_line_out_prog <= data_line_in_mode; + data_line_out_ctrl <= data_line_in_ctrl; + + end +end + + + +endmodule // DTPU + + diff --git a/hw/hdl/operators/dtengine/xgboost/core/DTPUCluster.sv b/hw/hdl/operators/dtengine/xgboost/core/DTPUCluster.sv new file mode 100644 index 00000000..8d84a6e0 --- /dev/null +++ b/hw/hdl/operators/dtengine/xgboost/core/DTPUCluster.sv @@ -0,0 +1,219 @@ + +import DTEngine_Types::*; + + +module DTPUCluster ( + input wire clk, + input wire rst_n, + + input wire [DATA_BUS_WIDTH-1:0] data_line_in, + input wire data_line_in_valid, + input wire data_line_in_last, + input wire data_line_in_ctrl, + input wire [1:0] data_line_in_mode, + input wire [NUM_PUS_PER_CLUSTER_BITS-1:0] data_line_in_pu, + output reg data_line_in_ready, + + + output wire [NUM_PUS_PER_CLUSTER*16-1:0] partial_tree_node_index_out, + output wire partial_tree_node_index_out_valid, + + output wire [DATA_PRECISION-1:0] partial_aggregation_out, + output wire partial_aggregation_out_valid, + input wire partial_aggregation_out_ready, + + output reg [31:0] tuples_received, + output reg [31:0] lines_received, + output reg [31:0] tuples_res_out, + output reg [31:0] tree_res_out, + output reg [31:0] reduce_tree_outs, + output reg [31:0] reduce_tree_outs_valids + + ); + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Signals Declarations ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// + +wire [DATA_BUS_WIDTH-1:0] data_line_array[NUM_PUS_PER_CLUSTER:0]; +wire data_line_valid_array[NUM_PUS_PER_CLUSTER:0]; +wire data_line_last_array[NUM_PUS_PER_CLUSTER:0]; +wire data_line_ready_array[NUM_PUS_PER_CLUSTER:0]; +wire data_line_ctrl_array[NUM_PUS_PER_CLUSTER:0]; +wire [1:0] data_line_mode_array[NUM_PUS_PER_CLUSTER:0]; +wire [NUM_PUS_PER_CLUSTER_BITS-1:0] data_line_pu_array[NUM_PUS_PER_CLUSTER:0]; + +wire [15:0] pu_tree_node_index_out[NUM_PUS_PER_CLUSTER-1:0]; +wire pu_tree_node_index_out_valid[NUM_PUS_PER_CLUSTER-1:0]; + +wire [DATA_PRECISION-1:0] pu_tree_leaf_out[NUM_PUS_PER_CLUSTER-1:0]; +wire pu_tree_leaf_out_valid[NUM_PUS_PER_CLUSTER-1:0]; +wire pu_tree_leaf_out_last[NUM_PUS_PER_CLUSTER-1:0]; +wire pu_tree_leaf_out_ready[NUM_PUS_PER_CLUSTER-1:0]; + + +wire [DATA_PRECISION-1:0] aggregation_out; +wire aggregation_out_valid; +wire leaves_aggreg_result_fifo_full; +wire aggregation_out_ready; + +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Generate DTPU Instances ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// + +always @(posedge clk) begin + if (~rst_n) begin + // reset + data_line_in_ready <= 1'b0; + + tuples_received <= 0; + lines_received <= 0; + tuples_res_out <= 0; + tree_res_out <= 0; + reduce_tree_outs <= 0; + reduce_tree_outs_valids <= 0; + end + else begin + data_line_in_ready <= data_line_ready_array[0]; + + if(data_line_in_valid & data_line_in_last) begin + tuples_received <= tuples_received + 1'b1; + end + + if(data_line_in_valid) begin + lines_received <= lines_received + 1'b1; + end + + if(pu_tree_leaf_out_valid[0] & pu_tree_leaf_out_last[0]) begin + tuples_res_out <= tuples_res_out + 1'b1; + end + if(pu_tree_leaf_out_valid[0]) begin + tree_res_out <= tree_res_out + 1'b1; + end + + if(aggregation_out_valid & aggregation_out_ready) begin + reduce_tree_outs <= reduce_tree_outs + 1'b1; + end + + if(aggregation_out_valid) begin + reduce_tree_outs_valids <= reduce_tree_outs_valids + 1'b1; + end + + end +end + +// initialize input to first PU +assign data_line_array[0] = data_line_in; +assign data_line_valid_array[0] = data_line_in_valid; +assign data_line_last_array[0] = data_line_in_last; +assign data_line_ctrl_array[0] = data_line_in_ctrl; +assign data_line_mode_array[0] = data_line_in_mode; +assign data_line_pu_array[0] = data_line_in_pu; + + +// generate a cascade of PUs +genvar i; + +generate + for (i = 0; i < NUM_PUS_PER_CLUSTER; i = i + 1) begin:pus + DTPU #(.PU_ID (i) + ) pu_x( + + .clk (clk), + .rst_n (rst_n), + + .data_line_in (data_line_array[i]), + .data_line_in_valid (data_line_valid_array[i]), + .data_line_in_last (data_line_last_array[i]), + .data_line_in_ctrl (data_line_ctrl_array[i]), + .data_line_in_mode (data_line_mode_array[i]), + .data_line_in_pu (data_line_pu_array[i]), + .data_line_in_ready (data_line_ready_array[i]), + + .data_line_out (data_line_array[i+1]), + .data_line_out_valid (data_line_valid_array[i+1]), + .data_line_out_ctrl (data_line_ctrl_array[i+1]), + .data_line_out_last (data_line_last_array[i+1]), + .data_line_out_prog (data_line_mode_array[i+1]), + .data_line_out_pu (data_line_pu_array[i+1]), + + .pu_tree_node_index_out (pu_tree_node_index_out[i]), + .pu_tree_node_index_out_valid (pu_tree_node_index_out_valid[i]), + + .pu_tree_leaf_out (pu_tree_leaf_out[i]), + .pu_tree_leaf_out_valid (pu_tree_leaf_out_valid[i]), + .pu_tree_leaf_out_last (pu_tree_leaf_out_last[i]) + ); + + + // + assign partial_tree_node_index_out[16*i+15:i*16] = pu_tree_node_index_out[i]; + + end +endgenerate + +// as all PUs synchronized to start processing at the same time and do the same amount of processing +// looking at the valid signal from 1st PU is enough. + +assign partial_tree_node_index_out_valid = pu_tree_node_index_out_valid[0]; + +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Instance of FPAdders Tree ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// + +FPAddersReduceTree #(.NUM_FP_POINTS(NUM_PUS_PER_CLUSTER) + ) reduce_leaves( + .clk (clk), + .rst_n (rst_n), + + .fp_in_vector (pu_tree_leaf_out), + .fp_in_vector_valid (pu_tree_leaf_out_valid), + .fp_in_vector_last (pu_tree_leaf_out_last), + .fp_in_vector_ready (pu_tree_leaf_out_ready), + + .reduce_out (aggregation_out), + .reduce_out_valid (aggregation_out_valid), + .reduce_out_ready (aggregation_out_ready) + ); + +assign aggregation_out_ready = ~leaves_aggreg_result_fifo_full; +// putting FPAdders tree output in a FIFO + +quick_fifo #(.FIFO_WIDTH(DATA_PRECISION), + .FIFO_DEPTH_BITS(9), + .FIFO_ALMOSTFULL_THRESHOLD(508) + ) leaves_aggreg_result_fifo ( + .clk (clk), + .reset_n (rst_n), + .din (aggregation_out), + .we (aggregation_out_valid), + + .re (partial_aggregation_out_ready), + .dout (partial_aggregation_out), + .empty (), + .valid (partial_aggregation_out_valid), + .full (leaves_aggreg_result_fifo_full), + .count (), + .almostfull () + ); + +endmodule + + + + + + + + + + + + diff --git a/hw/hdl/operators/dtengine/xgboost/core/FPAddersReduceTree.sv b/hw/hdl/operators/dtengine/xgboost/core/FPAddersReduceTree.sv new file mode 100644 index 00000000..2f37511e --- /dev/null +++ b/hw/hdl/operators/dtengine/xgboost/core/FPAddersReduceTree.sv @@ -0,0 +1,187 @@ + + +module FPAddersReduceTree #(parameter NUM_FP_POINTS = 8, + parameter FP_ADDER_LATENCY = 2) + ( + input wire clk, + input wire rst_n, + + input wire [31:0] fp_in_vector[NUM_FP_POINTS-1:0], + input wire fp_in_vector_valid[NUM_FP_POINTS-1:0], + input wire fp_in_vector_last[NUM_FP_POINTS-1:0], + output reg fp_in_vector_ready[NUM_FP_POINTS-1:0], + + output wire [31:0] reduce_out, + output wire reduce_out_valid, + input wire reduce_out_ready + ); + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Local Parameters ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// + +localparam NUM_TREE_LEVELS = (NUM_FP_POINTS == 16)? 4 : + (NUM_FP_POINTS == 8)? 3 : + (NUM_FP_POINTS == 4)? 2 : 1; + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// Signals Declarations ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// + +wire [33:0] tree_data[NUM_TREE_LEVELS:0][(NUM_FP_POINTS>>1)-1:0][1:0]; +wire [31:0] tree_out; +reg [31:0] tree_out_d1; + +wire fp_in_valid_delayed; +wire fp_in_last_delayed; + +wire fp_in_ready; + +wire tree_aggregator_in_fifo_valid; +wire tree_aggregator_in_fifo_almfull; +wire [32:0] tree_aggregator_in_fifo_dout; + +genvar i; +generate + + for (i = 0; i < NUM_FP_POINTS; i = i + 1) + begin:fpInReady + + always@(posedge clk) begin + if(~rst_n) begin + fp_in_vector_ready[i] <= 1'b0; + end + else begin + fp_in_vector_ready[i] <= fp_in_ready; + end + end + end +endgenerate + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// FP Adders Tree ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// +// first level of tree adders +generate + for (i = 0; i < (NUM_FP_POINTS>>1); i = i + 1) + begin:treeLevel1 + + assign tree_data[0][i][0] = {1'b0, {|(fp_in_vector[i<<1])}, fp_in_vector[i<<1] }; + assign tree_data[0][i][1] = {1'b0, {|(fp_in_vector[(i<<1)+1])}, fp_in_vector[(i<<1)+1] }; + + FPAdder_8_23_uid2_l2 fpadder_1_x( + .clk (clk), + .rst (~rst_n), + .seq_stall (1'b0), + .X (tree_data[0][i][0]), + .Y (tree_data[0][i][1]), + .R (tree_data[1][i>>1][i%2]) + ); + end +endgenerate + +// the rest of levels +generate + genvar j; + for (i = 1; i < NUM_TREE_LEVELS; i = i + 1) + begin:treeLevels + for (j = 0; j < (NUM_FP_POINTS >> (i+1)); j = j + 1) + begin:levelAdders + FPAdder_8_23_uid2_l2 fpadder_i_x( + .clk (clk), + .rst (~rst_n), + .seq_stall (1'b0), + .X (tree_data[i][j][0]), + .Y (tree_data[i][j][1]), + .R (tree_data[i+1][j>>1][j%2]) + ); + end + end +endgenerate + +// delay valid and last +delay #(.DATA_WIDTH(1), + .DELAY_CYCLES(FP_ADDER_LATENCY*NUM_TREE_LEVELS+1) + ) fpadder_delay( + + .clk (clk), + .rst_n (rst_n), + .data_in (fp_in_vector_last[0]), // + .data_in_valid (fp_in_vector_valid[0]), + .data_out (fp_in_last_delayed), + .data_out_valid (fp_in_valid_delayed) + ); + +// assign tree output +assign tree_out = (tree_data[NUM_TREE_LEVELS][0][0][33:32] == 2'b00)? 0 : tree_data[NUM_TREE_LEVELS][0][0][31:0]; + +//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////// ////////////////////////////////// +////////////////////////////// FP Aggregator ///////////////////////// +////////////////////////////////////// ////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/* +quick_fifo #(.FIFO_WIDTH(32+1), + .FIFO_DEPTH_BITS(9), + .FIFO_ALMOSTFULL_THRESHOLD(500) + ) tree_aggregator_in_fifo ( + .clk (clk), + .reset_n (rst_n), + .din ({fp_in_last_delayed, tree_out}), + .we (fp_in_valid_delayed), + + .re (fp_in_ready), + .dout (tree_aggregator_in_fifo_dout), + .empty (), + .valid (tree_aggregator_in_fifo_valid), + .full (), + .count (), + .almostfull (tree_aggregator_in_fifo_almfull) + ); + +*/ + +always@(posedge clk) begin + if(~rst_n) begin + tree_out_d1 <= 1'b0; + end + else begin + tree_out_d1 <= tree_out; + end +end + + +FPAggregator #(.FP_ADDER_LATENCY(FP_ADDER_LATENCY)) + + tree_aggregator( + + .clk (clk), + .rst_n (rst_n), + + .fp_in (tree_out), + .fp_in_valid (fp_in_valid_delayed), + .fp_in_last (fp_in_last_delayed), + .fp_in_ready (fp_in_ready), + + .aggreg_out (reduce_out), + .aggreg_out_valid (reduce_out_valid), + .aggreg_out_ready (reduce_out_ready) + ); + + + + + + +endmodule + diff --git a/hw/hdl/operators/dtengine/xgboost/core/FPAggregator.v b/hw/hdl/operators/dtengine/xgboost/core/FPAggregator.v new file mode 100644 index 00000000..26c70a90 --- /dev/null +++ b/hw/hdl/operators/dtengine/xgboost/core/FPAggregator.v @@ -0,0 +1,152 @@ + +module FPAggregator #(parameter FP_ADDER_LATENCY = 2) ( + + input wire clk, + input wire rst_n, + + input wire [31:0] fp_in, + input wire fp_in_valid, + input wire fp_in_last, + output wire fp_in_ready, + + output wire [31:0] aggreg_out, + output wire aggreg_out_valid, + input wire aggreg_out_ready + ); + + + + +wire aggreg_in_fifo_full; +wire aggreg_in_fifo_valid; +wire aggreg_in_fifo_re; +wire [32:0] aggreg_in_fifo_dout; + +wire [33:0] input_A; +reg [33:0] prev_aggreg_value; +wire [33:0] aggreg_value; +reg [3:0] fpadder_latency_count; + + +wire fp_in_valid_delayed; +wire fp_in_last_delayed; +wire aggregator_ready; + +reg [31:0] aggreg_out_d1; +reg aggreg_out_valid_d1; + +wire aggreg_out_fifo_almfull; +//////////////////////////////////////////////////////////////////////////////// +assign fp_in_ready = ~aggreg_in_fifo_full; + +quick_fifo #(.FIFO_WIDTH(32+1), + .FIFO_DEPTH_BITS(9), + .FIFO_ALMOSTFULL_THRESHOLD(508) + ) aggreg_in_fifo ( + .clk (clk), + .reset_n (rst_n), + .din ({fp_in_last, fp_in}), + .we (fp_in_valid), + + .re (aggreg_in_fifo_re), + .dout (aggreg_in_fifo_dout), + .empty (), + .valid (aggreg_in_fifo_valid), + .full (aggreg_in_fifo_full), + .count (), + .almostfull () + ); + +assign aggreg_in_fifo_re = aggregator_ready; +//////////////////////////////////////////////////////////////////////////////// + +always @(posedge clk) begin + if (~rst_n) begin + // reset + prev_aggreg_value <= 0; + fpadder_latency_count <= 0; + aggreg_out_valid_d1 <= 1'b0; + aggreg_out_d1 <= 0; + end + else begin + if(aggregator_ready & aggreg_in_fifo_valid) begin + fpadder_latency_count <= FP_ADDER_LATENCY-1; + end + else if(!(fpadder_latency_count == 0)) begin + fpadder_latency_count <= fpadder_latency_count - 1'b1; + end + //--------------------- Do aggregation --------------------------// + if(fp_in_valid_delayed) begin + if(~fp_in_last_delayed) begin + prev_aggreg_value <= aggreg_value; + end + else begin + prev_aggreg_value <= 0; + end + end + + //--------------------- Tuple Output ----------------------------// + aggreg_out_valid_d1 <= 1'b0; + + if(fp_in_valid_delayed & fp_in_last_delayed) begin + if(aggreg_value[33:32] == 2'b00) begin + aggreg_out_d1 <= 0; + end + else begin + aggreg_out_d1 <= aggreg_value[31:0]; + end + + aggreg_out_valid_d1 <= 1'b1; + end + end +end + +assign aggregator_ready = (fpadder_latency_count == 0) & ~aggreg_out_fifo_almfull; + +assign input_A = {1'b0, {|(aggreg_in_fifo_dout[31:0])}, aggreg_in_fifo_dout[31:0]}; + +FPAdder_8_23_uid2_l2 fpadder( + .clk (clk), + .rst (~rst_n), + .seq_stall (1'b0), + .X (input_A), + .Y (prev_aggreg_value), + .R (aggreg_value) + ); + +// delay valid, last with FPAdder Latency +delay #(.DATA_WIDTH(1), + .DELAY_CYCLES(FP_ADDER_LATENCY) + ) fpadder_delay( + + .clk (clk), + .rst_n (rst_n), + .data_in (aggreg_in_fifo_dout[32]), // + .data_in_valid (aggreg_in_fifo_valid & aggregator_ready), + .data_out (fp_in_last_delayed), + .data_out_valid (fp_in_valid_delayed) + ); + + +quick_fifo #(.FIFO_WIDTH(32), + .FIFO_DEPTH_BITS(9), + .FIFO_ALMOSTFULL_THRESHOLD(500) + ) aggreg_out_fifo ( + .clk (clk), + .reset_n (rst_n), + .din (aggreg_out_d1), + .we (aggreg_out_valid_d1), + + .re (aggreg_out_ready), + .dout (aggreg_out), + .empty (), + .valid (aggreg_out_valid), + .full (), + .count (), + .almostfull (aggreg_out_fifo_almfull) + ); + + + + +endmodule diff --git a/hw/hdl/operators/dtengine/xgboost/core/Mem1in2out.v b/hw/hdl/operators/dtengine/xgboost/core/Mem1in2out.v new file mode 100644 index 00000000..4ddc318a --- /dev/null +++ b/hw/hdl/operators/dtengine/xgboost/core/Mem1in2out.v @@ -0,0 +1,127 @@ + + +module Mem1in2out #( + parameter DATA_WIDTH = 32, + parameter ADDR_WIDTH = 10, + parameter LINE_ADDR_WIDTH = 3, + parameter WORD_WIDTH = 32, + parameter NUM_PIPELINE_LEVELS = 1 +) ( + input wire clk, + input wire rst_n, + input wire we, + input wire rea, + input wire reb, + input wire [LINE_ADDR_WIDTH+ADDR_WIDTH-1:0] raddr, + input wire [LINE_ADDR_WIDTH+ADDR_WIDTH-1:0] wraddr, + input wire [DATA_WIDTH-1:0] din, + output wire [WORD_WIDTH-1:0] dout1, + output wire valid_out1, + output wire [WORD_WIDTH-1:0] dout2, + output wire valid_out2 +); + +reg rea_p[NUM_PIPELINE_LEVELS+1]; +reg reb_p[NUM_PIPELINE_LEVELS+1]; +wire [DATA_WIDTH-1:0] dline_a; +wire [DATA_WIDTH-1:0] dline_b; + +reg [LINE_ADDR_WIDTH-1:0] raddr_d1; +reg [LINE_ADDR_WIDTH-1:0] wraddr_d1; +/* + +(* ramstyle = "no_rw_check" *) reg [DATA_WIDTH-1:0] mem[0:2**ADDR_WIDTH-1]; + + + always @(posedge clk) begin + if (we) + mem[ wraddr[LINE_ADDR_WIDTH+ADDR_WIDTH-1:LINE_ADDR_WIDTH] ] <= din; + + if(rea) + dline_a <= mem[ wraddr[LINE_ADDR_WIDTH+ADDR_WIDTH-1:LINE_ADDR_WIDTH] ]; + // + if(reb) + dline_b <= mem[ raddr[LINE_ADDR_WIDTH+ADDR_WIDTH-1:LINE_ADDR_WIDTH] ]; + end + +*/ +bramin1out2 bram1in2out_inst ( + .address_a ( wraddr[LINE_ADDR_WIDTH+ADDR_WIDTH-1:LINE_ADDR_WIDTH] ), + .address_b ( raddr[LINE_ADDR_WIDTH+ADDR_WIDTH-1:LINE_ADDR_WIDTH] ), + .clock ( clk ), + .data_a ( din ), + .data_b ( 0 ), + .wren_a ( we ), + .wren_b ( 1'b0 ), + .q_a ( dline_a ), + .q_b ( dline_b ) + ); + + +//------------------------ Out MUX Pipelines ------------------------// +// pipeline re i = 0, +always @(posedge clk) begin + if(~rst_n) begin + rea_p[0] <= 0; + reb_p[0] <= 0; + end + else begin + rea_p[0] <= rea; + reb_p[0] <= reb; + end + + wraddr_d1 <= wraddr[LINE_ADDR_WIDTH-1:0]; + raddr_d1 <= raddr[LINE_ADDR_WIDTH-1:0]; +end + +// pipeline re i = 1 to NUM_PIPELINE_LEVELS+1, +genvar i; +generate for (i = 1; i < NUM_PIPELINE_LEVELS+1; i=i+1) begin: PipelineOutMux + always @(posedge clk) begin + if(~rst_n) begin + rea_p[i] <= 0; + reb_p[i] <= 0; + end + else begin + rea_p[i] <= rea_p[i-1]; + reb_p[i] <= reb_p[i-1]; + end + end +end +endgenerate + +PipelinedMUX #( + .DATA_WIDTH (DATA_WIDTH), + .ADDR_WIDTH (LINE_ADDR_WIDTH), + .WORD_WIDTH (WORD_WIDTH), + .NUM_PIPELINE_LEVELS (NUM_PIPELINE_LEVELS) +) muxa( + .clk (clk), + .rst_n (rst_n), + + .din (dline_a), + .addr (wraddr_d1), + .dout (dout1) +); + +PipelinedMUX #( + .DATA_WIDTH (DATA_WIDTH), + .ADDR_WIDTH (LINE_ADDR_WIDTH), + .WORD_WIDTH (WORD_WIDTH), + .NUM_PIPELINE_LEVELS (NUM_PIPELINE_LEVELS) +) muxb( + .clk (clk), + .rst_n (rst_n), + + .din (dline_b), + .addr (raddr_d1), + .dout (dout2) +); + + +assign valid_out1 = rea_p[NUM_PIPELINE_LEVELS]; +assign valid_out2 = reb_p[NUM_PIPELINE_LEVELS]; + + + +endmodule // Mem1in2out diff --git a/hw/hdl/operators/dtengine/xgboost/core/PipelinedMUX.sv b/hw/hdl/operators/dtengine/xgboost/core/PipelinedMUX.sv new file mode 100644 index 00000000..20404f94 --- /dev/null +++ b/hw/hdl/operators/dtengine/xgboost/core/PipelinedMUX.sv @@ -0,0 +1,152 @@ + + +module PipelinedMUX #( + parameter DATA_WIDTH = 32, + parameter ADDR_WIDTH = 3, + parameter WORD_WIDTH = 32, + parameter NUM_PIPELINE_LEVELS = 1 +)( + input wire clk, // Clock + input wire rst_n, // Asynchronous reset active low + + input wire [DATA_WIDTH-1:0] din, + input wire [ADDR_WIDTH-1:0] addr, + output wire [WORD_WIDTH-1:0] dout +); + + + +localparam NUM_WORDS_PER_LINE = DATA_WIDTH / WORD_WIDTH; + +localparam NUM_BITS_PER_LEVEL = ADDR_WIDTH / NUM_PIPELINE_LEVELS; +localparam NOT_SIMILAR_LEVELS = ADDR_WIDTH % NUM_PIPELINE_LEVELS; + +wire [WORD_WIDTH-1:0] dline_Array[NUM_WORDS_PER_LINE-1:0]; +reg [ADDR_WIDTH-1:0] addr_d[NUM_PIPELINE_LEVELS-1:0]; + +genvar i,j,w; + +generate + always @(*) begin + addr_d[0] = addr; + end + + for (i = 1; i < NUM_PIPELINE_LEVELS; i = i + 1) + begin:addr_pipeline + always @(posedge clk) begin + if (~rst_n) begin + // reset + addr_d[i] <= 0; + end + else begin + addr_d[i] <= addr_d[i-1]; + end + end + end +endgenerate +generate +for (i = 0; i < NUM_WORDS_PER_LINE; i=i+1) begin: VectorToArray + assign dline_Array[i] = din[i*WORD_WIDTH+WORD_WIDTH-1:i*WORD_WIDTH]; +end +endgenerate + +generate + // One Pipeline Level, i.e. register output only. + if(NUM_PIPELINE_LEVELS == 1) begin: pipeLevs + //-------------------------------------------------------------------------------------// + reg [WORD_WIDTH-1:0] dword_out; + + always @(posedge clk) begin + dword_out <= dline_Array[ addr_d[0][ADDR_WIDTH-1:0] ]; + end + + assign dout = dword_out; + //-------------------------------------------------------------------------------------// + end + else if(NOT_SIMILAR_LEVELS == 0) begin + //-------------------------------------------------------------------------------------// + reg [WORD_WIDTH-1:0] levArray[NUM_PIPELINE_LEVELS+1:0][(NUM_WORDS_PER_LINE >> NUM_BITS_PER_LEVEL)-1:0][(2**NUM_BITS_PER_LEVEL)-1:0]; + + for (i = 0; i < NUM_WORDS_PER_LINE; i=i+(2**NUM_BITS_PER_LEVEL)) begin: L1 + for (w = 0; w < (2**NUM_BITS_PER_LEVEL); w=w+1) begin: L2 + always @(*) begin + levArray[0][i>>NUM_BITS_PER_LEVEL][w] = din[(i+w)*WORD_WIDTH+WORD_WIDTH-1:(i+w)*WORD_WIDTH]; + end + end + end + // + for (i = 0; i < NUM_PIPELINE_LEVELS; i=i+1) begin:L3 + for (j = 0; j < (NUM_WORDS_PER_LINE >> ((i+1)*NUM_BITS_PER_LEVEL)); j=j+1) begin:L4 + always @(posedge clk) begin + levArray[i+1][j>>NUM_BITS_PER_LEVEL][j] <= levArray[i][j][ addr_d[i][i*NUM_BITS_PER_LEVEL+NUM_BITS_PER_LEVEL-1:i*NUM_BITS_PER_LEVEL] ]; + end + end + end + + assign dout = levArray[NUM_PIPELINE_LEVELS][0][0]; + //-------------------------------------------------------------------------------------// + end + else if(ADDR_WIDTH == 4) begin // This means Pipeline level = 3 + + reg [WORD_WIDTH-1:0] lev1Array[3:0][3:0]; + reg [WORD_WIDTH-1:0] lev2Array[1:0][1:0]; + reg [WORD_WIDTH-1:0] lev3Array[1:0]; + reg [WORD_WIDTH-1:0] dword_out; + + for (i = 0; i < NUM_WORDS_PER_LINE; i=i+4) begin:L5 + for (w = 0; w < 4; w=w+1) begin:L6 + always @(*) begin + lev1Array[i>>2][w] = din[(i+w)*WORD_WIDTH+WORD_WIDTH-1:(i+w)*WORD_WIDTH]; + end + end + end + // Level 1 Muxes + for (j = 0; j < 4; j=j+1) begin:L7 + always @(posedge clk) begin + lev2Array[j>>1][j%2] <= lev1Array[j][ addr_d[0][1:0] ]; + end + end + // Level 2 Muxes + for (j = 0; j < 2; j=j+1) begin:L8 + always @(posedge clk) begin + lev3Array[j] <= lev2Array[j][ addr_d[1][2] ]; + end + end + // Level 3 Muxes + always @(posedge clk) begin + dword_out <= lev3Array[ addr_d[2][3] ]; + end + + assign dout = dword_out; + end + else if(ADDR_WIDTH == 3) begin // This means Pipeline level = 2 + wire [WORD_WIDTH-1:0] lev1Array[1:0][3:0]; + reg [WORD_WIDTH-1:0] lev2Array[1:0]; + reg [WORD_WIDTH-1:0] dword_out; + + for (i = 0; i < NUM_WORDS_PER_LINE; i=i+4) begin:L9 + for (w = 0; w < 4; w=w+1) begin:L10 + assign lev1Array[i>>2][w] = din[(i+w)*WORD_WIDTH+WORD_WIDTH-1:(i+w)*WORD_WIDTH]; + end + end + // Level 1 Muxes + for (j = 0; j < 2; j=j+1) begin:L11 + always @(posedge clk) begin + lev2Array[j] <= lev1Array[j][ addr_d[0][1:0] ]; + end + end + // Level 2 Muxes + always @(posedge clk) begin + dword_out <= lev2Array[ addr_d[1][2] ]; + end + + assign dout = dword_out; + + end +endgenerate + + + + + +endmodule diff --git a/hw/hdl/operators/dtengine/xgboost/core/Qdualport_mem.qip b/hw/hdl/operators/dtengine/xgboost/core/Qdualport_mem.qip new file mode 100644 index 00000000..9325329b --- /dev/null +++ b/hw/hdl/operators/dtengine/xgboost/core/Qdualport_mem.qip @@ -0,0 +1,5 @@ +set_global_assignment -name IP_TOOL_NAME "RAM: 2-PORT" +set_global_assignment -name IP_TOOL_VERSION "13.1" +set_global_assignment -name VERILOG_FILE [file join $::quartus(qip_path) "Qdualport_mem.v"] +set_global_assignment -name MISC_FILE [file join $::quartus(qip_path) "Qdualport_mem_inst.v"] +set_global_assignment -name MISC_FILE [file join $::quartus(qip_path) "Qdualport_mem_bb.v"] diff --git a/hw/hdl/operators/dtengine/xgboost/core/Qdualport_mem.v b/hw/hdl/operators/dtengine/xgboost/core/Qdualport_mem.v new file mode 100644 index 00000000..87e791d8 --- /dev/null +++ b/hw/hdl/operators/dtengine/xgboost/core/Qdualport_mem.v @@ -0,0 +1,220 @@ +// megafunction wizard: %RAM: 2-PORT% +// GENERATION: STANDARD +// VERSION: WM1.0 +// MODULE: altsyncram + +// ============================================================ +// File Name: Qdualport_mem.v +// Megafunction Name(s): +// altsyncram +// +// Simulation Library Files(s): +// altera_mf +// ============================================================ +// ************************************************************ +// THIS IS A WIZARD-GENERATED FILE. DO NOT EDIT THIS FILE! +// +// 13.1.0 Build 162 10/23/2013 SJ Full Version +// ************************************************************ + + +//Copyright (C) 1991-2013 Altera Corporation +//Your use of Altera Corporation's design tools, logic functions +//and other software and tools, and its AMPP partner logic +//functions, and any output files from any of the foregoing +//(including device programming or simulation files), and any +//associated documentation or information are expressly subject +//to the terms and conditions of the Altera Program License +//Subscription Agreement, Altera MegaCore Function License +//Agreement, or other applicable license agreement, including, +//without limitation, that your use is for the sole purpose of +//programming logic devices manufactured by Altera and sold by +//Altera or its authorized distributors. Please refer to the +//applicable agreement for further details. + + +// synopsys translate_off +`timescale 1 ps / 1 ps +// synopsys translate_on +module Qdualport_mem ( + clock, + data, + rdaddress, + rden, + wraddress, + wren, + q); + + input clock; + input [255:0] data; + input [8:0] rdaddress; + input rden; + input [8:0] wraddress; + input wren; + output [255:0] q; +`ifndef ALTERA_RESERVED_QIS +// synopsys translate_off +`endif + tri1 clock; + tri1 rden; + tri0 wren; +`ifndef ALTERA_RESERVED_QIS +// synopsys translate_on +`endif + + wire [255:0] sub_wire0; + wire [255:0] q = sub_wire0[255:0]; + + altsyncram altsyncram_component ( + .address_a (wraddress), + .clock0 (clock), + .data_a (data), + .rden_b (rden), + .wren_a (wren), + .address_b (rdaddress), + .q_b (sub_wire0), + .aclr0 (1'b0), + .aclr1 (1'b0), + .addressstall_a (1'b0), + .addressstall_b (1'b0), + .byteena_a (1'b1), + .byteena_b (1'b1), + .clock1 (1'b1), + .clocken0 (1'b1), + .clocken1 (1'b1), + .clocken2 (1'b1), + .clocken3 (1'b1), + .data_b ({256{1'b1}}), + .eccstatus (), + .q_a (), + .rden_a (1'b1), + .wren_b (1'b0)); + defparam + altsyncram_component.address_aclr_b = "NONE", + altsyncram_component.address_reg_b = "CLOCK0", + altsyncram_component.clock_enable_input_a = "BYPASS", + altsyncram_component.clock_enable_input_b = "BYPASS", + altsyncram_component.clock_enable_output_b = "BYPASS", + altsyncram_component.intended_device_family = "Stratix V", + altsyncram_component.lpm_type = "altsyncram", + altsyncram_component.numwords_a = 512, + altsyncram_component.numwords_b = 512, + altsyncram_component.operation_mode = "DUAL_PORT", + altsyncram_component.outdata_aclr_b = "NONE", + altsyncram_component.outdata_reg_b = "UNREGISTERED", + altsyncram_component.power_up_uninitialized = "FALSE", + altsyncram_component.rdcontrol_reg_b = "CLOCK0", + altsyncram_component.read_during_write_mode_mixed_ports = "DONT_CARE", + altsyncram_component.widthad_a = 9, + altsyncram_component.widthad_b = 9, + altsyncram_component.width_a = 256, + altsyncram_component.width_b = 256, + altsyncram_component.width_byteena_a = 1; + + +endmodule + +// ============================================================ +// CNX file retrieval info +// ============================================================ +// Retrieval info: PRIVATE: ADDRESSSTALL_A NUMERIC "0" +// Retrieval info: PRIVATE: ADDRESSSTALL_B NUMERIC "0" +// Retrieval info: PRIVATE: BYTEENA_ACLR_A NUMERIC "0" +// Retrieval info: PRIVATE: BYTEENA_ACLR_B NUMERIC "0" +// Retrieval info: PRIVATE: BYTE_ENABLE_A NUMERIC "0" +// Retrieval info: PRIVATE: BYTE_ENABLE_B NUMERIC "0" +// Retrieval info: PRIVATE: BYTE_SIZE NUMERIC "8" +// Retrieval info: PRIVATE: BlankMemory NUMERIC "1" +// Retrieval info: PRIVATE: CLOCK_ENABLE_INPUT_A NUMERIC "0" +// Retrieval info: PRIVATE: CLOCK_ENABLE_INPUT_B NUMERIC "0" +// Retrieval info: PRIVATE: CLOCK_ENABLE_OUTPUT_A NUMERIC "0" +// Retrieval info: PRIVATE: CLOCK_ENABLE_OUTPUT_B NUMERIC "0" +// Retrieval info: PRIVATE: CLRdata NUMERIC "0" +// Retrieval info: PRIVATE: CLRq NUMERIC "0" +// Retrieval info: PRIVATE: CLRrdaddress NUMERIC "0" +// Retrieval info: PRIVATE: CLRrren NUMERIC "0" +// Retrieval info: PRIVATE: CLRwraddress NUMERIC "0" +// Retrieval info: PRIVATE: CLRwren NUMERIC "0" +// Retrieval info: PRIVATE: Clock NUMERIC "0" +// Retrieval info: PRIVATE: Clock_A NUMERIC "0" +// Retrieval info: PRIVATE: Clock_B NUMERIC "0" +// Retrieval info: PRIVATE: IMPLEMENT_IN_LES NUMERIC "0" +// Retrieval info: PRIVATE: INDATA_ACLR_B NUMERIC "0" +// Retrieval info: PRIVATE: INDATA_REG_B NUMERIC "0" +// Retrieval info: PRIVATE: INIT_FILE_LAYOUT STRING "PORT_B" +// Retrieval info: PRIVATE: INIT_TO_SIM_X NUMERIC "0" +// Retrieval info: PRIVATE: INTENDED_DEVICE_FAMILY STRING "Stratix V" +// Retrieval info: PRIVATE: JTAG_ENABLED NUMERIC "0" +// Retrieval info: PRIVATE: JTAG_ID STRING "NONE" +// Retrieval info: PRIVATE: MAXIMUM_DEPTH NUMERIC "0" +// Retrieval info: PRIVATE: MEMSIZE NUMERIC "131072" +// Retrieval info: PRIVATE: MEM_IN_BITS NUMERIC "0" +// Retrieval info: PRIVATE: MIFfilename STRING "" +// Retrieval info: PRIVATE: OPERATION_MODE NUMERIC "2" +// Retrieval info: PRIVATE: OUTDATA_ACLR_B NUMERIC "0" +// Retrieval info: PRIVATE: OUTDATA_REG_B NUMERIC "0" +// Retrieval info: PRIVATE: RAM_BLOCK_TYPE NUMERIC "0" +// Retrieval info: PRIVATE: READ_DURING_WRITE_MODE_MIXED_PORTS NUMERIC "2" +// Retrieval info: PRIVATE: READ_DURING_WRITE_MODE_PORT_A NUMERIC "3" +// Retrieval info: PRIVATE: READ_DURING_WRITE_MODE_PORT_B NUMERIC "3" +// Retrieval info: PRIVATE: REGdata NUMERIC "1" +// Retrieval info: PRIVATE: REGq NUMERIC "1" +// Retrieval info: PRIVATE: REGrdaddress NUMERIC "1" +// Retrieval info: PRIVATE: REGrren NUMERIC "1" +// Retrieval info: PRIVATE: REGwraddress NUMERIC "1" +// Retrieval info: PRIVATE: REGwren NUMERIC "1" +// Retrieval info: PRIVATE: SYNTH_WRAPPER_GEN_POSTFIX STRING "0" +// Retrieval info: PRIVATE: USE_DIFF_CLKEN NUMERIC "0" +// Retrieval info: PRIVATE: UseDPRAM NUMERIC "1" +// Retrieval info: PRIVATE: VarWidth NUMERIC "0" +// Retrieval info: PRIVATE: WIDTH_READ_A NUMERIC "256" +// Retrieval info: PRIVATE: WIDTH_READ_B NUMERIC "256" +// Retrieval info: PRIVATE: WIDTH_WRITE_A NUMERIC "256" +// Retrieval info: PRIVATE: WIDTH_WRITE_B NUMERIC "256" +// Retrieval info: PRIVATE: WRADDR_ACLR_B NUMERIC "0" +// Retrieval info: PRIVATE: WRADDR_REG_B NUMERIC "0" +// Retrieval info: PRIVATE: WRCTRL_ACLR_B NUMERIC "0" +// Retrieval info: PRIVATE: enable NUMERIC "0" +// Retrieval info: PRIVATE: rden NUMERIC "1" +// Retrieval info: LIBRARY: altera_mf altera_mf.altera_mf_components.all +// Retrieval info: CONSTANT: ADDRESS_ACLR_B STRING "NONE" +// Retrieval info: CONSTANT: ADDRESS_REG_B STRING "CLOCK0" +// Retrieval info: CONSTANT: CLOCK_ENABLE_INPUT_A STRING "BYPASS" +// Retrieval info: CONSTANT: CLOCK_ENABLE_INPUT_B STRING "BYPASS" +// Retrieval info: CONSTANT: CLOCK_ENABLE_OUTPUT_B STRING "BYPASS" +// Retrieval info: CONSTANT: INTENDED_DEVICE_FAMILY STRING "Stratix V" +// Retrieval info: CONSTANT: LPM_TYPE STRING "altsyncram" +// Retrieval info: CONSTANT: NUMWORDS_A NUMERIC "512" +// Retrieval info: CONSTANT: NUMWORDS_B NUMERIC "512" +// Retrieval info: CONSTANT: OPERATION_MODE STRING "DUAL_PORT" +// Retrieval info: CONSTANT: OUTDATA_ACLR_B STRING "NONE" +// Retrieval info: CONSTANT: OUTDATA_REG_B STRING "UNREGISTERED" +// Retrieval info: CONSTANT: POWER_UP_UNINITIALIZED STRING "FALSE" +// Retrieval info: CONSTANT: RDCONTROL_REG_B STRING "CLOCK0" +// Retrieval info: CONSTANT: READ_DURING_WRITE_MODE_MIXED_PORTS STRING "DONT_CARE" +// Retrieval info: CONSTANT: WIDTHAD_A NUMERIC "9" +// Retrieval info: CONSTANT: WIDTHAD_B NUMERIC "9" +// Retrieval info: CONSTANT: WIDTH_A NUMERIC "256" +// Retrieval info: CONSTANT: WIDTH_B NUMERIC "256" +// Retrieval info: CONSTANT: WIDTH_BYTEENA_A NUMERIC "1" +// Retrieval info: USED_PORT: clock 0 0 0 0 INPUT VCC "clock" +// Retrieval info: USED_PORT: data 0 0 256 0 INPUT NODEFVAL "data[255..0]" +// Retrieval info: USED_PORT: q 0 0 256 0 OUTPUT NODEFVAL "q[255..0]" +// Retrieval info: USED_PORT: rdaddress 0 0 9 0 INPUT NODEFVAL "rdaddress[8..0]" +// Retrieval info: USED_PORT: rden 0 0 0 0 INPUT VCC "rden" +// Retrieval info: USED_PORT: wraddress 0 0 9 0 INPUT NODEFVAL "wraddress[8..0]" +// Retrieval info: USED_PORT: wren 0 0 0 0 INPUT GND "wren" +// Retrieval info: CONNECT: @address_a 0 0 9 0 wraddress 0 0 9 0 +// Retrieval info: CONNECT: @address_b 0 0 9 0 rdaddress 0 0 9 0 +// Retrieval info: CONNECT: @clock0 0 0 0 0 clock 0 0 0 0 +// Retrieval info: CONNECT: @data_a 0 0 256 0 data 0 0 256 0 +// Retrieval info: CONNECT: @rden_b 0 0 0 0 rden 0 0 0 0 +// Retrieval info: CONNECT: @wren_a 0 0 0 0 wren 0 0 0 0 +// Retrieval info: CONNECT: q 0 0 256 0 @q_b 0 0 256 0 +// Retrieval info: GEN_FILE: TYPE_NORMAL Qdualport_mem.v TRUE +// Retrieval info: GEN_FILE: TYPE_NORMAL Qdualport_mem.inc FALSE +// Retrieval info: GEN_FILE: TYPE_NORMAL Qdualport_mem.cmp FALSE +// Retrieval info: GEN_FILE: TYPE_NORMAL Qdualport_mem.bsf FALSE +// Retrieval info: GEN_FILE: TYPE_NORMAL Qdualport_mem_inst.v TRUE +// Retrieval info: GEN_FILE: TYPE_NORMAL Qdualport_mem_bb.v TRUE +// Retrieval info: LIB_FILE: altera_mf diff --git a/hw/hdl/operators/dtengine/xgboost/core/RLS.v b/hw/hdl/operators/dtengine/xgboost/core/RLS.v new file mode 100644 index 00000000..b1b3b9c6 --- /dev/null +++ b/hw/hdl/operators/dtengine/xgboost/core/RLS.v @@ -0,0 +1,73 @@ + +import DTEngine_Types::*; + +module RLS #(parameter DATA_WIDTH = 8, + parameter DATA_WIDTH_BITS = 3) + ( + input wire clk, + input wire rst_n, + input wire shift_enable, + input wire [DATA_WIDTH-1:0] data_in, + input wire [DATA_WIDTH_BITS-1:0] shift_count, + + output wire [DATA_WIDTH-1:0] data_out + ); + +reg [DATA_WIDTH-1:0] shifted_data; + +assign data_out = shifted_data; + +generate + if(NUM_DTPU_CLUSTERS == 8) begin + always @(posedge clk) begin + if(shift_enable) begin + case (shift_count) + 3'b001: begin + shifted_data[0] <= data_in[DATA_WIDTH-1]; + shifted_data[DATA_WIDTH-1:1] <= data_in[DATA_WIDTH-2:0]; + end + 3'b010: begin + shifted_data[1:0] <= data_in[DATA_WIDTH-1:DATA_WIDTH-2]; + shifted_data[DATA_WIDTH-1:2] <= data_in[DATA_WIDTH-3:0]; + end + 3'b100: begin + shifted_data[3:0] <= data_in[DATA_WIDTH-1:DATA_WIDTH-4]; + shifted_data[DATA_WIDTH-1:4] <= data_in[DATA_WIDTH-5:0]; + end + default: begin + shifted_data <= data_in; + end + endcase + end + else begin + shifted_data <= data_in; + end + end + end + else begin + always @(posedge clk) begin + if(shift_enable) begin + case (shift_count) + 2'b01: begin + shifted_data[0] <= data_in[DATA_WIDTH-1]; + shifted_data[DATA_WIDTH-1:1] <= data_in[DATA_WIDTH-2:0]; + end + 2'b10: begin + shifted_data[1:0] <= data_in[DATA_WIDTH-1:DATA_WIDTH-2]; + shifted_data[DATA_WIDTH-1:2] <= data_in[DATA_WIDTH-3:0]; + end + default: begin + shifted_data <= data_in; + end + endcase + end + else begin + shifted_data <= data_in; + end + end + end + +endgenerate + + +endmodule \ No newline at end of file diff --git a/hw/hdl/operators/dtengine/xgboost/core/bram1in2out.v b/hw/hdl/operators/dtengine/xgboost/core/bram1in2out.v new file mode 100644 index 00000000..1a056a31 --- /dev/null +++ b/hw/hdl/operators/dtengine/xgboost/core/bram1in2out.v @@ -0,0 +1,242 @@ +// megafunction wizard: %RAM: 2-PORT% +// GENERATION: STANDARD +// VERSION: WM1.0 +// MODULE: altsyncram + +// ============================================================ +// File Name: bram1in2out.v +// Megafunction Name(s): +// altsyncram +// +// Simulation Library Files(s): +// altera_mf +// ============================================================ +// ************************************************************ +// THIS IS A WIZARD-GENERATED FILE. DO NOT EDIT THIS FILE! +// +// 13.1.0 Build 162 10/23/2013 SJ Full Version +// ************************************************************ + + +//Copyright (C) 1991-2013 Altera Corporation +//Your use of Altera Corporation's design tools, logic functions +//and other software and tools, and its AMPP partner logic +//functions, and any output files from any of the foregoing +//(including device programming or simulation files), and any +//associated documentation or information are expressly subject +//to the terms and conditions of the Altera Program License +//Subscription Agreement, Altera MegaCore Function License +//Agreement, or other applicable license agreement, including, +//without limitation, that your use is for the sole purpose of +//programming logic devices manufactured by Altera and sold by +//Altera or its authorized distributors. Please refer to the +//applicable agreement for further details. + + +// synopsys translate_off +`timescale 1 ps / 1 ps +// synopsys translate_on +module bram1in2out ( + address_a, + address_b, + clock, + data_a, + data_b, + wren_a, + wren_b, + q_a, + q_b); + + input [9:0] address_a; + input [9:0] address_b; + input clock; + input [255:0] data_a; + input [255:0] data_b; + input wren_a; + input wren_b; + output [255:0] q_a; + output [255:0] q_b; +`ifndef ALTERA_RESERVED_QIS +// synopsys translate_off +`endif + tri1 clock; + tri0 wren_a; + tri0 wren_b; +`ifndef ALTERA_RESERVED_QIS +// synopsys translate_on +`endif + + wire [255:0] sub_wire0; + wire [255:0] sub_wire1; + wire [255:0] q_a = sub_wire0[255:0]; + wire [255:0] q_b = sub_wire1[255:0]; + + altsyncram altsyncram_component ( + .clock0 (clock), + .wren_a (wren_a), + .address_b (address_b), + .data_b (data_b), + .wren_b (wren_b), + .address_a (address_a), + .data_a (data_a), + .q_a (sub_wire0), + .q_b (sub_wire1), + .aclr0 (1'b0), + .aclr1 (1'b0), + .addressstall_a (1'b0), + .addressstall_b (1'b0), + .byteena_a (1'b1), + .byteena_b (1'b1), + .clock1 (1'b1), + .clocken0 (1'b1), + .clocken1 (1'b1), + .clocken2 (1'b1), + .clocken3 (1'b1), + .eccstatus (), + .rden_a (1'b1), + .rden_b (1'b1)); + defparam + altsyncram_component.address_reg_b = "CLOCK0", + altsyncram_component.clock_enable_input_a = "BYPASS", + altsyncram_component.clock_enable_input_b = "BYPASS", + altsyncram_component.clock_enable_output_a = "BYPASS", + altsyncram_component.clock_enable_output_b = "BYPASS", + altsyncram_component.indata_reg_b = "CLOCK0", + altsyncram_component.intended_device_family = "Stratix V", + altsyncram_component.lpm_type = "altsyncram", + altsyncram_component.numwords_a = 1024, + altsyncram_component.numwords_b = 1024, + altsyncram_component.operation_mode = "BIDIR_DUAL_PORT", + altsyncram_component.outdata_aclr_a = "NONE", + altsyncram_component.outdata_aclr_b = "NONE", + altsyncram_component.outdata_reg_a = "CLOCK0", + altsyncram_component.outdata_reg_b = "CLOCK0", + altsyncram_component.power_up_uninitialized = "FALSE", + altsyncram_component.read_during_write_mode_mixed_ports = "DONT_CARE", + altsyncram_component.read_during_write_mode_port_a = "NEW_DATA_NO_NBE_READ", + altsyncram_component.read_during_write_mode_port_b = "NEW_DATA_NO_NBE_READ", + altsyncram_component.widthad_a = 10, + altsyncram_component.widthad_b = 10, + altsyncram_component.width_a = 256, + altsyncram_component.width_b = 256, + altsyncram_component.width_byteena_a = 1, + altsyncram_component.width_byteena_b = 1, + altsyncram_component.wrcontrol_wraddress_reg_b = "CLOCK0"; + + +endmodule + +// ============================================================ +// CNX file retrieval info +// ============================================================ +// Retrieval info: PRIVATE: ADDRESSSTALL_A NUMERIC "0" +// Retrieval info: PRIVATE: ADDRESSSTALL_B NUMERIC "0" +// Retrieval info: PRIVATE: BYTEENA_ACLR_A NUMERIC "0" +// Retrieval info: PRIVATE: BYTEENA_ACLR_B NUMERIC "0" +// Retrieval info: PRIVATE: BYTE_ENABLE_A NUMERIC "0" +// Retrieval info: PRIVATE: BYTE_ENABLE_B NUMERIC "0" +// Retrieval info: PRIVATE: BYTE_SIZE NUMERIC "8" +// Retrieval info: PRIVATE: BlankMemory NUMERIC "1" +// Retrieval info: PRIVATE: CLOCK_ENABLE_INPUT_A NUMERIC "0" +// Retrieval info: PRIVATE: CLOCK_ENABLE_INPUT_B NUMERIC "0" +// Retrieval info: PRIVATE: CLOCK_ENABLE_OUTPUT_A NUMERIC "0" +// Retrieval info: PRIVATE: CLOCK_ENABLE_OUTPUT_B NUMERIC "0" +// Retrieval info: PRIVATE: CLRdata NUMERIC "0" +// Retrieval info: PRIVATE: CLRq NUMERIC "0" +// Retrieval info: PRIVATE: CLRrdaddress NUMERIC "0" +// Retrieval info: PRIVATE: CLRrren NUMERIC "0" +// Retrieval info: PRIVATE: CLRwraddress NUMERIC "0" +// Retrieval info: PRIVATE: CLRwren NUMERIC "0" +// Retrieval info: PRIVATE: Clock NUMERIC "0" +// Retrieval info: PRIVATE: Clock_A NUMERIC "0" +// Retrieval info: PRIVATE: Clock_B NUMERIC "0" +// Retrieval info: PRIVATE: IMPLEMENT_IN_LES NUMERIC "0" +// Retrieval info: PRIVATE: INDATA_ACLR_B NUMERIC "0" +// Retrieval info: PRIVATE: INDATA_REG_B NUMERIC "1" +// Retrieval info: PRIVATE: INIT_FILE_LAYOUT STRING "PORT_A" +// Retrieval info: PRIVATE: INIT_TO_SIM_X NUMERIC "0" +// Retrieval info: PRIVATE: INTENDED_DEVICE_FAMILY STRING "Stratix V" +// Retrieval info: PRIVATE: JTAG_ENABLED NUMERIC "0" +// Retrieval info: PRIVATE: JTAG_ID STRING "NONE" +// Retrieval info: PRIVATE: MAXIMUM_DEPTH NUMERIC "0" +// Retrieval info: PRIVATE: MEMSIZE NUMERIC "262144" +// Retrieval info: PRIVATE: MEM_IN_BITS NUMERIC "0" +// Retrieval info: PRIVATE: MIFfilename STRING "" +// Retrieval info: PRIVATE: OPERATION_MODE NUMERIC "3" +// Retrieval info: PRIVATE: OUTDATA_ACLR_B NUMERIC "0" +// Retrieval info: PRIVATE: OUTDATA_REG_B NUMERIC "1" +// Retrieval info: PRIVATE: RAM_BLOCK_TYPE NUMERIC "0" +// Retrieval info: PRIVATE: READ_DURING_WRITE_MODE_MIXED_PORTS NUMERIC "2" +// Retrieval info: PRIVATE: READ_DURING_WRITE_MODE_PORT_A NUMERIC "3" +// Retrieval info: PRIVATE: READ_DURING_WRITE_MODE_PORT_B NUMERIC "3" +// Retrieval info: PRIVATE: REGdata NUMERIC "1" +// Retrieval info: PRIVATE: REGq NUMERIC "1" +// Retrieval info: PRIVATE: REGrdaddress NUMERIC "0" +// Retrieval info: PRIVATE: REGrren NUMERIC "0" +// Retrieval info: PRIVATE: REGwraddress NUMERIC "1" +// Retrieval info: PRIVATE: REGwren NUMERIC "1" +// Retrieval info: PRIVATE: SYNTH_WRAPPER_GEN_POSTFIX STRING "0" +// Retrieval info: PRIVATE: USE_DIFF_CLKEN NUMERIC "0" +// Retrieval info: PRIVATE: UseDPRAM NUMERIC "1" +// Retrieval info: PRIVATE: VarWidth NUMERIC "0" +// Retrieval info: PRIVATE: WIDTH_READ_A NUMERIC "256" +// Retrieval info: PRIVATE: WIDTH_READ_B NUMERIC "256" +// Retrieval info: PRIVATE: WIDTH_WRITE_A NUMERIC "256" +// Retrieval info: PRIVATE: WIDTH_WRITE_B NUMERIC "256" +// Retrieval info: PRIVATE: WRADDR_ACLR_B NUMERIC "0" +// Retrieval info: PRIVATE: WRADDR_REG_B NUMERIC "1" +// Retrieval info: PRIVATE: WRCTRL_ACLR_B NUMERIC "0" +// Retrieval info: PRIVATE: enable NUMERIC "0" +// Retrieval info: PRIVATE: rden NUMERIC "0" +// Retrieval info: LIBRARY: altera_mf altera_mf.altera_mf_components.all +// Retrieval info: CONSTANT: ADDRESS_REG_B STRING "CLOCK0" +// Retrieval info: CONSTANT: CLOCK_ENABLE_INPUT_A STRING "BYPASS" +// Retrieval info: CONSTANT: CLOCK_ENABLE_INPUT_B STRING "BYPASS" +// Retrieval info: CONSTANT: CLOCK_ENABLE_OUTPUT_A STRING "BYPASS" +// Retrieval info: CONSTANT: CLOCK_ENABLE_OUTPUT_B STRING "BYPASS" +// Retrieval info: CONSTANT: INDATA_REG_B STRING "CLOCK0" +// Retrieval info: CONSTANT: INTENDED_DEVICE_FAMILY STRING "Stratix V" +// Retrieval info: CONSTANT: LPM_TYPE STRING "altsyncram" +// Retrieval info: CONSTANT: NUMWORDS_A NUMERIC "1024" +// Retrieval info: CONSTANT: NUMWORDS_B NUMERIC "1024" +// Retrieval info: CONSTANT: OPERATION_MODE STRING "BIDIR_DUAL_PORT" +// Retrieval info: CONSTANT: OUTDATA_ACLR_A STRING "NONE" +// Retrieval info: CONSTANT: OUTDATA_ACLR_B STRING "NONE" +// Retrieval info: CONSTANT: OUTDATA_REG_A STRING "CLOCK0" +// Retrieval info: CONSTANT: OUTDATA_REG_B STRING "CLOCK0" +// Retrieval info: CONSTANT: POWER_UP_UNINITIALIZED STRING "FALSE" +// Retrieval info: CONSTANT: READ_DURING_WRITE_MODE_MIXED_PORTS STRING "DONT_CARE" +// Retrieval info: CONSTANT: READ_DURING_WRITE_MODE_PORT_A STRING "NEW_DATA_NO_NBE_READ" +// Retrieval info: CONSTANT: READ_DURING_WRITE_MODE_PORT_B STRING "NEW_DATA_NO_NBE_READ" +// Retrieval info: CONSTANT: WIDTHAD_A NUMERIC "10" +// Retrieval info: CONSTANT: WIDTHAD_B NUMERIC "10" +// Retrieval info: CONSTANT: WIDTH_A NUMERIC "256" +// Retrieval info: CONSTANT: WIDTH_B NUMERIC "256" +// Retrieval info: CONSTANT: WIDTH_BYTEENA_A NUMERIC "1" +// Retrieval info: CONSTANT: WIDTH_BYTEENA_B NUMERIC "1" +// Retrieval info: CONSTANT: WRCONTROL_WRADDRESS_REG_B STRING "CLOCK0" +// Retrieval info: USED_PORT: address_a 0 0 10 0 INPUT NODEFVAL "address_a[9..0]" +// Retrieval info: USED_PORT: address_b 0 0 10 0 INPUT NODEFVAL "address_b[9..0]" +// Retrieval info: USED_PORT: clock 0 0 0 0 INPUT VCC "clock" +// Retrieval info: USED_PORT: data_a 0 0 256 0 INPUT NODEFVAL "data_a[255..0]" +// Retrieval info: USED_PORT: data_b 0 0 256 0 INPUT NODEFVAL "data_b[255..0]" +// Retrieval info: USED_PORT: q_a 0 0 256 0 OUTPUT NODEFVAL "q_a[255..0]" +// Retrieval info: USED_PORT: q_b 0 0 256 0 OUTPUT NODEFVAL "q_b[255..0]" +// Retrieval info: USED_PORT: wren_a 0 0 0 0 INPUT GND "wren_a" +// Retrieval info: USED_PORT: wren_b 0 0 0 0 INPUT GND "wren_b" +// Retrieval info: CONNECT: @address_a 0 0 10 0 address_a 0 0 10 0 +// Retrieval info: CONNECT: @address_b 0 0 10 0 address_b 0 0 10 0 +// Retrieval info: CONNECT: @clock0 0 0 0 0 clock 0 0 0 0 +// Retrieval info: CONNECT: @data_a 0 0 256 0 data_a 0 0 256 0 +// Retrieval info: CONNECT: @data_b 0 0 256 0 data_b 0 0 256 0 +// Retrieval info: CONNECT: @wren_a 0 0 0 0 wren_a 0 0 0 0 +// Retrieval info: CONNECT: @wren_b 0 0 0 0 wren_b 0 0 0 0 +// Retrieval info: CONNECT: q_a 0 0 256 0 @q_a 0 0 256 0 +// Retrieval info: CONNECT: q_b 0 0 256 0 @q_b 0 0 256 0 +// Retrieval info: GEN_FILE: TYPE_NORMAL bram1in2out.v TRUE +// Retrieval info: GEN_FILE: TYPE_NORMAL bram1in2out.inc TRUE +// Retrieval info: GEN_FILE: TYPE_NORMAL bram1in2out.cmp TRUE +// Retrieval info: GEN_FILE: TYPE_NORMAL bram1in2out.bsf TRUE +// Retrieval info: GEN_FILE: TYPE_NORMAL bram1in2out_inst.v TRUE +// Retrieval info: GEN_FILE: TYPE_NORMAL bram1in2out_bb.v TRUE +// Retrieval info: LIB_FILE: altera_mf diff --git a/hw/hdl/operators/dtengine/xgboost/core/bramin1out2.qip b/hw/hdl/operators/dtengine/xgboost/core/bramin1out2.qip new file mode 100644 index 00000000..5e319d00 --- /dev/null +++ b/hw/hdl/operators/dtengine/xgboost/core/bramin1out2.qip @@ -0,0 +1,5 @@ +set_global_assignment -name IP_TOOL_NAME "RAM: 2-PORT" +set_global_assignment -name IP_TOOL_VERSION "13.1" +set_global_assignment -name VERILOG_FILE [file join $::quartus(qip_path) "bramin1out2.v"] +set_global_assignment -name MISC_FILE [file join $::quartus(qip_path) "bramin1out2_inst.v"] +set_global_assignment -name MISC_FILE [file join $::quartus(qip_path) "bramin1out2_bb.v"] diff --git a/hw/hdl/operators/dtengine/xgboost/core/bramin1out2.v b/hw/hdl/operators/dtengine/xgboost/core/bramin1out2.v new file mode 100644 index 00000000..f2f02f19 --- /dev/null +++ b/hw/hdl/operators/dtengine/xgboost/core/bramin1out2.v @@ -0,0 +1,242 @@ +// megafunction wizard: %RAM: 2-PORT% +// GENERATION: STANDARD +// VERSION: WM1.0 +// MODULE: altsyncram + +// ============================================================ +// File Name: bramin1out2.v +// Megafunction Name(s): +// altsyncram +// +// Simulation Library Files(s): +// altera_mf +// ============================================================ +// ************************************************************ +// THIS IS A WIZARD-GENERATED FILE. DO NOT EDIT THIS FILE! +// +// 13.1.0 Build 162 10/23/2013 SJ Full Version +// ************************************************************ + + +//Copyright (C) 1991-2013 Altera Corporation +//Your use of Altera Corporation's design tools, logic functions +//and other software and tools, and its AMPP partner logic +//functions, and any output files from any of the foregoing +//(including device programming or simulation files), and any +//associated documentation or information are expressly subject +//to the terms and conditions of the Altera Program License +//Subscription Agreement, Altera MegaCore Function License +//Agreement, or other applicable license agreement, including, +//without limitation, that your use is for the sole purpose of +//programming logic devices manufactured by Altera and sold by +//Altera or its authorized distributors. Please refer to the +//applicable agreement for further details. + + +// synopsys translate_off +`timescale 1 ps / 1 ps +// synopsys translate_on +module bramin1out2 ( + address_a, + address_b, + clock, + data_a, + data_b, + wren_a, + wren_b, + q_a, + q_b); + + input [9:0] address_a; + input [9:0] address_b; + input clock; + input [255:0] data_a; + input [255:0] data_b; + input wren_a; + input wren_b; + output [255:0] q_a; + output [255:0] q_b; +`ifndef ALTERA_RESERVED_QIS +// synopsys translate_off +`endif + tri1 clock; + tri0 wren_a; + tri0 wren_b; +`ifndef ALTERA_RESERVED_QIS +// synopsys translate_on +`endif + + wire [255:0] sub_wire0; + wire [255:0] sub_wire1; + wire [255:0] q_a = sub_wire0[255:0]; + wire [255:0] q_b = sub_wire1[255:0]; + + altsyncram altsyncram_component ( + .clock0 (clock), + .wren_a (wren_a), + .address_b (address_b), + .data_b (data_b), + .wren_b (wren_b), + .address_a (address_a), + .data_a (data_a), + .q_a (sub_wire0), + .q_b (sub_wire1), + .aclr0 (1'b0), + .aclr1 (1'b0), + .addressstall_a (1'b0), + .addressstall_b (1'b0), + .byteena_a (1'b1), + .byteena_b (1'b1), + .clock1 (1'b1), + .clocken0 (1'b1), + .clocken1 (1'b1), + .clocken2 (1'b1), + .clocken3 (1'b1), + .eccstatus (), + .rden_a (1'b1), + .rden_b (1'b1)); + defparam + altsyncram_component.address_reg_b = "CLOCK0", + altsyncram_component.clock_enable_input_a = "BYPASS", + altsyncram_component.clock_enable_input_b = "BYPASS", + altsyncram_component.clock_enable_output_a = "BYPASS", + altsyncram_component.clock_enable_output_b = "BYPASS", + altsyncram_component.indata_reg_b = "CLOCK0", + altsyncram_component.intended_device_family = "Stratix V", + altsyncram_component.lpm_type = "altsyncram", + altsyncram_component.numwords_a = 1024, + altsyncram_component.numwords_b = 1024, + altsyncram_component.operation_mode = "BIDIR_DUAL_PORT", + altsyncram_component.outdata_aclr_a = "NONE", + altsyncram_component.outdata_aclr_b = "NONE", + altsyncram_component.outdata_reg_a = "UNREGISTERED", + altsyncram_component.outdata_reg_b = "UNREGISTERED", + altsyncram_component.power_up_uninitialized = "FALSE", + altsyncram_component.read_during_write_mode_mixed_ports = "DONT_CARE", + altsyncram_component.read_during_write_mode_port_a = "NEW_DATA_NO_NBE_READ", + altsyncram_component.read_during_write_mode_port_b = "NEW_DATA_NO_NBE_READ", + altsyncram_component.widthad_a = 10, + altsyncram_component.widthad_b = 10, + altsyncram_component.width_a = 256, + altsyncram_component.width_b = 256, + altsyncram_component.width_byteena_a = 1, + altsyncram_component.width_byteena_b = 1, + altsyncram_component.wrcontrol_wraddress_reg_b = "CLOCK0"; + + +endmodule + +// ============================================================ +// CNX file retrieval info +// ============================================================ +// Retrieval info: PRIVATE: ADDRESSSTALL_A NUMERIC "0" +// Retrieval info: PRIVATE: ADDRESSSTALL_B NUMERIC "0" +// Retrieval info: PRIVATE: BYTEENA_ACLR_A NUMERIC "0" +// Retrieval info: PRIVATE: BYTEENA_ACLR_B NUMERIC "0" +// Retrieval info: PRIVATE: BYTE_ENABLE_A NUMERIC "0" +// Retrieval info: PRIVATE: BYTE_ENABLE_B NUMERIC "0" +// Retrieval info: PRIVATE: BYTE_SIZE NUMERIC "8" +// Retrieval info: PRIVATE: BlankMemory NUMERIC "1" +// Retrieval info: PRIVATE: CLOCK_ENABLE_INPUT_A NUMERIC "0" +// Retrieval info: PRIVATE: CLOCK_ENABLE_INPUT_B NUMERIC "0" +// Retrieval info: PRIVATE: CLOCK_ENABLE_OUTPUT_A NUMERIC "0" +// Retrieval info: PRIVATE: CLOCK_ENABLE_OUTPUT_B NUMERIC "0" +// Retrieval info: PRIVATE: CLRdata NUMERIC "0" +// Retrieval info: PRIVATE: CLRq NUMERIC "0" +// Retrieval info: PRIVATE: CLRrdaddress NUMERIC "0" +// Retrieval info: PRIVATE: CLRrren NUMERIC "0" +// Retrieval info: PRIVATE: CLRwraddress NUMERIC "0" +// Retrieval info: PRIVATE: CLRwren NUMERIC "0" +// Retrieval info: PRIVATE: Clock NUMERIC "0" +// Retrieval info: PRIVATE: Clock_A NUMERIC "0" +// Retrieval info: PRIVATE: Clock_B NUMERIC "0" +// Retrieval info: PRIVATE: IMPLEMENT_IN_LES NUMERIC "0" +// Retrieval info: PRIVATE: INDATA_ACLR_B NUMERIC "0" +// Retrieval info: PRIVATE: INDATA_REG_B NUMERIC "1" +// Retrieval info: PRIVATE: INIT_FILE_LAYOUT STRING "PORT_A" +// Retrieval info: PRIVATE: INIT_TO_SIM_X NUMERIC "0" +// Retrieval info: PRIVATE: INTENDED_DEVICE_FAMILY STRING "Stratix V" +// Retrieval info: PRIVATE: JTAG_ENABLED NUMERIC "0" +// Retrieval info: PRIVATE: JTAG_ID STRING "NONE" +// Retrieval info: PRIVATE: MAXIMUM_DEPTH NUMERIC "0" +// Retrieval info: PRIVATE: MEMSIZE NUMERIC "131072" +// Retrieval info: PRIVATE: MEM_IN_BITS NUMERIC "0" +// Retrieval info: PRIVATE: MIFfilename STRING "" +// Retrieval info: PRIVATE: OPERATION_MODE NUMERIC "3" +// Retrieval info: PRIVATE: OUTDATA_ACLR_B NUMERIC "0" +// Retrieval info: PRIVATE: OUTDATA_REG_B NUMERIC "0" +// Retrieval info: PRIVATE: RAM_BLOCK_TYPE NUMERIC "0" +// Retrieval info: PRIVATE: READ_DURING_WRITE_MODE_MIXED_PORTS NUMERIC "2" +// Retrieval info: PRIVATE: READ_DURING_WRITE_MODE_PORT_A NUMERIC "3" +// Retrieval info: PRIVATE: READ_DURING_WRITE_MODE_PORT_B NUMERIC "3" +// Retrieval info: PRIVATE: REGdata NUMERIC "1" +// Retrieval info: PRIVATE: REGq NUMERIC "0" +// Retrieval info: PRIVATE: REGrdaddress NUMERIC "0" +// Retrieval info: PRIVATE: REGrren NUMERIC "0" +// Retrieval info: PRIVATE: REGwraddress NUMERIC "1" +// Retrieval info: PRIVATE: REGwren NUMERIC "1" +// Retrieval info: PRIVATE: SYNTH_WRAPPER_GEN_POSTFIX STRING "0" +// Retrieval info: PRIVATE: USE_DIFF_CLKEN NUMERIC "0" +// Retrieval info: PRIVATE: UseDPRAM NUMERIC "1" +// Retrieval info: PRIVATE: VarWidth NUMERIC "0" +// Retrieval info: PRIVATE: WIDTH_READ_A NUMERIC "256" +// Retrieval info: PRIVATE: WIDTH_READ_B NUMERIC "256" +// Retrieval info: PRIVATE: WIDTH_WRITE_A NUMERIC "256" +// Retrieval info: PRIVATE: WIDTH_WRITE_B NUMERIC "256" +// Retrieval info: PRIVATE: WRADDR_ACLR_B NUMERIC "0" +// Retrieval info: PRIVATE: WRADDR_REG_B NUMERIC "1" +// Retrieval info: PRIVATE: WRCTRL_ACLR_B NUMERIC "0" +// Retrieval info: PRIVATE: enable NUMERIC "0" +// Retrieval info: PRIVATE: rden NUMERIC "0" +// Retrieval info: LIBRARY: altera_mf altera_mf.altera_mf_components.all +// Retrieval info: CONSTANT: ADDRESS_REG_B STRING "CLOCK0" +// Retrieval info: CONSTANT: CLOCK_ENABLE_INPUT_A STRING "BYPASS" +// Retrieval info: CONSTANT: CLOCK_ENABLE_INPUT_B STRING "BYPASS" +// Retrieval info: CONSTANT: CLOCK_ENABLE_OUTPUT_A STRING "BYPASS" +// Retrieval info: CONSTANT: CLOCK_ENABLE_OUTPUT_B STRING "BYPASS" +// Retrieval info: CONSTANT: INDATA_REG_B STRING "CLOCK0" +// Retrieval info: CONSTANT: INTENDED_DEVICE_FAMILY STRING "Stratix V" +// Retrieval info: CONSTANT: LPM_TYPE STRING "altsyncram" +// Retrieval info: CONSTANT: NUMWORDS_A NUMERIC "512" +// Retrieval info: CONSTANT: NUMWORDS_B NUMERIC "512" +// Retrieval info: CONSTANT: OPERATION_MODE STRING "BIDIR_DUAL_PORT" +// Retrieval info: CONSTANT: OUTDATA_ACLR_A STRING "NONE" +// Retrieval info: CONSTANT: OUTDATA_ACLR_B STRING "NONE" +// Retrieval info: CONSTANT: OUTDATA_REG_A STRING "UNREGISTERED" +// Retrieval info: CONSTANT: OUTDATA_REG_B STRING "UNREGISTERED" +// Retrieval info: CONSTANT: POWER_UP_UNINITIALIZED STRING "FALSE" +// Retrieval info: CONSTANT: READ_DURING_WRITE_MODE_MIXED_PORTS STRING "DONT_CARE" +// Retrieval info: CONSTANT: READ_DURING_WRITE_MODE_PORT_A STRING "NEW_DATA_NO_NBE_READ" +// Retrieval info: CONSTANT: READ_DURING_WRITE_MODE_PORT_B STRING "NEW_DATA_NO_NBE_READ" +// Retrieval info: CONSTANT: WIDTHAD_A NUMERIC "9" +// Retrieval info: CONSTANT: WIDTHAD_B NUMERIC "9" +// Retrieval info: CONSTANT: WIDTH_A NUMERIC "256" +// Retrieval info: CONSTANT: WIDTH_B NUMERIC "256" +// Retrieval info: CONSTANT: WIDTH_BYTEENA_A NUMERIC "1" +// Retrieval info: CONSTANT: WIDTH_BYTEENA_B NUMERIC "1" +// Retrieval info: CONSTANT: WRCONTROL_WRADDRESS_REG_B STRING "CLOCK0" +// Retrieval info: USED_PORT: address_a 0 0 9 0 INPUT NODEFVAL "address_a[8..0]" +// Retrieval info: USED_PORT: address_b 0 0 9 0 INPUT NODEFVAL "address_b[8..0]" +// Retrieval info: USED_PORT: clock 0 0 0 0 INPUT VCC "clock" +// Retrieval info: USED_PORT: data_a 0 0 256 0 INPUT NODEFVAL "data_a[255..0]" +// Retrieval info: USED_PORT: data_b 0 0 256 0 INPUT NODEFVAL "data_b[255..0]" +// Retrieval info: USED_PORT: q_a 0 0 256 0 OUTPUT NODEFVAL "q_a[255..0]" +// Retrieval info: USED_PORT: q_b 0 0 256 0 OUTPUT NODEFVAL "q_b[255..0]" +// Retrieval info: USED_PORT: wren_a 0 0 0 0 INPUT GND "wren_a" +// Retrieval info: USED_PORT: wren_b 0 0 0 0 INPUT GND "wren_b" +// Retrieval info: CONNECT: @address_a 0 0 9 0 address_a 0 0 9 0 +// Retrieval info: CONNECT: @address_b 0 0 9 0 address_b 0 0 9 0 +// Retrieval info: CONNECT: @clock0 0 0 0 0 clock 0 0 0 0 +// Retrieval info: CONNECT: @data_a 0 0 256 0 data_a 0 0 256 0 +// Retrieval info: CONNECT: @data_b 0 0 256 0 data_b 0 0 256 0 +// Retrieval info: CONNECT: @wren_a 0 0 0 0 wren_a 0 0 0 0 +// Retrieval info: CONNECT: @wren_b 0 0 0 0 wren_b 0 0 0 0 +// Retrieval info: CONNECT: q_a 0 0 256 0 @q_a 0 0 256 0 +// Retrieval info: CONNECT: q_b 0 0 256 0 @q_b 0 0 256 0 +// Retrieval info: GEN_FILE: TYPE_NORMAL bramin1out2.v TRUE +// Retrieval info: GEN_FILE: TYPE_NORMAL bramin1out2.inc FALSE +// Retrieval info: GEN_FILE: TYPE_NORMAL bramin1out2.cmp FALSE +// Retrieval info: GEN_FILE: TYPE_NORMAL bramin1out2.bsf FALSE +// Retrieval info: GEN_FILE: TYPE_NORMAL bramin1out2_inst.v TRUE +// Retrieval info: GEN_FILE: TYPE_NORMAL bramin1out2_bb.v TRUE +// Retrieval info: LIB_FILE: altera_mf diff --git a/hw/hdl/operators/dtengine/xgboost/core/dualport_mem.v b/hw/hdl/operators/dtengine/xgboost/core/dualport_mem.v new file mode 100644 index 00000000..4e689de6 --- /dev/null +++ b/hw/hdl/operators/dtengine/xgboost/core/dualport_mem.v @@ -0,0 +1,85 @@ + + + +module DualPortMem #( + parameter DATA_WIDTH = 32, + parameter ADDR_WIDTH = 8, + parameter WORD_WIDTH = 16, + parameter LINE_ADDR_WIDTH = 3, + parameter NUM_PIPELINE_LEVELS = 1 +) ( + input wire clk, + input wire rst_n, + input wire we, + input wire re, + input wire [ADDR_WIDTH+LINE_ADDR_WIDTH-1:0] raddr, + input wire [ADDR_WIDTH-1:0] waddr, + input wire [DATA_WIDTH-1:0] din, + output wire [WORD_WIDTH-1:0] dout, + output wire valid_out +); + +reg re_p[NUM_PIPELINE_LEVELS+1]; +wire [DATA_WIDTH-1:0] dline; +reg [LINE_ADDR_WIDTH-1:0] raddr_d1; + + + +Qdualport_mem Qdualport_mem_inst ( + .clock ( clk ), + .data ( din ), + .rdaddress ( raddr[LINE_ADDR_WIDTH+ADDR_WIDTH-1:LINE_ADDR_WIDTH] ), + .rden ( re ), + .wraddress ( waddr ), + .wren ( we ), + .q ( dline ) + ); + + +always @(posedge clk) begin + raddr_d1 <= raddr[LINE_ADDR_WIDTH-1:0]; +end + +//------------------------ Out MUX Pipelines ------------------------// +// pipeline re i = 0, +always @(posedge clk) begin + if(~rst_n) begin + re_p[0] <= 0; + end + else begin + re_p[0] <= re; + end +end + +genvar i; +// pipeline re i = 1 to NUM_PIPELINE_LEVELS+1, +generate for (i = 1; i < NUM_PIPELINE_LEVELS+1; i=i+1) begin: PipelineOutMux + always @(posedge clk) begin + if(~rst_n) begin + re_p[i] <= 0; + end + else begin + re_p[i] <= re_p[i-1]; + end + end +end +endgenerate + +PipelinedMUX #( + .DATA_WIDTH (DATA_WIDTH), + .ADDR_WIDTH (LINE_ADDR_WIDTH), + .WORD_WIDTH (WORD_WIDTH), + .NUM_PIPELINE_LEVELS (NUM_PIPELINE_LEVELS) +) muxa( + .clk (clk), + .rst_n (rst_n), + + .din (dline), + .addr (raddr_d1), + .dout (dout) +); + +assign valid_out = re_p[NUM_PIPELINE_LEVELS]; + +endmodule + diff --git a/hw/hdl/operators/dtengine/xgboost/engineParams.sv b/hw/hdl/operators/dtengine/xgboost/engineParams.sv new file mode 100644 index 00000000..bcbe8907 --- /dev/null +++ b/hw/hdl/operators/dtengine/xgboost/engineParams.sv @@ -0,0 +1,334 @@ +// This is a generated file. Use and modify at your own risk. +//////////////////////////////////////////////////////////////////////////////// + +// default_nettype of none prevents implicit wire declaration. +`default_nettype none +`timescale 1ns/1ps +module engineParams #( + parameter integer C_ADDR_WIDTH = 12, + parameter integer C_DATA_WIDTH = 32 +) +( + // AXI4-Lite slave signals + input wire aclk , + input wire areset , + input wire aclk_en , + input wire awvalid , + output wire awready , + input wire [C_ADDR_WIDTH-1:0] awaddr , + input wire wvalid , + output wire wready , + input wire [C_DATA_WIDTH-1:0] wdata , + input wire [C_DATA_WIDTH/8-1:0] wstrb , + input wire arvalid , + output wire arready , + input wire [C_ADDR_WIDTH-1:0] araddr , + output wire rvalid , + input wire rready , + output wire [C_DATA_WIDTH-1:0] rdata , + output wire [2-1:0] rresp , + output wire bvalid , + input wire bready , + output wire [2-1:0] bresp , + output wire ap_start , + // User defined arguments + output wire [16-1:0] tuple_numcls , + output wire [8-1:0] treeDepth , + output wire [8-1:0] puTrees , + output wire [32-1:0] outputNumCLs , + output wire [8-1:0] prog_schedule , + output wire [8-1:0] proc_schedule , + output wire [16-1:0] tree_weights_numcls_minus_one, + output wire [16-1:0] tree_feature_index_numcls_minus_one +); + +/////////////////////////////////////////////////////////////////////////////// +// Local Parameters +/////////////////////////////////////////////////////////////////////////////// +localparam [C_ADDR_WIDTH-1:0] LP_ADDR_AP_CTRL = 12'h000; // 0 +localparam [C_ADDR_WIDTH-1:0] LP_ADDR_TUPLENUMCLS_0 = 12'h008; // 1 +localparam [C_ADDR_WIDTH-1:0] LP_ADDR_TREEDEPTH_0 = 12'h010; // 2 +localparam [C_ADDR_WIDTH-1:0] LP_ADDR_PUTREES_0 = 12'h018; // 3 +localparam [C_ADDR_WIDTH-1:0] LP_ADDR_OUTPUTNUMCLS_0 = 12'h020; // 4 +localparam [C_ADDR_WIDTH-1:0] LP_ADDR_TREEWNUMCLS_0 = 12'h028; // 5 +localparam [C_ADDR_WIDTH-1:0] LP_ADDR_TREEFNUMCLS_0 = 12'h030; // 6 +localparam [C_ADDR_WIDTH-1:0] LP_ADDR_PROG_SCHEDULE_0 = 12'h038; // 7 +localparam [C_ADDR_WIDTH-1:0] LP_ADDR_PROC_SCHEDULE_0 = 12'h040; // 8 + +localparam integer LP_SM_WIDTH = 2; +localparam [LP_SM_WIDTH-1:0] SM_WRIDLE = 2'd0; +localparam [LP_SM_WIDTH-1:0] SM_WRDATA = 2'd1; +localparam [LP_SM_WIDTH-1:0] SM_WRRESP = 2'd2; +localparam [LP_SM_WIDTH-1:0] SM_WRRESET = 2'd3; +localparam [LP_SM_WIDTH-1:0] SM_RDIDLE = 2'd0; +localparam [LP_SM_WIDTH-1:0] SM_RDDATA = 2'd1; +localparam [LP_SM_WIDTH-1:0] SM_RDRESET = 2'd3; + +/////////////////////////////////////////////////////////////////////////////// +// Wires and Variables +/////////////////////////////////////////////////////////////////////////////// +reg [LP_SM_WIDTH-1:0] wstate = SM_WRRESET; +reg [LP_SM_WIDTH-1:0] wnext ; +reg [C_ADDR_WIDTH-1:0] waddr ; +wire [C_DATA_WIDTH-1:0] wmask ; +wire aw_hs ; +wire w_hs ; +reg [LP_SM_WIDTH-1:0] rstate = SM_RDRESET; +reg [LP_SM_WIDTH-1:0] rnext ; +reg [C_DATA_WIDTH-1:0] rdata_r ; +wire ar_hs ; +wire [C_ADDR_WIDTH-1:0] raddr ; +// internal registers +reg int_ap_start = 1'b0; + +reg [16-1:0] int_tuple_numcls = 16'd0; +reg [8-1:0] int_treeDepth = 8'd0; +reg [8-1:0] int_puTrees = 8'd0; +reg [8-1:0] int_prog_schedule = 8'd0; +reg [8-1:0] int_proc_schedule = 8'd0; +reg [32-1:0] int_outputNumCLs = 32'd0; +reg [16-1:0] int_treew_numcls = 16'd0; +reg [16-1:0] int_treef_numcls = 16'd0; + +/////////////////////////////////////////////////////////////////////////////// +// Begin RTL +/////////////////////////////////////////////////////////////////////////////// + +//------------------------AXI write fsm------------------ +assign awready = (wstate == SM_WRIDLE); +assign wready = (wstate == SM_WRDATA); +assign bresp = 2'b00; // OKAY +assign bvalid = (wstate == SM_WRRESP); + +genvar i; +generate for (i = 0; i < C_DATA_WIDTH/8; i=i+1) begin: wmask_g + assign wmask[8*i+7:8*i] = {8{wstrb[i]}}; +end +endgenerate + +assign aw_hs = awvalid & awready; +assign w_hs = wvalid & wready; + +// wstate +always @(posedge aclk) begin + if (areset) + wstate <= SM_WRRESET; + else if (aclk_en) + wstate <= wnext; +end + +// wnext +always @(*) begin + case (wstate) + SM_WRIDLE: + if (awvalid) + wnext = SM_WRDATA; + else + wnext = SM_WRIDLE; + SM_WRDATA: + if (wvalid) + wnext = SM_WRRESP; + else + wnext = SM_WRDATA; + SM_WRRESP: + if (bready) + wnext = SM_WRIDLE; + else + wnext = SM_WRRESP; + // SM_WRRESET + default: + wnext = SM_WRIDLE; + endcase +end + +// waddr +always @(posedge aclk) begin + if (aclk_en) begin + if (aw_hs) + waddr <= awaddr; + end +end + +//------------------------AXI read fsm------------------- +assign arready = (rstate == SM_RDIDLE); +assign rdata = rdata_r; +assign rresp = 2'b00; // OKAY +assign rvalid = (rstate == SM_RDDATA); +assign ar_hs = arvalid & arready; +assign raddr = araddr; + +// rstate +always @(posedge aclk) begin + if (areset) + rstate <= SM_RDRESET; + else if (aclk_en) + rstate <= rnext; +end + +// rnext +always @(*) begin + case (rstate) + SM_RDIDLE: + if (arvalid) + rnext = SM_RDDATA; + else + rnext = SM_RDIDLE; + SM_RDDATA: + if (rready & rvalid) + rnext = SM_RDIDLE; + else + rnext = SM_RDDATA; + // SM_RDRESET: + default: + rnext = SM_RDIDLE; + endcase +end + +// rdata_r +always @(posedge aclk) begin + if (aclk_en) begin + if (ar_hs) begin + rdata_r <= {C_DATA_WIDTH{1'b0}}; + case (raddr) + LP_ADDR_AP_CTRL: begin + rdata_r[0] <= int_ap_start; + rdata_r[1] <= int_ap_done; + rdata_r[2] <= int_ap_idle; + rdata_r[3+:C_DATA_WIDTH-3] <= {C_DATA_WIDTH-3{1'b0}}; + end + LP_ADDR_TUPLENUMCLS_0: begin + rdata_r <= {16'b0, int_tuple_numcls[0+:16]}; + end + LP_ADDR_TREEDEPTH_0: begin + rdata_r <= {24'b0, int_treeDepth[0+:8]}; + end + LP_ADDR_PUTREES_0: begin + rdata_r <= {24'b0, int_puTrees[0+:8]}; + end + LP_ADDR_OUTPUTNUMCLS_0: begin + rdata_r <= int_outputNumCLs[0+:32]; + end + LP_ADDR_TREEWNUMCLS_0: begin + rdata_r <= {16'b0, int_treew_numcls[0+:16]}; + end + LP_ADDR_TREEFNUMCLS_0: begin + rdata_r <= {16'b0, int_treef_numcls[0+:16]}; + end + default: begin + rdata_r <= {C_DATA_WIDTH{1'b0}}; + end + endcase + end + end +end + +//------------------------Register logic----------------- +assign ap_start = int_ap_start; +assign tuple_numcls = int_tuple_numcls; +assign treeDepth = int_treeDepth; +assign puTrees = int_puTrees; +assign outputNumCLs = int_outputNumCLs; +assign proc_schedule = int_proc_schedule; +assign prog_schedule = int_prog_schedule; + + +assign tree_weights_numcls_minus_one = int_treew_numcls; +assign tree_feature_index_numcls_minus_one = int_treef_numcls; + +// int_ap_start +always @(posedge aclk) begin + if (areset) + int_ap_start <= 1'b0; + else if (aclk_en) begin + if (w_hs && waddr[11:0] == LP_ADDR_AP_CTRL && wstrb[0] && wdata[0]) + int_ap_start <= 1'b1; + else if (ap_done) + int_ap_start <= 1'b0; + end +end + +// int_numFeatures[16-1:0] +always @(posedge aclk) begin + if (areset) + int_tuple_numcls[0+:16] <= 16'd0; + else if (aclk_en) begin + if (w_hs && waddr[11:0] == LP_ADDR_TUPLENUMCLS_0) + int_tuple_numcls[0+:16] <= (wdata[0+:16] & wmask[0+:16]) | (int_tuple_numcls[0+:16] & ~wmask[0+:16]); + end +end + +// int_treeDepth[8-1:0] +always @(posedge aclk) begin + if (areset) + int_treeDepth[0+:8] <= 8'd0; + else if (aclk_en) begin + if (w_hs && waddr[11:0] == LP_ADDR_TREEDEPTH_0) + int_treeDepth[0+:8] <= (wdata[0+:8] & wmask[0+:8]) | (int_treeDepth[0+:8] & ~wmask[0+:8]); + end +end + +// int_puTrees[8-1:0] +always @(posedge aclk) begin + if (areset) + int_puTrees[0+:8] <= 8'd0; + else if (aclk_en) begin + if (w_hs && waddr[11:0] == LP_ADDR_PUTREES_0) + int_puTrees[0+:8] <= (wdata[0+:8] & wmask[0+:8]) | (int_puTrees[0+:8] & ~wmask[0+:8]); + end +end + +// int_outputNumCLs[32-1:0] +always @(posedge aclk) begin + if (areset) + int_outputNumCLs[0+:32] <= 32'd0; + else if (aclk_en) begin + if (w_hs && waddr[11:0] == LP_ADDR_OUTPUTNUMCLS_0) + int_outputNumCLs[0+:32] <= (wdata[0+:32] & wmask[0+:32]) | (int_outputNumCLs[0+:32] & ~wmask[0+:32]); + end +end + +// int_treew_numcls[16-1:0] +always @(posedge aclk) begin + if (areset) + int_treew_numcls[0+:16] <= 16'd0; + else if (aclk_en) begin + if (w_hs && waddr[11:0] == LP_ADDR_TREEWNUMCLS_0) + int_treew_numcls[0+:16] <= (wdata[0+:16] & wmask[0+:16]) | (int_treew_numcls[0+:16] & ~wmask[0+:16]); + end +end + +// int_treef_numcls[16-1:0] +always @(posedge aclk) begin + if (areset) + int_treef_numcls[0+:16] <= 16'd0; + else if (aclk_en) begin + if (w_hs && waddr[11:0] == LP_ADDR_TREEFNUMCLS_0) + int_treef_numcls[0+:16] <= (wdata[0+:16] & wmask[0+:16]) | (int_treef_numcls[0+:16] & ~wmask[0+:16]); + end +end + +// int_prog_schedule[8-1:0] +always @(posedge aclk) begin + if (areset) + int_prog_schedule[0+:8] <= 8'd0; + else if (aclk_en) begin + if (w_hs && waddr[11:0] == LP_ADDR_PROG_SCHEDULE_0) + int_prog_schedule[0+:8] <= (wdata[0+:8] & wmask[0+:8]) | (int_prog_schedule[0+:8] & ~wmask[0+:8]); + end +end + +// int_proc_schedule[8-1:0] +always @(posedge aclk) begin + if (areset) + int_proc_schedule[0+:8] <= 8'd0; + else if (aclk_en) begin + if (w_hs && waddr[11:0] == LP_ADDR_PROC_SCHEDULE_0) + int_proc_schedule[0+:8] <= (wdata[0+:8] & wmask[0+:8]) | (int_proc_schedule[0+:8] & ~wmask[0+:8]); + end +end + + +endmodule + +`default_nettype wire + diff --git a/hw/hdl/operators/hll/hll_slave.sv b/hw/hdl/operators/hll/hll_slave.sv new file mode 100644 index 00000000..9e4d4024 --- /dev/null +++ b/hw/hdl/operators/hll/hll_slave.sv @@ -0,0 +1,237 @@ +/** + * PT Config Slave + */ +module hll_slave ( + input logic aclk, + input logic aresetn, + + AXI4L.s axi_ctrl, + + input logic resultValid, + input logic [63:0] result, + output logic resetn_hll +); + +//`define DEBUG_CNFG_SLAVE + +// -- Decl ---------------------------------------------------------- +// ------------------------------------------------------------------ + +// Constants +localparam integer N_REGS = 3; +localparam integer ADDR_LSB = (AXIL_DATA_BITS/32) + 1; +localparam integer ADDR_MSB = $clog2(N_REGS); +localparam integer SLV_ADDR_BITS = ADDR_LSB + ADDR_MSB; + +// Internal registers +logic [SLV_ADDR_BITS-1:0] axi_awaddr; +logic axi_awready; +logic [SLV_ADDR_BITS-1:0] axi_araddr; +logic axi_arready; +logic [1:0] axi_bresp; +logic axi_bvalid; +logic axi_wready; +logic [AXIL_DATA_BITS-1:0] axi_rdata; +logic [1:0] axi_rresp; +logic axi_rvalid; + +// Registers +logic [N_REGS-1:0][AXIL_DATA_BITS-1:0] slv_reg; +logic slv_reg_rden; +logic slv_reg_wren; +logic [AXIL_DATA_BITS-1:0] slv_data_out; +logic aw_en; + +logic done; + +// -- Def ----------------------------------------------------------- +// ------------------------------------------------------------------ + +/* -- Register map ----------------------------------------------------------------------- +/ 0 (RW) : Control +/ 1 (RO) : Status +/ 2 (RO) : Result +*/ + +// Write process +assign slv_reg_wren = axi_wready && axi_ctrl.wvalid && axi_awready && axi_ctrl.awvalid; + +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) begin + for (int i = 0; i < N_REGS; i++) begin + slv_reg[i] <= 0; + end + end + else begin + slv_reg[0][0] <= 0; + + if(resultValid) begin + slv_reg[1][0] <= 1'b1; + slv_reg[2] <= result; + end + else if(slv_reg[0][0]) begin + slv_reg[1][0] <= 1'b0; + end + + if(slv_reg_wren) begin + case (axi_awaddr[ADDR_LSB+ADDR_MSB-1:ADDR_LSB]) + 2'h0: begin // Control + for (int i = 0; i < 1; i++) begin + if(axi_ctrl.wstrb[i]) begin + slv_reg[0][(i*8)+:8] <= axi_ctrl.wdata[(i*8)+:8]; + end + end + end + default : ; + endcase + end + end +end + +assign resetn_hll = ~slv_reg[0][0]; + +// Read process +assign slv_reg_rden = axi_arready & axi_ctrl.arvalid & ~axi_rvalid; + +always_ff @(posedge aclk, negedge aresetn) begin + if( aresetn == 1'b0 ) begin + axi_rdata <= 0; + end + else begin + axi_rdata <= 0; + if(slv_reg_rden) begin + case (axi_araddr[ADDR_LSB+ADDR_MSB-1:ADDR_LSB]) + 2'h1: // Status + axi_rdata[0] <= slv_reg[1][0]; + 2'h2: // Result + axi_rdata <= slv_reg[2]; + default: ; + endcase + end + end +end + +// I/O +assign axi_ctrl.awready = axi_awready; +assign axi_ctrl.arready = axi_arready; +assign axi_ctrl.bresp = axi_bresp; +assign axi_ctrl.bvalid = axi_bvalid; +assign axi_ctrl.wready = axi_wready; +assign axi_ctrl.rdata = axi_rdata; +assign axi_ctrl.rresp = axi_rresp; +assign axi_ctrl.rvalid = axi_rvalid; + +// awready and awaddr +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) + begin + axi_awready <= 1'b0; + axi_awaddr <= 0; + aw_en <= 1'b1; + end + else + begin + if (~axi_awready && axi_ctrl.awvalid && axi_ctrl.wvalid && aw_en) + begin + axi_awready <= 1'b1; + aw_en <= 1'b0; + axi_awaddr <= axi_ctrl.awaddr; + end + else if (axi_ctrl.bready && axi_bvalid) + begin + aw_en <= 1'b1; + axi_awready <= 1'b0; + end + else + begin + axi_awready <= 1'b0; + end + end +end + +// arready and araddr +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) + begin + axi_arready <= 1'b0; + axi_araddr <= 0; + end + else + begin + if (~axi_arready && axi_ctrl.arvalid) + begin + axi_arready <= 1'b1; + axi_araddr <= axi_ctrl.araddr; + end + else + begin + axi_arready <= 1'b0; + end + end +end + +// bvalid and bresp +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) + begin + axi_bvalid <= 0; + axi_bresp <= 2'b0; + end + else + begin + if (axi_awready && axi_ctrl.awvalid && ~axi_bvalid && axi_wready && axi_ctrl.wvalid) + begin + axi_bvalid <= 1'b1; + axi_bresp <= 2'b0; + end + else + begin + if (axi_ctrl.bready && axi_bvalid) + begin + axi_bvalid <= 1'b0; + end + end + end +end + +// wready +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) + begin + axi_wready <= 1'b0; + end + else + begin + if (~axi_wready && axi_ctrl.wvalid && axi_ctrl.awvalid && aw_en ) + begin + axi_wready <= 1'b1; + end + else + begin + axi_wready <= 1'b0; + end + end +end + +// rvalid and rresp (1Del?) +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) + begin + axi_rvalid <= 0; + axi_rresp <= 0; + end + else + begin + if (axi_arready && axi_ctrl.arvalid && ~axi_rvalid) + begin + axi_rvalid <= 1'b1; + axi_rresp <= 2'b0; + end + else if (axi_rvalid && axi_ctrl.rready) + begin + axi_rvalid <= 1'b0; + end + end +end + +endmodule // cnfg_slave \ No newline at end of file diff --git a/hw/hdl/operators/hll/hll_user_logic.sv b/hw/hdl/operators/hll/hll_user_logic.sv new file mode 100644 index 00000000..e3410272 --- /dev/null +++ b/hw/hdl/operators/hll/hll_user_logic.sv @@ -0,0 +1,59 @@ +`timescale 1ns / 1ps + +import lynxTypes::*; + +/** + * User logic + * + */ +module design_user_logic_6 ( + // Clock and reset + input wire aclk, + input wire[0:0] aresetn, + + // AXI4 control + AXI4L.s axi_ctrl, + + // AXI4S + AXI4S.m axis_src, + AXI4S.s axis_sink +); + + /* -- Tie-off unused interfaces and signals ----------------------------- */ + //always_comb axi_ctrl.tie_off_s(); + always_comb axis_src.tie_off_m(); + //always_comb axis_sink.tie_off_s(); + + /* -- USER LOGIC -------------------------------------------------------- */ + AXI4S axis_sink_r(); + axis_reg_rtl inst_r_in (.aclk(aclk), .aresetn(aresetn), .axis_in(axis_sink), .axis_out(axis_sink_r)); + + logic resultValid; + logic [63:0] result; + logic resetn_hll; + + // + hll_slave inst_hll_slv ( + .aclk(aclk), + .aresetn(aresetn), + .axi_ctrl(axi_ctrl), + .resultValid(resultValid), + .result(result), + .resetn_hll(resetn_hll) + ); + + hyperloglog_0 inst_hll ( + .s_axis_input_tuple_TVALID(axis_sink_r.tvalid), + .s_axis_input_tuple_TREADY(axis_sink_r.tready), + .s_axis_input_tuple_TDATA(axis_sink_r.tdata), + .s_axis_input_tuple_TKEEP(axis_sink_r.tkeep), + .s_axis_input_tuple_TLAST(axis_sink_r.tlast), + .regResult_V(result), + .res_valid_V(resultValid), + .ap_clk(aclk), + .ap_rst_n(resetn_hll) + ); + + +endmodule + diff --git a/hw/hdl/operators/kmeans/Formatter.sv b/hw/hdl/operators/kmeans/Formatter.sv new file mode 100755 index 00000000..b7129884 --- /dev/null +++ b/hw/hdl/operators/kmeans/Formatter.sv @@ -0,0 +1,153 @@ + +import kmeansTypes::*; + +module Formatter ( + input wire clk, // Clock + input wire rst_n, // Asynchronous reset active low + + input wire [NUM_CLUSTER_BITS:0] num_cluster,// the actual number of cluster that will be used + input wire [MAX_DEPTH_BITS:0] data_dim, //input the actual dimension of the data + + //interface to fetch engine + input wire [511:0] tuple_cl, + input wire tuple_cl_valid, + + input wire [511:0] centroid_cl, //not in bit-weaving format + input wire centroid_cl_valid, + + output wire centroid_cl_ready, + + //interface to pipeline + output wire [32-1:0] centroid, + output wire centroid_valid, + output wire last_dim_of_all_centroid, + output wire last_dim_of_one_centroid, + + //interface to pipelines + output reg [NUM_PIPELINE-1:0][32-1:0] tuple, + output reg tuple_valid, + output reg last_dim_of_one_tuple, + + //debug counters + output wire [2:0] [31:0] formatter_debug_cnt + +); + + +reg rst_n_reg; +wire c_lane_ready; + +always @ (posedge clk) begin + rst_n_reg <= rst_n; +end + +assign centroid_cl_ready = c_lane_ready; + +//--------------------split the centroid cache line---------------------// + + +c_lane_splitter c_lane_splitter +( + .clk (clk), + .rst_n (rst_n_reg), + .num_cluster (num_cluster), + .data_dim (data_dim), + .centroid_cl (centroid_cl), + .centroid_cl_valid (centroid_cl_valid), + .c_lane_ready (c_lane_ready), + .centroid (centroid), + .centroid_valid (centroid_valid), + .last_dim_of_all_centroid(last_dim_of_all_centroid), + .last_dim_of_one_centroid(last_dim_of_one_centroid) ); + + +//----------------------distribute the tuples----------------------// + +reg [MAX_DEPTH_BITS:0] data_dim_cnt; +reg [NUM_PIPELINE_BITS:0] pipe_index; + +always @ (posedge clk) begin + if(~rst_n_reg) begin + data_dim_cnt <= '0; + pipe_index <= '0; + end + else begin + if(tuple_cl_valid) begin + data_dim_cnt <= data_dim_cnt + 16; //assume the data dimension is multiple of 16 + if(data_dim_cnt + 16 >= data_dim) begin + data_dim_cnt <= '0; + pipe_index <= pipe_index + 1'b1; + if(pipe_index == NUM_PIPELINE-1) begin + pipe_index <= '0; + end + end + end + end +end + +wire [NUM_PIPELINE-1:0] tuple_fifo_we, tuple_fifo_re, tuple_fifo_valid, tuple_fifo_empty, tuple_fifo_full, tuple_almost_full; +wire [NUM_PIPELINE-1:0][512-1:0] tuple_fifo_dout, tuple_fifo_din; +reg [NUM_PIPELINE_BITS:0] multiplex_cnt; +reg [MAX_DEPTH_BITS:0] sent_dim_cnt; + +generate + for (genvar n = 0; n < NUM_PIPELINE; n++) begin: pipe_buffer + + assign tuple_fifo_we[n] = tuple_cl_valid & (pipe_index == n); + assign tuple_fifo_din[n] = tuple_cl; + assign tuple_fifo_re[n] = (&tuple_fifo_valid) & (multiplex_cnt == 15); + + quick_fifo #(.FIFO_WIDTH(512), .FIFO_DEPTH_BITS(BUFFER_DEPTH_BITS)) + tuple_fifo + ( + .clk, + .reset_n(rst_n_reg), + .we(tuple_fifo_we[n]), + .din(tuple_fifo_din[n]), + .re(tuple_fifo_re[n]), + .valid(tuple_fifo_valid[n]), + .dout(tuple_fifo_dout[n]), //contains the data and the terminate bit + .count(), + .empty(tuple_fifo_empty[n]), + .full(tuple_fifo_full[n]), + .almostfull(tuple_almost_full[n]) + ); + + end +endgenerate + +always @ (posedge clk) begin + if(~rst_n_reg) begin + multiplex_cnt <= '0; + sent_dim_cnt <= '0; + end + else begin + if(&tuple_fifo_valid) begin + multiplex_cnt <= multiplex_cnt + 1'b1; + sent_dim_cnt <= sent_dim_cnt + 1'b1; + if(multiplex_cnt == 15) begin + multiplex_cnt <= '0; + end + if(sent_dim_cnt == (data_dim -1)) begin + sent_dim_cnt <= '0; + end + end + end +end + +//-----------------------output path----------------// +generate + for (genvar i = 0; i < NUM_PIPELINE; i++) begin: formatter_output + always @ (posedge clk) begin + tuple[i] <= tuple_fifo_dout[i][multiplex_cnt*MAX_DIM_WIDTH +: MAX_DIM_WIDTH]; + end + end +endgenerate + +always @ (posedge clk) begin + tuple_valid <= &tuple_fifo_valid; + last_dim_of_one_tuple <= (&tuple_fifo_valid) & (sent_dim_cnt == (data_dim -1)); +end + + +endmodule diff --git a/hw/hdl/operators/kmeans/adder_tree.sv b/hw/hdl/operators/kmeans/adder_tree.sv new file mode 100755 index 00000000..f14d196f --- /dev/null +++ b/hw/hdl/operators/kmeans/adder_tree.sv @@ -0,0 +1,106 @@ +/* + * Copyright 2017 - 2018, Zeke Wang, Systems Group, ETH Zurich + * + * This hardware operator is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +import kmeansTypes::*; + + +module kmeans_adder_tree #( + parameter TREE_DEPTH = NUM_PIPELINE_BITS, + parameter TREE_WIDTH = 2**TREE_DEPTH +)( + input wire clk, + input wire rst_n, + //--------------------------Begin/Stop-----------------------------// + + //---------------------Input: External Memory rd response-----------------// + input wire [TREE_WIDTH-1:0] [63:0] v_input, // + input wire v_input_valid, // + + //------------------Output: disptach resp data to b of each bank---------------// + output wire [63:0] v_output, + output wire v_output_valid +); + + +reg rst_n_reg; + +always @ (posedge clk) begin + rst_n_reg <= rst_n; +end + +reg [63:0] v_intermdiate_result[TREE_DEPTH-1:0][TREE_WIDTH-1:0]; +reg v_intermdiate_result_valid[TREE_DEPTH-1:0]; + +reg [63:0] v_intermdiate_result_reg[TREE_DEPTH-1:0][TREE_WIDTH-1:0]; +reg v_intermdiate_result_valid_reg[TREE_DEPTH-1:0]; + + +genvar d, w, b; +generate + for( d = 0; d < TREE_DEPTH; d = d + 1) begin: inst_adder_tree_depth + for( w = 0; w < ( TREE_WIDTH/(2**(d+1)) ); w = w + 1) begin: inst_adder_tree_width + always @(posedge clk) begin + if(d == 0) begin + v_intermdiate_result_reg[d][w] <= v_input[2*w] + v_input[2*w+1]; + v_intermdiate_result[d][w] <= v_intermdiate_result_reg[d][w]; + end + else if(d > 0) begin + // v_intermdiate_result_reg[d-1][2*w] <= v_intermdiate_result[d-1][2*w]; + // v_intermdiate_result_reg[d-1][2*w+1]<=v_intermdiate_result[d-1][2*w+1]; + // v_intermdiate_result[d][w] <= v_intermdiate_result_reg[d-1][2*w] + v_intermdiate_result_reg[d-1][2*w+1]; + v_intermdiate_result_reg[d][w] <= v_intermdiate_result[d-1][2*w] + v_intermdiate_result[d-1][2*w+1]; + v_intermdiate_result [d][w] <= v_intermdiate_result_reg[d][w]; + end + else begin + v_intermdiate_result[d][w] <= v_intermdiate_result[d-1][2*w] + v_intermdiate_result[d-1][2*w+1]; + end + end + end + end +endgenerate + +generate + for( d = 0; d < TREE_DEPTH; d = d + 1) begin: inst_adder_tree_valid + + always @(posedge clk) + begin + if(~rst_n_reg) + v_intermdiate_result_valid[d] <= 1'b0; + else + begin + if(d == 0) begin + v_intermdiate_result_valid_reg[d] <= v_input_valid; + v_intermdiate_result_valid[d] <= v_intermdiate_result_valid_reg[d]; + end + else if(d > 0) begin + v_intermdiate_result_valid_reg[d] <= v_intermdiate_result_valid[d-1]; + v_intermdiate_result_valid[d] <= v_intermdiate_result_valid_reg[d] ; + end + else begin + v_intermdiate_result_valid[d] <= v_intermdiate_result_valid[d-1]; + end + end + end +end +endgenerate + +assign v_output = v_intermdiate_result[TREE_DEPTH-1][0]; +assign v_output_valid = v_intermdiate_result_valid[TREE_DEPTH-1]; + + + +endmodule \ No newline at end of file diff --git a/hw/hdl/operators/kmeans/adder_tree_low_resource.sv b/hw/hdl/operators/kmeans/adder_tree_low_resource.sv new file mode 100755 index 00000000..9e2e71ba --- /dev/null +++ b/hw/hdl/operators/kmeans/adder_tree_low_resource.sv @@ -0,0 +1,91 @@ +/* + * Copyright 2017 - 2018, Zeke Wang, Systems Group, ETH Zurich + * + * This hardware operator is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +import kmeansTypes::*; + + +module kmeans_adder_tree_low_resource #( + parameter TREE_DEPTH = NUM_PIPELINE_BITS, + parameter TREE_WIDTH = 2**TREE_DEPTH, + parameter BIT_WIDTH = 32 +)( + input wire clk, + input wire rst_n, + //--------------------------Begin/Stop-----------------------------// + + //---------------------Input: External Memory rd response-----------------// + input wire [TREE_WIDTH-1:0] [BIT_WIDTH-1:0] v_input, // + input wire v_input_valid, // + + //------------------Output: disptach resp data to b of each bank---------------// + output wire [BIT_WIDTH-1:0] v_output, + output wire v_output_valid +); + + +reg rst_n_reg; + +always @ (posedge clk) begin + rst_n_reg <= rst_n; +end + +reg [BIT_WIDTH-1:0] v_intermdiate_result[TREE_DEPTH-1:0][TREE_WIDTH-1:0]; +reg v_intermdiate_result_valid[TREE_DEPTH-1:0]; + + +genvar d, w, b; +generate + for( d = 0; d < TREE_DEPTH; d = d + 1) begin: inst_adder_tree_depth + for( w = 0; w < ( TREE_WIDTH/(2**(d+1)) ); w = w + 1) begin: inst_adder_tree_width + always @(posedge clk) begin + if(d == 0) begin + v_intermdiate_result[d][w] <= v_input[2*w] + v_input[2*w+1]; + end + else begin + v_intermdiate_result[d][w] <= v_intermdiate_result[d-1][2*w] + v_intermdiate_result[d-1][2*w+1]; + end + end + end + end +endgenerate + +generate + for( d = 0; d < TREE_DEPTH; d = d + 1) begin: inst_adder_tree_valid + + always @(posedge clk) + begin + if(~rst_n_reg) + v_intermdiate_result_valid[d] <= 1'b0; + else + begin + if(d == 0) begin + v_intermdiate_result_valid[d] <= v_input_valid; + end + else begin + v_intermdiate_result_valid[d] <= v_intermdiate_result_valid[d-1]; + end + end + end +end +endgenerate + +assign v_output = v_intermdiate_result[TREE_DEPTH-1][0]; +assign v_output_valid = v_intermdiate_result_valid[TREE_DEPTH-1]; + + + +endmodule \ No newline at end of file diff --git a/hw/hdl/operators/kmeans/agg_div.sv b/hw/hdl/operators/kmeans/agg_div.sv new file mode 100755 index 00000000..c825fb6c --- /dev/null +++ b/hw/hdl/operators/kmeans/agg_div.sv @@ -0,0 +1,134 @@ +`default_nettype none +import kmeansTypes::*; + +module agg_div +( + input wire clk, // Clock + input wire rst_n, // Asynchronous reset active low + + input wire start_operator, + + input wire [MAX_DEPTH_BITS:0] data_dim, //input the actual dimension of the data + input wire [NUM_CLUSTER_BITS:0] num_cluster, //input the actual number of cluster + + //interface with pipelines + // output wire agg_ready, + // input wire [NUM_PIPELINE-1:0] accu_finish, + + input wire [63:0] agg_data, + input wire agg_valid, + + //interface with the + //input wire write_engine_ready, + //input wire update_ready, + output wire [MAX_DIM_WIDTH-1:0] update, + output wire update_valid, + output wire update_last, + output wire update_last_dim, + + //debug counter + output reg [7:0][31:0] agg_div_debug_cnt + +); + + wire [63:0] div_sum; + wire [63:0] div_count; + wire div_valid; + wire div_last_dim; + wire div_last; + + wire [63:0] sse; + wire sse_valid; + wire sse_converge; + + wire div_dout_last_dim; + wire div_dout_last; + wire [MAX_DIM_WIDTH-1:0] div_dout; + wire div_dout_valid; + + reg [MAX_DEPTH_BITS:0] data_dim_reg; + reg [NUM_CLUSTER_BITS:0] num_cluster_reg; + + wire [7:0] [31:0] k_means_aggregation_debug_cnt; + wire [31:0] k_means_division_debug_cnt; + + reg rst_n_reg; + reg start_operator_reg; + always @ (posedge clk) begin + rst_n_reg <= rst_n; + start_operator_reg <= start_operator; + end + + k_means_aggregation agg ( + .clk (clk), + .rst_n (rst_n_reg), + .start_operator (start_operator_reg), + .data_dim (data_dim_reg), + .num_cluster (num_cluster_reg), + // .agg_ready (agg_ready), + // .accu_finish (accu_finish), + .agg_data (agg_data), + .agg_valid (agg_valid), + .div_sum (div_sum), + .div_count (div_count), + .div_valid (div_valid), + .div_last_dim(div_last_dim), + .div_last (div_last), + .sse (sse), + .sse_valid (sse_valid), + .sse_converge(sse_converge), + .k_means_aggregation_debug_cnt(k_means_aggregation_debug_cnt) + ); + + k_means_division division + ( + .clk (clk), + .rst_n (rst_n_reg), + .start_operator (start_operator_reg), + .div_sum (div_sum), + .div_count (div_count), + .div_valid (div_valid), + .div_last_dim (div_last_dim), + .div_last (div_last), + .div_dout_last_dim(div_dout_last_dim), + .div_dout_last (div_dout_last), + .div_dout (div_dout), + .div_dout_valid (div_dout_valid), + .k_means_division_debug_cnt(k_means_division_debug_cnt) + ); + + + div_buffer div_buffer( + .clk (clk), + .rst_n (rst_n_reg), + .div_dout (div_dout), + .div_dout_valid (div_dout_valid), + .div_dout_last_dim(div_dout_last_dim), + .div_dout_last (div_dout_last), + //.update_ready (write_engine_ready&update_ready), + .update (update), + .update_valid (update_valid), + .update_last (update_last), + .update_last_dim (update_last_dim) + ); + + + always @ (posedge clk) begin + if(~rst_n_reg) begin + num_cluster_reg <= '0; + data_dim_reg <= '0; + end + else begin + num_cluster_reg <= num_cluster; + data_dim_reg <= data_dim; + end + + // agg_div_debug_cnt[0] <= k_means_aggregation_debug_cnt[0]; + // agg_div_debug_cnt[1] <= k_means_division_debug_cnt; + agg_div_debug_cnt <= k_means_aggregation_debug_cnt; + end + + + +endmodule +`default_nettype wire diff --git a/hw/hdl/operators/kmeans/dist_processor.sv b/hw/hdl/operators/kmeans/dist_processor.sv new file mode 100755 index 00000000..e4881252 --- /dev/null +++ b/hw/hdl/operators/kmeans/dist_processor.sv @@ -0,0 +1,85 @@ + +import kmeansTypes::*; + +module dist_processor #(parameter INDEX_PROCESSOR = 0) +( + input wire clk, // Clock + input wire rst_n, // Asynchronous reset active low + + input wire enable, + + //----------- bit serial data---------------// + input wire data_valid_i, + input wire [31:0] data_i, + input wire data_last_dim, + + //-----------input centroid----------------------// + input wire [31:0] centroid_i, + + + //---------------previous processor assignment result---// + input wire min_dist_valid_i, + input wire [63:0] min_dist_i, + input wire [NUM_CLUSTER_BITS:0] cluster_i, + + //--------------current processor assignment result--------// + output wire min_dist_valid_o, + output wire [63:0] min_dist_o, + output wire [NUM_CLUSTER_BITS:0] cluster_o +); + +//////////////////////////////input register////////////////////////////////////////////// + +reg rst_n_reg; +reg enable_reg; +always @(posedge clk ) begin + rst_n_reg <= rst_n; + enable_reg <= enable; +end + + +///////////////////Calculate distance/////////////////////////////// + +wire [63:0] distance; +wire dist_valid; + +mul_accu mul_accu +( + .clk (clk), + .rst_n (rst_n_reg), + .x (centroid_i), + .a_valid (data_valid_i), + .a (data_i), + .a_last_dim (data_last_dim), + .result (distance), + .result_valid(dist_valid) + ); + + +////////////////////////min dist selection///////////////////////////// + +reg min_dist_valid_reg; +reg [63:0] min_dist_reg; +reg [NUM_CLUSTER_BITS:0] cluster_reg; + +always @ (posedge clk) begin + if(~rst_n_reg) begin + min_dist_valid_reg <= 1'b0; + end + else begin + min_dist_reg <= '0; + cluster_reg <= '0; + min_dist_valid_reg <= dist_valid; + if(dist_valid & min_dist_valid_i) begin //dist valid and min dist valid should be set at the same cycle + min_dist_reg <= ((distance<=min_dist_i) & enable_reg) ? distance : min_dist_i; + cluster_reg <= ((distance<=min_dist_i) & enable_reg) ? INDEX_PROCESSOR : cluster_i; + end + end +end + +assign min_dist_valid_o = min_dist_valid_reg; +assign min_dist_o = min_dist_reg; +assign cluster_o = cluster_reg; + + +endmodule diff --git a/hw/hdl/operators/kmeans/div_buffer.sv b/hw/hdl/operators/kmeans/div_buffer.sv new file mode 100755 index 00000000..1871f83a --- /dev/null +++ b/hw/hdl/operators/kmeans/div_buffer.sv @@ -0,0 +1,68 @@ +`default_nettype none +import kmeansTypes::*; + +module div_buffer +( + input wire clk, // Clock + input wire rst_n, // Asynchronous reset active low + + //interface with divider + input wire [MAX_DIM_WIDTH-1:0] div_dout, + input wire div_dout_valid, + input wire div_dout_last_dim, + input wire div_dout_last, + + + //input wire update_ready, //assume update and wr_engine will not be over flowed + output reg [MAX_DIM_WIDTH-1:0] update, + output reg update_valid, + output reg update_last, + output reg update_last_dim +); + + logic div_buffer_re, div_buffer_valid; + logic div_buffer_we; + logic [MAX_DIM_WIDTH+1:0] div_buffer_data_din; + logic [MAX_DIM_WIDTH+1:0] div_buffer_data_dout; + reg rst_n_reg; + + always @ (posedge clk) begin + rst_n_reg <= rst_n; + if(~rst_n_reg) begin + update_valid <= 1'b0; + update_last <= 1'b0; + update_last_dim <= 1'b0; + end + else begin + update <= div_buffer_data_dout[MAX_DIM_WIDTH-1:0]; + update_last <= div_buffer_data_dout[MAX_DIM_WIDTH]; + update_last_dim <= div_buffer_data_dout[MAX_DIM_WIDTH+1]; + update_valid <= /*update_ready &*/ div_buffer_valid; + end + end + + + + + quick_fifo #(.FIFO_WIDTH(MAX_DIM_WIDTH+2), .FIFO_DEPTH_BITS(9)) + div_buffer + ( + .clk, + .reset_n(rst_n_reg), + .we(div_buffer_we), + .din(div_buffer_data_din), + .re(div_buffer_re), + .valid(div_buffer_valid), + .dout(div_buffer_data_dout), + .count(), + .empty(), + .full(), + .almostfull() + ); + + assign div_buffer_we = div_dout_valid ; + assign div_buffer_data_din = {div_dout_last_dim,div_dout_last,div_dout}; + assign div_buffer_re = /*update_ready &*/ div_buffer_valid; + +endmodule +`default_nettype wire diff --git a/hw/hdl/operators/kmeans/dual_port_ram.sv b/hw/hdl/operators/kmeans/dual_port_ram.sv new file mode 100755 index 00000000..5524b46b --- /dev/null +++ b/hw/hdl/operators/kmeans/dual_port_ram.sv @@ -0,0 +1,68 @@ +// Copyright (c) 2013-2015, Intel Corporation +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Intel Corporation nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + + +module dual_port_ram #( + parameter DATA_WIDTH = 32, + parameter ADDR_WIDTH = 8 +) +( + input wire clk, + input wire we, + input wire re, + input wire [ADDR_WIDTH-1:0] raddr, + input wire [ADDR_WIDTH-1:0] waddr, + input wire [DATA_WIDTH-1:0] din, + output reg [DATA_WIDTH-1:0] dout +); + + +`ifdef VENDOR_XILINX + (* ram_extract = "yes", ram_style = "block" *) + reg [DATA_WIDTH-1:0] mem[0:2**ADDR_WIDTH-1]; +`else +/*(* ramstyle = "no_rw_check" *)*/ reg [DATA_WIDTH-1:0] mem[0:2**ADDR_WIDTH-1]; +`endif + + initial + begin + for (int i = 0; i < 2**ADDR_WIDTH; i++) begin + mem [i] = '0; + end + end + + always @(posedge clk) begin + + if (we) + mem[waddr] <= din; + + if (re) + dout <= mem[raddr]; + end + + + +endmodule \ No newline at end of file diff --git a/hw/hdl/operators/kmeans/fetch_engine.sv b/hw/hdl/operators/kmeans/fetch_engine.sv new file mode 100755 index 00000000..05ea2e81 --- /dev/null +++ b/hw/hdl/operators/kmeans/fetch_engine.sv @@ -0,0 +1,219 @@ +// `default_nettype none +import kmeansTypes::*; + +module fetch_engine +( + input wire clk, // Clock + input wire rst_n, // Asynchronous reset active low + input wire start_operator, + + input RuntimeParam rp, + // TX RD + output wire [57:0] um_tx_rd_addr, + output reg [7:0] um_tx_rd_tag, + output reg um_tx_rd_valid, + input wire um_tx_rd_ready, + + // RX RD + input wire [7:0] um_rx_rd_tag, + input wire [511:0] um_rx_data, + input wire um_rx_rd_valid, + output reg um_rx_rd_ready, + + //output to kmeans module + output wire [511:0] tuple_cl, + output wire tuple_cl_valid, + output wire tuple_cl_last, + input wire tuple_cl_ready, + + output wire [511:0] centroid_cl, + output wire centroid_cl_valid, + output wire centroid_cl_last, + input wire centroid_cl_ready, + + + output reg [1:0][31:0] fetch_engine_debug_cnt + +); + + RuntimeParam rp_reg; + reg rst_n_reg; + reg start_operator_reg; + + wire req_sent, req_received; + reg potential_overflow; + reg [31:0] inflight_cnt; + + wire rx_fifo_valid; + wire [511:0] rx_fifo_dout; + wire rx_fifo_re; + wire rx_fifo_full; + wire rx_fifo_almostfull; + wire [BUFFER_DEPTH_BITS-1:0] rx_fifo_count; + + always @ (posedge clk) begin + rp_reg <= rp; + rst_n_reg <= rst_n; + start_operator_reg <= start_operator; + end + + + //generate the read address + rd_addr_gen rd_addr_gen ( + .clk (clk), + .rst_n (rst_n_reg), + .start_operator (start_operator_reg), + .um_tx_rd_addr (um_tx_rd_addr), + .um_tx_rd_tag (um_tx_rd_tag), + .um_tx_rd_valid (um_tx_rd_valid), + .um_tx_rd_ready (um_tx_rd_ready), + .potential_overflow(potential_overflow), + .rp (rp_reg)); + + quick_fifo #(.FIFO_WIDTH(512), + .FIFO_DEPTH_BITS(BUFFER_DEPTH_BITS)) + rx_fifo + ( + .clk (clk), + .reset_n (rst_n_reg), + .we (um_rx_rd_ready & um_rx_rd_valid), + .din (um_rx_data), + .re (rx_fifo_re), + .valid (rx_fifo_valid), + .dout (rx_fifo_dout), + .count (rx_fifo_count), + .empty (), + .full (rx_fifo_full), + .almostfull(rx_fifo_almostfull) ); + + assign um_rx_rd_ready = ~rx_fifo_full; + + fetch_engine_output_lane output_lane( + .clk (clk), + .rst_n (rst_n_reg), + .start_operator (start_operator_reg), + .rx_fifo_valid (rx_fifo_valid), + .rx_fifo_dout (rx_fifo_dout), + .rx_fifo_re (rx_fifo_re), + .rp (rp_reg), + .tuple_cl (tuple_cl), + .tuple_cl_valid (tuple_cl_valid), + .tuple_cl_last (tuple_cl_last), + .tuple_cl_ready (tuple_cl_ready), + .centroid_cl (centroid_cl), + .centroid_cl_valid(centroid_cl_valid), + .centroid_cl_last (centroid_cl_last), + .centroid_cl_ready(centroid_cl_ready) + ); + + + //flow control to avoid overflow + assign req_sent = um_tx_rd_ready & um_tx_rd_valid; + assign req_received = um_rx_rd_ready & um_rx_rd_valid; + + always @ (posedge clk) begin + if(~rst_n_reg ) begin + inflight_cnt <= '0; + potential_overflow <= '0; + end + else begin + if(req_sent & !req_received) begin + inflight_cnt <= inflight_cnt + 1'b1; + end + else if((!req_sent & req_received) & (inflight_cnt>0)) begin + inflight_cnt <= inflight_cnt - 1'b1; + end + + potential_overflow <= ((inflight_cnt + rx_fifo_count )< 2**(BUFFER_DEPTH_BITS-1)) ? 0: 1; + end + end + + + //debug counters + reg [31:0] tuple_cl_cnt, centroid_cl_cnt; + reg [31:0] cl_rec_cnt, cl_req_cnt; + + always @ (posedge clk) begin + if(start_operator_reg ) begin + tuple_cl_cnt <= '0; + centroid_cl_cnt <= '0; + cl_req_cnt <= '0; + cl_rec_cnt <= '0; + end + else begin + if(tuple_cl_valid & tuple_cl_ready) begin + tuple_cl_cnt <= tuple_cl_cnt + 1'b1; + end + if(centroid_cl_valid & centroid_cl_ready) begin + centroid_cl_cnt <= centroid_cl_cnt + 1'b1; + end + if(um_rx_rd_ready & um_rx_rd_valid) begin + cl_rec_cnt <= cl_rec_cnt + 1'b1; + end + if(um_tx_rd_valid & um_tx_rd_ready) begin + cl_req_cnt <= cl_req_cnt + 1'b1; + end + end + end + + always @ (posedge clk) begin + fetch_engine_debug_cnt[0] <= tuple_cl_cnt; + fetch_engine_debug_cnt[1] <= centroid_cl_cnt; + end + +////////////////////////////////////////////////////////////////////////////////////////////////////// +//---------------------------------log file print--------------------------------------------------// +//////////////////////////////////////////////////////////////////////////////////////////////////// +`define LOG_NULL +`ifdef LOG_FILE + int file; + reg file_finished; + initial begin + file = $fopen("/home/harpdev/doppiodb/fpga/operators/k_means_v2/sim_log/fetch_engine.txt","w"); + + if(file) begin + $display("fetch_engine file open successfully\n"); + // $display("output to divider",); + end + else + $display("Failed to open fetch_engine file\n"); + end + + always @ (posedge clk) begin + if(~rst_n_reg) begin + + end + else begin + if(um_rx_rd_valid & um_rx_rd_ready) begin + $fwrite(file,"Rec cacheline%d:",cl_rec_cnt); + $fwrite(file,"%d ", um_rx_data[31:0]); + $fwrite(file,"%d ", um_rx_data[63:32]); + $fwrite(file,"%d ", um_rx_data[95:64]); + $fwrite(file,"%d ", um_rx_data[127:96]); + $fwrite(file,"%d ", um_rx_data[159:128]); + $fwrite(file,"%d ", um_rx_data[191:160]); + $fwrite(file,"%d ", um_rx_data[223:192]); + $fwrite(file,"%d ", um_rx_data[255:224]); + $fwrite(file,"%d ", um_rx_data[287:256]); + $fwrite(file,"%d ", um_rx_data[319:288]); + $fwrite(file,"%d ", um_rx_data[351:320]); + $fwrite(file,"%d ", um_rx_data[383:352]); + $fwrite(file,"%d ", um_rx_data[415:384]); + $fwrite(file,"%d ", um_rx_data[447:416]); + $fwrite(file,"%d ", um_rx_data[479:448]); + $fwrite(file,"%d ", um_rx_data[511:480]); + + $fwrite(file,"\n"); + + end + end + end +`endif +//////////////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////////////// + + + + +endmodule +// `default_nettype wire diff --git a/hw/hdl/operators/kmeans/fetch_engine_output_lane.sv b/hw/hdl/operators/kmeans/fetch_engine_output_lane.sv new file mode 100755 index 00000000..ff3807f0 --- /dev/null +++ b/hw/hdl/operators/kmeans/fetch_engine_output_lane.sv @@ -0,0 +1,163 @@ +import kmeansTypes::*; + +module fetch_engine_output_lane ( + input wire clk, // Clock + input wire rst_n, // Asynchronous reset active low + + input wire start_operator, + + input RuntimeParam rp, + input wire rx_fifo_valid, + input wire [511:0] rx_fifo_dout, + output wire rx_fifo_re, + + //interface to kmeans module + + output wire [511:0] tuple_cl, + output wire tuple_cl_valid, + output reg tuple_cl_last, + input wire tuple_cl_ready, + + + output wire [511:0] centroid_cl, + output wire centroid_cl_valid, + output reg centroid_cl_last, + input wire centroid_cl_ready + +); + + reg is_centroid, is_tuple; + + //read counter + reg [31:0] rd_cnt; + reg rd_cnt_en, rd_cnt_clr; + + //count how many iterations of data have been sent + reg [15:0] rd_iteration_cnt; + reg rd_iteration_cnt_en, rd_iteration_cnt_clr; + + assign rx_fifo_re = rx_fifo_valid & ((is_centroid & centroid_cl_ready) | (is_tuple & tuple_cl_ready)); + + //data path for tuple + assign tuple_cl_valid = is_tuple & rx_fifo_valid & tuple_cl_ready; + assign tuple_cl = is_tuple ? rx_fifo_dout : '0; + + //data path for centroid + assign centroid_cl_valid = is_centroid & rx_fifo_valid & centroid_cl_ready; + assign centroid_cl = is_centroid ? rx_fifo_dout : '0; + + //fsm to count the number of centroid/tuple cachelines and set last bit + typedef enum logic[1:0] {IDLE, CENTROID, TUPLE} state; + state currentState, nextState; + + always_comb begin : proc_fsm + //default + rd_cnt_en = 0; + rd_cnt_clr = 0; + + is_centroid = 0; + is_tuple = 0; + + rd_iteration_cnt_en = 0; + rd_iteration_cnt_clr = 0; + + tuple_cl_last = 0; + centroid_cl_last = 0; + + nextState = currentState; + + case (currentState) + + IDLE: begin + if(start_operator) begin + nextState = CENTROID; + end + end + + //sequentially read the centroids cacheline from the memory + CENTROID: begin + is_centroid = 1; + if(rx_fifo_valid && centroid_cl_ready ) begin + rd_cnt_en = 1; + if(rd_cnt == (rp.num_cl_centroid-1)) begin + rd_cnt_clr = 1; + centroid_cl_last = 1; + nextState = TUPLE; + end + end + end + + //read the first few lines of every 32 cachelines from the memory + TUPLE: begin + is_tuple = 1; + if(rx_fifo_valid && tuple_cl_ready) begin + rd_cnt_en = 1; + if(rd_cnt == rp.num_cl_tuple-1) begin + rd_cnt_clr = 1; + rd_iteration_cnt_en = 1; + tuple_cl_last = 1; + if(rd_iteration_cnt == rp.num_iteration-1) begin + rd_iteration_cnt_clr = 1; + nextState = IDLE; + end + end + end + end + + //default : /* default */; + endcase + end + + always @ (posedge clk) begin + if(~rst_n) begin + rd_cnt <= '0; + rd_iteration_cnt <= '0; + currentState <= IDLE; + end + else begin + currentState <= nextState; + rd_cnt <= rd_cnt_clr?'0:(rd_cnt_en? (rd_cnt+1) : rd_cnt); + rd_iteration_cnt <= rd_iteration_cnt_clr? '0: (rd_iteration_cnt_en? (rd_iteration_cnt+1): rd_iteration_cnt); + end + end + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +//---------------------------------log file print--------------------------------------------------// +//////////////////////////////////////////////////////////////////////////////////////////////////// +`define LOG_NULL +`ifdef LOG_FILE + int file; + reg file_finished; + initial begin + file = $fopen("/home/harpdev/doppiodb/fpga/operators/k_means_v2/sim_log/fetch_engine_output_lane.txt","w"); + if(file) + $display("fetch_engine_output_lane file open successfully\n"); + else + $display("Failed to open fetch_engine_output_lane file\n"); + end + + always @ (posedge clk) begin + if(~rst_n) begin + file_finished <= 1'b0; + end + else begin + if(centroid_cl_valid & centroid_cl_last) begin + $fwrite(file,"Iteration:%d, centroid_cl_cnt:%d\n",rd_iteration_cnt+1, rd_cnt+1); + end + if(tuple_cl_valid& tuple_cl_last) begin + $fwrite(file,"Iteration:%d, tuple_cl_cnt:%d\n",rd_iteration_cnt+1, rd_cnt+1); + end + if(tuple_cl_valid & rd_cnt==(rp.num_cl_tuple-1) & rd_iteration_cnt==(rp.num_iteration-1)) begin + file_finished <= 1'b1; + end + if(file_finished) begin + $fclose(file); + end + end + end +`endif +//////////////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////////////// + +endmodule \ No newline at end of file diff --git a/hw/hdl/operators/kmeans/formatter_c_lane_splitter.sv b/hw/hdl/operators/kmeans/formatter_c_lane_splitter.sv new file mode 100755 index 00000000..219e1989 --- /dev/null +++ b/hw/hdl/operators/kmeans/formatter_c_lane_splitter.sv @@ -0,0 +1,162 @@ +import kmeansTypes::*; + +//split the 512 bit cacheline to chunks +module c_lane_splitter ( + input wire clk, // Clock + input wire rst_n, // Asynchronous reset active low + + input wire [NUM_CLUSTER_BITS:0] num_cluster,// the actual number of cluster that will be used + input wire [MAX_DEPTH_BITS:0] data_dim, //input the actual dimension of the data + + //interface to fetch engine + input wire [511:0] centroid_cl, + input wire centroid_cl_valid, + + output wire c_lane_ready, + + //interface to next module + // input wire next_module_ready, + + output reg [32-1:0] centroid, //32b*NUM_BANK + output reg centroid_valid, + output reg last_dim_of_all_centroid, + output reg last_dim_of_one_centroid + +); + + +wire c_fifo_valid; +wire [511 : 0] c_fifo_dout; +wire c_fifo_re; +wire c_fifo_full; +wire c_fifo_almostfull; + +reg [31:0] sel_cnt; +reg [31:0] sent_cnt; +reg [16:0] dim_cnt; + +wire is_last_dim_of_all, is_cl_last, is_last_dim_of_one; + +reg [31:0] total_send_amount; + + +localparam SPLIT_RATIO = 16; //number of splits of each cacheline + +//buffer the initial centroid cacheline +quick_fifo #(.FIFO_WIDTH(512), + .FIFO_DEPTH_BITS(BUFFER_DEPTH_BITS-3), + .FIFO_ALMOSTFULL_THRESHOLD(2**(BUFFER_DEPTH_BITS-3) -8)) + c_fifo + ( + .clk (clk), + .reset_n (rst_n), + .we (~c_fifo_full & centroid_cl_valid), + .din (centroid_cl), + .re (c_fifo_re), + .valid (c_fifo_valid), + .dout (c_fifo_dout), + .count (), + .empty (), + .full (c_fifo_full), + .almostfull(c_fifo_almostfull) ); + +//last of the feature of all the initial centroids +assign is_last_dim_of_all = (sent_cnt == (total_send_amount-1)) & c_fifo_valid; +//last chunk of the cacheline +assign is_cl_last = (sel_cnt == SPLIT_RATIO -1) & c_fifo_valid; +assign is_last_dim_of_one = (dim_cnt == data_dim-1) & c_fifo_valid; + +assign c_lane_ready = ~c_fifo_almostfull; +assign c_fifo_re = is_last_dim_of_all | is_cl_last; + + +always @ (posedge clk) begin + if(~rst_n) begin + sel_cnt <= '0; + sent_cnt <= '0; + dim_cnt <= '0; + total_send_amount <= '0; + end + else begin + total_send_amount <= num_cluster*data_dim; + + //counter to set the last bit + if(is_last_dim_of_all ) begin + sent_cnt <= '0; + end + else if( c_fifo_valid) begin + sent_cnt <= sent_cnt + 1'b1; + end + + //counter to select portion of the cacheline + if(is_cl_last | is_last_dim_of_all) begin + sel_cnt <= '0; + end + else if( c_fifo_valid) begin + sel_cnt <= sel_cnt + 1'b1; + end + + //counter to set the last chunk of one centroid flag + if(is_last_dim_of_one) begin + dim_cnt <= '0; + end + else if(c_fifo_valid) begin + dim_cnt <= dim_cnt + 1'b1; + end + + end +end + + +always @ (posedge clk) begin + if(~rst_n) begin + centroid_valid <= 1'b0; + end + else begin + centroid_valid <= c_fifo_valid; + centroid <= c_fifo_dout[(32)*sel_cnt +: (32)]; + last_dim_of_all_centroid <= is_last_dim_of_all; + last_dim_of_one_centroid <= is_last_dim_of_one; + end +end + +////////////////////////////////////////////////////////////////////////////////////////////////////// +//---------------------------------log file print--------------------------------------------------// +//////////////////////////////////////////////////////////////////////////////////////////////////// +`define LOG_NULL +`ifdef LOG_FILE + int file; + reg file_finished; + initial begin + file = $fopen("/home/harpdev/doppiodb/fpga/operators/k_means_v2/sim_log/formatter_c_lane.txt","w"); + if(file) + $display("formatter_c_lane file open successfully\n"); + else + $display("Failed to open formatter_c_lane file\n"); + end + + always @ (posedge clk) begin + if(~rst_n) begin + file_finished <= 1'b0; + end + else begin + if(centroid_valid) begin + + $fwrite(file,"%d ", centroid); + + if(last_dim_of_one_centroid) begin + $fwrite(file,"\n"); + end + if(last_dim_of_all_centroid) begin + $fwrite(file,"\n\n"); + end + end + + end + end +`endif +//////////////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////////////// + + +endmodule \ No newline at end of file diff --git a/hw/hdl/operators/kmeans/k_means_accumulation.sv b/hw/hdl/operators/kmeans/k_means_accumulation.sv new file mode 100755 index 00000000..4f068d97 --- /dev/null +++ b/hw/hdl/operators/kmeans/k_means_accumulation.sv @@ -0,0 +1,643 @@ + +import kmeansTypes::*; + + +module k_means_accumulation #(parameter PIPELINE_INDEX=0) ( + input clk, // Clock + input wire rst_n, + + //pipeline processor interface + input logic data_valid_accu_i, + input logic [32-1:0] data_accu_i, + + input logic min_dist_accu_valid_i, + input logic [63:0] min_dist_accu_i, + input logic [NUM_CLUSTER_BITS:0] cluster_accu_i, + + input logic terminate_accu_i, + + //runtime configration parameters + input logic [MAX_DEPTH_BITS:0] data_dim_accu_i, //input the actual dimension of the data + input logic [NUM_CLUSTER_BITS:0] num_cluster_accu_i, //input the actual number of cluster + + + //aggregator interface + input logic agg_ready_i, + output logic accu_finish_o, + output logic [63:0] agg_data_o, + output logic agg_valid_o + + // output wire [NUM_BANK-1:0][MAX_DIM_WIDTH-1:0] debug_output, + // output wire debug_output_valid + + //output reg [7:0][31:0] accu_debug_cnt + +); + + //dual port BRAM signal + logic [64-1:0] bram_din, bram_dout; + logic [NUM_CLUSTER_BITS+MAX_DEPTH_BITS-1:0] bram_addr_re, bram_addr_wr; + logic bram_we; + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//----------------------------------------Accumulation-------------------------------------------------------// +////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////First cycle//////////////////////////////////////////////// + +//-------------------------input register----------------------------------// + //runtime param reg + logic [MAX_DEPTH_BITS:0] data_dim_accu_DP; + logic [NUM_CLUSTER_BITS:0] num_cluster_accu_DP; + + //input reg + logic rst_n_reg, min_dist_accu_valid_DP; + logic [63:0]min_dist_accu_DP; + logic [NUM_CLUSTER_BITS:0]cluster_accu_DP; + // logic [4:0] numBits_minus_1_DP; + + //register input signal + always_ff @(posedge clk) begin : proc_rst_delay + rst_n_reg <= rst_n; + min_dist_accu_DP <= min_dist_accu_i; + cluster_accu_DP <= cluster_accu_i; + data_dim_accu_DP <= data_dim_accu_i; + num_cluster_accu_DP <= num_cluster_accu_i; + // numBits_minus_1_DP <= numBits_minus_1; + if (~rst_n_reg) begin + min_dist_accu_valid_DP <= 1'b0; + end + else begin + min_dist_accu_valid_DP <= min_dist_accu_valid_i; + end + end + + +//////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////Second Cycle////////////////////////////////////////////// + +//----------------------Cluster FIFO and DATA FIFO------------------// + + //FIFO signal + logic data_fifo_we, data_fifo_re, data_fifo_valid,data_fifo_empty, data_fifo_almostfull, data_fifo_full; + logic [MAX_DIM_WIDTH-1:0] data_fifo_dout, data_fifo_din; + + //cluster fifo signals + logic cluster_fifo_we, cluster_fifo_re, cluster_fifo_valid, cluster_fifo_empty, cluster_fifo_almostfull, cluster_fifo_full; + logic [NUM_CLUSTER_BITS:0] cluster_fifo_dout; + reg [MAX_DEPTH_BITS:0] dim_cnt_accu; + + quick_fifo #(.FIFO_WIDTH(MAX_DIM_WIDTH), .FIFO_DEPTH_BITS(BUFFER_DEPTH_BITS), .FIFO_ALMOSTFULL_THRESHOLD(2**(BUFFER_DEPTH_BITS-1))) + DATA_FIFO + ( + .clk(clk), + .reset_n(rst_n_reg), + .we(data_fifo_we), + .din(data_fifo_din), + .re(data_fifo_re), + .valid(data_fifo_valid), + .dout(data_fifo_dout), + .count(), + .empty(data_fifo_empty), + .full(data_fifo_full), + .almostfull(data_fifo_almostfull) + ); + + + + assign data_fifo_din = data_accu_i; + assign data_fifo_we = data_valid_accu_i & ~data_fifo_full ; //write in when not update, data valid + + quick_fifo #(.FIFO_WIDTH(NUM_CLUSTER_BITS+1), .FIFO_DEPTH_BITS(BUFFER_DEPTH_BITS), .FIFO_ALMOSTFULL_THRESHOLD(2**(BUFFER_DEPTH_BITS-1))) + CLUSTER_FIFO + ( + .clk (clk), + .reset_n (rst_n_reg), + .we (cluster_fifo_we), + .din (cluster_accu_DP), + .re (cluster_fifo_re), + .valid (cluster_fifo_valid), + .dout (cluster_fifo_dout), + .count (), + .empty (cluster_fifo_empty), + .full (cluster_fifo_full), + .almostfull(cluster_fifo_almostfull) ); + + + + assign cluster_fifo_we = min_dist_accu_valid_DP & ~cluster_fifo_full; + assign cluster_fifo_re = data_fifo_valid & cluster_fifo_valid & (dim_cnt_accu ==data_dim_accu_DP -1 ) ; + + always @ (posedge clk) begin + if(~rst_n_reg) begin + dim_cnt_accu <= '0; + end + else begin + if(data_fifo_valid & cluster_fifo_valid) begin + dim_cnt_accu <= dim_cnt_accu + 1'b1; + if(dim_cnt_accu + 1'b1 == data_dim_accu_DP ) begin + dim_cnt_accu <= '0; + end + end + end + end + + //---------------------------accu counter and sse----------------------------------// + + + //accumulation counter signals + logic [NUM_CLUSTER-1:0][63:0] accu_counter; + logic accu_counter_clr; + + //signals for accumulate the square error + logic [63:0] sse; + logic sse_clr; + + //count the number of sample in each cluster + always @ (posedge clk) begin + if(~rst_n_reg) begin + accu_counter <= '0; + end + else begin + if(accu_counter_clr) begin + accu_counter <= '0; + end + else if(cluster_fifo_re) begin + accu_counter[cluster_fifo_dout] <= accu_counter[cluster_fifo_dout] + 1'b1; + end + end + end + + //aggregate the sse + always @ (posedge clk) begin + if(~rst_n_reg) begin + sse <= '0; + end + else begin + if(sse_clr) begin + sse <= '0; + end + else if(min_dist_accu_valid_DP) begin + sse <= sse + min_dist_accu_DP; + end + end + end + + + //////////////////////////////////////////////////////////////////////////////////////////////////// + ///////////////////////////////////Third cycle////////////////////////////////////////////////////// + + //-------------------------------Accu read addr generation---------------------------------// + reg [NUM_CLUSTER_BITS+MAX_DEPTH_BITS-1:0] re_addr_accu; + reg re_addr_accu_valid; + always @ (posedge clk ) begin + if(~rst_n_reg) begin + re_addr_accu <= '0; + re_addr_accu_valid <= 1'b0; + end + else begin + re_addr_accu_valid <= data_fifo_valid & cluster_fifo_valid; + re_addr_accu <= (cluster_fifo_dout << (MAX_DEPTH_BITS)) + dim_cnt_accu; + end + end + + + //////////////////////////////////////////////////////////////////////////////////////////////////// + ///////////////////////////////////Fourth cycle////////////////////////////////////////////////////// + + //------------------------read the data from bram need one cycle-------------------// + //---------------------------register the wr addr-------------------------------// + reg [NUM_CLUSTER_BITS+MAX_DEPTH_BITS-1:0] wr_addr_accu_reg; + reg re_addr_accu_valid_reg; + + always @ (posedge clk) begin + re_addr_accu_valid_reg <= re_addr_accu_valid; + wr_addr_accu_reg <= re_addr_accu; + end + + assign data_fifo_re = re_addr_accu_valid_reg; + + //////////////////////////////////////////////////////////////////////////////////////////////////// + ///////////////////////////////////Fifth cycle////////////////////////////////////////////////////// + + //----------------------------accumulation-------------------------------------// + //---------------------------register the wr addr accu and wr_en---------------// + + wire [63:0] add_operand; + reg [64-1:0] add_result; + reg [NUM_CLUSTER_BITS+MAX_DEPTH_BITS-1:0] wr_addr_accu_reg2; + reg wr_en_accu; + + assign add_operand = {32'b0, data_fifo_dout}; + + always @ (posedge clk) begin + add_result <= add_operand + bram_dout; + end + + + always @ (posedge clk) begin + if(~rst_n_reg) begin + wr_en_accu <= 1'b0; + end + else begin + wr_addr_accu_reg2 <= wr_addr_accu_reg; + wr_en_accu <= re_addr_accu_valid_reg; + end + end + + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//----------------------------------------Aggregation-------------------------------------------------------// +////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + //-------------------------------check condition to go to aggregation stage-------------------------// + reg terminate_accu_reg, goto_agg, agg_ready_reg; + //terminate flag + always @ (posedge clk) begin + + if(~rst_n_reg) begin + terminate_accu_reg <= 0; + agg_ready_reg <= 1'b0; + end + else begin + goto_agg <= 1'b0; + if(terminate_accu_reg & data_fifo_empty & cluster_fifo_empty) begin + terminate_accu_reg <= 0; + goto_agg <= 1'b1; + end + else if(terminate_accu_i) begin + terminate_accu_reg <= terminate_accu_i; + end + + agg_ready_reg <= agg_ready_i; + end + end + + //-------------------FSM to control the sending sequences of the data---------------------------------// + reg accu_finish, is_agg_data_re, is_agg; + reg [MAX_DEPTH_BITS:0] dim_cnt_agg; + reg [NUM_CLUSTER_BITS:0] cluster_cnt_agg; + reg [NUM_CLUSTER_BITS+MAX_DEPTH_BITS-1:0] re_addr_agg, wr_addr_agg; + + typedef enum reg [2:0]{AGG_IDLE, WAIT_AGG, AGG_DATA, WAIT_ONE_CYCLE, WAIT_SECOND_CYCLE,AGG_CLUSTER, AGG_SSE, TERMINATE} agg_state; + + agg_state state; + + always @ (posedge clk) begin + + if(~rst_n_reg) begin + state <= AGG_IDLE; + dim_cnt_agg <= '0; + cluster_cnt_agg <= '0; + re_addr_agg <= '0; + wr_addr_agg <= '0; + end + else begin + accu_finish <= 1'b0; + accu_counter_clr <= 1'b0; + + is_agg_data_re <= 1'b0; + is_agg <= 1'b0; + + sse_clr <= 1'b0; + + wr_addr_agg <= re_addr_agg; + + case (state) + + AGG_IDLE:begin + if(goto_agg == 1) begin + state <= WAIT_AGG; + end + end + + WAIT_AGG: begin + accu_finish <= 1'b1; + if(agg_ready_reg) begin + state <= AGG_CLUSTER; + is_agg <= 1'b1; + end + end + + //upon the start of aggregation state, give aggregation_valid signal and increment the addr_offset + AGG_DATA: begin + is_agg_data_re <= 1; + is_agg <= 1'b1; + dim_cnt_agg <= dim_cnt_agg + 1'b1; + if(dim_cnt_agg == data_dim_accu_DP -1 ) begin + dim_cnt_agg <= '0; + cluster_cnt_agg <= cluster_cnt_agg + 1'b1; + if(cluster_cnt_agg == num_cluster_accu_DP-1) begin + cluster_cnt_agg <= '0; + state <= WAIT_ONE_CYCLE; + end + end + + re_addr_agg <= (cluster_cnt_agg << MAX_DEPTH_BITS) + (dim_cnt_agg); + + end + + //wait one cycle to wait the data is read from memory + WAIT_ONE_CYCLE: begin + is_agg <= 1'b1; + state <= WAIT_SECOND_CYCLE; + re_addr_agg <= '0; + end + + WAIT_SECOND_CYCLE: begin + state <= AGG_SSE; + is_agg <= 1'b1; + end + + AGG_CLUSTER: begin + is_agg <= 1'b1; + cluster_cnt_agg <= cluster_cnt_agg + 1'b1; + if(cluster_cnt_agg == num_cluster_accu_DP-1) begin + cluster_cnt_agg <= '0; + state <= AGG_DATA; + end + end + + AGG_SSE: begin + is_agg <= 1'b1; + state <= TERMINATE; + end + + TERMINATE:begin + is_agg <= 1'b1; + state <= AGG_IDLE; + accu_counter_clr <= 1'b1; + sse_clr <= 1'b1; + end + endcase + end + end + + + //----------------ouput data path-----------------------// + reg agg_data_re_valid; + + always @ (posedge clk) begin + if(~rst_n_reg) begin + agg_valid_o <= 1'b0; + agg_data_re_valid <= 1'b0; + end + else begin + agg_data_re_valid <= is_agg_data_re; //takes one cycle to read from bram + + agg_data_o <= agg_data_re_valid ? bram_dout : (state == AGG_CLUSTER? accu_counter[cluster_cnt_agg] : (state == AGG_SSE? sse : '0 ) ); + agg_valid_o <= agg_data_re_valid | (state == AGG_CLUSTER) | (state == AGG_SSE); + + end + end + + assign accu_finish_o = accu_finish; + + + +//-------------------------------ACCU BRAM ---------------------------------------// + + + + dual_port_ram #(.DATA_WIDTH(64), .ADDR_WIDTH(MAX_DEPTH_BITS+NUM_CLUSTER_BITS)) + accu_ram + ( + .clk (clk), + .we (bram_we), + .re (1'b1), + .raddr(bram_addr_re), + .waddr(bram_addr_wr), + .din (bram_din), + .dout (bram_dout) + ); + + + assign bram_addr_wr = is_agg ? wr_addr_agg : wr_addr_accu_reg2; + assign bram_din = is_agg? '0: add_result; + assign bram_addr_re = is_agg? re_addr_agg : re_addr_accu; + assign bram_we = wr_en_accu | (agg_data_re_valid ); + + + //------------------------debug counters----------------------------------// + + // reg [31:0] accu_ram_we_cnt; + // // reg [31:0][7:0] accu_bram_data; + // reg [31:0] bram_accu_cycle; + // reg [31:0] back_pressure_cnt; + // reg [15:0] goto_agg_cnt; + // reg [15:0] cross_state_cnt; + // // reg [15:0] accu_bram_cnt_indx; + // reg [15:0] addr_offset_overflow_cnt; + // reg [15:0] rdw_check; + + always @ (posedge clk) begin + if(~rst_n_reg) begin + // accu_ram_we_cnt <= '0; + // bram_accu_cycle <= '0; + // back_pressure_cnt <= '0; + // goto_agg_cnt <= '0; + // cross_state_cnt <= '0; + // accu_bram_cnt_indx <= '0; + // addr_offset_overflow_cnt <= '0; + // rdw_check <= '0; + end + else begin + // if(bram_we_accu) begin + // accu_ram_we_cnt <= accu_ram_we_cnt + 1'b1; + // end + + // if(bram_we_accu & accu_ram_we_cnt<4 & accu_ram_we_cnt>=0) begin //first setosa + // accu_bram_data[accu_bram_cnt_indx] <= bram_din; + // accu_bram_cnt_indx <= accu_bram_cnt_indx+ 1'b1; + // end + // else if(bram_we_accu & accu_ram_we_cnt<204 & accu_ram_we_cnt>=192) begin //last 2 setosa and first versicolor + // accu_bram_data[accu_bram_cnt_indx] <= bram_din; + // accu_bram_cnt_indx <= accu_bram_cnt_indx+ 1'b1; + // end + // else if(bram_we_accu & accu_ram_we_cnt<404 & accu_ram_we_cnt>=392) begin //last 2 versicolor, first virginica + // accu_bram_data[accu_bram_cnt_indx] <= bram_din; + // accu_bram_cnt_indx <= accu_bram_cnt_indx+ 1'b1; + // end + // else if(bram_we_accu & accu_ram_we_cnt<600 & accu_ram_we_cnt>=596) begin + // accu_bram_data[accu_bram_cnt_indx] <= bram_din; + // accu_bram_cnt_indx <= accu_bram_cnt_indx+ 1'b1; + // end + + // if(addr_offset>=4) begin + // addr_offset_overflow_cnt <= addr_offset_overflow_cnt + 1'b1; + // end + + // if(bram_we & (bram_addr_re == bram_addr_wr)) begin + // rdw_check <= rdw_check+1'b1; + // end + + // if(currentState == BRAM_ACCU) begin + // bram_accu_cycle <= bram_accu_cycle + 1'b1; + // end + // if(cluster_fifo_almostfull | data_fifo_almostfull ) begin + // back_pressure_cnt <= back_pressure_cnt + 1'b1; + // end + // if(goto_agg) begin + // goto_agg_cnt <= goto_agg_cnt + 1'b1; + // end + // if((currentState == BRAM_ACCU) & is_agg) begin + // cross_state_cnt <= 1'b1; + // end + end + + // accu_debug_cnt[0] = bram_accu_cycle; + // accu_debug_cnt[1] = accu_ram_we_cnt; + // accu_debug_cnt[2] = back_pressure_cnt; + // accu_debug_cnt[3] = {cross_state_cnt, goto_agg_cnt}; + // accu_debug_cnt[0] = {accu_bram_data[3],accu_bram_data[2],accu_bram_data[1],accu_bram_data[0]}; + // accu_debug_cnt[1] = {accu_bram_data[7],accu_bram_data[6],accu_bram_data[5],accu_bram_data[4]}; + // accu_debug_cnt[2] = {accu_bram_data[11],accu_bram_data[10],accu_bram_data[9],accu_bram_data[8]}; + // accu_debug_cnt[3] = {accu_bram_data[15],accu_bram_data[14],accu_bram_data[13],accu_bram_data[12]}; + // accu_debug_cnt[4] = {accu_bram_data[19],accu_bram_data[18],accu_bram_data[17],accu_bram_data[16]}; + // accu_debug_cnt[5] = {accu_bram_data[23],accu_bram_data[22],accu_bram_data[21],accu_bram_data[20]}; + // accu_debug_cnt[6] = {accu_bram_data[27],accu_bram_data[26],accu_bram_data[25],accu_bram_data[24]}; + // accu_debug_cnt[7] = {accu_bram_data[31],accu_bram_data[30],accu_bram_data[29],accu_bram_data[28]}; + + + end + + // reg [31:0] agg_valid_cnt; + // reg [15:0][15:0] sent_agg_data; //don't need to reset + + // always @ (posedge clk) begin + // if(~rst_n_reg) begin + // agg_valid_cnt <= '0; + // end + // else begin + // if(agg_valid_o) begin + // agg_valid_cnt <= agg_valid_cnt + 1'b1; + // end + // if(agg_valid_o) begin + // sent_agg_data[agg_valid_cnt] <= agg_data_o[15:0]; + // end + + // accu_debug_cnt[0] <= {sent_agg_data[1], sent_agg_data[0]}; + // accu_debug_cnt[1] <= {sent_agg_data[3], sent_agg_data[2]}; + // accu_debug_cnt[2] <= {sent_agg_data[5], sent_agg_data[4]}; + // accu_debug_cnt[3] <= {sent_agg_data[7], sent_agg_data[6]}; + // accu_debug_cnt[4] <= {sent_agg_data[9], sent_agg_data[8]}; + // accu_debug_cnt[5] <= {sent_agg_data[11], sent_agg_data[10]}; + // accu_debug_cnt[6] <= {sent_agg_data[13], sent_agg_data[12]}; + // accu_debug_cnt[7] <= {sent_agg_data[15], rdw_check}; + + // end + // end + + + + + +// `define LOG_NULL +// ////////////////////////////////////////////////////////////////////////////////////////////////////// +// //---------------------------------log file print--------------------------------------------------// +// //////////////////////////////////////////////////////////////////////////////////////////////////// +// `ifdef LOG_FILE +// int file; +// reg [31:0] is_agg_data_re_dim_cnt_accu, is_agg_data_re_sample_cnt; +// initial begin +// file = $fopen($sformatf("/home/harpdev/doppiodb/fpga/operators/bit_serial_kmeans/sim_log/k_means_accumulation%d.txt", PIPELINE_INDEX ),"w"); + +// if(file) begin +// $display("k_means_accumulation file open successfully\n"); +// $fwrite(file,"PIPELINE_INDEX:%d\n", PIPELINE_INDEX); +// $fwrite(file,"Data Sum\n"); +// end +// else +// $display("Failed to open k_means_accumulation file\n"); +// end + +// always @ (posedge clk) begin +// if(~rst_n) begin +// is_agg_data_re_dim_cnt_accu <= '0; +// is_agg_data_re_sample_cnt <= '0; +// end +// else begin +// if(is_agg_data_re & (is_agg_data_re_dim_cnt_accu == data_dim_accu_DP-1)) begin +// is_agg_data_re_dim_cnt_accu <= '0; +// end +// else if(is_agg_data_re) begin +// is_agg_data_re_dim_cnt_accu <= is_agg_data_re_dim_cnt_accu + 1'b1; +// end + +// if(is_agg_data_re & (is_agg_data_re_dim_cnt_accu == data_dim_accu_DP-1) &(is_agg_data_re_sample_cnt==num_cluster_accu_DP-1)) begin +// is_agg_data_re_sample_cnt <= '0; +// end +// else if(is_agg_data_re & (is_agg_data_re_dim_cnt_accu == data_dim_accu_DP-1)) begin +// is_agg_data_re_sample_cnt <= is_agg_data_re_sample_cnt + 1'b1; +// end + + +// if (is_agg_data_re &(is_agg_data_re_dim_cnt_accu == data_dim_accu_DP-1) & (is_agg_data_re_sample_cnt==num_cluster_accu_DP-1)) begin +// $fwrite(file,"%d\nCluster Cnt\n",bram_dout); +// end +// else if(is_agg_data_re &(is_agg_data_re_dim_cnt_accu == data_dim_accu_DP-1)) begin +// $fwrite(file,"%d\n", bram_dout); +// end +// else if(is_agg_data_re ) begin +// $fwrite(file,"%d ", bram_dout); +// end + +// if(is_agg_cluster_re & ~(agg_cnt==num_cluster_accu_DP-1)) begin +// $fwrite(file,"%d ", accu_counter[agg_cnt]); +// end +// else if(is_agg_cluster_re & (agg_cnt == num_cluster_accu_DP -1)) begin +// $fwrite(file,"%d\n", accu_counter[agg_cnt]); +// end + +// if(is_agg_sse_re) begin +// $fwrite(file,"\nSSE:%d\n\n\n",sse); +// end +// end +// end +// `endif +//////////////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////////////// + + +// ////////////////////////////////////////////////////////////////////////////////////////////////////// +// //---------------------------------log file_accu print--------------------------------------------------// +// //////////////////////////////////////////////////////////////////////////////////////////////////// +// `ifdef LOG_FILE +// int file_accu; +// reg [NUM_CLUSTER_BITS:0] cluster_fifo_dout_reg; +// initial begin +// file_accu = $fopen($sformatf("/home/harpdev/doppiodb/fpga/operators/low_precision_kmeans/sim_log/k_means_accumulation_bram%d.txt", PIPELINE_INDEX ),"w"); + +// if(file_accu) begin +// $display("k_means_accumulation_bram file_accu open successfully\n"); +// $fwrite(file_accu,"PIPELINE_INDEX:%d\n", PIPELINE_INDEX); +// end +// else +// $display("Failed to open k_means_accumulation_bram file_accu\n"); +// end + +// always @ (posedge clk) begin +// if(~rst_n) begin +// cluster_fifo_dout_reg <= '0; +// end +// else begin +// cluster_fifo_dout_reg <= cluster_fifo_dout; + +// if(~is_agg & bram_we) begin +// $fwrite(file_accu,"Data:%d, Cluster%d, Accu:%d, Wr_Addr:%d\n", data_fifo_dout_reg, cluster_fifo_dout_reg, bram_din,bram_addr_wr); +// end + +// if(goto_agg) begin +// $fwrite(file_accu,"next iteration\n\n\n"); +// end +// end +// end +// `endif +// //////////////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////////////// + + + + +endmodule diff --git a/hw/hdl/operators/kmeans/k_means_aggregation.sv b/hw/hdl/operators/kmeans/k_means_aggregation.sv new file mode 100755 index 00000000..8c348384 --- /dev/null +++ b/hw/hdl/operators/kmeans/k_means_aggregation.sv @@ -0,0 +1,274 @@ +import kmeansTypes::*; + +module k_means_aggregation +( + input wire clk, // Clock + input wire rst_n, // Asynchronous reset active low + + input wire start_operator, + + input wire [MAX_DEPTH_BITS:0] data_dim, //input the actual dimension of the data + input wire [NUM_CLUSTER_BITS:0] num_cluster, //input the actual number of cluster + + //interface with pipelines + // output wire agg_ready, + // input wire [NUM_PIPELINE-1:0] accu_finish, + + input wire [63:0] agg_data, + input wire agg_valid, + + //interface with the divider + output reg [63:0] div_sum, + output reg [63:0] div_count, + output reg div_valid, + output reg div_last_dim, + output reg div_last, + + output reg [63:0] sse, + output reg sse_valid, + output wire sse_converge, + + output reg [7:0][31:0] k_means_aggregation_debug_cnt + +); + + + //sum FIFO signal + wire sum_fifo_we, sum_fifo_re, sum_fifo_valid,sum_fifo_empty, sum_fifo_full; + wire [63:0] sum_fifo_dout, sum_fifo_din; + + //count FIFO signal + wire count_fifo_we, count_fifo_re, count_fifo_valid,count_fifo_empty, count_fifo_full; + wire [63:0] count_fifo_dout, count_fifo_din; + + reg [63:0] data_cnt; + + reg [63:0] previous_sse; + + reg [16:0] dim_cnt; + reg [16:0] cluster_cnt; + + + // reg agg_ready_reg; + // reg [NUM_PIPELINE-1:0] accu_finish_reg; + + reg rst_n_reg; + + //fifo will not be full + // assign agg_ready = agg_ready_reg; + + always @ (posedge clk) begin + rst_n_reg <= rst_n; + + // if(~rst_n_reg) begin + + // agg_ready_reg <= 1'b0; + // accu_finish_reg <= '0; + // end + // else begin + // accu_finish_reg <= accu_finish; + // agg_ready_reg <= (&accu_finish_reg); + // end + end + +//----------------------register the agg input---------------------// +reg [63:0] agg_data_reg1, agg_data_reg2; +reg agg_valid_reg1,agg_valid_reg2; + +always @ (posedge clk) begin + agg_data_reg1 <= agg_data; + agg_data_reg2 <= agg_data_reg1; + + if(~rst_n_reg) begin + agg_valid_reg1 <= 1'b0; + agg_valid_reg2 <= 1'b0; + end + else begin + agg_valid_reg1 <= agg_valid; + agg_valid_reg2 <= agg_valid_reg1; + end +end +//------------------------------------------------------------------// + +//set fifo depth such that it will not overflow +quick_fifo #(.FIFO_WIDTH(64), .FIFO_DEPTH_BITS(9)) + sum_fifo + ( + .clk, + .reset_n(rst_n_reg), + .we(sum_fifo_we), + .din(sum_fifo_din), + .re(sum_fifo_re), + .valid(sum_fifo_valid), + .dout(sum_fifo_dout), + .count(), + .empty(sum_fifo_empty), + .full(sum_fifo_full), + .almostfull()); + + assign sum_fifo_din = agg_data_reg2; + // assign sum_fifo_we = agg_valid_reg2 & (data_cnt < num_cluster*data_dim); + assign sum_fifo_we = agg_valid_reg2 & (num_cluster <= data_cnt) & ( data_cnt < num_cluster*data_dim + num_cluster ); + assign sum_fifo_re = sum_fifo_valid & count_fifo_valid; + + + quick_fifo #(.FIFO_WIDTH(64), .FIFO_DEPTH_BITS(NUM_CLUSTER_BITS+1)) + count_fifo + ( + .clk, + .reset_n(rst_n_reg), + .we(count_fifo_we), + .din(count_fifo_din), + .re(count_fifo_re), + .valid(count_fifo_valid), + .dout(count_fifo_dout), + .count(), + .empty(count_fifo_empty), + .full(count_fifo_full), + .almostfull() + ); + + assign count_fifo_din = agg_data_reg2; + // assign count_fifo_we = agg_valid_reg2 & ( num_cluster*data_dim <= data_cnt ) & ( data_cnt < num_cluster*data_dim + num_cluster ); + assign count_fifo_we = agg_valid_reg2 & ( data_cnt < num_cluster ); + assign count_fifo_re = count_fifo_valid & sum_fifo_valid & (dim_cnt == data_dim-1); + + always @ (posedge clk) begin + if(~rst_n_reg) begin + data_cnt <= '0; + sse <= '0; + sse_valid <= '0; + previous_sse <= '0; + dim_cnt <= '0; + cluster_cnt <= '0; + end + else begin + //multiplex input to different fifos + if((data_cnt == num_cluster * data_dim + num_cluster) & agg_valid_reg2) begin + data_cnt <= '0; + end + else if(agg_valid_reg2) begin + data_cnt <= data_cnt + 1'b1; + end + + //collect sse + if((data_cnt == num_cluster * data_dim + num_cluster) & agg_valid_reg2) begin + sse <= agg_data_reg2; + previous_sse <= sse; + end + + sse_valid <= (data_cnt == num_cluster * data_dim + num_cluster) & agg_valid_reg2; + + //dimension counter to set last dim flag + if(sum_fifo_valid & count_fifo_valid & (dim_cnt == data_dim-1)) begin + dim_cnt <= '0; + end + else if(sum_fifo_valid & count_fifo_valid) begin + dim_cnt <= dim_cnt + 1'b1; + end + + //cluster counter combined with dimension counter to set the last flag + if(sum_fifo_valid & count_fifo_valid & (dim_cnt == data_dim-1) & (cluster_cnt == num_cluster -1)) begin + cluster_cnt <= '0; + end + else if(sum_fifo_valid & count_fifo_valid & (dim_cnt == data_dim-1)) begin + cluster_cnt <= cluster_cnt + 1'b1; + end + + end + end + + //output data path + always @ (posedge clk) begin + if(~rst_n_reg) begin + div_valid <= 1'b0; + end + else begin + div_count <= count_fifo_dout; + div_sum <= sum_fifo_dout; + div_valid <= sum_fifo_valid & count_fifo_valid; + div_last_dim <= sum_fifo_valid & count_fifo_valid & (dim_cnt == data_dim-1); + div_last <= sum_fifo_valid & count_fifo_valid & (dim_cnt == data_dim-1) & (cluster_cnt == num_cluster -1); + end + end + + + + assign sse_converge = (sse!= 0) & (previous_sse == sse); + + + //debug counters + reg [31:0] agg_input_valid_cnt; + // reg agg_ready_flag; + reg [15:0][15:0] received_agg_data; //don't need to reset + + always @ (posedge clk) begin + if(start_operator) begin + agg_input_valid_cnt <= '0; + // agg_ready_flag <= 1'b0; + end + else begin + if(agg_valid_reg2) begin + agg_input_valid_cnt <= agg_input_valid_cnt + 1'b1; + end + // if(agg_ready_reg) begin + // agg_ready_flag <= 1'b1; + // end + if(agg_valid_reg2) begin + received_agg_data[agg_input_valid_cnt] <= agg_data_reg2[15:0]; + end + end + + // k_means_aggregation_debug_cnt[0] <= agg_input_valid_cnt; + k_means_aggregation_debug_cnt[0] <= {received_agg_data[1], received_agg_data[0]}; + k_means_aggregation_debug_cnt[1] <= {received_agg_data[3], received_agg_data[2]}; + k_means_aggregation_debug_cnt[2] <= {received_agg_data[5], received_agg_data[4]}; + k_means_aggregation_debug_cnt[3] <= {received_agg_data[7], received_agg_data[6]}; + k_means_aggregation_debug_cnt[4] <= {received_agg_data[9], received_agg_data[8]}; + k_means_aggregation_debug_cnt[5] <= {received_agg_data[11], received_agg_data[10]}; + k_means_aggregation_debug_cnt[6] <= {received_agg_data[13], received_agg_data[12]}; + k_means_aggregation_debug_cnt[7] <= {received_agg_data[15], received_agg_data[14]}; + end + + + +`define LOG_NULL +////////////////////////////////////////////////////////////////////////////////////////////////////// +//---------------------------------log file print--------------------------------------------------// +//////////////////////////////////////////////////////////////////////////////////////////////////// +`ifdef LOG_FILE + int file; + reg file_finished; + initial begin + file = $fopen("/home/harpdev/doppiodb/fpga/operators/k_means_v2/sim_log/k_means_aggregation.txt","w"); + + if(file) begin + $display("k_means_aggregation file open successfully\n"); + $display("output to divider",); + end + else + $display("Failed to open k_means_aggregation file\n"); + end + + always @ (posedge clk) begin + if(~rst_n_reg) begin + + end + else begin + if(div_valid & ~div_last_dim & ~div_last) begin + $fwrite(file,"%d ",div_sum); + end + else if(div_valid & div_last_dim & ~div_last) begin + $fwrite(file,"%d; count:%d\n", div_sum, div_count); + end + else if(div_valid & div_last_dim & div_last) begin + $fwrite(file,"%d; count%d\nSSE:%d\n\n\n", div_sum, div_count, sse); + end + end + end +`endif +//////////////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////////////// + + +endmodule diff --git a/hw/hdl/operators/kmeans/k_means_division.sv b/hw/hdl/operators/kmeans/k_means_division.sv new file mode 100755 index 00000000..56fea786 --- /dev/null +++ b/hw/hdl/operators/kmeans/k_means_division.sv @@ -0,0 +1,161 @@ +`default_nettype none +import kmeansTypes::*; + +module k_means_division +( + input wire clk, // Clock + input wire rst_n, // Asynchronous reset active low + + input wire start_operator, + + input wire [63:0] div_sum, + input wire [63:0] div_count, + input wire div_valid, + input wire div_last_dim, + input wire div_last, + + output wire div_dout_last_dim, + output wire div_dout_last, + output reg [MAX_DIM_WIDTH-1:0] div_dout, //the center data for updating the cluster in dist processors + output wire div_dout_valid, //when high, means up_center_o valids, goes to div_dout stage in the higher hierarchy + + //debug counter + output reg [31:0] k_means_division_debug_cnt +); + + localparam LPM_WIDTHN = 48; + localparam LPM_PIPELINE = 20; + + reg rst_delay_n; + wire [LPM_WIDTHN-1:0] divider_quotient; + logic [63:0] divider_denummer; + //shift registers + reg[LPM_PIPELINE:0] div_valid_sr; + reg[LPM_PIPELINE:0] div_last_dim_sr; + reg[LPM_PIPELINE:0] div_last_sr; + + + + always_ff @(posedge clk)begin + rst_delay_n <= rst_n; + end + + + always_comb begin : proc_denumer + if(div_valid && (div_count==0)) begin + divider_denummer = 1; + end + else begin + divider_denummer = div_count; + end + end + +/* + lpm_divide #( + .lpm_widthn(LPM_WIDTHN), + .lpm_widthd(32), + .lpm_pipeline(LPM_PIPELINE), + .lpm_nrepresentation("UNSIGNED"), + .lpm_drepresentation("UNSIGNED") + // .LPM_NREPRESENTATION("unsigned"), + // .LPM_DREPRESENTATION("unsigned") + ) + divider + ( + .clock(clk), + .clken(1'b1), + .aclr(1'b0), + .quotient(divider_quotient), + .numer(div_sum[LPM_WIDTHN-1:0]), + .denom(divider_denummer[31:0]), + .remain() + ); +*/ + + logic [95:0] tmp; + div_gen_0 inst_div_gen ( + .aclk(clk), + .s_axis_divisor_tvalid(1'b1), + .s_axis_divisor_tdata(divider_denummer[31:0]), + .s_axis_dividend_tvalid(1'b1), + .s_axis_dividend_tdata(div_sum[LPM_WIDTHN-1:0]), + .m_axis_dout_tvalid(), + .m_axis_dout_tdata(tmp) + ); + + + assign divider_quotient = tmp[79:48]; + + assign div_dout_valid = div_valid_sr[0]; + assign div_dout_last_dim = div_last_dim_sr[0]; + assign div_dout_last = div_last_sr[0]; + always @(posedge clk) begin + div_dout <= divider_quotient[31:0]; + if (~rst_delay_n) begin + div_valid_sr <= 0; + div_last_dim_sr <= 0; + div_last_sr <= 0; + end + else begin + div_valid_sr <= {div_valid, div_valid_sr[LPM_PIPELINE:1]}; + div_last_dim_sr <= {div_last_dim, div_last_dim_sr[LPM_PIPELINE:1]}; + div_last_sr <= {div_last, div_last_sr[LPM_PIPELINE:1]}; + end + end + + + + //debug counters + reg [31:0] division_output_cnt; + always @ (posedge clk) begin + if(start_operator) begin + division_output_cnt <= '0; + end + else if(div_dout_valid) begin + division_output_cnt <= division_output_cnt + 1'b1; + end + + k_means_division_debug_cnt <= division_output_cnt; + end + +`define LOG_NULL +////////////////////////////////////////////////////////////////////////////////////////////////////// +//---------------------------------log file print--------------------------------------------------// +//////////////////////////////////////////////////////////////////////////////////////////////////// +`ifdef LOG_FILE + int file; + reg file_finished; + initial begin + file = $fopen("/home/harpdev/doppiodb/fpga/operators/k_means_v2/sim_log/k_means_division.txt","w"); + + if(file) begin + $display("k_means_division file open successfully\n"); + $fwrite(file,"output to divider\n"); + end + else + $display("Failed to open k_means_division file\n"); + end + + always @ (posedge clk) begin + if(~rst_delay_n) begin + + end + else begin + if(div_dout_valid) begin + $fwrite(file,"%d ", div_dout); + if(div_dout_last_dim) begin + $fwrite(file,"\n"); + end + if(div_dout_last) begin + $fwrite(file,"\n\n"); + end + end + end + end +`endif +//////////////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////////////// + + +endmodule +`default_nettype wire diff --git a/hw/hdl/operators/kmeans/k_means_layer.sv b/hw/hdl/operators/kmeans/k_means_layer.sv new file mode 100755 index 00000000..1952efc7 --- /dev/null +++ b/hw/hdl/operators/kmeans/k_means_layer.sv @@ -0,0 +1,187 @@ +import kmeansTypes::*; + +module k_means_layer #(parameter CLUSTER_ID = 0)( + input wire clk, // Clock + input wire rst_n, // Asynchronous reset active low + + input wire [MAX_DEPTH_BITS:0] data_dim_minus_1, + input wire [NUM_CLUSTER_BITS:0] num_cluster_minus_1, + + + //interface with previous kmeans layer + input wire [32-1:0] centroid_i, + input wire centroid_valid_i, + input wire last_dim_of_one_centroid_i, + input wire last_dim_of_all_centroid_i, + + //interface with next kmeans layer + output reg [32-1:0] centroid_o, + output reg centroid_valid_o, + output reg last_dim_of_one_centroid_o, + output reg last_dim_of_all_centroid_o, + + //input interface with previous k-means layer + input wire [NUM_PIPELINE-1:0][32-1:0] tuple_i, + input wire tuple_valid_i, + input wire last_dim_of_one_tuple_i, + + //output interface with next kmeans layer + //tuple bits stream through each layer + output reg [NUM_PIPELINE-1:0][32-1:0] tuple_o, + output wire tuple_valid_o, + output wire last_dim_of_one_tuple_o, + + //---------------previous assignment result---// + input wire [NUM_PIPELINE-1:0] min_dist_valid_i, + input wire [63:0] min_dist_i[NUM_PIPELINE-1:0], + input wire [NUM_PIPELINE-1:0][NUM_CLUSTER_BITS:0] cluster_i, + + //--------------current assignment result--------// + output wire [NUM_PIPELINE-1:0] min_dist_valid_o, + output wire [63:0] min_dist_o[NUM_PIPELINE-1:0], + output wire [NUM_PIPELINE-1:0][NUM_CLUSTER_BITS:0] cluster_o + + + +); + +reg rst_n_reg; + + +reg [MAX_DEPTH_BITS-1:0] raddr; +reg [MAX_DEPTH_BITS-1:0] waddr; +wire [32-1:0] centroid_from_bram; +wire write_en; +reg enable; +wire read_en; + +reg [NUM_CLUSTER_BITS:0] cluster_cnt; + +//stores the centroid chunk +dual_port_ram #(.DATA_WIDTH(32), .ADDR_WIDTH(MAX_DEPTH_BITS)) + processor_mem + ( + .clk (clk), + .we (write_en), + .re (1'b1), + .raddr(raddr), + .waddr(waddr), + .din (centroid_i), + .dout (centroid_from_bram) + ); + +//write only when cluster cnt equals +assign write_en = centroid_valid_i & (cluster_cnt == CLUSTER_ID); +//read only when last bit of number of bank of dimensions +assign read_en = tuple_valid_i ; + +//bram read and write address calculation +always_ff @(posedge clk) begin : proc_addr + rst_n_reg <= rst_n; + if (~rst_n_reg) begin + waddr <= '0; + cluster_cnt <= '0; + raddr <= '0; + enable <= 1'b0; + end + else begin + if(centroid_valid_i) begin + if(last_dim_of_one_centroid_i) begin + cluster_cnt <= cluster_cnt + 1'b1; + if(last_dim_of_all_centroid_i) begin + cluster_cnt <= '0; + end + end + end + + if (write_en) begin + waddr <= waddr + 1'b1; + if ( last_dim_of_one_centroid_i) begin + waddr <= '0; + end + end + + if(read_en) begin + raddr <= raddr + 1'b1; + if( last_dim_of_one_tuple_i) begin + raddr <= '0; + end + end + + enable <= (CLUSTER_ID <= num_cluster_minus_1); + end +end + +//-----------------------------One cycle later-------------------------------// +//-------------------bram centroid valid one cycle after the read----------------// + +reg [NUM_PIPELINE-1:0][32-1:0] tuple_reg; +reg tuple_valid_reg; +reg last_dim_of_one_tuple_reg; + +always @ (posedge clk) begin + + tuple_reg <= tuple_i; + tuple_valid_reg <= tuple_valid_i; + last_dim_of_one_tuple_reg <= last_dim_of_one_tuple_i; + + //register the updated centroid and forward to next layer + centroid_o <= centroid_i; + centroid_valid_o <= centroid_valid_i; + last_dim_of_all_centroid_o <= last_dim_of_all_centroid_i; + last_dim_of_one_centroid_o <= last_dim_of_one_centroid_i; +end + +//forward the tuple bit streams to next layer +assign tuple_o = tuple_reg; +assign tuple_valid_o = tuple_valid_reg; +assign last_dim_of_one_tuple_o = last_dim_of_one_tuple_reg; + + +//-------------------------One cycle later---------------------------------// +//----------------------register the bram centroid one cycle---------------// +reg [32-1:0] centroid_from_bram_reg; +reg [NUM_PIPELINE-1:0][32-1:0] tuple_reg2; +reg tuple_valid_reg2; +reg last_dim_of_one_tuple_reg2; +always @ (posedge clk) begin + if(~rst_n_reg) begin + tuple_valid_reg2 <= '0; + end + else begin + centroid_from_bram_reg <= centroid_from_bram; + + + tuple_reg2 <= tuple_reg; + tuple_valid_reg2 <= tuple_valid_reg; + last_dim_of_one_tuple_reg2 <= last_dim_of_one_tuple_reg; + end +end + +genvar i; +generate + for (i = 0; i < NUM_PIPELINE; i++) begin: PARALLEL_DP + dist_processor #(.INDEX_PROCESSOR(CLUSTER_ID)) dist_processor + ( + .clk (clk), + .rst_n (rst_n), + .enable (enable), + .data_valid_i (tuple_valid_reg2), + .data_i (tuple_reg2[i]), + .data_last_dim (last_dim_of_one_tuple_reg2), + .centroid_i (centroid_from_bram_reg), + .min_dist_valid_i (min_dist_valid_i[i]), + .min_dist_i (min_dist_i[i]), + .cluster_i (cluster_i[i]), + .min_dist_valid_o (min_dist_valid_o[i]), + .min_dist_o (min_dist_o[i]), + .cluster_o (cluster_o[i]) + ); + end +endgenerate + + + + + +endmodule // k_means_layer \ No newline at end of file diff --git a/hw/hdl/operators/kmeans/k_means_module.sv b/hw/hdl/operators/kmeans/k_means_module.sv new file mode 100755 index 00000000..82998424 --- /dev/null +++ b/hw/hdl/operators/kmeans/k_means_module.sv @@ -0,0 +1,456 @@ + + +`default_nettype none +import kmeansTypes::*; + +module k_means_module +( + input wire clk, // Clock + input wire rst_n, // Asynchronous reset active low + + input wire start_operator, + input wire um_done, + + input wire [NUM_CLUSTER_BITS:0] num_cluster,// the actual number of cluster that will be used + input wire [MAX_DEPTH_BITS:0] data_dim, //input the actual dimension of the data + input wire [63:0] data_set_size, + + //interface to fetch engine + input wire [511:0] tuple_cl, + input wire tuple_cl_valid, + input wire tuple_cl_last, + output wire tuple_cl_ready, + + input wire [511:0] centroid_cl, //not in bit-weaving format + input wire centroid_cl_valid, + input wire centroid_cl_last, + output wire centroid_cl_ready, + + //update to write engine and formatter + output wire [511:0] updated_centroid, + output wire updated_centroid_valid, + output wire updated_centroid_last, + + //debug counter + output wire [7:0][31:0] agg_div_debug_cnt, + output wire [7:0][31:0] k_means_module_debug_cnt + +); + + + +reg start_operator_reg, um_done_reg, rst_n_reg, running_kmeans; +reg [MAX_DEPTH_BITS:0] data_dim_minus_1; +reg [NUM_CLUSTER_BITS:0] num_cluster_minus_1; + +always @ (posedge clk) begin + rst_n_reg <= rst_n; + + if(~rst_n_reg) begin + running_kmeans <= 1'b0; + start_operator_reg <= 1'b0; + um_done_reg <= 1'b0; + end + else begin + start_operator_reg <= start_operator; + um_done_reg <= um_done; + + data_dim_minus_1 <= '0; + num_cluster_minus_1 <= '0; + if(running_kmeans) begin + data_dim_minus_1 <= data_dim - 1; + num_cluster_minus_1 <= num_cluster -1; + end + + if(start_operator_reg) begin + running_kmeans <= 1'b1; + end + else if(um_done_reg) begin + running_kmeans <= 1'b0; + end + end +end + +//--------------------------split the centroid cachelines-----------------// +//--------------------------re-group the tuple cachelines------------------// +wire [32-1:0] centroid; +wire centroid_valid; +wire last_dim_of_all_centroid; +wire last_dim_of_one_centroid; + +wire [NUM_PIPELINE-1:0][32-1:0] tuple; +wire tuple_valid; +wire last_dim_of_one_tuple; + +wire [511:0] formatter_centroid_input; +wire formatter_centroid_input_valid; + +reg [511:0] update_cl; +reg update_cl_valid; +reg update_cl_last; + +//multiplex receiving centroid from initial ones and updated ones +assign formatter_centroid_input = centroid_cl_valid ? centroid_cl : update_cl; +assign formatter_centroid_input_valid = centroid_cl_valid ? 1'b1 : update_cl_valid; + +Formatter Formatter +( + .clk (clk), + .rst_n (rst_n), + .num_cluster (num_cluster), + .data_dim (data_dim), + .tuple_cl (tuple_cl), + .tuple_cl_valid (tuple_cl_valid), + .centroid_cl (formatter_centroid_input), + .centroid_cl_valid (formatter_centroid_input_valid), + .centroid_cl_ready (centroid_cl_ready), + .centroid (centroid), + .centroid_valid(centroid_valid), + .last_dim_of_all_centroid(last_dim_of_all_centroid), + .last_dim_of_one_centroid(last_dim_of_one_centroid), + .tuple (tuple), + .tuple_valid (tuple_valid), + .last_dim_of_one_tuple (last_dim_of_one_tuple), + .formatter_debug_cnt () + ); + + + +//request mem data when norm of all centroids have been calculated +reg request_mem_data; +always @ (posedge clk) begin + if(~rst_n_reg) begin + request_mem_data <= 1'b0; + end + else begin + if(last_dim_of_one_centroid) begin + request_mem_data <= 1'b1; + end + else if(tuple_cl_valid & tuple_cl_last) begin + request_mem_data <= 1'b0; + end + end +end + +assign tuple_cl_ready = request_mem_data; +//----------------------Assignment calculation---------------------------------// +wire [NUM_CLUSTER:0][32-1:0] centroid_pip; +wire [NUM_CLUSTER:0] centroid_valid_pip; +wire [NUM_CLUSTER:0] last_dim_of_one_centroid_pip; +wire [NUM_CLUSTER:0] last_dim_of_all_centroid_pip; + + +wire [NUM_PIPELINE-1:0][32-1:0] tuple_pip [NUM_CLUSTER:0]; //not sure the array type instantiation is valid +wire [NUM_CLUSTER:0] tuple_valid_pip; +wire [NUM_CLUSTER:0] last_dim_of_one_tuple_pip; + + +wire [63:0] min_dist_pip[NUM_CLUSTER:0][NUM_PIPELINE-1:0]; +wire [NUM_CLUSTER:0][NUM_PIPELINE-1:0] min_dist_valid_pip; +wire [NUM_PIPELINE-1:0][NUM_CLUSTER_BITS:0] assign_cluster_pip[NUM_CLUSTER:0]; + + +assign centroid_pip[0] = centroid; +assign centroid_valid_pip[0] = centroid_valid; +assign last_dim_of_one_centroid_pip[0] = last_dim_of_one_centroid; +assign last_dim_of_all_centroid_pip[0] = last_dim_of_all_centroid; + +assign tuple_pip[0] = tuple; +assign tuple_valid_pip[0] = tuple_valid; +assign last_dim_of_one_tuple_pip[0] = last_dim_of_one_tuple; + +generate + for (genvar m = 0; m < NUM_PIPELINE; m++) begin: min_dist_pip_assign + assign min_dist_pip[0][m] = 48'h7fffffffffff; + assign min_dist_valid_pip[0][m] = 1'b1; + end +endgenerate +assign assign_cluster_pip[0] = '0; + +genvar n; +generate + for ( n = 0; n < NUM_CLUSTER; n++) begin: k_means_layer + k_means_layer #(.CLUSTER_ID(n)) k_means_layer + ( + .clk (clk), + .rst_n (rst_n_reg), + .data_dim_minus_1 (data_dim_minus_1), + .num_cluster_minus_1 (num_cluster_minus_1), + + .centroid_i (centroid_pip[n]), + .centroid_valid_i (centroid_valid_pip[n]), + .last_dim_of_one_centroid_i (last_dim_of_one_centroid_pip[n]), + .last_dim_of_all_centroid_i (last_dim_of_all_centroid_pip[n]), + + + .centroid_o (centroid_pip[n+1]), + .centroid_valid_o (centroid_valid_pip[n+1]), + .last_dim_of_one_centroid_o (last_dim_of_one_centroid_pip[n+1]), + .last_dim_of_all_centroid_o (last_dim_of_all_centroid_pip[n+1]), + + .tuple_i (tuple_pip[n]), + .tuple_valid_i (tuple_valid_pip[n]), + .last_dim_of_one_tuple_i (last_dim_of_one_tuple_pip[n]), + + .tuple_o (tuple_pip[n+1]), + .tuple_valid_o (tuple_valid_pip[n+1]), + .last_dim_of_one_tuple_o (last_dim_of_one_tuple_pip[n+1]), + + .min_dist_valid_i (min_dist_valid_pip[n]), + .min_dist_i (min_dist_pip[n]), + .cluster_i (assign_cluster_pip[n]), + + .min_dist_valid_o (min_dist_valid_pip[n+1]), + .min_dist_o (min_dist_pip[n+1]), + .cluster_o (assign_cluster_pip[n+1]) + ); + end +endgenerate + +//check termination criteria +reg [47:0] expect_tuple_first_pip; +reg pipeline_finish; +reg [47:0] min_dist_valid_pip_cnt; + +always @ (posedge clk) begin + if(~rst_n_reg) begin + expect_tuple_first_pip <= '0; + pipeline_finish <= 1'b0; + min_dist_valid_pip_cnt <= '0; + end + else begin + expect_tuple_first_pip <= '0; + pipeline_finish <= 1'b0; + if(running_kmeans) begin + expect_tuple_first_pip <= (data_set_size + NUM_PIPELINE -1) >> NUM_PIPELINE_BITS; + if(min_dist_valid_pip[NUM_CLUSTER][0]) begin + min_dist_valid_pip_cnt <= min_dist_valid_pip_cnt + 1'b1; + if(min_dist_valid_pip_cnt == expect_tuple_first_pip-1) begin + min_dist_valid_pip_cnt <= '0; + pipeline_finish <= 1'b1; + end + end + end + end +end + + + +//-----------------------Accumulation-----------------------------------// +wire [NUM_PIPELINE-1:0] [63:0] agg_data_pip; +wire [NUM_PIPELINE-1:0] agg_valid_pip; +wire [NUM_PIPELINE-1:0] accu_finish; +reg first_pipe_finish, agg_ready; + + +genvar k; +generate + for (k = 0; k < NUM_PIPELINE; k++) begin: Accumulation + k_means_accumulation #(.PIPELINE_INDEX(k)) k_means_accumulation + ( + .clk (clk), + .rst_n (rst_n_reg), + .data_valid_accu_i (tuple_valid_pip[NUM_CLUSTER]), + .data_accu_i (tuple_pip[NUM_CLUSTER][k]), + .min_dist_accu_valid_i(min_dist_valid_pip[NUM_CLUSTER][k]), + .min_dist_accu_i (min_dist_pip[NUM_CLUSTER][k]), + .cluster_accu_i (assign_cluster_pip[NUM_CLUSTER][k]), + .terminate_accu_i (pipeline_finish), + .data_dim_accu_i (data_dim), + .num_cluster_accu_i (num_cluster), + .agg_ready_i (agg_ready), + .accu_finish_o (accu_finish[k]), + .agg_data_o (agg_data_pip[k]), + .agg_valid_o (agg_valid_pip[k]) + + ); + end +endgenerate + +// check if all pipelines finish accumulation + +always @ (posedge clk) begin + if(~rst_n) begin + first_pipe_finish <= 1'b0; + end + else begin + first_pipe_finish <= accu_finish[0]; + agg_ready <= first_pipe_finish; + end +end + +//-------------------------adder tree-----------------------// +wire [63:0] agg_div_din, adder_tree_dout; +wire agg_div_valid, adder_tree_dout_valid; + +//adder tree currently doesn't work for single pipeline +kmeans_adder_tree adder_tree +( + .clk (clk), + .rst_n (rst_n), + .v_input (agg_data_pip), + .v_input_valid (agg_valid_pip[0]), + .v_output (adder_tree_dout), + .v_output_valid(adder_tree_dout_valid)); //all the kmeans pipeline will send agg data synchoronously + +assign agg_div_din = (NUM_PIPELINE_BITS==0) ? agg_data_pip : adder_tree_dout; +assign agg_div_valid = (NUM_PIPELINE_BITS==0) ? (agg_valid_pip[0]) : adder_tree_dout_valid; + +//----------------------aggregation and division---------------// +wire [31:0] update; +wire update_last, update_valid, update_last_dim; + +agg_div agg_div +( + .clk (clk), + .rst_n (rst_n), + .start_operator (start_operator_reg), + .data_dim (data_dim), + .num_cluster (num_cluster), + .agg_data (agg_div_din), + .agg_valid (agg_div_valid), + .update (update), + .update_valid (update_valid), + .update_last (update_last), + .update_last_dim (update_last_dim), + .agg_div_debug_cnt (agg_div_debug_cnt) + ); + +//------------------------re-group 32bit update to 512bit cl-------------// + +reg [7:0] re_group_cnt; + +always @ (posedge clk) begin + if(~rst_n_reg) begin + re_group_cnt <= '0; + end + else begin + update_cl_valid <= 1'b0; + update_cl_last <= 1'b0; + + if(update_valid) begin + update_cl[re_group_cnt*32 +: 32] <= update; + re_group_cnt <= re_group_cnt + 1'b1; + if(re_group_cnt == 15 ) begin + re_group_cnt <= '0; + update_cl_valid <= 1'b1; + end + if(update_last) begin + re_group_cnt <= '0; + update_cl_valid <= 1'b1; + update_cl_last <= 1'b1; + end + end + end +end + +assign updated_centroid = update_cl; +assign updated_centroid_last = update_cl_last; +assign updated_centroid_valid = update_cl_valid; + + + + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +//---------------------------------log file print--------------------------------------------------// +//////////////////////////////////////////////////////////////////////////////////////////////////// +`define LOG_NULL +`ifdef LOG_FILE + int file; + reg file_finished; + initial begin + file = $fopen("/home/harpdev/doppiodb/fpga/operators/k_means_v2/sim_log/parallel_words.txt","w"); + if(file) + $display("parallel_words file open successfully\n"); + else + $display("Failed to open parallel_words file\n"); + end + + always @ (posedge clk) begin + if(~rst_n_reg) begin + file_finished <= 1'b0; + end + else begin + if(tuple_cl_valid) begin + $fwrite(file,"16 dimension\n"); + for(integer j=0; j a) begin + sub = x - a; + end + else begin + sub = a - x; + end +end + + +//takes 2 cycle +// logic_dsp_unsigned_27x27_atom mult_and_accu( +// .clk_i (clk), +// .clr (~rst_n), +// .ax (sub[26:0]), +// .ay (sub[26:0]), +// .accu_en(a_valid_reg2 & (~a_last_dim_reg2) ), +// .resulta(dist_euclidean) +// ); + +reg [53:0] mult_result; + +always @ (posedge clk) begin + if (~rst_n) begin + mult_result <= '0; + end + else begin + if (a_valid) begin + mult_result <= sub[26:0] * sub[26:0]; + end + else begin + mult_result <= '0; + end + end +end + + +always @ (posedge clk) begin + if (~rst_n) begin + dist_euclidean <= '0; + end + else begin + if (a_valid_reg2 & (~a_last_dim_reg2)) begin + dist_euclidean <= mult_result + dist_euclidean; + end + else begin + dist_euclidean <= mult_result; + end + end +end + + +always @ (posedge clk) begin + a_valid_reg1 <= a_valid; + a_valid_reg2 <= a_valid_reg1; + + a_last_dim_reg1 <= a_last_dim; + a_last_dim_reg2 <= a_last_dim_reg1; +end + +//Output of dot product module. + assign result = {10'b0, dist_euclidean}; + assign result_valid = a_valid_reg2 & a_last_dim_reg2; + +endmodule \ No newline at end of file diff --git a/hw/hdl/operators/kmeans/quick_fifo.v b/hw/hdl/operators/kmeans/quick_fifo.v new file mode 100644 index 00000000..7d635fec --- /dev/null +++ b/hw/hdl/operators/kmeans/quick_fifo.v @@ -0,0 +1,153 @@ +// Copyright (c) 2013-2015, Intel Corporation +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Intel Corporation nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + + +module quick_fifo #( + parameter FIFO_WIDTH = 32, + parameter FIFO_DEPTH_BITS = 8, + parameter FIFO_ALMOSTFULL_THRESHOLD = 2**FIFO_DEPTH_BITS - 4 +) ( + input wire clk, + input wire reset_n, + + input wire we, // input write enable + input wire [FIFO_WIDTH - 1:0] din, // input write data with configurable width + + input wire re, // input read enable + output reg valid, // dout valid + output reg [FIFO_WIDTH - 1:0] dout, // output read data with configurable width + + output reg [FIFO_DEPTH_BITS - 1:0] count, // output FIFOcount + output reg empty, // output FIFO empty + output reg full, // output FIFO full + output reg almostfull // output configurable programmable full/ almost full +); + + reg [FIFO_DEPTH_BITS - 1:0] rp = 0; + reg [FIFO_DEPTH_BITS - 1:0] wp = 0; + + reg [FIFO_DEPTH_BITS - 1:0] mem_count = 0; // output FIFOcount + reg mem_empty = 1'b1; + + reg valid_t1 = 0, valid_t2 = 0; + reg valid0 = 0; + + wire remem; + wire wemem; + wire remem_valid; + + wire [FIFO_WIDTH-1:0] dout_mem; + + assign remem = (re & valid_t1 & valid_t2) | ~(valid_t1 & valid_t2); + assign wemem = we & ~full; + + assign remem_valid = remem & ~mem_empty; + + + spl_sdp_mem_t #(.DATA_WIDTH(FIFO_WIDTH), + .ADDR_WIDTH(FIFO_DEPTH_BITS)) spl_fifo_mem( + .clk (clk), + .we (wemem), + .re (remem), + .raddr (rp), + .waddr (wp), + .din (din), + .dout (dout_mem) + ); + + // data + always @(posedge clk) begin + dout <= (valid_t2)? ((re)? dout_mem : dout) : dout_mem; + end + + // valids, flags + always @(posedge clk) begin + if (~reset_n) begin + empty <= 1'b1; + full <= 1'b0; + almostfull <= 1'b0; + count <= 0; //32'b0; + rp <= 0; + wp <= 0; + valid_t2 <= 1'b0; + valid_t1 <= 1'b0; + mem_empty <= 1'b1; + mem_count <= 'b0; + + //dout <= 0; + valid <= 0; + valid0 <= 0; + end + + else begin + + valid <= (valid)? ((re)? valid0 : 1'b1) : valid0; + valid0 <= (remem)? ~mem_empty : valid0; + + valid_t2 <= (valid_t2)? ((re)? valid_t1 : 1'b1) : valid_t1; + + valid_t1 <= (remem)? ~mem_empty : valid_t1; + rp <= (remem & ~mem_empty)? (rp + 1'b1) : rp; + wp <= (wemem)? (wp + 1'b1) : wp; + + // mem_empty + if (we) mem_empty <= 1'b0; + else if(remem & (mem_count == 1'b1)) mem_empty <= 1'b1; + + // mem_count + if( wemem & ~remem_valid) mem_count <= mem_count + 1'b1; + else if (~wemem & remem_valid) mem_count <= mem_count - 1'b1; + + + // empty + if (we) empty <= 1'b0; + else if((re & valid_t2 & ~valid_t1) & (count == 1'b1)) empty <= 1'b1; + + // count + if( wemem & (~(re & valid_t2) | ~re) ) count <= count + 1'b1; + else if (~wemem & (re & valid_t2)) count <= count - 1'b1; + + // + if (we & ~re) begin + + if (count == (2**FIFO_DEPTH_BITS-1)) + full <= 1'b1; + + if (count == (FIFO_ALMOSTFULL_THRESHOLD-1)) + almostfull <= 1'b1; + end + // + if ((~we | full) & re) begin // + full <= 1'b0; + + if (count == FIFO_ALMOSTFULL_THRESHOLD) + almostfull <= 1'b0; + end + end + end + +endmodule + diff --git a/hw/hdl/operators/kmeans/rd_addr_gen.sv b/hw/hdl/operators/kmeans/rd_addr_gen.sv new file mode 100755 index 00000000..e5f4a5b6 --- /dev/null +++ b/hw/hdl/operators/kmeans/rd_addr_gen.sv @@ -0,0 +1,168 @@ +import kmeansTypes::*; + +module rd_addr_gen ( + input wire clk, // Clock + input wire rst_n, // Asynchronous reset active low + + input wire start_operator, + + output reg [57:0] um_tx_rd_addr, + output reg [7:0] um_tx_rd_tag, + output reg um_tx_rd_valid, + input wire um_tx_rd_ready, + + input wire potential_overflow, + + input RuntimeParam rp + +); + + reg is_initialize, is_read_tuple; + + //read counter + reg [31:0] rd_cnt; + reg rd_cnt_en, rd_cnt_clr; + + //count how many iterations of read signal have been sent + reg [15:0] rd_iteration_cnt; + reg rd_iteration_cnt_en, rd_iteration_cnt_clr; + + reg [63:0] addr_offset_tuple, addr_offset_centroid; + + // reg [16:0] precision_cnt; + + reg [57:0] um_tx_rd_addr_reg; + reg um_tx_rd_valid_reg; + + typedef enum logic[1:0] {IDLE, INITIALIZE, DATA_FETCH} state; + state currentState, nextState; + + always_comb begin : proc_fsm + //default + rd_cnt_en = 0; + rd_cnt_clr = 0; + + is_initialize = 0; + is_read_tuple = 0; + + rd_iteration_cnt_en = 0; + rd_iteration_cnt_clr = 0; + + nextState = currentState; + + case (currentState) + + IDLE: begin + if(start_operator) begin + nextState = INITIALIZE; + end + end + + //sequentially read the centroids cacheline from the memory + INITIALIZE: begin + is_initialize = 1; + if(um_tx_rd_ready & ~potential_overflow) begin + rd_cnt_en = 1; + if(rd_cnt == (rp.num_cl_centroid-1)) begin + rd_cnt_clr = 1; + nextState = DATA_FETCH; + end + end + end + + //read the first few lines of every 32 cachelines from the memory + DATA_FETCH: begin + is_read_tuple = 1; + if(um_tx_rd_ready & ~potential_overflow) begin + rd_cnt_en = 1; + if(rd_cnt == rp.num_cl_tuple-1) begin + rd_cnt_clr = 1; + rd_iteration_cnt_en = 1; + if(rd_iteration_cnt == rp.num_iteration-1) begin + rd_iteration_cnt_clr = 1; + nextState = IDLE; + end + end + end + end + + //default : /* default */; + endcase + end + + // assign um_tx_rd_valid = (is_initialize | is_read_tuple) & um_tx_rd_ready & !potential_overflow; + // assign um_tx_rd_addr = is_initialize ? (rp.addr_center + addr_offset_centroid): (is_read_tuple ? (rp.addr_data + addr_offset_tuple) : '0); + // assign um_tx_rd_tag = '0; + + //output path + always @ (posedge clk) begin + if(~rst_n) begin + um_tx_rd_valid_reg <= '0; + end + else begin + if(um_tx_rd_ready) begin + um_tx_rd_valid_reg <= (is_initialize | is_read_tuple)& !potential_overflow ; + um_tx_rd_addr_reg <= is_initialize ? (rp.addr_center + addr_offset_centroid): (is_read_tuple ? (rp.addr_data + addr_offset_tuple) : '0); + end + end + + um_tx_rd_tag <= '0; + end + assign um_tx_rd_valid = um_tx_rd_valid_reg & um_tx_rd_ready; + assign um_tx_rd_addr = um_tx_rd_addr_reg; + + always @ (posedge clk) begin + if(~rst_n ) begin + addr_offset_tuple <= '0; + addr_offset_centroid <= '0; + // precision_cnt <= '0; + rd_cnt <= '0; + rd_iteration_cnt <= '0; + currentState <= IDLE; + end + else begin + currentState <= nextState; + + rd_cnt <= rd_cnt_clr?'0:(rd_cnt_en? (rd_cnt+1) : rd_cnt); + rd_iteration_cnt <= rd_iteration_cnt_clr? '0: (rd_iteration_cnt_en? (rd_iteration_cnt+1): rd_iteration_cnt); + + //sequentially read the centroids cacheline from the memory + if(is_initialize & um_tx_rd_ready & ~potential_overflow) begin + addr_offset_centroid <= addr_offset_centroid + 1'b1; + end + + // //count how many cachelines in every 32 lines have been requested + // if(is_read_tuple & um_tx_rd_ready & ~potential_overflow) begin + // if(precision_cnt == (rp.precision-1)) begin + // precision_cnt <= '0; + // end + // else begin + // precision_cnt <= precision_cnt +1'b1; + // end + // end + + //read the first few lines of every 32 cachelines from the memory + if(is_read_tuple & um_tx_rd_ready & ~potential_overflow) begin + //new iteration + if(rd_cnt == rp.num_cl_tuple-1) begin + // if(rd_iteration_cnt == rp.num_iteration-1) begin + addr_offset_tuple <= '0; + // end + end + // //jump to next block + // else if(precision_cnt == (rp.precision-1)) begin + // addr_offset_tuple <= addr_offset_tuple + 32-rp.precision + 1; + // end + //next cacheline + else begin + addr_offset_tuple <= addr_offset_tuple + 1'b1; + end + end + + end + + end + + + +endmodule \ No newline at end of file diff --git a/hw/hdl/operators/kmeans/runtimeParam_Manager.sv b/hw/hdl/operators/kmeans/runtimeParam_Manager.sv new file mode 100755 index 00000000..38f1069d --- /dev/null +++ b/hw/hdl/operators/kmeans/runtimeParam_Manager.sv @@ -0,0 +1,96 @@ +import kmeansTypes::*; + +module runtimeParam_Manager ( + input wire clk, // Clock + input wire rst_n, // Asynchronous reset active low + input wire start_um, + input wire [511:0] um_params, + + output RuntimeParam runtimeParam, + output reg start_operator +); + + reg flag; + reg rst_n_reg; + + always_ff @(posedge clk) begin + rst_n_reg <= rst_n; + if(~rst_n_reg) begin + flag <= '0; + start_operator <= '0; + runtimeParam <= '0; + end + else begin + if(start_um & ~flag) begin + flag <= 1'b1; + end + else if(flag & ~start_um) begin + flag <= 1'b0; + end + + start_operator <= ~start_um & flag; + + if (start_um) begin + runtimeParam.addr_center <= um_params[63:6]; //64 + runtimeParam.addr_data <= um_params[127:70]; //64 + runtimeParam.addr_result <= um_params[191:134]; //64 + runtimeParam.data_set_size <= um_params[255:192]; //64 + runtimeParam.num_cl_centroid <= um_params[287:256]; //32 + runtimeParam.num_cl_tuple <= um_params[319:288]; //32 + runtimeParam.num_cluster <= um_params[351:320]; //32 + runtimeParam.data_dim <= um_params[383:352]; //32 + runtimeParam.num_iteration <= um_params[399:384]; //16 + // runtimeParam.precision <= um_params[407:400]; //8 + end + + end + + end + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +//---------------------------------log file print--------------------------------------------------// +//////////////////////////////////////////////////////////////////////////////////////////////////// +`define LOG_NULL +`ifdef LOG_FILE + int file; + reg file_finished; + initial begin + file = $fopen("/home/harpdev/doppiodb/fpga/operators/k_means_v2/sim_log/runtimeParam.txt","w"); + if(file) + $display("RuntimeParam file open successfully\n"); + else + $display("Failed to open runtimeParam file\n"); + end + + always @ (posedge clk) begin + if(~rst_n) begin + + file_finished <= 1'b0; + end + else begin + + if(start_um & ~file_finished) begin + $fwrite(file,"addr_center:%d\n", um_params[63:6]); + $fwrite(file,"addr_data:%d\n", um_params[127:70]); + $fwrite(file,"addr_result:%d\n", um_params[191:134]); + $fwrite(file,"data_set_size:%d\n", um_params[255:192]); + $fwrite(file,"num_cl_centroid:%d\n", um_params[287:256]); + $fwrite(file,"num_cl_tuple:%d\n", um_params[319:288]); + $fwrite(file,"num_cluster:%d\n", um_params[351:320]); + $fwrite(file,"data_dim:%d\n", um_params[383:352]); + $fwrite(file,"num_iteration:%d\n", um_params[399:384]); + // $fwrite(file,"precision:%d\n", um_params[407:400]); + + file_finished <= 1'b1; + end + else if(file_finished) begin + $fclose(file); + end + end + end +`endif +//////////////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////////////// + +endmodule \ No newline at end of file diff --git a/hw/hdl/operators/kmeans/spl_sdp_mem.v b/hw/hdl/operators/kmeans/spl_sdp_mem.v new file mode 100644 index 00000000..e672640b --- /dev/null +++ b/hw/hdl/operators/kmeans/spl_sdp_mem.v @@ -0,0 +1,68 @@ +// Copyright (c) 2013-2015, Intel Corporation +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Intel Corporation nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + + +module spl_sdp_mem #( + parameter DATA_WIDTH = 32, + parameter ADDR_WIDTH = 8 +) ( + input wire clk, + input wire we, + input wire re, + input wire [ADDR_WIDTH-1:0] raddr, + input wire [ADDR_WIDTH-1:0] waddr, + input wire [DATA_WIDTH-1:0] din, + output reg [DATA_WIDTH-1:0] dout, + output reg valid +); + + +`ifdef VENDOR_XILINX + (* ram_extract = "yes", ram_style = "block" *) + reg [DATA_WIDTH-1:0] mem[0:2**ADDR_WIDTH-1]; +`else +(* ramstyle = "no_rw_check" *) reg [DATA_WIDTH-1:0] mem[0:2**ADDR_WIDTH-1]; +`endif + +reg [ADDR_WIDTH-1:0] raddr_d1; + + + always @(posedge clk) begin + + if (we) + mem[waddr] <= din; + + //if (re) + // raddr_d1 <= raddr; + + dout <= mem[raddr]; + valid <= re; + end + + + +endmodule + diff --git a/hw/hdl/operators/kmeans/spl_sdp_mem_t.v b/hw/hdl/operators/kmeans/spl_sdp_mem_t.v new file mode 100644 index 00000000..93bd69fa --- /dev/null +++ b/hw/hdl/operators/kmeans/spl_sdp_mem_t.v @@ -0,0 +1,62 @@ +// Copyright (c) 2013-2015, Intel Corporation +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Intel Corporation nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + + +module spl_sdp_mem_t #( + parameter DATA_WIDTH = 32, + parameter ADDR_WIDTH = 8 +) ( + input wire clk, + input wire we, + input wire re, + input wire [ADDR_WIDTH-1:0] raddr, + input wire [ADDR_WIDTH-1:0] waddr, + input wire [DATA_WIDTH-1:0] din, + output reg [DATA_WIDTH-1:0] dout +); + + +`ifdef VENDOR_XILINX + (* ram_extract = "yes", ram_style = "block" *) + reg [DATA_WIDTH-1:0] mem[0:2**ADDR_WIDTH-1]; +`else +(* ramstyle = "no_rw_check" *) reg [DATA_WIDTH-1:0] mem[0:2**ADDR_WIDTH-1]; +`endif + + + always @(posedge clk) begin + + if (we) + mem[waddr] <= din; + + if (re) + dout <= mem[raddr]; + end + + + +endmodule + diff --git a/hw/hdl/operators/kmeans/wr_engine.sv b/hw/hdl/operators/kmeans/wr_engine.sv new file mode 100755 index 00000000..86d41aad --- /dev/null +++ b/hw/hdl/operators/kmeans/wr_engine.sv @@ -0,0 +1,216 @@ +import kmeansTypes::*; + +module wr_engine ( + input wire clk, // Clock + input wire rst_n, // Asynchronous reset active low + + input RuntimeParam rp, + //memory interface + input wire start_operator, + + output reg [57:0] um_tx_wr_addr, + output reg [7:0] um_tx_wr_tag, + output reg um_tx_wr_valid, + output reg [511:0] um_tx_data, + input wire um_tx_wr_ready, + + output wire um_done, + + //kmeans module update interface + input wire [512-1:0] update, + input wire update_valid, + input wire update_last, + + //debug counter + output wire [31:0] wr_engine_debug_cnt + +); + + +/////////////////////////////---To avoid timeing--////////////////////////////////// +reg [511:0] update_reg, update_reg2; +reg update_valid_reg, update_valid_reg2; +reg update_last_reg, update_last_reg2; +reg rst_n_reg; +reg start_operator_reg1,start_operator_reg2; + +always @ (posedge clk) begin + rst_n_reg <= rst_n; + start_operator_reg1 <= start_operator; + start_operator_reg2 <= start_operator_reg1; + if(~rst_n_reg) begin + update_valid_reg <= '0; + end + else begin + //if(wr_engine_ready) begin + update_reg <= update; + update_reg2 <= update_reg; + + update_valid_reg <= update_valid; + update_valid_reg2 <= update_valid_reg; + + update_last_reg <= update_last; + update_last_reg2 <= update_last_reg; + + //end + end +end + +//////////////////////////////////////////////////////////////////////////////////////// + + +wire wr_fifo_valid; +wire [512 : 0] wr_fifo_dout; +wire wr_fifo_re; +wire wr_fifo_full; + +wire [511:0] wr_data; +wire wr_last; +reg [57:0] wr_addr_offset; +reg [31:0] iteration_cnt; +reg running_kmeans; +reg done; + +quick_fifo #(.FIFO_WIDTH(512+1), + .FIFO_DEPTH_BITS(BUFFER_DEPTH_BITS-3)) + wr_fifo + ( + .clk (clk), + .reset_n (rst_n_reg), + .we (~wr_fifo_full & update_valid_reg2), + .din ({update_last_reg2,update_reg2}), + .re (wr_fifo_re), + .valid (wr_fifo_valid), + .dout (wr_fifo_dout), + .count (), + .empty (), + .full (wr_fifo_full), + .almostfull() ); + +assign wr_data = wr_fifo_dout[511:0]; +assign wr_last = wr_fifo_dout[512]; +assign wr_fifo_re = wr_fifo_valid & um_tx_wr_ready; + + +always @ (posedge clk) begin + if(~rst_n_reg) begin + um_tx_wr_valid <= 1'b0; + end + else begin + if(um_tx_wr_ready) begin + um_tx_data <= wr_data; + um_tx_wr_valid <= wr_fifo_valid; + um_tx_wr_addr <= rp.addr_result + wr_addr_offset; + end + um_tx_wr_tag <= '0; + end +end + + + +assign um_done = done; + +always @ (posedge clk) begin + if(~rst_n_reg) begin + wr_addr_offset <= '0; + iteration_cnt <= '0; + running_kmeans <= 0; + done <= '0; + end + else begin + if(um_done) begin + wr_addr_offset <= '0; + end + else if(wr_fifo_valid & um_tx_wr_ready) begin + wr_addr_offset <= wr_addr_offset + 1'b1; + end + + if(um_done) begin + iteration_cnt <= '0; + end + else if(wr_last & wr_fifo_valid & um_tx_wr_ready) begin + iteration_cnt <= iteration_cnt + 1'b1; + end + + if(start_operator_reg2) begin + running_kmeans <= 1'b1; + end + else if(um_done) begin + running_kmeans <= 1'b0; + end + + //set the done signal, um_done is set one cycle after the last cl is sent + if(done) begin + done <= 1'b0; + end + else if(running_kmeans & (iteration_cnt == rp.num_iteration-1) & wr_last & um_tx_wr_ready & wr_fifo_valid ) begin + done <= 1'b1; + end + end +end + +//debug counter +reg [31:0] output_cl_cnt; +always @ (posedge clk) begin + if(start_operator_reg2) begin + output_cl_cnt <= '0; + end + else begin + if(um_tx_wr_ready & um_tx_wr_valid) begin + output_cl_cnt <= output_cl_cnt + 1'b1; + end + end +end + +assign wr_engine_debug_cnt = output_cl_cnt; + +`define LOG_NULL +////////////////////////////////////////////////////////////////////////////////////////////////////// +//---------------------------------log file print--------------------------------------------------// +//////////////////////////////////////////////////////////////////////////////////////////////////// +`ifdef LOG_FILE + int file; + reg file_finished; + initial begin + file = $fopen("/home/harpdev/doppiodb/fpga/operators/k_means_v2/sim_log/wr_engine.txt","w"); + + if(file) begin + $display("wr_engine file open successfully\n"); + $display("output to divider",); + end + else + $display("Failed to open wr_engine file\n"); + end + + always @ (posedge clk) begin + if(~rst_n_reg) begin + + end + else begin + if(um_tx_wr_ready & um_tx_wr_valid) begin + $fwrite(file,"%d ", um_tx_data[31:0]); + $fwrite(file,"%d ", um_tx_data[63:32]); + $fwrite(file,"%d ", um_tx_data[95:64]); + $fwrite(file,"%d ", um_tx_data[127:96]); + $fwrite(file,"%d ", um_tx_data[159:128]); + $fwrite(file,"%d ", um_tx_data[191:160]); + $fwrite(file,"%d ", um_tx_data[223:192]); + $fwrite(file,"%d ", um_tx_data[255:224]); + $fwrite(file,"%d ", um_tx_data[287:256]); + $fwrite(file,"%d ", um_tx_data[319:288]); + $fwrite(file,"%d ", um_tx_data[351:320]); + $fwrite(file,"%d ", um_tx_data[383:352]); + $fwrite(file,"%d ", um_tx_data[415:384]); + $fwrite(file,"%d ", um_tx_data[447:416]); + $fwrite(file,"%d ", um_tx_data[479:448]); + $fwrite(file,"%d ", um_tx_data[511:480]); + + $fwrite(file,"\n\n\n"); + end + end + end + `endif +//////////////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////////////// + +endmodule \ No newline at end of file diff --git a/hw/hdl/operators/lpn/fk_ctrl.vhd b/hw/hdl/operators/lpn/fk_ctrl.vhd new file mode 100644 index 00000000..a8b5bbb3 --- /dev/null +++ b/hw/hdl/operators/lpn/fk_ctrl.vhd @@ -0,0 +1,352 @@ +--*************************************************************************-- +--! @file : fk_ctrl.vhd +--! @project : Firekite, Dual port RAM implementation +--! +--! Controller unit +--! +--! @date : 10.10.2017. +--*************************************************************************-- + +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + +entity fk_ctrl is + generic( + NBITS : integer := 4096; -- Length of the basis vector in bits + DBITS : integer := 2816 -- Data set length in bits + ); + port( + clk : in std_logic; + reset_n : in std_logic; + -- Control DP + dp_key_ld_init : out std_logic; + dp_key_ld_v_init : out std_logic; + dp_key_ld_v : out std_logic; + dp_key_start : out std_logic; + dp_key_done : in std_logic; + dp_ld_data : out std_logic; + dp_wr_data : out std_logic; + dp_rd_data : out std_logic; + dp_rst_pntr : out std_logic; + dp_ld_work : out std_logic; + dp_wr_work : out std_logic; + dp_rst_work : out std_logic; + dp_start_init_exp : out std_logic; + dp_start_exp : out std_logic; + dp_done_exp : in std_logic; + -- Avalon slave + avs_start : in std_logic; + avs_len : in std_logic_vector(15 downto 0); + avs_src : in std_logic_vector(31 downto 0); + avs_dest : in std_logic_vector(31 downto 0); + avs_mode : in std_logic_vector(1 downto 0); + avs_done : out std_logic; + avs_doneIrq : out std_logic; + -- Avalon master Read + avm_rd_start : out std_logic; + avm_rd_src : out std_logic_vector(31 downto 0); + avm_rd_len : out std_logic_vector(9 downto 0); + avm_rd_dVal : in std_logic; + avm_rd_done : in std_logic; + -- Avalon master Write + avm_wr_start : out std_logic; + avm_wr_dest : out std_logic_vector(31 downto 0); + avm_wr_len : out std_logic_vector(9 downto 0); + avm_wr_dVal : in std_logic; + avm_wr_done : in std_logic + ); +end entity fk_ctrl; + +architecture RTL of fk_ctrl is + + -- Constants + constant INIT_BYTES : integer := NBITS/8; + constant DATA_BYTES : integer := DBITS/8; + + -- FSM + type state_type is (ST_IDLE, + ST_KEY_FETCH, ST_KEY_STORE, + ST_INITV_FETCH, ST_INITV_STORE, ST_INITV_EXP, + ST_OPER_FETCH, ST_OPER_EXEC, ST_OPER_STORE, + ST_OPER_STORE_AND_FETCH); + signal state_reg, state_next : state_type; + + -- Internal registers + signal sadd_reg, sadd_next : std_logic_vector(31 downto 0); + signal dadd_reg, dadd_next : std_logic_vector(31 downto 0); + signal slen_reg, slen_next : std_logic_vector(15 downto 0); + signal dlen_reg, dlen_next : std_logic_vector(15 downto 0); + signal d_reg, d_next : std_logic; + + -- Status signals + signal d_oper : std_logic; + +begin + + -- Register process + -- + CR_REG: process (clk, reset_n) is + begin + if reset_n = '0' then + state_reg <= ST_IDLE; + sadd_reg <= (others => '0'); + dadd_reg <= (others => '0'); + slen_reg <= (others => '0'); + dlen_reg <= (others => '0'); + d_reg <= '0'; + elsif rising_edge(clk) then + state_reg <= state_next; + sadd_reg <= sadd_next; + dadd_reg <= dadd_next; + slen_reg <= slen_next; + dlen_reg <= dlen_next; + d_reg <= d_next; + end if; + end process CR_REG; + + -- Next state logic + -- + NSL: process (state_reg, + avs_start, avs_mode, avm_rd_done, avm_wr_done, dp_done_exp, dp_key_done, d_oper) is + begin + state_next <= state_reg; + + case state_reg is + + -- IDLE + -- When start is asserted by the slave the state changes to either + -- fetching the base key, fetching the initial vector or + -- starting the operation. Mode signal determines which of + -- these will materialise. + when ST_IDLE => + if avs_start = '1'then + case avs_mode is + when "00" => + state_next <= ST_KEY_FETCH; + when "01" => + state_next <= ST_INITV_FETCH; + when "10" => + state_next <= ST_OPER_FETCH; + when others => null; + end case; + end if; + + -- KEY FETCH + -- If DMA is done fetching the base key, the base key can be + -- stored in the key register. + when ST_KEY_FETCH => + if avm_rd_done = '1' then + state_next <= ST_KEY_STORE; + end if; + + -- KEY STORE + -- Store the fetched key in the key register. + when ST_KEY_STORE => + state_next <= ST_IDLE; + + -- INITV_FETCH + -- If DMA is done fetching the init vector, the V vector can + -- be stored and the expansion of the noise bits can be started. + when ST_INITV_FETCH => + if avm_rd_done = '1' then + state_next <= ST_INITV_STORE; + end if; + + -- INITV_STORE + -- Store V vector and start the expansion of the noise bits. + when ST_INITV_STORE => + state_next <= ST_INITV_EXP; + + -- INITV_EXP + -- Check whether the expansion of the noise bits is done. + when ST_INITV_EXP => + if dp_done_exp = '1' then + state_next <= ST_IDLE; + end if; + + -- OPER FETCH + -- Check whether the data has been fetched and whether + -- the work vector has been updated. + when ST_OPER_FETCH => + if avm_rd_done = '1' and dp_key_done = '1' then + state_next <= ST_OPER_EXEC; + end if; + + -- OPER EXEC + -- XOR the fetched data with the work vector. + when ST_OPER_EXEC => + if d_oper = '1' then + state_next <= ST_OPER_STORE; + else + state_next <= ST_OPER_STORE_AND_FETCH; + end if; + + -- OPER STORE + -- Final state. + when ST_OPER_STORE => + if avm_wr_done = '1' then + state_next <= ST_IDLE; + end if; + + -- OPER STORE AND FETCH + -- Store and fetch at the same time. While doing that + -- also expand the noise bits and update work vector. + -- In short parallelize everything. + when ST_OPER_STORE_AND_FETCH => + if avm_rd_done = '1' and avm_wr_done = '1' and + dp_key_done = '1' and dp_done_exp = '1' then + state_next <= ST_OPER_EXEC; + end if; + + end case; + end process NSL; + + -- Datapath process + -- + DP: process (state_reg, sadd_reg, dadd_reg, slen_reg, dlen_reg, d_reg, + avs_start, avs_src, avs_dest, avs_mode, avs_len, + dp_done_exp, avm_rd_done, avm_wr_done, d_oper, dp_key_done, + avm_rd_dVal, avm_wr_dVal) is + begin + sadd_next <= sadd_reg; + dadd_next <= dadd_reg; + slen_next <= slen_reg; + dlen_next <= dlen_reg; + d_next <= '0'; + + dp_key_start <= '0'; + dp_start_exp <= '0'; + dp_rst_work <= '0'; + dp_rst_pntr <= '0'; + dp_rd_data <= '0'; + dp_wr_data <= '0'; + dp_ld_work <= '0'; + avm_rd_start <= '0'; + avm_rd_src <= (others => '0'); + avm_rd_len <= (others => '0'); + avm_wr_start <= '0'; + avm_wr_dest <= (others => '0'); + avm_wr_len <= (others => '0'); + + case state_reg is + + -- Latch the length, destination and source when started. + when ST_IDLE => + dp_rst_pntr <= '1'; + dp_rst_work <= '1'; + if avs_start = '1' then + avm_rd_start <= '1'; + avm_rd_src <= avs_src; + case avs_mode is + when "00" | "01" => + avm_rd_len <= std_logic_vector(to_unsigned(INIT_BYTES, 10)); + when "10" => + dp_key_start <= '1'; + sadd_next <= std_logic_vector(unsigned(avs_src) + to_unsigned(DATA_BYTES, 32)); + dadd_next <= avs_dest; + slen_next <= std_logic_vector(unsigned(avs_len) - to_unsigned(DATA_BYTES, 16)); + dlen_next <= avs_len; + if unsigned(avs_len) > to_unsigned(DATA_BYTES, 16) then + avm_rd_len <= std_logic_vector(to_unsigned(DATA_BYTES, 10)); + else + avm_rd_len <= avs_len(9 downto 0); + end if; + when others => null; + end case; + end if; + + -- Signal to the Avalon slave that the operation is done. + when ST_KEY_STORE => + d_next <= '1'; + + -- Signal to the Avalon slave that the operation is done. + when ST_INITV_EXP => + if dp_done_exp = '1' then + d_next <= '1'; + end if; + + -- Control the work vector and the serial write of + -- the data input register. + when ST_OPER_FETCH => + if avm_rd_done /= '1' then + dp_wr_data <= avm_rd_dVal; + end if; + if dp_key_done /= '1' then + dp_ld_work <= '1'; + end if; + + -- Prepare the Write DMA for memory Write sequence. + -- If operation is not done start fetching data, + -- epxanding noise bits and updating key vector in + -- parallel. + when ST_OPER_EXEC => + dp_rst_pntr <= '1'; + dp_rst_work <= '1'; + avm_wr_start <= '1'; + avm_wr_dest <= dadd_reg; + dadd_next <= std_logic_vector(unsigned(dadd_reg) + to_unsigned(DATA_BYTES, 32)); + if d_oper = '1' then + avm_wr_len <= dlen_reg(9 downto 0); + dlen_next <= (others => '0'); + else + avm_wr_len <= std_logic_vector(to_unsigned(DATA_BYTES, 10)); + slen_next <= std_logic_vector(unsigned(slen_reg) - to_unsigned(DATA_BYTES, 16)); + dlen_next <= slen_reg; + dp_key_start <= '1'; + dp_start_exp <= '1'; + avm_rd_start <= '1'; + avm_rd_src <= sadd_reg; + sadd_next <= std_logic_vector(unsigned(sadd_reg) + to_unsigned(DATA_BYTES, 32)); + if unsigned(slen_reg) > to_unsigned(DATA_BYTES, 16) then + avm_rd_len <= std_logic_vector(to_unsigned(DATA_BYTES, 10)); + else + avm_rd_len <= slen_reg(9 downto 0); + end if; + end if; + + -- Store the data. If DMA Write is done signal the + -- end of operation. + when ST_OPER_STORE => + if avm_wr_done = '1' then + d_next <= '1'; + else + dp_rd_data <= avm_wr_dVal; + end if; + + -- Control serial write, read, and the work + -- vector update. + when ST_OPER_STORE_AND_FETCH => + if avm_wr_done /= '1' then + dp_rd_data <= avm_wr_dVal; + end if; + if avm_rd_done /= '1' then + dp_wr_data <= avm_rd_dVal; + end if; + if dp_key_done /= '1' then + dp_ld_work <= '1'; + end if; + + when others => null; + + end case; + end process DP; + + -- Status signals + d_oper <= '1' when unsigned(dlen_reg) <= to_unsigned(DATA_BYTES, 16) else '0'; + + -- Output + dp_key_ld_init <= '1' when state_reg = ST_KEY_STORE else '0'; + dp_key_ld_v_init <= '1' when state_reg = ST_INITV_STORE else '0'; + dp_key_ld_v <= '1' when state_reg = ST_OPER_EXEC else '0'; + + dp_ld_data <= '1' when state_reg = ST_OPER_EXEC else '0'; + + dp_wr_work <= avm_rd_dVal when state_reg = ST_KEY_FETCH or state_reg = ST_INITV_FETCH else '0'; + + dp_start_init_exp <= '1' when state_reg = ST_INITV_STORE else '0'; + + avs_done <= '1' when state_reg = ST_IDLE else '0'; + avs_doneIrq <= d_reg; + +end architecture RTL; diff --git a/hw/hdl/operators/lpn/fk_data_reg.vhd b/hw/hdl/operators/lpn/fk_data_reg.vhd new file mode 100644 index 00000000..5bdbbf8c --- /dev/null +++ b/hw/hdl/operators/lpn/fk_data_reg.vhd @@ -0,0 +1,111 @@ +--*************************************************************************-- +--! @file : fk_data_reg.vhd +--! @project : Firekite, Dual port RAM implementation. +--! +--! Input and output data registers +--! +--! @date : 10.10.2017. +--*************************************************************************-- + +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; +use work.utils.all; + +entity fk_data_reg is + generic( + NBITS : integer := 4096; -- Length of the basis vectors in bits + DBITS : integer := 2816; -- Data set length in bits + BUS_SIZE : integer := 32 -- Avalon bus size + ); + port( + clk : in std_logic; + reset_n : in std_logic; + -- Control + ld_data : in std_logic; -- Parallel load reg. + wr_data : in std_logic; -- Serial write reg. + rd_data : in std_logic; -- Serial read reg. + rst_pntr : in std_logic; -- Reset reg. pointers + -- Data + sink_data_p : in std_logic_vector(DBITS-1 downto 0); + sink_data_s : in std_logic_vector(BUS_SIZE-1 downto 0); + src_data_s : out std_logic_vector(BUS_SIZE-1 downto 0) + ); +end entity fk_data_reg; + +architecture RTL of fk_data_reg is + + -- Constants + constant PNT_BITS : integer := bitlength(DBITS/BUS_SIZE-1); + + -- Internal regsiters + signal data_in_reg, data_in_next : std_logic_vector(DBITS-1 downto 0); + signal data_out_reg, data_out_next : std_logic_vector(DBITS-1 downto 0); + + -- Counters and pointers + signal prd_reg, prd_next : unsigned(PNT_BITS-1 downto 0); + signal pwr_reg, pwr_next : unsigned(PNT_BITS-1 downto 0); + +begin + + -- Register process + -- + CR_REG: process (clk, reset_n) is + begin + if reset_n = '0' then + data_in_reg <= (others => '0'); + data_out_reg <= (others => '0'); + prd_reg <= to_unsigned(0, PNT_BITS); + pwr_reg <= to_unsigned(0, PNT_BITS); + elsif rising_edge(clk) then + data_in_reg <= data_in_next; + data_out_reg <= data_out_next; + prd_reg <= prd_next; + pwr_reg <= pwr_next; + end if; + end process CR_REG; + + -- Datapath process + -- + DP: process (data_in_reg, data_out_reg, prd_reg, pwr_reg, + ld_data, wr_data, rd_data, rst_pntr, sink_data_p, sink_data_s) is + variable idx : integer; + begin + data_in_next <= data_in_reg; + data_out_next <= data_out_reg; + prd_next <= prd_reg; + pwr_next <= pwr_reg; + + -- Parallel load output data register. During the execution stage + -- when work vector is XOR-ed with the data to produce the + -- result. + if ld_data = '1' then + data_out_next <= data_in_reg xor sink_data_p; + end if; + + -- Serial load data in register. During the phase where the data + -- is fetched from the memory. + if wr_data = '1' then + idx := to_integer(pwr_reg); + data_in_next(idx*BUS_SIZE+BUS_SIZE-1 downto idx*BUS_SIZE) <= sink_data_s; + pwr_next <= pwr_reg + 1; + end if; + + -- Serial read data register. During the phase where the result + -- is stored in the memory. + if rd_data = '1' then + prd_next <= prd_reg + 1; + end if; + + -- Reset pointers. + if rst_pntr = '1' then + prd_next <= to_unsigned(0, PNT_BITS); + pwr_next <= to_unsigned(0, PNT_BITS); + end if; + + end process DP; + + -- Output + src_data_s <= data_out_reg(to_integer(prd_reg)*BUS_SIZE+BUS_SIZE-1 downto to_integer(prd_reg)*BUS_SIZE); + +end architecture RTL; diff --git a/hw/hdl/operators/lpn/fk_dma_rd.vhd b/hw/hdl/operators/lpn/fk_dma_rd.vhd new file mode 100644 index 00000000..3cd83f57 --- /dev/null +++ b/hw/hdl/operators/lpn/fk_dma_rd.vhd @@ -0,0 +1,162 @@ +--*************************************************************************-- +--! @file : fk_dma_rd.vhd +--! @project : Firekite, Dual port RAM implementation. +--! +--! DMA read unit. Generic bus size. Burst operation. +--! +--! @date : 10.10.2017. +--*************************************************************************-- + +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; +use work.utils.all; + +entity fk_dma_rd is + generic( + BUS_SIZE : integer := 32 -- Avalon bus size in bits + ); + port( + clk : in std_logic; + reset_n : in std_logic; + -- Control + start : in std_logic; + src : in std_logic_vector(31 downto 0); + len : in std_logic_vector(9 downto 0); + dVal : out std_logic; + done : out std_logic; + -- Data + src_data : out std_logic_vector(BUS_SIZE-1 downto 0); + -- Avalon master + avm_WaitRequest : in std_logic; + avm_Read : out std_logic; + avm_ReadDataValid : in std_logic; + avm_ReadData : in std_logic_vector(BUS_SIZE-1 downto 0); + avm_Address : out std_logic_vector(31 downto 0); + avm_BurstCount : out std_logic_vector(7 downto 0) + ); +end entity fk_dma_rd; + +architecture RTL of fk_dma_rd is + + -- Constants + constant MAX_BURST_SIZE : integer := 128; -- Max burst size on the bus + constant BUS_SIZE_BYTES : integer := BUS_SIZE/8; -- Bus size in bytes + constant CNT_BURST : integer := bitlength(MAX_BURST_SIZE-1); + + -- FSM + type state_type is (ST_IDLE, ST_READ_BURST_START, ST_READ_BURST); + signal state_reg, state_next : state_type; + + -- Internal registers + signal sadd_reg, sadd_next : std_logic_vector(31 downto 0); + signal len_reg, len_next : std_logic_vector(7 downto 0); + + -- Counters + signal cnt_b_reg, cnt_b_next : unsigned(CNT_BURST-1 downto 0); + + -- Status signals + signal d_cnt_burst : std_logic; + +begin + + -- Register process + -- + CR_REG: process (clk, reset_n) is + begin + if reset_n = '0' then + state_reg <= ST_IDLE; + cnt_b_reg <= to_unsigned(0, CNT_BURST); + sadd_reg <= (others => '0'); + len_reg <= (others => '0'); + elsif rising_edge(clk) then + state_reg <= state_next; + cnt_b_reg <= cnt_b_next; + sadd_reg <= sadd_next; + len_reg <= len_next; + end if; + end process CR_REG; + + -- Next state logic + -- + NSL: process (state_reg, start, avm_WaitRequest, d_cnt_burst) is + begin + state_next <= state_reg; + + case state_reg is + + -- IDLE + -- When start is asserted go to Read state. + when ST_IDLE => + if start = '1' then + state_next <= ST_READ_BURST_START; + end if; + + -- READ BURST START + -- Signal the start of the burst read sequence. + when ST_READ_BURST_START => + if avm_WaitRequest /= '1' then + state_next <= ST_READ_BURST; + end if; + + -- READ BURST + -- Burst read the data from the memory. + when ST_READ_BURST => + if d_cnt_burst = '1' then + state_next <= ST_IDLE; + end if; + + end case; + end process NSL; + + -- Datapath process + -- + DP: process (state_reg, cnt_b_reg, sadd_reg, len_reg, + start, len, src, avm_ReadDataValid, d_cnt_burst, avm_WaitRequest) is + begin + cnt_b_next <= cnt_b_reg; + sadd_next <= sadd_reg; + len_next <= len_reg; + + dVal <= '0'; + + case state_reg is + + -- Latch source and the length of the data in + -- the dedicated registers. + when ST_IDLE => + if start = '1' then + sadd_next <= src; + len_next <= std_logic_vector(to_unsigned((to_integer(unsigned(len))/BUS_SIZE_BYTES), 8)); + end if; + + -- Increment counters and address when read data is valid and signal to the + -- controller that the data is valid. + when ST_READ_BURST => + if avm_ReadDataValid = '1' then + dVal <= '1'; + sadd_next <= std_logic_vector(unsigned(sadd_reg) + to_unsigned(BUS_SIZE_BYTES, 32)); + if d_cnt_burst = '1' then + cnt_b_next <= to_unsigned(0, CNT_BURST); + else + cnt_b_next <= cnt_b_reg + 1; + end if; + end if; + + when others => null; + + end case; + end process DP; + + -- Status signals + d_cnt_burst <= '1' when cnt_b_reg = unsigned(len_reg)-1 else '0'; + + -- Output + done <= '1' when state_reg = ST_IDLE else '0'; + src_data <= avm_ReadData; + + avm_Read <= '1' when state_reg = ST_READ_BURST_START else '0'; + avm_BurstCount <= len_reg; + avm_Address <= sadd_reg; + +end architecture RTL; diff --git a/hw/hdl/operators/lpn/fk_dma_wr.vhd b/hw/hdl/operators/lpn/fk_dma_wr.vhd new file mode 100644 index 00000000..b6b22ee8 --- /dev/null +++ b/hw/hdl/operators/lpn/fk_dma_wr.vhd @@ -0,0 +1,157 @@ +--*************************************************************************-- +--! @file : fk_dma_wr.vhd +--! @project : Firekite, Dual port RAM implementation. +--! +--! DMA write/read unit. Generic bus size. Burst operation. +--! +--! @date : 10.10.2017. +--*************************************************************************-- + +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; +use work.utils.all; + +entity fk_dma_wr is + generic( + BUS_SIZE : integer := 32 -- Avalon bus size in bits + ); + port( + clk : in std_logic; + reset_n : in std_logic; + -- Control + start : in std_logic; + dest : in std_logic_vector(31 downto 0); + len : in std_logic_vector(9 downto 0); + dVal : out std_logic; + done : out std_logic; + -- Data + sink_data : in std_logic_vector(BUS_SIZE-1 downto 0); + -- Avalon master + avm_WaitRequest : in std_logic; + avm_Write : out std_logic; + avm_WriteData : out std_logic_vector(BUS_SIZE-1 downto 0); + avm_Address : out std_logic_vector(31 downto 0); + avm_BurstCount : out std_logic_vector(7 downto 0) + ); +end entity fk_dma_wr; + +architecture RTL of fk_dma_wr is + + -- Constants + constant MAX_BURST_SIZE : integer := 128; -- Max burst size on the bus + constant BUS_SIZE_BYTES : integer := BUS_SIZE/8; -- Bus size in bytes + constant CNT_BURST : integer := bitlength(MAX_BURST_SIZE-1); + + -- FSM + type state_type is (ST_IDLE, ST_WRITE_BURST); + signal state_reg, state_next : state_type; + + -- Internal registers + signal dadd_reg, dadd_next : std_logic_vector(31 downto 0); + signal len_reg, len_next : std_logic_vector(7 downto 0); + signal wr_reg, wr_next : std_logic; + + -- Counters + signal cnt_b_reg, cnt_b_next : unsigned(CNT_BURST-1 downto 0); + + -- Status signals + signal d_cnt_burst : std_logic; + +begin + + -- Register process + -- + CR_REG: process (clk, reset_n) is + begin + if reset_n = '0' then + state_reg <= ST_IDLE; + cnt_b_reg <= to_unsigned(0, CNT_BURST); + dadd_reg <= (others => '0'); + len_reg <= (others => '0'); + elsif rising_edge(clk) then + state_reg <= state_next; + cnt_b_reg <= cnt_b_next; + dadd_reg <= dadd_next; + len_reg <= len_next; + end if; + end process CR_REG; + + -- Next state logic + -- + NSL: process (state_reg, start, avm_WaitRequest, d_cnt_burst) is + begin + state_next <= state_reg; + + case state_reg is + + -- IDLE + -- When start is asserted go to either Write state. + when ST_IDLE => + if start = '1' then + state_next <= ST_WRITE_BURST; + end if; + + -- WRITE BURST + -- Burst write the data to the memory. + when ST_WRITE_BURST => + if avm_WaitRequest /= '1' then + if d_cnt_burst = '1' then + state_next <= ST_IDLE; + end if; + end if; + + end case; + end process NSL; + + -- Datapath process + -- + DP: process (state_reg, cnt_b_reg, dadd_reg, len_reg, + start, len, dest, d_cnt_burst, avm_WaitRequest) is + begin + cnt_b_next <= cnt_b_reg; + dadd_next <= dadd_reg; + len_next <= len_reg; + + dVal <= '0'; + + case state_reg is + + -- Latch destination and the length of the data in + -- the dedicated registers. + when ST_IDLE => + if start = '1' then + dadd_next <= dest; + len_next <= std_logic_vector(to_unsigned((to_integer(unsigned(len))/BUS_SIZE_BYTES), 8)); + end if; + + -- Increment counters and address when data is written and signal to the + -- controller that the data has been written. + when ST_WRITE_BURST => + if avm_WaitRequest /= '1' then + dVal <= '1'; + dadd_next <= std_logic_vector(unsigned(dadd_reg) + to_unsigned(BUS_SIZE_BYTES, 32)); + if d_cnt_burst = '1' then + cnt_b_next <= to_unsigned(0, CNT_BURST); + else + cnt_b_next <= cnt_b_reg + 1; + end if; + end if; + + when others => null; + + end case; + end process DP; + + -- Status signals + d_cnt_burst <= '1' when cnt_b_reg = unsigned(len_reg)-1 else '0'; + + -- Output + done <= '1' when state_reg = ST_IDLE else '0'; + + avm_Write <= '1' when state_reg = ST_WRITE_BURST else '0'; + avm_BurstCount <= len_reg; + avm_WriteData <= sink_Data; + avm_Address <= dadd_reg; + +end architecture RTL; diff --git a/hw/hdl/operators/lpn/fk_dp.vhd b/hw/hdl/operators/lpn/fk_dp.vhd new file mode 100644 index 00000000..5539a8f1 --- /dev/null +++ b/hw/hdl/operators/lpn/fk_dp.vhd @@ -0,0 +1,144 @@ +--*************************************************************************-- +--! @file : fk_dp.vhd +--! @project : Firekite, Single port RAM implementation. +--! +--! Datapath registers consisting of work, key and data registers. +--! +--! @date : 10.10.2017. +--*************************************************************************-- + +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; +use work.utils.all; + +entity fk_dp is + generic( + NBITS : integer := 4096; -- Length of the basis vector in bits + DBITS : integer := 2816; -- Data set length in bits + EBITS : integer := 768; -- Length of the compressed noise bits + VBITS : integer := 512; -- Number of basis vectors + BUS_SIZE : integer := 32; -- Avalon bus size in bits + NCYC : integer := 32 -- Cycl. to exp. noise bits and update work vctr + ); + port( + clk : in std_logic; + reset_n : in std_logic; + -- Control + ld_key_init : in std_logic; + ld_v_init : in std_logic; + ld_v : in std_logic; + start_key : in std_logic; + done_key : out std_logic; + ld_data : in std_logic; + wr_data : in std_logic; + rd_data : in std_logic; + rst_pntr : in std_logic; + ld_work : in std_logic; + wr_work : in std_logic; + rst_work : in std_logic; + start_init_exp : in std_logic; + start_exp : in std_logic; + done_exp : out std_logic; + -- Data + sink_data_s : in std_logic_vector(BUS_SIZE-1 downto 0); + src_data_s : out std_logic_vector(BUS_SIZE-1 downto 0) + ); +end entity fk_dp; + +architecture RTL of fk_dp is + + -- Internal signals + signal data_work_in : std_logic_vector(NBITS-1 downto 0); + signal data_work_out : std_logic_vector(NBITS-1 downto 0); + signal data_noise_out : std_logic_vector(NBITS-1 downto 0); + signal data_xor_work_noise : std_logic_vector(NBITS-1 downto 0); + +begin + + -- Key register instantiation. + GEN_KEY_REG: entity work.fk_key_reg + generic map( + NBITS => NBITS, + VBITS => VBITS, + NCYC => NCYC + ) + port map( + clk => clk, + reset_n => reset_n, + -- Control + ld_key_init => ld_key_init, + ld_v_init => ld_v_init, + ld_v => ld_v, + start => start_key, + done => done_key, + -- Data + sink_data_init => data_work_out, + sink_data => data_xor_work_noise(VBITS-1 downto 0), + src_data => data_work_in + ); + + -- Work register instantiation. + GEN_WORK_REG: entity work.fk_work_reg + generic map( + NBITS => NBITS, + BUS_SIZE => BUS_SIZE + ) + port map( + clk => clk, + reset_n => reset_n, + -- Control + ld_work => ld_work, + wr_work => wr_work, + rst_work => rst_work, + -- Data + sink_data_p => data_work_in, + src_data_p => data_work_out, + sink_data_s => sink_data_s + ); + + -- Data register instantiation. + GEN_DATA_REG: entity work.fk_data_reg + generic map( + NBITS => NBITS, + DBITS => DBITS, + BUS_SIZE => BUS_SIZE + ) + port map( + clk => clk, + reset_n => reset_n, + -- Control + ld_data => ld_data, + wr_data => wr_data, + rd_data => rd_data, + rst_pntr => rst_pntr, + -- Data + sink_data_p => data_xor_work_noise(NBITS-1 downto NBITS-DBITS), + sink_data_s => sink_data_s, + src_data_s => src_data_s + ); + + -- Noise register instantiation + GEN_NOISE_REG: entity work.fk_noise_reg + generic map( + NBITS => NBITS, + EBITS => EBITS, + NCYC => NCYC + ) + port map( + clk => clk, + reset_n => reset_n, + -- Control + start_init => start_init_exp, + start => start_exp, + done => done_exp, + -- Data + sink_data_init => data_work_out((NBITS-DBITS)-1 downto VBITS), + sink_data => data_xor_work_noise((NBITS-DBITS)-1 downto VBITS), + src_data => data_noise_out + ); + + -- XOR noise and work + data_xor_work_noise <= data_work_out xor data_noise_out; + +end architecture RTL; diff --git a/hw/hdl/operators/lpn/fk_key_reg.vhd b/hw/hdl/operators/lpn/fk_key_reg.vhd new file mode 100644 index 00000000..a5998393 --- /dev/null +++ b/hw/hdl/operators/lpn/fk_key_reg.vhd @@ -0,0 +1,206 @@ +--*************************************************************************-- +--! @file : fk_key_reg.vhd +--! @project : Firekite, Dual port RAM implementation. +--! +--! Key register. Rotating key implementation. +--! +--! @date : 10.10.2017. +--*************************************************************************-- + +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; +use work.utils.all; + +entity fk_key_reg is + generic( + NBITS : integer := 4096; -- Length of the basis vector + VBITS : integer := 512; -- Length of the V vector + NCYC : integer := 32 -- Number of cycles needed for update + ); + port( + clk : in std_logic; + reset_n : in std_logic; + -- Control + ld_key_init : in std_logic; -- Load parallel reg. + ld_v_init : in std_logic; + ld_v : in std_logic; + start : in std_logic; -- Start the update of the work vctr + done : out std_logic; + -- Data + sink_data_init : in std_logic_vector(NBITS-1 downto 0); + sink_data : in std_logic_vector(VBITS-1 downto 0); + src_data : out std_logic_vector(NBITS-1 downto 0) + ); +end entity fk_key_reg; + +architecture RTL of fk_key_reg is + + -- Constants + constant CNT_BITS : integer := bitlength(NCYC-1); + constant ROT_BITS : integer := VBITS/NCYC; -- Keys updated in one cycle + + -- FSM + type state_type is (ST_IDLE, ST_OPER); + signal state_reg, state_next : state_type; + + -- Internal regsiters + signal key_reg, key_next : std_logic_vector(NBITS-1 downto 0); + signal v_reg, v_next : std_logic_vector(VBITS-1 downto 0); + + -- Counters + signal cnt_reg, cnt_next : unsigned(CNT_BITS-1 downto 0); + + -- Types + type key_array_s0 is array (ROT_BITS-1 downto 0) of std_logic_vector(NBITS-1 downto 0); + + -- Internal signals + signal key_data : key_array_s0; + signal key_data_s0 : key_array_s0; + signal v_curr : std_logic_vector(ROT_BITS-1 downto 0); + + -- Status signals + signal d_cnt : std_logic; + +begin + + -- Register process + -- + CR_REG: process (clk, reset_n) is + begin + if reset_n = '0' then + state_reg <= ST_IDLE; + cnt_reg <= to_unsigned(0, CNT_BITS); + key_reg <= (others => '0'); + v_reg <= (others => '0'); + elsif rising_edge(clk) then + state_reg <= state_next; + cnt_reg <= cnt_next; + key_reg <= key_next; + v_reg <= v_next; + end if; + end process CR_REG; + + -- Next state logic + -- + NSL: process (state_reg, start, d_cnt) is + begin + state_next <= state_reg; + + case state_reg is + + -- IDLE + -- When start is asserted, start updating the work vector. + when ST_IDLE => + if start = '1' then + state_next <= ST_OPER; + end if; + + -- OPER + -- If NCYC has passed return to idle. Work vector updated. + when ST_OPER => + if d_cnt = '1' then + state_next <= ST_IDLE; + end if; + + end case; + end process NSL; + + -- Datapath process + -- + DP: process (state_reg, cnt_reg, key_reg, v_reg, ld_key_init, + ld_v_init, ld_v, d_cnt, sink_data, sink_data_init) is + begin + cnt_next <= cnt_reg; + key_next <= key_reg; + v_next <= v_reg; + + -- Load key parallel. During the initial key storage. + if ld_key_init = '1' then + key_next <= sink_data_init; + end if; + + -- Load v parallel during the initial init vector storage. + if ld_v_init = '1' then + v_next <= sink_data_init(VBITS-1 downto 0); + end if; + + -- Load v vector parallel. + if ld_v = '1' then + v_next <= sink_data; + end if; + + -- Updating of the working vector. + case state_reg is + + -- Counter reset. + when ST_IDLE => + cnt_next <= to_unsigned(0, CNT_BITS); + + -- If NCYC has passed end the operation. During the operation and + -- during each cycle rotate the base key and V vectors. + when ST_OPER => + if d_cnt = '1' then + cnt_next <= to_unsigned(0, CNT_BITS); + else + cnt_next <= cnt_reg + 1; + end if; + -- Key rotation + for i in 0 to NBITS/VBITS-1 loop + key_next(i*VBITS+VBITS-1 downto i*VBITS) <= + key_reg(i*VBITS+VBITS-1-ROT_BITS downto i*VBITS) & + key_reg(i*VBITS+VBITS-1 downto i*VBITS+VBITS-ROT_BITS); + end loop; + -- V rotation + v_next <= v_reg(ROT_BITS-1 downto 0) & v_reg(VBITS-1 downto ROT_BITS); + + end case; + end process DP; + + -- Combinational logic. Keys derived from the base key by pure + -- wiring. + PS0: process (key_reg) is + begin + for i in 1 to ROT_BITS-1 loop + for j in 0 to NBITS/VBITS-1 loop + key_data(i)(j*VBITS+VBITS-1 downto j*VBITS) <= + key_reg(j*VBITS+VBITS-1-i downto j*VBITS) & + key_reg(j*VBITS+VBITS-1 downto j*VBITS+VBITS-i); + end loop; + end loop; + key_data(0) <= key_reg; + end process PS0; + + -- Multiplication of the generated keys with the lowest + -- ROT_BITS of the V vector. + v_curr <= v_reg(ROT_BITS-1 downto 0); + PS0M: process (key_data, v_curr) is + begin + for i in 0 to ROT_BITS-1 loop + if v_curr(i) = '1' then + key_data_s0(i) <= key_data(i); + else + key_data_s0(i) <= (others => '0'); + end if; + end loop; + end process PS0M; + + -- Generation of the current update vector by XOR-ing all + -- generated intermediate keys. + PS1: process (key_data_s0) is + variable tmp : std_logic_vector(NBITS-1 downto 0); + begin + tmp := (others => '0'); + for i in 0 to ROT_BITS-1 loop + tmp := tmp xor key_data_s0(i); + end loop; + src_data <= tmp; + end process PS1; + + -- Status signals + d_cnt <= '1' when cnt_reg = NCYC-1 else '0'; + + -- Output + done <= '1' when state_reg = ST_IDLE else '0'; + +end architecture RTL; diff --git a/hw/hdl/operators/lpn/fk_noise_reg.vhd b/hw/hdl/operators/lpn/fk_noise_reg.vhd new file mode 100644 index 00000000..15d1279a --- /dev/null +++ b/hw/hdl/operators/lpn/fk_noise_reg.vhd @@ -0,0 +1,168 @@ +--*************************************************************************-- +--! @file : fk_noise_reg.vhd +--! @project : Firekite, Dual port RAM implementation. +--! +--! Expansion of the compressed noise bits. +--! +--! @date : 10.10.2017. +--*************************************************************************-- + +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; +use work.utils.all; + +entity fk_noise_reg is + generic( + NBITS : integer := 4096; -- Length of the basis vector + EBITS : integer := 768; -- Length of the compressed noise bits + NCYC : integer := 32 -- Number of cycles for the exp. of noise bits + ); + port( + clk : in std_logic; + reset_n : in std_logic; + -- Control + start_init : in std_logic; -- Initial expansion + start : in std_logic; -- Expansion of the noise bits + done : out std_logic; + -- Data + sink_data_init : in std_logic_vector(EBITS-1 downto 0); + sink_data : in std_logic_vector(EBITS-1 downto 0); + src_data : out std_logic_vector(NBITS-1 downto 0) + ); +end entity fk_noise_reg; + +architecture RTL of fk_noise_reg is + + -- Constants + constant CNT_BITS : integer := bitlength(NCYC-1); + constant ROT_BITS : integer := EBITS/NCYC; -- Noise bits in one cycle + constant IDX_LEN : integer := bitlength(NBITS-1); -- Encoded length + constant NCYC_FLIP : integer := (EBITS/IDX_LEN)/NCYC; -- Flips in one cycle + + -- FSM + type state_type is (ST_IDLE, ST_EXP); + signal state_reg, state_next : state_type; + + -- Internal regsiters + signal noise_reg, noise_next : std_logic_vector(NBITS-1 downto 0); + signal exp_reg, exp_next : std_logic_vector(EBITS-1 downto 0); + + -- Counters and pointers + signal cnt_reg, cnt_next : unsigned(CNT_BITS-1 downto 0); + + -- Internal signals + signal cmpr_data : std_logic_vector(ROT_BITS-1 downto 0); + signal uncmpr_data : std_logic_vector(NBITS-1 downto 0); + + -- Status signals + signal d_cnt : std_logic; + +begin + + -- Register process + -- + CR_REG: process (clk, reset_n) is + begin + if reset_n = '0' then + state_reg <= ST_IDLE; + noise_reg <= (others => '0'); + exp_reg <= (others => '0'); + cnt_reg <= to_unsigned(0, CNT_BITS); + elsif rising_edge(clk) then + state_reg <= state_next; + noise_reg <= noise_next; + exp_reg <= exp_next; + cnt_reg <= cnt_next; + end if; + end process CR_REG; + + -- Next state logic + -- + NSL: process (state_reg, start_init, start, d_cnt) is + begin + state_next <= state_reg; + + case state_reg is + + -- IDLE + -- When start is asserted start expanding the noise bits + -- from the expansion register. + when ST_IDLE => + if start = '1' or start_init = '1' then + state_next <= ST_EXP; + end if; + + -- EXP + -- Wait until the counter counts NCYC signalling the end + -- of the expansion and return to idle. + when ST_EXP => + if d_cnt = '1' then + state_next <= ST_IDLE; + end if; + + end case; + end process NSL; + + -- Datapath process + -- + DP: process (state_reg, noise_reg, exp_reg, cnt_reg, + d_cnt, sink_data, sink_data_init, uncmpr_data, start_init, start) is + variable idx : integer; + begin + noise_next <= noise_reg; + exp_next <= exp_reg; + cnt_next <= cnt_reg; + + -- Expand the noise bits. + case state_reg is + + -- When started load expansion register with data from the + -- work register and reset the noise register. + when ST_IDLE => + cnt_next <= to_unsigned(0, CNT_BITS); + if start = '1' then + exp_next <= sink_data; + noise_next <= (others => '0'); + elsif start_init = '1' then + exp_next <= sink_data_init; + noise_next <= (others => '0'); + end if; + + -- If NCYC has passed, end the operation. During the operation and + -- during each cycle rotate expansion register and update the + -- noise vector with the expanded data(OR operation?). + when ST_EXP => + if d_cnt = '1' then + cnt_next <= to_unsigned(0, CNT_BITS); + else + cnt_next <= cnt_reg + 1; + end if; + -- E vector rotation. + exp_next <= exp_reg(ROT_BITS-1 downto 0) & exp_reg(EBITS-1 downto ROT_BITS); + -- Noise register update with the uncompressed noise bits. + noise_next <= noise_reg or uncmpr_data; + + end case; + end process DP; + + -- Combinational logic present in the expansion of the noise bits. + -- Data to be expanded in the current cycle is represented by the lowest + -- ROT_BITS of the expansion register. + cmpr_data <= exp_reg(ROT_BITS-1 downto 0); + DCDR: process (cmpr_data) is + begin + uncmpr_data <= (others => '0'); + for i in 0 to NCYC_FLIP-1 loop + uncmpr_data(to_integer(unsigned(cmpr_data(i*IDX_LEN+IDX_LEN-1 downto i*IDX_LEN)))) <= '1'; + end loop; + end process DCDR; + + -- Status signals + d_cnt <= '1' when cnt_reg = NCYC-1 else '0'; + + -- Output + done <= '1' when state_reg = ST_IDLE else '0'; + src_data <= noise_reg; + +end architecture RTL; diff --git a/hw/hdl/operators/lpn/fk_slave.vhd b/hw/hdl/operators/lpn/fk_slave.vhd new file mode 100644 index 00000000..18941363 --- /dev/null +++ b/hw/hdl/operators/lpn/fk_slave.vhd @@ -0,0 +1,133 @@ +--*************************************************************************-- +--! @file : fk_slave.vhd +--! @project : Firekite, Dual port RAM implementation. +--! +--! Avalon slave. 6 Interface registers. +--! +--! @date : 10.10.2017. +--*************************************************************************-- + +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + +entity fk_slave is + port ( + clk : in std_logic; + reset_n : in std_logic; + -- Avalon slave + avs_Address : in std_logic_vector(2 downto 0); + avs_ChipSelect : in std_logic; + avs_Read : in std_logic; + avs_Write : in std_logic; + avs_WriteData : in std_logic_vector(31 downto 0); + avs_ReadData : out std_logic_vector(31 downto 0); + -- Control + start : out std_logic; + len : out std_logic_vector(15 downto 0); + src : out std_logic_vector(31 downto 0); + dest : out std_logic_vector(31 downto 0); + mode : out std_logic_vector(1 downto 0); + done : in std_logic; + doneIrq : in std_logic; + -- Irq + irq : out std_logic + ); +end entity fk_slave; + +architecture rtl of fk_slave is + + -- Internal interface registers + signal ireg_command : std_logic_vector(3 downto 0); + signal ireg_src : std_logic_vector(31 downto 0); + signal ireg_dest : std_logic_vector(31 downto 0); + signal ireg_len : std_logic_vector(15 downto 0); + signal ireg_status : std_logic_vector(4 downto 0); + signal ireg_irqP : std_logic; + + -- Internal signals + signal irqEn : std_logic; + +begin + + -- Write Avalon slave + -- + WR_CR_REG : process (clk, reset_n) is + begin + if reset_n = '0' then + ireg_command <= (others => '0'); + ireg_src <= (others => '0'); + ireg_dest <= (others => '0'); + ireg_len <= (others => '0'); + ireg_irqP <= '0'; + elsif rising_edge(clk) then + ireg_command(0) <= '0'; + if doneIrq = '1' then + ireg_irqP <= '1'; + end if; + if avs_chipSelect = '1' and avs_write = '1' then + case avs_address is + -- Command + when "000" => + ireg_command <= avs_writeData(3 downto 0); + -- Source address + when "001" => + ireg_src <= avs_writeData(31 downto 0); + -- Destination address + when "010" => + ireg_dest <= avs_writeData(31 downto 0); + -- Data length + when "011" => + ireg_len <= avs_writeData(15 downto 0); + -- Clear IRQ + when "100" => + if avs_writeData(0) = '1' then + ireg_irqP <= '0'; + end if; + when others => null; + end case; + end if; + end if; + end process WR_CR_REG; + + -- Read Avalon slave + -- + RD_C_REG : process (clk) is + begin + if rising_edge(clk) then + if avs_chipSelect = '1' and avs_read = '1' then + avs_readData <= (others => '0'); + case avs_address is + -- Command + when "000" => avs_readData(3 downto 0) <= ireg_command; + -- Source address + when "001" => avs_readData(31 downto 0) <= ireg_src; + -- Destination address + when "010" => avs_readData(31 downto 0) <= ireg_dest; + -- Length of the data + when "011" => avs_readData(15 downto 0) <= ireg_len; + -- Status + when "101" => avs_readData(4 downto 0) <= ireg_status; + when others => null; + end case; + end if; + end if; + end process RD_C_REG; + + -- Datapath + ireg_status(2 downto 0) <= ireg_command(3 downto 1); + ireg_status(3) <= ireg_irqP; + ireg_status(4) <= done; + irqEn <= ireg_command(1); + + -- Output + start <= ireg_command(0); + src <= ireg_src; + dest <= ireg_dest; + len <= ireg_len; + mode <= ireg_command(3 downto 2); + + -- Irq + irq <= '1' when ireg_irqP = '1' and irqEn = '1' else '0'; + +end architecture rtl; diff --git a/hw/hdl/operators/lpn/fk_top.vhd b/hw/hdl/operators/lpn/fk_top.vhd new file mode 100644 index 00000000..8693f339 --- /dev/null +++ b/hw/hdl/operators/lpn/fk_top.vhd @@ -0,0 +1,253 @@ +--*************************************************************************-- +--! @file : fk_top.vhd +--! @project : Firekite, Single port RAM implementation. +--! +--! Top level component +--! +--! @date : 10.10.2017. +--*************************************************************************-- + +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; +use work.utils.all; + +entity fk_top is + generic( + BUS_SIZE : integer := 32; -- Avalon bus size + NCYC : integer := 32 -- Cycl. to exp. noise bits and upd. work vctr. + ); + port( + clk : in std_logic; + reset_n : in std_logic; + -- Avalon slave + avs_Address : in std_logic_vector(2 downto 0); + avs_ChipSelect : in std_logic; + avs_Read : in std_logic; + avs_Write : in std_logic; + avs_WriteData : in std_logic_vector(31 downto 0); + avs_ReadData : out std_logic_vector(31 downto 0); + -- Avalon master Read + avm_rd_WaitRequest : in std_logic; + avm_rd_Read : out std_logic; + avm_rd_ReadDataValid : in std_logic; + avm_rd_ReadData : in std_logic_vector(BUS_SIZE-1 downto 0); + avm_rd_Address : out std_logic_vector(31 downto 0); + avm_rd_BurstCount : out std_logic_vector(7 downto 0); + -- Avalon master Write + avm_wr_WaitRequest : in std_logic; + avm_wr_Write : out std_logic; + avm_wr_WriteData : out std_logic_vector(BUS_SIZE-1 downto 0); + avm_wr_Address : out std_logic_vector(31 downto 0); + avm_wr_BurstCount : out std_logic_vector(7 downto 0); + -- Irq + irq : out std_logic + ); +end entity fk_top; + +architecture RTL of fk_top is + + -- Constants + constant NBITS : integer := 4096; + constant DBITS : integer := 2816; + constant EBITS : integer := 768; + constant VBITS : integer := 512; + + -- Internal signals + + signal start_slv_ctrl : std_logic; + signal len_slv_ctrl : std_logic_vector(15 downto 0); + signal src_slv_ctrl : std_logic_vector(31 downto 0); + signal dest_slv_ctrl : std_logic_vector(31 downto 0); + signal mode_slv_ctrl : std_logic_vector(1 downto 0); + signal done_ctrl_slv : std_logic; + signal doneIrq_ctrl_slv : std_logic; + + signal start_ctrl_dma_rd : std_logic; + signal src_ctrl_dma_rd : std_logic_vector(31 downto 0); + signal len_ctrl_dma_rd : std_logic_vector(9 downto 0); + signal dVal_dma_rd_ctrl : std_logic; + signal done_dma_rd_ctrl : std_logic; + signal data_dma_rd_dp : std_logic_vector(BUS_SIZE-1 downto 0); + + signal start_ctrl_dma_wr : std_logic; + signal dest_ctrl_dma_wr : std_logic_vector(31 downto 0); + signal len_ctrl_dma_wr : std_logic_vector(9 downto 0); + signal dVal_dma_wr_ctrl : std_logic; + signal done_dma_wr_ctrl : std_logic; + signal data_dp_dma_wr : std_logic_vector(BUS_SIZE-1 downto 0); + + + signal ld_key_init_ctrl_dp : std_logic; + signal ld_v_init_ctrl_dp : std_logic; + signal ld_v_ctrl_dp : std_logic; + signal start_key_ctrl_dp : std_logic; + signal done_key_dp_ctrl : std_logic; + signal ld_data_ctrl_dp : std_logic; + signal wr_data_ctrl_dp : std_logic; + signal rd_data_ctrl_dp : std_logic; + signal rst_pntr_ctrl_dp : std_logic; + signal ld_work_ctrl_dp : std_logic; + signal wr_work_ctrl_dp : std_logic; + signal rst_work_ctrl_dp : std_logic; + signal start_init_exp_ctrl_dp : std_logic; + signal start_exp_ctrl_dp : std_logic; + signal done_exp_dp_ctrl : std_logic; + +begin + + -- Instantiate Avalon slave unit. Interface. + SLV: entity work.fk_slave + port map( + clk => clk, + reset_n => reset_n, + -- Avalon slave + avs_Address => avs_Address, + avs_ChipSelect => avs_ChipSelect, + avs_Read => avs_Read, + avs_Write => avs_Write, + avs_WriteData => avs_WriteData, + avs_ReadData => avs_ReadData, + -- Control + start => start_slv_ctrl, + len => len_slv_ctrl, + src => src_slv_ctrl, + dest => dest_slv_ctrl, + mode => mode_slv_ctrl, + done => done_ctrl_slv, + doneIrq => doneIrq_ctrl_slv, + -- Irq + irq => irq + ); + + -- Instantiate DMA Read unit. + DMA_RD: entity work.fk_dma_rd + generic map( + BUS_SIZE => BUS_SIZE + ) + port map( + clk => clk, + reset_n => reset_n, + -- Control + start => start_ctrl_dma_rd, + src => src_ctrl_dma_rd, + len => len_ctrl_dma_rd, + dVal => dVal_dma_rd_ctrl, + done => done_dma_rd_ctrl, + -- Data + src_data => data_dma_rd_dp, + -- Avalon master + avm_WaitRequest => avm_rd_WaitRequest, + avm_Read => avm_rd_Read, + avm_ReadDataValid => avm_rd_ReadDataValid, + avm_ReadData => avm_rd_ReadData, + avm_Address => avm_rd_Address, + avm_BurstCount => avm_rd_BurstCount + ); + + -- Instantiate DMA Write unit. + DMA_WR: entity work.fk_dma_wr + generic map( + BUS_SIZE => BUS_SIZE + ) + port map( + clk => clk, + reset_n => reset_n, + -- Control + start => start_ctrl_dma_wr, + dest => dest_ctrl_dma_wr, + len => len_ctrl_dma_wr, + dVal => dVal_dma_wr_ctrl, + done => done_dma_wr_ctrl, + -- Data + sink_data => data_dp_dma_wr, + -- Avalon master + avm_WaitRequest => avm_wr_WaitRequest, + avm_Write => avm_wr_Write, + avm_WriteData => avm_wr_WriteData, + avm_Address => avm_wr_Address, + avm_BurstCount => avm_wr_BurstCount + ); + + -- Instantiate Datapath with registers. + DP: entity work.fk_dp + generic map( + NBITS => NBITS, + DBITS => DBITS, + EBITS => EBITS, + VBITS => VBITS, + BUS_SIZE => BUS_SIZE, + NCYC => NCYC + ) + port map( + clk => clk, + reset_n => reset_n, + -- Control + ld_key_init => ld_key_init_ctrl_dp, + ld_v_init => ld_v_init_ctrl_dp, + ld_v => ld_v_ctrl_dp, + start_key => start_key_ctrl_dp, + done_key => done_key_dp_ctrl, + ld_data => ld_data_ctrl_dp, + wr_data => wr_data_ctrl_dp, + rd_data => rd_data_ctrl_dp, + rst_pntr => rst_pntr_ctrl_dp, + ld_work => ld_work_ctrl_dp, + wr_work => wr_work_ctrl_dp, + rst_work => rst_work_ctrl_dp, + start_init_exp => start_init_exp_ctrl_dp, + start_exp => start_exp_ctrl_dp, + done_exp => done_exp_dp_ctrl, + -- Data + sink_data_s => data_dma_rd_dp, + src_data_s => data_dp_dma_wr + ); + + -- Instantiate Controller. + CTRL: entity work.fk_ctrl + generic map( + NBITS => NBITS, + DBITS => DBITS + ) + port map( + clk => clk, + reset_n => reset_n, + -- Control DP + dp_key_ld_init => ld_key_init_ctrl_dp, + dp_key_ld_v_init => ld_v_init_ctrl_dp, + dp_key_ld_v => ld_v_ctrl_dp, + dp_key_start => start_key_ctrl_dp, + dp_key_done => done_key_dp_ctrl, + dp_ld_data => ld_data_ctrl_dp, + dp_wr_data => wr_data_ctrl_dp, + dp_rd_data => rd_data_ctrl_dp, + dp_rst_pntr => rst_pntr_ctrl_dp, + dp_ld_work => ld_work_ctrl_dp, + dp_wr_work => wr_work_ctrl_dp, + dp_rst_work => rst_work_ctrl_dp, + dp_start_init_exp => start_init_exp_ctrl_dp, + dp_start_exp => start_exp_ctrl_dp, + dp_done_exp => done_exp_dp_ctrl, + -- Avalon slave + avs_start => start_slv_ctrl, + avs_len => len_slv_ctrl, + avs_src => src_slv_ctrl, + avs_dest => dest_slv_ctrl, + avs_mode => mode_slv_ctrl, + avs_done => done_ctrl_slv, + avs_doneIrq => doneIrq_ctrl_slv, + -- Avalon master Read + avm_rd_start => start_ctrl_dma_rd, + avm_rd_src => src_ctrl_dma_rd, + avm_rd_len => len_ctrl_dma_rd, + avm_rd_dVal => dVal_dma_rd_ctrl, + avm_rd_done => done_dma_rd_ctrl, + -- Avalon master Write + avm_wr_start => start_ctrl_dma_wr, + avm_wr_dest => dest_ctrl_dma_wr, + avm_wr_len => len_ctrl_dma_wr, + avm_wr_dVal => dVal_dma_wr_ctrl, + avm_wr_done => done_dma_wr_ctrl + ); + +end architecture RTL; diff --git a/hw/hdl/operators/lpn/fk_work_reg.vhd b/hw/hdl/operators/lpn/fk_work_reg.vhd new file mode 100644 index 00000000..3546f970 --- /dev/null +++ b/hw/hdl/operators/lpn/fk_work_reg.vhd @@ -0,0 +1,81 @@ +--*************************************************************************-- +--! @file : fk_work_reg.vhd +--! @project : Firekite, Dual port RAM implementation. +--! +--! Work register. +--! +--! @date : 10.10.2017. +--*************************************************************************-- + +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; +use work.utils.all; + +entity fk_work_reg is + generic( + NBITS : integer := 4096; -- Length of the basis vector + BUS_SIZE : integer := 32 -- Avalon bus size + ); + port( + clk : in std_logic; + reset_n : in std_logic; + -- Control + ld_work : in std_logic; -- Parallel load reg. + wr_work : in std_logic; -- Serial write reg. + rst_work : in std_logic; -- Reset reg. + -- Data + sink_data_p : in std_logic_vector(NBITS-1 downto 0); -- Parallel + src_data_p : out std_logic_vector(NBITS-1 downto 0); + sink_data_s : in std_logic_vector(BUS_SIZE-1 downto 0) -- Serial + ); +end entity fk_work_reg; + +architecture RTL of fk_work_reg is + + -- Internal regsiters + signal work_reg, work_next : std_logic_vector(NBITS-1 downto 0); + +begin + + -- Register process + -- + CR_REG: process (clk, reset_n) is + begin + if reset_n = '0' then + work_reg <= (others => '0'); + elsif rising_edge(clk) then + work_reg <= work_next; + end if; + end process CR_REG; + + -- Datapath process + -- + DP: process (work_reg, + ld_work, wr_work, rst_work, sink_data_p, sink_data_s) is + variable idx : integer; + begin + work_next <= work_reg; + + -- Parallel load work register. XOR-s its contents with + -- data from the key register(update of the work vector). + if ld_work = '1' then + work_next <= work_reg xor sink_data_p; + end if; + + -- Serial write work register. Shift register. + if wr_work = '1' then + work_next <= sink_data_s & work_reg(NBITS-1 downto BUS_SIZE); + end if; + + -- Resets the register with all zeros. + if rst_work = '1' then + work_next <= (others => '0'); + end if; + + end process DP; + + -- Output + src_data_p <= work_reg; + +end architecture RTL; diff --git a/hw/hdl/operators/lpn/utils.vhd b/hw/hdl/operators/lpn/utils.vhd new file mode 100644 index 00000000..bba368a4 --- /dev/null +++ b/hw/hdl/operators/lpn/utils.vhd @@ -0,0 +1,31 @@ +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + +package utils is + function bitlength(number : natural) return positive; + +end package utils; + +package body utils is + + -- purpose: returns the minimum # of bits needed to represent the input number + function bitlength(number : natural) return positive is + variable acc : positive := 1; + variable i : natural := 0; + begin + if number = 0 then + return 1; + else + while True loop + if acc > number then + return i; + end if; + + acc := acc * 2; + i := i + 1; + end loop; + end if; + end function bitlength; + +end package body utils; diff --git a/hw/hdl/operators/minmaxsum/minmaxsum.vhd b/hw/hdl/operators/minmaxsum/minmaxsum.vhd new file mode 100644 index 00000000..40bdb0ba --- /dev/null +++ b/hw/hdl/operators/minmaxsum/minmaxsum.vhd @@ -0,0 +1,131 @@ +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + +entity minmaxsum is +port ( + clk : in std_logic; + rst_n : in std_logic; + + clr : in std_logic; + done : out std_logic; + + min_out : out std_logic_vector(31 downto 0); + max_out : out std_logic_vector(31 downto 0); + sum_out : out std_logic_vector(31 downto 0); + + axis_in_tvalid : in std_logic; + axis_in_tdata : in std_logic_vector(511 downto 0); + axis_in_tlast : in std_logic +); +end minmaxsum; + +architecture behavioral of minmaxsum is + +type CLIntegersType is array (15 downto 0) of signed(31 downto 0); +signal CLIntegers : CLIntegersType; + +type OddEvenSortIntegersType is array(1 to 16) of CLIntegersType; +signal OddEvenSortIntegers : OddEvenSortIntegersType; +signal sortingSteps : std_logic_vector(1 to 16); +signal lastSteps : std_logic_vector(1 to 16); + +signal minimum : signed(31 downto 0); +signal maximum : signed(31 downto 0); +signal summation : signed(31 downto 0); + +begin + +-- Generate input data +gen_CLIntegers: for i in 0 to 15 generate + CLIntegers(i) <= signed(axis_in_tdata(i*32+31 downto i*32)); +end generate gen_CLIntegers; + +min_out <= std_logic_vector(minimum); +max_out <= std_logic_vector(maximum); +sum_out <= std_logic_vector(summation); + +process(clk) +begin +if clk'event and clk = '1' then + if rst_n = '0' then + minimum <= X"7FFFFFFF"; + maximum <= X"80000000"; + summation <= (others => '0'); + + done <= '0'; + else + done <= '0'; + + if clr = '1' then + minimum <= X"7FFFFFFF"; + maximum <= X"80000000"; + summation <= (others => '0'); + end if; + + -- Read CLs + sortingSteps(1) <= axis_in_tvalid; + lastSteps(1) <= axis_in_tlast; + for j in 1 to 15 loop + sortingSteps(j+1) <= sortingSteps(j); + lastSteps(j+1) <= lastSteps(j); + end loop; + + -- 1. Cycle + for i in 0 to 7 loop + if CLIntegers(2*i) > CLIntegers(2*i+1) then + OddEvenSortIntegers(1)(2*i) <= CLIntegers(2*i+1); OddEvenSortIntegers(1)(2*i+1) <= CLIntegers(2*i); + else + OddEvenSortIntegers(1)(2*i) <= CLIntegers(2*i); OddEvenSortIntegers(1)(2*i+1) <= CLIntegers(2*i+1); + end if; + end loop; + + -- 3.5.7.9.11.13.15. Cycle + for j in 1 to 7 loop + for i in 0 to 7 loop + if OddEvenSortIntegers(2*j)(2*i) > OddEvenSortIntegers(2*j)(2*i+1) then + OddEvenSortIntegers(2*j+1)(2*i) <= OddEvenSortIntegers(2*j)(2*i+1); OddEvenSortIntegers(2*j+1)(2*i+1) <= OddEvenSortIntegers(2*j)(2*i); + else + OddEvenSortIntegers(2*j+1)(2*i) <= OddEvenSortIntegers(2*j)(2*i); OddEvenSortIntegers(2*j+1)(2*i+1) <= OddEvenSortIntegers(2*j)(2*i+1); + end if; + end loop; + end loop; + + -- 2.4.6.8.10.12.14.16. Cycle + for j in 1 to 8 loop + OddEvenSortIntegers(2*j)(0) <= OddEvenSortIntegers(2*j-1)(0); + OddEvenSortIntegers(2*j)(15) <= OddEvenSortIntegers(2*j-1)(15); + for i in 1 to 7 loop + if OddEvenSortIntegers(2*j-1)(2*i-1) > OddEvenSortIntegers(2*j-1)(2*i) then + OddEvenSortIntegers(2*j)(2*i-1) <= OddEvenSortIntegers(2*j-1)(2*i); OddEvenSortIntegers(2*j)(2*i) <= OddEvenSortIntegers(2*j-1)(2*i-1); + else + OddEvenSortIntegers(2*j)(2*i-1) <= OddEvenSortIntegers(2*j-1)(2*i-1); OddEvenSortIntegers(2*j)(2*i) <= OddEvenSortIntegers(2*j-1)(2*i); + end if; + end loop; + end loop; + + -- Results + if sortingSteps(16) = '1' then + if OddEvenSortIntegers(16)(0) < minimum then + minimum <= OddEvenSortIntegers(16)(0); + end if; + if OddEvenSortIntegers(16)(15) > maximum then + maximum <= OddEvenSortIntegers(16)(15); + end if; + summation <= summation + OddEvenSortIntegers(16)(0) + OddEvenSortIntegers(16)(1) + OddEvenSortIntegers(16)(2) + OddEvenSortIntegers(16)(3) + + OddEvenSortIntegers(16)(4) + OddEvenSortIntegers(16)(5) + OddEvenSortIntegers(16)(6) + OddEvenSortIntegers(16)(7) + + OddEvenSortIntegers(16)(8) + OddEvenSortIntegers(16)(9) + OddEvenSortIntegers(16)(10) + OddEvenSortIntegers(16)(11) + + OddEvenSortIntegers(16)(12) + OddEvenSortIntegers(16)(13) + OddEvenSortIntegers(16)(14) + OddEvenSortIntegers(16)(15); + end if; + + -- Done + if lastSteps(16) = '1' then + done <= '1'; + end if; + + end if; +end if; +end process; + + +end architecture; \ No newline at end of file diff --git a/hw/hdl/operators/minmaxsum/minmaxsum_slave.sv b/hw/hdl/operators/minmaxsum/minmaxsum_slave.sv new file mode 100644 index 00000000..ee2db8b1 --- /dev/null +++ b/hw/hdl/operators/minmaxsum/minmaxsum_slave.sv @@ -0,0 +1,236 @@ +import lynxTypes::*; + +module minmaxsum_slave ( + input logic aclk, + input logic aresetn, + + AXI4L.s axi_ctrl, + + output logic clr, + input logic done, + + input logic [31:0] minimum, + input logic [31:0] maximum, + input logic [31:0] summation +); + +//`define DEBUG_CNFG_SLAVE + +// -- Decl ---------------------------------------------------------- +// ------------------------------------------------------------------ + +// Constants +localparam integer N_REGS = 5; +localparam integer ADDR_LSB = (AXIL_DATA_BITS/32) + 1; +localparam integer ADDR_MSB = $clog2(N_REGS); +localparam integer AXI_ADDR_BITS = ADDR_LSB + ADDR_MSB; + +// Internal registers +logic [AXI_ADDR_BITS-1:0] axi_awaddr; +logic axi_awready; +logic [AXI_ADDR_BITS-1:0] axi_araddr; +logic axi_arready; +logic [1:0] axi_bresp; +logic axi_bvalid; +logic axi_wready; +logic [AXIL_DATA_BITS-1:0] axi_rdata; +logic [1:0] axi_rresp; +logic axi_rvalid; + +// Registers +logic [N_REGS-1:0][AXIL_DATA_BITS-1:0] slv_reg; +logic slv_reg_rden; +logic slv_reg_wren; +logic aw_en; + +logic done_op; + +// -- Def ----------------------------------------------------------- +// ------------------------------------------------------------------ + +// -- Register map ----------------------------------------------------------------------- +localparam integer CTRL_REG = 0; +localparam integer STAT_REG = 1; +localparam integer MIN_REG = 2; +localparam integer MAX_REG = 3; +localparam integer SUM_REG = 4; + +// Write process +assign slv_reg_wren = axi_wready && axi_ctrl.wvalid && axi_awready && axi_ctrl.awvalid; + +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) begin + slv_reg <= 0; + + done_op <= 1'b0; + end + else begin + slv_reg[CTRL_REG] <= 0; + + done_op <= slv_reg[CTRL_REG][0] ? 1'b0 : done ? 1'b1 : done_op; + + if(slv_reg_wren) begin + case (axi_awaddr[ADDR_LSB+ADDR_MSB-1:ADDR_LSB]) + CTRL_REG: + for (int i = 0; i < 1; i++) begin + if(axi_ctrl.wstrb[i]) begin + slv_reg[CTRL_REG][(i*8)+:8] <= axi_ctrl.wdata[(i*8)+:8]; + end + end + default : ; + endcase + end + end +end + +assign clr = slv_reg[CTRL_REG][0]; + +// Read process +assign slv_reg_rden = axi_arready & axi_ctrl.arvalid & ~axi_rvalid; + +always_ff @(posedge aclk, negedge aresetn) begin + if( aresetn == 1'b0 ) begin + axi_rdata <= 0; + end + else begin + axi_rdata <= 0; + if(slv_reg_rden) begin + case (axi_araddr[ADDR_LSB+ADDR_MSB-1:ADDR_LSB]) + STAT_REG: // Status + axi_rdata[0] <= done_op; + MIN_REG: // Min + axi_rdata[31:0] <= minimum; + MAX_REG: // Max + axi_rdata[31:0] <= maximum; + SUM_REG: // Sum + axi_rdata[31:0] <= summation; + default: ; + endcase + end + end +end + +// I/O +assign axi_ctrl.awready = axi_awready; +assign axi_ctrl.arready = axi_arready; +assign axi_ctrl.bresp = axi_bresp; +assign axi_ctrl.bvalid = axi_bvalid; +assign axi_ctrl.wready = axi_wready; +assign axi_ctrl.rdata = axi_rdata; +assign axi_ctrl.rresp = axi_rresp; +assign axi_ctrl.rvalid = axi_rvalid; + +// awready and awaddr +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) + begin + axi_awready <= 1'b0; + axi_awaddr <= 0; + aw_en <= 1'b1; + end + else + begin + if (~axi_awready && axi_ctrl.awvalid && axi_ctrl.wvalid && aw_en) + begin + axi_awready <= 1'b1; + aw_en <= 1'b0; + axi_awaddr <= axi_ctrl.awaddr; + end + else if (axi_ctrl.bready && axi_bvalid) + begin + aw_en <= 1'b1; + axi_awready <= 1'b0; + end + else + begin + axi_awready <= 1'b0; + end + end +end + +// arready and araddr +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) + begin + axi_arready <= 1'b0; + axi_araddr <= 0; + end + else + begin + if (~axi_arready && axi_ctrl.arvalid) + begin + axi_arready <= 1'b1; + axi_araddr <= axi_ctrl.araddr; + end + else + begin + axi_arready <= 1'b0; + end + end +end + +// bvalid and bresp +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) + begin + axi_bvalid <= 0; + axi_bresp <= 2'b0; + end + else + begin + if (axi_awready && axi_ctrl.awvalid && ~axi_bvalid && axi_wready && axi_ctrl.wvalid) + begin + axi_bvalid <= 1'b1; + axi_bresp <= 2'b0; + end + else + begin + if (axi_ctrl.bready && axi_bvalid) + begin + axi_bvalid <= 1'b0; + end + end + end +end + +// wready +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) + begin + axi_wready <= 1'b0; + end + else + begin + if (~axi_wready && axi_ctrl.wvalid && axi_ctrl.awvalid && aw_en ) + begin + axi_wready <= 1'b1; + end + else + begin + axi_wready <= 1'b0; + end + end +end + +// rvalid and rresp (1Del?) +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) + begin + axi_rvalid <= 0; + axi_rresp <= 0; + end + else + begin + if (axi_arready && axi_ctrl.arvalid && ~axi_rvalid) + begin + axi_rvalid <= 1'b1; + axi_rresp <= 2'b0; + end + else if (axi_rvalid && axi_ctrl.rready) + begin + axi_rvalid <= 1'b0; + end + end +end + +endmodule // cnfg_slave \ No newline at end of file diff --git a/hw/hdl/operators/minmaxsum/minmaxsum_user_logic.sv b/hw/hdl/operators/minmaxsum/minmaxsum_user_logic.sv new file mode 100644 index 00000000..c89d5543 --- /dev/null +++ b/hw/hdl/operators/minmaxsum/minmaxsum_user_logic.sv @@ -0,0 +1,68 @@ +`timescale 1ns / 1ps + +import lynxTypes::*; + +/** + * User logic + * + */ +module design_user_logic_0 ( + // Clock and reset + input wire aclk, + input wire[0:0] aresetn, + + // AXI4 control + AXI4L.s axi_ctrl, + + // AXI4S + AXI4S.m axis_src, + AXI4S.s axis_sink +); + +/* -- Tie-off unused interfaces and signals ----------------------------- */ +//always_comb axi_ctrl.tie_off_s(); +always_comb axis_src.tie_off_m(); +//always_comb axis_sink.tie_off_s(); + +/* -- USER LOGIC -------------------------------------------------------- */ +// Reg input +AXI4S axis_sink_r (); +//AXI4S axis_src_r (); +axis_reg_rtl inst_reg_sink (.aclk(aclk), .aresetn(aresetn), .axis_in(axis_sink), .axis_out(axis_sink_r)); +//axis_reg_rtl inst_reg_src (.aclk(aclk), .aresetn(aresetn), .axis_in(axis_src_r), .axis_out(axis_src)); + +logic clr; +logic done; +logic [31:0] minimum; +logic [31:0] maximum; +logic [31:0] summation; + +// Slave +minmaxsum_slave inst_slave ( + .aclk(aclk), + .aresetn(aresetn), + .axi_ctrl(axi_ctrl), + .clr(clr), + .done(done), + .minimum(minimum), + .maximum(maximum), + .summation(summation) +) + +// Minmaxsum +minmaxsum inst_top ( + .clk(aclk), + .rst_n(aresetn), + .clr(clr), + .done(done), + .min_out(minimum), + .max_out(maximum), + .sum_out(summation), + .axis_in_tvalid(axis_sink_r.tvalid), + .axis_in_tdata(axis_sink_r.tdata), + .axis_in_tlast(axis_sink_r.tlast) +); + +assign axis_sink_r.tready = 1'b1; + +endmodule diff --git a/hw/hdl/operators/percentage/minmaxsum_user_logic.sv b/hw/hdl/operators/percentage/minmaxsum_user_logic.sv new file mode 100644 index 00000000..e10e8e6e --- /dev/null +++ b/hw/hdl/operators/percentage/minmaxsum_user_logic.sv @@ -0,0 +1,103 @@ +`timescale 1ns / 1ps + +import lynxTypes::*; + +/** + * User logic + * + */ +module design_user_logic_0 ( + // Clock and reset + input wire aclk, + input wire[0:0] aresetn, + + // AXI4 control + AXI4L.s axi_ctrl, + + // AXI4S + AXI4S.m axis_src, + AXI4S.s axis_sink +); + +/* -- Tie-off unused interfaces and signals ----------------------------- */ +//always_comb axi_ctrl.tie_off_s(); +always_comb axis_src.tie_off_m(); +//always_comb axis_sink.tie_off_s(); + +/* -- USER LOGIC -------------------------------------------------------- */ +// Reg input +AXI4S axis_sink_r (); +//AXI4S axis_src_r (); +axis_reg_rtl inst_reg_sink (.aclk(aclk), .aresetn(aresetn), .axis_in(axis_sink), .axis_out(axis_sink_r)); +//axis_reg_rtl inst_reg_src (.aclk(aclk), .aresetn(aresetn), .axis_in(axis_src_r), .axis_out(axis_src)); + +logic clr; +logic done; +logic select; +logic [39:0] total_sum; +logic [39:0] selected_sum; +logic [31:0] selected_count; + +AXI4S axis_data (); +AXI4S axis_predicates (); + +// Slave +percentage_slave inst_slave ( + .aclk(aclk), + .aresetn(aresetn), + .axi_ctrl(axi_ctrl), + .clr(clr), + .done(done), + .select(select), + .minimum(minimum), + .maximum(maximum), + .summation(summation) +); + +// Mux input +always_comb begin + axis_data.tdata = axis_sink_r.tdata; + axis_data.tkeep = axis_sink_r.tkeep; + axis_data.tlast = axis_sink_r.tlast; + + axis_predicates.tdata = axis_sink_r.tdata; + axis_predicates.tkeep = axis_sink_r.tdata; + axis_predicates.tlast = axis_sink_r.tlast; + + if(select) begin + axis_data.tvalid = axis_sink_r.tvalid; + axis_predicates.tvalid = 1'b0; + + axis_sink_r.tready = axis_data.tready; + end + else begin + axis_data.tvalid = 1'b0; + axis_predicates.tvalid = axis_data.tvalid; + + axis_sink_r.tready = axis_predicates.tready; + end +end + +// FIFO predicates + +// FIFO data + +// Minmaxsum +percentage inst_top ( + .clk(aclk), + .rst_n(aresetn), + .predicates_line(predicates_tdata), + .predicates_valid(predicates_tvalid), + .predicates_last(predicates_tlast), + .predicates_in_ready(predicates_tready), + .data_line(data_tdata), + .data_valid(data_tvalid), + .data_last(data_tlast), + .data_in_ready(data_tready), + .total_sum(total_sum), + .selected_sum(selected_sum), + .selected_count(selected_count), + .output_valid(done) +); + +endmodule diff --git a/hw/hdl/operators/percentage/onesCounter.v b/hw/hdl/operators/percentage/onesCounter.v new file mode 100644 index 00000000..b47d56eb --- /dev/null +++ b/hw/hdl/operators/percentage/onesCounter.v @@ -0,0 +1,83 @@ +1 +/* + * Copyright 2016 - 2017 Systems Group, ETH Zurich + * + * This hardware operator is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + + +module onesCounter #(parameter WIDTH = 32) +( + input wire clk, + input wire rst_n, + + input wire data_in_valid, + input wire [31:0] data_in, + + output reg count_valid, + output reg [5:0] count + ); + +reg [2:0] sum_4_1, sum_4_2, sum_4_3, sum_4_4, sum_4_5, sum_4_6, sum_4_7, sum_4_8; + +reg [4:0] sum_2_1, sum_2_2; + +reg sum_4_valid, sum_2_valid; + + +always @(posedge clk) begin + if (~rst_n) begin + // reset + sum_4_1 <= 0; + sum_4_2 <= 0; + sum_4_3 <= 0; + sum_4_4 <= 0; + sum_4_5 <= 0; + sum_4_6 <= 0; + sum_4_7 <= 0; + sum_4_8 <= 0; + + sum_4_valid <= data_in_valid; + + sum_2_1 <= 0; + sum_2_2 <= 0; + + sum_2_valid <= 0; + + count <= 0; + count_valid <= 0; + end + else begin + sum_4_1 <= data_in[0] + data_in[1] + data_in[2] + data_in[3]; + sum_4_2 <= data_in[4] + data_in[5] + data_in[6] + data_in[7]; + sum_4_3 <= data_in[8] + data_in[9] + data_in[10] + data_in[11]; + sum_4_4 <= data_in[12] + data_in[13] + data_in[14] + data_in[15]; + sum_4_5 <= data_in[16] + data_in[17] + data_in[18] + data_in[19]; + sum_4_6 <= data_in[20] + data_in[21] + data_in[22] + data_in[23]; + sum_4_7 <= data_in[24] + data_in[25] + data_in[26] + data_in[27]; + sum_4_8 <= data_in[28] + data_in[29] + data_in[30] + data_in[31]; + + sum_4_valid <= data_in_valid; + + sum_2_1 <= sum_4_1 + sum_4_2 + sum_4_3 + sum_4_4; + sum_2_2 <= sum_4_5 + sum_4_6 + sum_4_7 + sum_4_8; + + sum_2_valid <= sum_4_valid; + + count <= sum_2_1 + sum_2_2; + count_valid <= sum_2_valid; + end +end + +endmodule \ No newline at end of file diff --git a/hw/hdl/operators/percentage/percentage.sv b/hw/hdl/operators/percentage/percentage.sv new file mode 100644 index 00000000..c9bb80be --- /dev/null +++ b/hw/hdl/operators/percentage/percentage.sv @@ -0,0 +1,187 @@ +module percentage( + input wire clk, + input wire rst_n, + + input wire [511:0] predicates_line, + input wire predicates_valid, + input wire predicates_last, + output wire predicates_in_ready, + + input wire [511:0] data_line, + input wire data_valid, + input wire data_last, + output wire data_in_ready, + + output reg [39:0] total_sum, + output reg [39:0] selected_sum, + output reg [31:0] selected_count, + output reg output_valid +); + +reg [511:0] pred_line; +reg pred_valid; +reg pred_last; +reg part; + +reg [511:0] d_line; +reg d_valid; +reg d_last; + +wire [255:0] pred_half; + +wire [15:0] notequal_0; + +wire matches_valid; +wire [5:0] matches_count; + +wire [35:0] full_accm; +wire full_accm_valid; +wire full_accm_last; + +wire [35:0] selective_accm; +wire selective_accm_valid; + +/////////////////////////////////////////////////////////////////////// +/// Keep Predicate line for two successive data lines +/////////////////////////////////////////////////////////////////////////////////// +always @(posedge clk) begin + if (~rst_n) begin + // reset + pred_line <= 0; + pred_valid <= 0; + pred_last <= 0; + end + else if(~pred_valid | (part & d_valid)) begin + pred_line <= predicates_line; + pred_valid <= predicates_valid; + pred_last <= predicates_last; + end +end + +always @(posedge clk) begin + if (~rst_n) begin + part <= 0; + end + else begin + case (part) + 1'b0: begin + if(pred_valid & d_valid) begin + part <= 1'b1; + end + end + 1'b1: begin + if(d_valid) begin + part <= 1'b0; + end + end + endcase + end +end +// +assign predicates_in_ready = ~pred_valid | (part & d_valid); +/////////////////////////////////////////////////////////////////////////////////// +always @(posedge clk) begin + if (~rst_n) begin + // reset + d_line <= 0; + d_valid <= 0; + d_last <= 0; + end + else if(~d_valid | pred_valid) begin + d_line <= data_line; + d_valid <= data_valid; + d_last <= data_last; + end +end +assign data_in_ready = (~d_valid | pred_valid); +///////////////////////////////////////////////////////////////////////////////////// + +////////////////////////// test_count module instance /// +assign pred_half = (part)? pred_line[511:256] : pred_line[255:0]; +// count non zero predicates +genvar i; + +generate + for(i = 0; i < 16; i = i + 1) begin + assign notequal_0[i] = ~(pred_half[(i+1)*16-1 : i*16] == 0); + end +endgenerate + +onesCounter onesCounter +( + .clk (clk), + .rst_n (rst_n), + + .data_in_valid (d_valid & pred_valid), + .data_in ({16'b0, notequal_0}), + + .count_valid (matches_valid), + .count (matches_count) +); + + +always@(posedge clk) begin + if(~rst_n) begin + selected_count <= 0; + end + else if(matches_valid) begin + selected_count <= selected_count + matches_count; + end +end + +////// + +reduction_tree reduce_full( + + .clk (clk), + .rst_n (rst_n), + .stall_pipeline (1'b0), + + .data_line (d_line), + .data_mask (16'hFFFF), + .data_valid (d_valid & pred_valid), + .data_last (d_last), + .reduce_result (full_accm), + .result_valid (full_accm_valid), + .result_last (full_accm_last) + ); + +always @(posedge clk) begin + if (~rst_n) begin + // reset + total_sum <= 0; + output_valid <= 0; + end + else if (full_accm_valid) begin + total_sum <= total_sum + {4'b0, full_accm}; + output_valid <= full_accm_last; + end +end + +reduction_tree reduce_selective( + + .clk (clk), + .rst_n (rst_n), + .stall_pipeline (1'b0), + + .data_line (d_line), + .data_mask (notequal_0), + .data_valid (d_valid & pred_valid), + .data_last (1'b0), + .reduce_result (selective_accm), + .result_valid (selective_accm_valid), + .result_last () + ); + +always @(posedge clk) begin + if (~rst_n) begin + // reset + selected_sum <= 0; + end + else if (selective_accm_valid) begin + selected_sum <= selected_sum + {4'b0, selective_accm}; + end +end + + +endmodule \ No newline at end of file diff --git a/hw/hdl/operators/percentage/reduction_tree.v b/hw/hdl/operators/percentage/reduction_tree.v new file mode 100644 index 00000000..720f82b8 --- /dev/null +++ b/hw/hdl/operators/percentage/reduction_tree.v @@ -0,0 +1,119 @@ +/* + * Copyright 2016 - 2017 Systems Group, ETH Zurich + * + * This hardware operator is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + + + +module reduction_tree( + + input wire clk, + input wire rst_n, + input wire stall_pipeline, + + input wire [511:0] data_line, + input wire [15:0] data_mask, + input wire data_valid, + input wire data_last, + output wire [35:0] reduce_result, + output wire result_valid, + output wire result_last + ); + +wire [31:0] in[15:0]; + +reg [32:0] sum1[7:0]; +reg [33:0] sum2[3:0]; +reg [34:0] sum3[1:0]; +reg [35:0] sum4; + +reg valid_in_d1; +reg valid_in_d2; +reg valid_in_d3; +reg valid_in_d4; + +reg last_in_d1; +reg last_in_d2; +reg last_in_d3; +reg last_in_d4; + +genvar j; +generate + + for(j = 0; j < 16; j = j + 1) begin: ins + assign in[j] = (data_mask[j])? data_line[(j+1)*32 - 1 : j*32] : 0; + end +endgenerate + +// +integer i; +always @(posedge clk) begin + // + if(~rst_n) begin + valid_in_d1 <= 0; + last_in_d1 <= 0; + + for(i = 0; i < 8; i = i + 1) begin + sum1[i] <= 0; + end + // + valid_in_d2 <= 0; + last_in_d2 <= 0; + for(i = 0; i < 4; i = i + 1) begin + sum2[i] <= 0; + end + // + valid_in_d3 <= 0; + last_in_d3 <= 0; + for(i = 0; i < 2; i = i + 1) begin + sum3[i] <= 0; + end + // + valid_in_d4 <= 0; + last_in_d4 <= 0; + sum4 <= 0; + end + else /*if(~stall_pipeline)*/ begin + valid_in_d1 <= data_valid; + last_in_d1 <= data_last; + + for(i = 0; i < 8; i = i + 1) begin + sum1[i] <= {1'b0, in[i*2]} + {1'b0, in[i*2+1]}; + end + // + valid_in_d2 <= valid_in_d1; + last_in_d2 <= last_in_d1; + for(i = 0; i < 4; i = i + 1) begin + sum2[i] <= {1'b0, sum1[i*2]} + {1'b0, sum1[i*2+1]}; + end + // + valid_in_d3 <= valid_in_d2; + last_in_d3 <= last_in_d2; + for(i = 0; i < 2; i = i + 1) begin + sum3[i] <= {1'b0, sum2[i*2]} + {1'b0, sum2[i*2+1]}; + end + // + valid_in_d4 <= valid_in_d3; + last_in_d4 <= last_in_d3; + sum4 <= {1'b0, sum3[0]} + {1'b0, sum3[1]}; + end + +end + +assign reduce_result = sum4; +assign result_last = last_in_d4; +assign result_valid = valid_in_d4; + +endmodule \ No newline at end of file diff --git a/hw/hdl/operators/regex/regex.sv b/hw/hdl/operators/regex/regex.sv new file mode 100644 index 00000000..040ef8b3 --- /dev/null +++ b/hw/hdl/operators/regex/regex.sv @@ -0,0 +1,108 @@ +`timescale 1ns / 1ps + +`include "axi_macros.svh" +`include "lynx_macros.svh" + +import lynxTypes::*; + +/** + * User logic + * + */ +module design_user_logic_0 ( + // AXI4L CONTROL + // Slave control. Utilize this interface for any kind of CSR implementation. + AXI4L.s axi_ctrl, + + // AXI4S HOST + AXI4S.m axis_host_src, + AXI4S.s axis_host_sink, + + // AXI4S RDMA + AXI4S.m axis_rdma_src, + AXI4S.s axis_rdma_sink, + + // FV + metaIntf.s fv_sink, + metaIntf.m fv_src, + + // Requests + reqIntf.m rd_req_user, + reqIntf.m wr_req_user, + + // RDMA + reqIntf.s rd_req_rdma, + reqIntf.s wr_req_rdma, + + // Clock and reset + input wire aclk, + input wire[0:0] aresetn +); + +/* -- Tie-off unused interfaces and signals ----------------------------- */ +always_comb axi_ctrl.tie_off_s(); +//always_comb axis_rdma_src.tie_off_m(); +//always_comb axis_rdma_sink.tie_off_s(); +//always_comb axis_card_src.tie_off_m(); +//always_comb axis_card_sink.tie_off_s(); +//always_comb fv_sink.tie_off_s(); +//always_comb fv_src.tie_off_m(); +//always_comb rd_req_user.tie_off_m(); +//always_comb wr_req_user.tie_off_m(); +always_comb rd_req_rdma.tie_off_s(); +//always_comb wr_req_rdma.tie_off_s(); + +/* -- USER LOGIC -------------------------------------------------------- */ + +localparam integer QP_BITS = 24; +localparam integer PARAMS_BITS = VADDR_BITS + LEN_BITS + QP_BITS; + +// Write - RDMA +`AXIS_ASSIGN(axis_rdma_sink, axis_card_src) +`REQ_ASSIGN(wr_req_rdma, wr_req_user) + +// Read - Farview +metaIntf #(.DATA_BITS(PARAMS_BITS)) params_sink (); +metaIntf #(.DATA_BITS(PARAMS_BITS)) params_src (); + +metaIntf #(.DATA_BITS(AXI_DATA_BITS)) cnfg (); + +// Request handler +regex_req inst_regex_req ( + .aclk(aclk), + .aresetn(aresetn), + .fv_sink(fv_sink), + .rd_req_user(rd_req_user), + .params(params_sink), + .cnfg(cnfg) +); + +// Data handler +regex_data inst_regex_data ( + .aclk(aclk), + .aresetn(aresetn), + .axis_card_sink(axis_card_sink), + .axis_rdma_src(axis_rdma_src), + .fv_src(fv_src), + .params(params_src), + .cnfg(cnfg) +); + +// Sequence +queue_meta inst_seq ( + .aclk(aclk), + .aresetn(aresetn), + .sink(params_sink), + .src(params_src) +); + +//create_ip -name fifo_generator -vendor xilinx.com -library ip -version 13.2 -module_name fifo_generator_512_shallow_sync +//set_property -dict [list CONFIG.Component_Name {fifo_generator_512_shallow_sync} CONFIG.INTERFACE_TYPE {AXI_STREAM} CONFIG.Reset_Type {Asynchronous_Reset} CONFIG.Full_Flags_Reset_Value {1} CONFIG.TDATA_NUM_BYTES {64} CONFIG.TSTRB_WIDTH {64} CONFIG.TKEEP_WIDTH {64} CONFIG.FIFO_Implementation_wach {Common_Clock_Distributed_RAM} CONFIG.Full_Threshold_Assert_Value_wach {15} CONFIG.Empty_Threshold_Assert_Value_wach {14} CONFIG.FIFO_Implementation_wrch {Common_Clock_Distributed_RAM} CONFIG.Full_Threshold_Assert_Value_wrch {15} CONFIG.Empty_Threshold_Assert_Value_wrch {14} CONFIG.FIFO_Implementation_rach {Common_Clock_Distributed_RAM} CONFIG.Full_Threshold_Assert_Value_rach {15} CONFIG.Empty_Threshold_Assert_Value_rach {14} CONFIG.Programmable_Full_Type_axis {Single_Programmable_Full_Threshold_Constant} CONFIG.Full_Threshold_Assert_Value_axis {126} CONFIG.Enable_Safety_Circuit {true}] [get_ips fifo_generator_512_shallow_sync] + +//create_ip -name fifo_generator -vendor xilinx.com -library ip -version 13.2 -module_name fifo_generator_1byte_sync +//set_property -dict [list CONFIG.Component_Name {fifo_generator_1byte_sync} CONFIG.INTERFACE_TYPE {AXI_STREAM} CONFIG.Reset_Type {Asynchronous_Reset} CONFIG.Full_Flags_Reset_Value {1} CONFIG.TUSER_WIDTH {0} CONFIG.FIFO_Implementation_wach {Common_Clock_Distributed_RAM} CONFIG.Full_Threshold_Assert_Value_wach {15} CONFIG.Empty_Threshold_Assert_Value_wach {14} CONFIG.FIFO_Implementation_wrch {Common_Clock_Distributed_RAM} CONFIG.Full_Threshold_Assert_Value_wrch {15} CONFIG.Empty_Threshold_Assert_Value_wrch {14} CONFIG.FIFO_Implementation_rach {Common_Clock_Distributed_RAM} CONFIG.Full_Threshold_Assert_Value_rach {15} CONFIG.Empty_Threshold_Assert_Value_rach {14} CONFIG.Enable_Safety_Circuit {true}] [get_ips fifo_generator_1byte_sync] + +//create_ip -name axis_data_fifo -vendor xilinx.com -library ip -version 2.0 -module_name axis_data_fifo_512_1kD +//set_property -dict [list CONFIG.TDATA_NUM_BYTES {64} CONFIG.FIFO_DEPTH {1024} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1} CONFIG.Component_Name {axis_data_fifo_512_1kD}] [get_ips axis_data_fifo_512_1kD] + +endmodule \ No newline at end of file diff --git a/hw/hdl/operators/regex/regex_data.sv b/hw/hdl/operators/regex/regex_data.sv new file mode 100644 index 00000000..8325f0b1 --- /dev/null +++ b/hw/hdl/operators/regex/regex_data.sv @@ -0,0 +1,258 @@ +import lynxTypes::*; + +module regex_data #( + parameter integer DBG_ILA = 0 +) ( + input logic aclk, + input logic aresetn, + + // RDMA + AXI4S.s axis_card_sink, + AXI4S.m axis_rdma_src, + + // Command + metaIntf.m fv_src, + + // Sequence + metaIntf.s params, + + // Config + metaIntf.s cnfg +); + +localparam integer APP_WRITE = 1; +localparam integer BEAT_LOG_BYTES = AXI_DATA_BITS/8; +localparam integer BEAT_LOG_BITS = $clog2(BEAT_LOG_BYTES); + +// -- FSM +typedef enum logic[1:0] {ST_IDLE, ST_SEND, ST_DROP} state_t; +logic [1:0] state_C, state_N; + +logic [31:0] cnt_C, cnt_N; +logic drop_sent_C, drop_sent_N; +logic drop_read_C, drop_read_N; + +logic [VADDR_BITS-1:0] params_raddr; +logic [LEN_BITS-1:0] params_len; +logic [23:0] params_qp; + +AXI4S axis_regex_in (); +logic regex_out_valid; +logic regex_out_ready; +logic regex_match; + +AXI4S axis_que_in (); +AXI4S axis_que_out (); +/* +if(DBG_ILA == 1) begin + ila_regex_data inst_data ( + .clk(aclk), + .probe0(state_C), // 2 + .probe1(cnt_C), // 31 + .probe2(drop_sent_C), + .probe3(drop_read_C), + .probe4(params_raddr), // 48 + .probe5(params_len), // 28 + .probe6(params_qp[5:0]), // 6 + .probe7(axis_regex_in.tvalid), + .probe8(axis_regex_in.tready), + .probe9(axis_regex_in.tlast), + .probe10(regex_out_valid), + .probe11(regex_out_ready), + .probe12(regex_match), + .probe13(axis_que_out.tvalid), + .probe14(axis_que_out.tready), + .probe15(axis_que_out.tlast), + .probe16(params.valid), + .probe17(params.ready), + .probe18(cnfg.valid), + .probe19(cnfg.ready), + .probe20(fv_src.valid), + .probe21(fv_src.ready), + .probe22(fv_src.data[160+:28]), // 28 + .probe23(axis_card_sink.tvalid), + .probe24(axis_card_sink.tready), + .probe25(axis_card_sink.tlast), + .probe26(axis_rdma_src.tvalid), + .probe27(axis_rdma_src.tready), + .probe28(axis_rdma_src.tlast), + .probe29(cnfg.data), // 512 + .probe30(axis_card_sink.tdata) // 512 + ); +end +*/ +// -- REG +always_ff @(posedge aclk, negedge aresetn) begin: PROC_REG +if (aresetn == 1'b0) begin + state_C <= ST_IDLE; +end +else + state_C <= state_N; + cnt_C <= cnt_N; + drop_sent_C <= drop_sent_N; + drop_read_C <= drop_read_N; +end + +// -- NSL +always_comb begin: NSL + state_N = state_C; + + case(state_C) + ST_IDLE: + if(regex_out_valid && params.ready && fv_src.ready) begin + if(regex_match) begin + state_N = ST_SEND; + end + else begin + state_N = ST_DROP; + end + end + + ST_SEND: + if((cnt_C == 0) && (axis_que_out.tready & axis_que_out.tvalid)) + state_N = ST_IDLE; + + ST_DROP: + if(drop_read_C && drop_sent_C) + state_N = ST_IDLE; + + endcase // state_C +end + +// -- DP +always_comb begin: DP + cnt_N = cnt_C; + drop_sent_N = drop_sent_C; + drop_read_N = drop_read_C; + + // Params + params.ready = 1'b0; + params_raddr = params.data[0+:VADDR_BITS]; + params_len = params.data[VADDR_BITS+:LEN_BITS]; + params_qp = params.data[VADDR_BITS+LEN_BITS+:24]; + + // Regex + regex_out_ready = 1'b0; + + // FV + fv_src.valid = 1'b0; + fv_src.data = 0; + fv_src.data[0+:5] = APP_WRITE; + fv_src.data[5+:24] = params_qp; + fv_src.data[64+:48] = 0; + fv_src.data[112+:48] = params_raddr; + fv_src.data[160+:32] = 0; + + // Data in + axis_card_sink.tready = axis_que_in.tready & axis_regex_in.tready; + + axis_que_in.tvalid = axis_card_sink.tvalid & axis_card_sink.tready; + axis_regex_in.tvalid = axis_card_sink.tvalid & axis_card_sink.tready; + + axis_que_in.tdata = axis_card_sink.tdata; + axis_que_in.tkeep = axis_card_sink.tkeep; + axis_que_in.tlast = axis_card_sink.tlast; + axis_regex_in.tdata = axis_card_sink.tdata; + axis_regex_in.tkeep = axis_card_sink.tkeep; + axis_regex_in.tlast = axis_card_sink.tlast; + + // Data out + axis_que_out.tready = 1'b0; + + axis_rdma_src.tvalid = 1'b0; + axis_rdma_src.tdata = axis_que_out.tdata; + axis_rdma_src.tkeep = axis_que_out.tkeep; + axis_rdma_src.tlast = axis_que_out.tlast; + + case(state_C) + ST_IDLE: begin + if(regex_out_valid && params.valid && fv_src.ready) begin + regex_out_ready = 1'b1; + params.ready = 1'b1; + fv_src.valid = 1'b1; + + if(regex_match) begin + fv_src.data[160+:32] = params_len; + end + else begin + fv_src.data[160+:32] = 64; + end + + cnt_N = (params_len - 1) >> BEAT_LOG_BITS; + drop_sent_N = 1'b0; + drop_read_N = 1'b0; + end + end + + ST_SEND: begin + axis_que_out.tready = axis_rdma_src.tready; + axis_rdma_src.tvalid = axis_que_out.tvalid; + + if(axis_que_out.tready & axis_que_out.tvalid) begin + cnt_N = cnt_C - 1; + end + end + + ST_DROP: begin + axis_que_out.tready = ~drop_read_C; + + if(axis_que_out.tready & axis_que_out.tvalid) begin + cnt_N = cnt_C - 1; + end + + if((cnt_C == 0) && (axis_que_out.tready & axis_que_out.tvalid)) begin + drop_read_N = 1'b1; + end + + if(~drop_sent_C) begin + axis_rdma_src.tvalid = 1'b1; + axis_rdma_src.tdata[15:0] = 0; + axis_rdma_src.tkeep = 64'hF; + axis_rdma_src.tlast = 1'b1; + end + + if(axis_rdma_src.tvalid & axis_rdma_src.tready) begin + drop_sent_N = 1'b1; + end + end + + endcase // state_C + +end + +// Matcher +regex_top inst_regex ( + .clk(aclk), + .rst(~aresetn), + + .config_data(cnfg.data), + .config_valid(cnfg.valid), + .config_ready(cnfg.ready), + + .input_data(axis_regex_in.tdata), + .input_valid(axis_regex_in.tvalid), + .input_last(axis_regex_in.tlast), + .input_ready(axis_regex_in.tready), + + .found_loc(regex_match), + .found_valid(regex_out_valid), + .found_ready(regex_out_ready) +); + +// Data queue +axis_data_fifo_512_1kD inst_data_que ( + .s_axis_aresetn(aresetn), + .s_axis_aclk(aclk), + .s_axis_tvalid(axis_que_in.tvalid), + .s_axis_tready(axis_que_in.tready), + .s_axis_tdata(axis_que_in.tdata), + .s_axis_tkeep(axis_que_in.tkeep), + .s_axis_tlast(axis_que_in.tlast), + .m_axis_tvalid(axis_que_out.tvalid), + .m_axis_tready(axis_que_out.tready), + .m_axis_tdata(axis_que_out.tdata), + .m_axis_tkeep(axis_que_out.tkeep), + .m_axis_tlast(axis_que_out.tlast) +); + +endmodule \ No newline at end of file diff --git a/hw/hdl/operators/regex/regex_ip.tcl b/hw/hdl/operators/regex/regex_ip.tcl new file mode 100644 index 00000000..37869ba0 --- /dev/null +++ b/hw/hdl/operators/regex/regex_ip.tcl @@ -0,0 +1,5 @@ +create_ip -name fifo_generator -vendor xilinx.com -library ip -version 13.2 -module_name fifo_generator_512_shallow_sync +set_property -dict [list CONFIG.Component_Name {fifo_generator_512_shallow_sync} CONFIG.Programmable_Full_Type_axis {Single_Programmable_Full_Threshold_Constant} CONFIG.Full_Threshold_Assert_Value_axis {126} CONFIG.INTERFACE_TYPE {AXI_STREAM} CONFIG.Reset_Type {Asynchronous_Reset} CONFIG.Full_Flags_Reset_Value {1} CONFIG.Clock_Type_AXI {Common_Clock} CONFIG.TDATA_NUM_BYTES {64} CONFIG.TUSER_WIDTH {4} CONFIG.TSTRB_WIDTH {64} CONFIG.TKEEP_WIDTH {64} CONFIG.FIFO_Implementation_wach {Common_Clock_Distributed_RAM} CONFIG.Full_Threshold_Assert_Value_wach {15} CONFIG.Empty_Threshold_Assert_Value_wach {14} CONFIG.FIFO_Implementation_wrch {Common_Clock_Distributed_RAM} CONFIG.Full_Threshold_Assert_Value_wrch {15} CONFIG.Empty_Threshold_Assert_Value_wrch {14} CONFIG.FIFO_Implementation_rach {Common_Clock_Distributed_RAM} CONFIG.Full_Threshold_Assert_Value_rach {15} CONFIG.Empty_Threshold_Assert_Value_rach {14} CONFIG.Enable_Safety_Circuit {true}] [get_ips fifo_generator_512_shallow_sync] + +create_ip -name fifo_generator -vendor xilinx.com -library ip -version 13.2 -module_name fifo_generator_1byte_sync +set_property -dict [list CONFIG.Component_Name {fifo_generator_1byte_sync} CONFIG.INTERFACE_TYPE {AXI_STREAM} CONFIG.Reset_Type {Asynchronous_Reset} CONFIG.Full_Flags_Reset_Value {1} CONFIG.TUSER_WIDTH {0} CONFIG.FIFO_Implementation_wach {Common_Clock_Distributed_RAM} CONFIG.Full_Threshold_Assert_Value_wach {15} CONFIG.Empty_Threshold_Assert_Value_wach {14} CONFIG.FIFO_Implementation_wrch {Common_Clock_Distributed_RAM} CONFIG.Full_Threshold_Assert_Value_wrch {15} CONFIG.Empty_Threshold_Assert_Value_wrch {14} CONFIG.FIFO_Implementation_rach {Common_Clock_Distributed_RAM} CONFIG.Full_Threshold_Assert_Value_rach {15} CONFIG.Empty_Threshold_Assert_Value_rach {14} CONFIG.Enable_Safety_Circuit {true}] [get_ips fifo_generator_1byte_sync] \ No newline at end of file diff --git a/hw/hdl/operators/regex/regex_req.sv b/hw/hdl/operators/regex/regex_req.sv new file mode 100644 index 00000000..f152b401 --- /dev/null +++ b/hw/hdl/operators/regex/regex_req.sv @@ -0,0 +1,181 @@ +import lynxTypes::*; + +module regex_req #( + parameter integer DBG_ILA = 0 +) ( + input logic aclk, + input logic aresetn, + + // RDMA + metaIntf.s fv_sink, + + // Host + reqIntf.m rd_req_user, + + // Sequence + metaIntf.m params, + + // Config + metaIntf.m cnfg +); + +localparam integer BEAT_LOG_BYTES = AXI_DATA_BITS/8; +localparam integer BEAT_LOG_BITS = $clog2(BEAT_LOG_BYTES); + +// -- FSM +typedef enum logic[1:0] {ST_IDLE, ST_CONFIG_1, ST_CONFIG_2, ST_READ} state_t; + +// Regs +logic [1:0] state_C, state_N; + +logic [511:0] regex_cnfg_C = 0, regex_cnfg_N; +logic [LEN_BITS-1:0] len_C, len_N; +logic [VADDR_BITS-1:0] laddr_C, laddr_N; +logic [VADDR_BITS-1:0] raddr_C, raddr_N; +logic [23:0] qp_C, qp_N; + +// Int +logic [23:0] fv_qp; +logic [VADDR_BITS-1:0] fv_raddr; +logic [VADDR_BITS-1:0] fv_laddr; +logic [31:0] fv_len; +logic [191:0] fv_raw; +/* +if(DBG_ILA == 1) begin + ila_regex_req inst_req ( + .clk(aclk), + .probe0(state_C), // 2 + .probe1(raddr_C), // 48 + .probe2(laddr_C), // 48 + .probe3(len_C), // 28 + .probe4(params.valid), + .probe5(params.ready), + .probe6(cnfg.valid), + .probe7(cnfg.ready), + .probe8(rd_req_user.valid), + .probe9(rd_req_user.ready), + .probe10(fv_sink.valid), + .probe11(fv_sink.ready), + .probe12(qp_C[5:0]) // 6 + ); +end +*/ +// -- REG +always_ff @(posedge aclk, negedge aresetn) begin: PROC_REG +if (aresetn == 1'b0) begin + state_C <= ST_IDLE; + regex_cnfg_C <= 0; +end +else + state_C <= state_N; + + regex_cnfg_C <= regex_cnfg_N; + len_C <= len_N; + laddr_C <= laddr_N; + raddr_C <= raddr_N; + qp_C <= qp_N; +end + +// -- NSL +always_comb begin: NSL + state_N = state_C; + + case(state_C) + ST_IDLE: + if(fv_sink.valid) + if(fv_sink.data[255-:8] != 255) + state_N = ST_CONFIG_1; + else + state_N = ST_READ; + + ST_CONFIG_1: + if(fv_sink.valid) + state_N = ST_CONFIG_2; + + ST_CONFIG_2: + if(cnfg.ready) + state_N = ST_IDLE; + + ST_READ: + if(rd_req_user.ready && params.ready) + state_N = ST_IDLE; + + endcase // state_C +end + +// -- DP +always_comb begin: DP + regex_cnfg_N = regex_cnfg_C; + len_N = len_C; + laddr_N = laddr_C; + raddr_N = raddr_C; + qp_N = qp_C; + + // Incoming + fv_qp = fv_sink.data[5+:24]; + fv_raddr = fv_sink.data[64+:48]; + fv_laddr = fv_sink.data[112+:48]; + fv_len = fv_sink.data[160+:32]; + fv_raw = fv_sink.data[64+:192]; + + // FV sink + fv_sink.ready = 1'b0; + + // RD host + rd_req_user.valid = 1'b0; + rd_req_user.req = 0; + rd_req_user.req.vaddr = laddr_C; + rd_req_user.req.len = len_C; + rd_req_user.req.ctl = 1'b1; + + // Params + params.valid = 1'b0; + params.data = {qp_C, len_C, raddr_C}; + + // Config intf + cnfg.valid = 1'b0; + cnfg.data = regex_cnfg_C; + + // DP fsm + case(state_C) + ST_IDLE: begin + if(fv_sink.valid) begin + fv_sink.ready = 1'b1; + + if(fv_sink.data[255-:8] != 255) begin + regex_cnfg_N[511] = 1'b1; + regex_cnfg_N[192+:192] = fv_raw; + end + else begin + len_N = fv_len[LEN_BITS-1:0]; + laddr_N = fv_laddr; + raddr_N = fv_raddr; + qp_N = fv_qp; + end + end + end + + ST_CONFIG_1: begin + if(fv_sink.valid) begin + fv_sink.ready = 1'b1; + + regex_cnfg_N[0+:192] = fv_raw; + end + end + + ST_CONFIG_2: begin + cnfg.valid = 1'b1; + end + + ST_READ: begin + if(rd_req_user.ready && params.ready) begin + rd_req_user.valid = 1'b1; + params.valid = 1'b1; + end + end + + endcase // state_C + +end + +endmodule \ No newline at end of file diff --git a/hw/hdl/operators/regex/regex_top.v b/hw/hdl/operators/regex/regex_top.v new file mode 100644 index 00000000..336b5b5d --- /dev/null +++ b/hw/hdl/operators/regex/regex_top.v @@ -0,0 +1,226 @@ +//--------------------------------------------------------------------------- +//-- Copyright 2015 - 2017 Systems Group, ETH Zurich +//-- +//-- This hardware module is free software: you can redistribute it and/or +//-- modify it under the terms of the GNU General Public License as published +//-- by the Free Software Foundation, either version 3 of the License, or +//-- (at your option) any later version. +//-- +//-- This program is distributed in the hope that it will be useful, +//-- but WITHOUT ANY WARRANTY; without even the implied warranty of +//-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//-- GNU General Public License for more details. +//-- +//-- You should have received a copy of the GNU General Public License +//-- along with this program. If not, see . +//--------------------------------------------------------------------------- + + +module regex_top +( + input clk, + input rst, + + input [511:0] input_data, + input input_valid, + input input_last, + output input_ready, + + input [511:0] config_data, + input config_valid, + output config_ready, + + output found_loc, + output found_valid, + input found_ready +); + +parameter REGEX_COUNT_BITS = 3; +parameter MAX_REGEX_ENGINES = 4; + +wire [511:0] regex_input_data [MAX_REGEX_ENGINES-1:0]; +reg [511:0] regex_input_prebuf [MAX_REGEX_ENGINES-1:0]; +wire [MAX_REGEX_ENGINES-1:0] regex_input_hasdata; +wire [MAX_REGEX_ENGINES-1:0] regex_input_almfull; +wire [MAX_REGEX_ENGINES-1:0] regex_input_notfull; +wire [MAX_REGEX_ENGINES-1:0] regex_input_ready; +reg [MAX_REGEX_ENGINES-1:0] regex_input_enable; +reg [MAX_REGEX_ENGINES-1:0] regex_input_type; + +wire [MAX_REGEX_ENGINES*16-1:0] regex_output_index ; +wire [MAX_REGEX_ENGINES-1:0] regex_output_match; +wire [MAX_REGEX_ENGINES-1:0] regex_output_valid; + +wire [MAX_REGEX_ENGINES-1:0] outfifo_valid; +wire [MAX_REGEX_ENGINES-1:0] outfifo_ready; +wire [MAX_REGEX_ENGINES-1:0] outfifo_data; + +reg [REGEX_COUNT_BITS-1:0] outfifo_pos; + +reg [REGEX_COUNT_BITS-1:0] current_regex_engine; +reg [REGEX_COUNT_BITS-1:0] config_regex_engine; +reg [REGEX_COUNT_BITS-1:0] output_regex_engine; + +reg config_wait; +reg regex_inputbuffer_ok; +reg regex_inputbuffer_pre; + +assign input_ready = (regex_inputbuffer_ok); +assign config_ready = ~regex_input_enable[config_regex_engine] && (regex_inputbuffer_ok); + +reg rstBuf; + +integer x; + +always @(posedge clk) begin + rstBuf <= rst; + + if (rst) begin + current_regex_engine <= 0; + config_regex_engine <= 0; + regex_input_enable <= 0; + output_regex_engine <= 0; + config_wait <= 0; + regex_inputbuffer_ok <= 0; + regex_inputbuffer_pre <= 0; + end + else begin + regex_input_enable <= 0; + + regex_inputbuffer_pre <= (regex_input_notfull == {MAX_REGEX_ENGINES{1'b1}} ? 1 : 0) && (regex_input_almfull == 0 ? 1 : 0); + regex_inputbuffer_ok <= regex_inputbuffer_pre; + + + if (config_ready==1 && config_valid==1) begin + $display("Config entered"); + + regex_input_prebuf[config_regex_engine] <= config_data; + regex_input_enable[config_regex_engine] <= 1; + regex_input_type[config_regex_engine] <= 1; + + if (config_regex_engine==MAX_REGEX_ENGINES-1) begin + config_regex_engine <= 0; + end else begin + config_regex_engine <= config_regex_engine +1; + end + + if (config_data[511]==1) begin + $display("Config all"); + for (x=0; x. +//--------------------------------------------------------------------------- + + +module rem_charmatch + ( + clk, + rst, + config_valid, + config_char, + config_chained, + input_valid, + input_char, + prev_matched, + this_matched + ); + + + input clk; + input rst; + + input config_valid; + input [7:0] config_char; + input config_chained; + + input input_valid; + input [7:0] input_char; + + input prev_matched; + output this_matched; + + reg char_match; + reg [7:0] char_data; + reg is_chained; + + + assign this_matched = char_match; + + always @(posedge clk) + begin + + if(rst) begin + char_data <= 0; + char_match <= 0; + end + else begin + + if (input_valid==1) begin + + if (char_data==input_char) + char_match <= is_chained ? prev_matched : 1; + else + char_match <= 0; + + end + + if (config_valid==1) begin + char_data <= config_char; + is_chained <= config_chained; + char_match <= 0; + end + + + end + end + +endmodule \ No newline at end of file diff --git a/hw/hdl/operators/regex/rem_decoder.v b/hw/hdl/operators/regex/rem_decoder.v new file mode 100644 index 00000000..7dbff4f6 --- /dev/null +++ b/hw/hdl/operators/regex/rem_decoder.v @@ -0,0 +1,137 @@ +//--------------------------------------------------------------------------- +//-- Copyright 2015 - 2017 Systems Group, ETH Zurich +//-- +//-- This hardware module is free software: you can redistribute it and/or +//-- modify it under the terms of the GNU General Public License as published +//-- by the Free Software Foundation, either version 3 of the License, or +//-- (at your option) any later version. +//-- +//-- This program is distributed in the hope that it will be useful, +//-- but WITHOUT ANY WARRANTY; without even the implied warranty of +//-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//-- GNU General Public License for more details. +//-- +//-- You should have received a copy of the GNU General Public License +//-- along with this program. If not, see . +//--------------------------------------------------------------------------- + + +module rem_decoder #(parameter CHAR_COUNT=16, DELIMITER=0) + ( + clk, + rst, //active high + config_valid, + config_chars, // an eight bit character for each checker + config_ranges, // two eight bit characters for each range checker (>=LOW, . +//--------------------------------------------------------------------------- + + +module rem_halfrange #(parameter HIGH_HALF=0) + ( + clk, + rst, + config_valid, + config_char, + config_chained, + config_range_en, + input_valid, + input_char, + prev_matched, + this_matched, + + low_smaller, + this_smaller + ); + + + input clk; + input rst; + + input config_valid; + input [7:0] config_char; + input config_chained; + input config_range_en; // only relevant if LOW_PART=0 + + input input_valid; + input [7:0] input_char; + + input prev_matched; + input low_smaller; // only relevant if LOW_PART=0 + output this_matched; + output this_smaller; // only relevant if LOW_PART=1 + + reg char_match; + reg [7:0] char_data; + reg is_chained; + reg is_ranged; + + + assign this_matched = char_match; + + assign this_smaller = (HIGH_HALF==0 && input_valid==1) ? input_char>char_data-1 : 0; + + always @(posedge clk) + begin + + if(rst) begin + char_data <= 0; + char_match <= 0; + end + else begin + + if (input_valid==1) begin + + if (char_data==input_char) begin + char_match <= is_chained ? prev_matched : 1; + end + else begin + if (HIGH_HALF==1 && is_ranged==1 && char_data>input_char && low_smaller==1) begin + char_match <= 1; + end + else begin + char_match <= 0; + end + end + end + + if (config_valid==1) begin + char_data <= config_char; + is_chained <= config_chained; + is_ranged <= config_range_en; + char_match <= 0; + end + + + end + end + +endmodule \ No newline at end of file diff --git a/hw/hdl/operators/regex/rem_onestate.v b/hw/hdl/operators/regex/rem_onestate.v new file mode 100644 index 00000000..d40e8cb7 --- /dev/null +++ b/hw/hdl/operators/regex/rem_onestate.v @@ -0,0 +1,94 @@ +//--------------------------------------------------------------------------- +//-- Copyright 2015 - 2017 Systems Group, ETH Zurich +//-- +//-- This hardware module is free software: you can redistribute it and/or +//-- modify it under the terms of the GNU General Public License as published +//-- by the Free Software Foundation, either version 3 of the License, or +//-- (at your option) any later version. +//-- +//-- This program is distributed in the hope that it will be useful, +//-- but WITHOUT ANY WARRANTY; without even the implied warranty of +//-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//-- GNU General Public License for more details. +//-- +//-- You should have received a copy of the GNU General Public License +//-- along with this program. If not, see . +//--------------------------------------------------------------------------- + + +module rem_onestate + ( + clk, + rst, + + is_sticky, + + delay_valid, + delay_cycles, + + pred_valid, + pred_match, + pred_index, + + act_input, + act_output, + act_index + ); + + input clk; + input rst; + + input is_sticky; + + input pred_valid; + input pred_match; + input [15:0] pred_index; + + input delay_valid; + input [3:0] delay_cycles; + + input act_input; + output reg act_output; + output reg [15:0] act_index; + + reg activated; + + reg [3:0] delay_cycles_reg; + + reg [2+15:0] delay_shift; + + always @(posedge clk ) begin + + if (delay_valid==1) delay_cycles_reg <= delay_cycles; + + if (rst) begin + act_output <= 0; + activated <= 0; + end + else + begin + + delay_shift <= {delay_shift[14:2],act_input,2'b00}; + + activated <= (delay_cycles_reg>1) ? delay_shift[delay_cycles_reg] : act_input; + + if (pred_valid) begin + + if ((delay_cycles_reg==0 && act_input==1) || (delay_cycles_reg!=0 && activated==1) && pred_match==1) begin + act_output <= pred_match; + + if (act_output==0) act_index <= pred_index; + end + else begin + if (is_sticky) begin + act_output <= act_output; + end else begin + act_output <= 0; + end + end + end + + end + end + +endmodule \ No newline at end of file diff --git a/hw/hdl/operators/regex/rem_rangematch.v b/hw/hdl/operators/regex/rem_rangematch.v new file mode 100644 index 00000000..d8e07a59 --- /dev/null +++ b/hw/hdl/operators/regex/rem_rangematch.v @@ -0,0 +1,86 @@ +//--------------------------------------------------------------------------- +//-- Copyright 2015 - 2017 Systems Group, ETH Zurich +//-- +//-- This hardware module is free software: you can redistribute it and/or +//-- modify it under the terms of the GNU General Public License as published +//-- by the Free Software Foundation, either version 3 of the License, or +//-- (at your option) any later version. +//-- +//-- This program is distributed in the hope that it will be useful, +//-- but WITHOUT ANY WARRANTY; without even the implied warranty of +//-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//-- GNU General Public License for more details. +//-- +//-- You should have received a copy of the GNU General Public License +//-- along with this program. If not, see . +//--------------------------------------------------------------------------- + + +module rem_rangematch + ( + clk, + rst, + config_valid, + config_low, + config_high, + config_chained, + input_valid, + input_char, + prev_matched, + this_matched + ); + + + input clk; + input rst; + + input config_valid; + input [7:0] config_low; + input [7:0] config_high; + input [1:0] config_chained; + + input input_valid; + input [7:0] input_char; + + input prev_matched; + output this_matched; + + reg char_match; + reg [7:0] char_low; + reg [7:0] char_high; + reg is_chained; + + + assign this_matched = char_match; + + always @(posedge clk) + begin + + if(rst) begin + char_low <= 0; + char_high <= 0; + char_match <= 0; + end + else begin + + if (input_valid==1) begin + + if (char_low<=input_char && char_high>input_char) + char_match <= is_chained ? prev_matched : 1; + else + char_match <= 0; + + end + + if (config_valid==1) begin + char_low <= config_low; + char_high <= config_high; + is_chained <= config_chained; + char_match <= 0; + end + + + end + end + +endmodule \ No newline at end of file diff --git a/hw/hdl/operators/regex/rem_top_ff.v b/hw/hdl/operators/regex/rem_top_ff.v new file mode 100644 index 00000000..0791b082 --- /dev/null +++ b/hw/hdl/operators/regex/rem_top_ff.v @@ -0,0 +1,351 @@ +//--------------------------------------------------------------------------- +//-- Copyright 2015 - 2017 Systems Group, ETH Zurich +//-- +//-- This hardware module is free software: you can redistribute it and/or +//-- modify it under the terms of the GNU General Public License as published +//-- by the Free Software Foundation, either version 3 of the License, or +//-- (at your option) any later version. +//-- +//-- This program is distributed in the hope that it will be useful, +//-- but WITHOUT ANY WARRANTY; without even the implied warranty of +//-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//-- GNU General Public License for more details. +//-- +//-- You should have received a copy of the GNU General Public License +//-- along with this program. If not, see . +//--------------------------------------------------------------------------- + +module rem_top_ff #(parameter CHAR_COUNT=16, DELIMITER=0, STATE_COUNT=8) + ( + clk, + rst, //active high + softRst, + + input_valid, + input_data, + + input_ready, + + output_valid, + output_match, + output_index + ); + + + input clk; + input rst; + input softRst; + + input input_valid; + input [511:0] input_data; + + output reg input_ready; + + output reg output_valid; + output reg output_match; + output reg [15:0] output_index; + + reg scan_mode; + + reg input_wasvalid; + reg input_wasready; + reg input_hasdata; + reg [511:0] input_datareg; + + reg config_valid; + reg [CHAR_COUNT*8-1:0] config_chars; + reg [CHAR_COUNT/2-1:0] config_ranges; + reg [CHAR_COUNT-1:0] config_conds; + + reg [STATE_COUNT*(CHAR_COUNT)-1:0] config_state_pred; + reg [STATE_COUNT*STATE_COUNT-1:0] config_state_act; + + reg restart; + reg wait_new; + reg wait_conf; + + wire pred_valid; + wire [CHAR_COUNT-1:0] pred_bits; + wire [15:0] pred_index; + wire pred_last; + + reg need_purge; + + reg pred_valid_D; + reg pred_last_D; + reg [15:0] pred_index_D; + + reg [STATE_COUNT*(CHAR_COUNT)-1:0] state_pred_masks; + reg [STATE_COUNT*STATE_COUNT-1:0] state_act_masks; + + wire [STATE_COUNT-1:0] state_match_bits; + wire [STATE_COUNT-1:0] state_inact_bits; + wire [STATE_COUNT-1:0] state_outact_bits; + + reg [STATE_COUNT*4-1:0] state_inact_delays; + + reg [STATE_COUNT-1:0] always_activated; + reg [STATE_COUNT-1:0] state_act_sticky; + + reg [15:0] string_length; + reg [7:0] length_remaining ; + reg [5:0] byte_addr; + + reg waiting_pred; + + reg dec_valid; + reg dec_last; + reg [7:0] dec_char; + + reg rstBuf; + + localparam STATE_ACT_SIZE = (STATE_COUNT*STATE_COUNT % 8 ==0) ? STATE_COUNT*STATE_COUNT : STATE_COUNT*STATE_COUNT+8-(STATE_COUNT*STATE_COUNT%8); + + + rem_decoder #( + .CHAR_COUNT(CHAR_COUNT), + .DELIMITER(DELIMITER) + ) decoder_inst ( + .clk(clk), + .rst(rstBuf), + .config_valid(config_valid), + .config_chars(config_chars), + .config_ranges(config_ranges), + .config_conds(config_conds), + .input_valid(dec_valid), + .input_last(dec_last), + .input_char(dec_char), + .index_rewind(wait_new), + .output_valid(pred_valid), + .output_data(pred_bits), + .output_index(pred_index), + .output_last(pred_last) + ); + + + genvar X; + generate + for (X=0; X1) begin + byte_addr <= 0; + input_ready <= 1; + length_remaining <= length_remaining-1; + end + else if (byte_addr==63 && length_remaining==1 && !need_purge) begin + byte_addr <= 0; + input_hasdata <= 0; + waiting_pred <= 1; + length_remaining <= 0; + end + + if (need_purge==1) begin + if (length_remaining>1) begin + byte_addr <= 64; + length_remaining <= length_remaining-1; + input_ready <= 1; + end + else begin + byte_addr <= 0; + restart <= 1; + input_ready <= 1; + need_purge <= 0; + end + end + + end + + + if (!need_purge && !wait_new && pred_valid_D==1 && (state_outact_bits[STATE_COUNT-1]==1 || pred_last_D==1)) begin + output_valid <= 1; + output_match <= state_outact_bits[STATE_COUNT-1]==1; + output_index <= pred_index_D; + + if (!waiting_pred) begin + need_purge<=1; + end + else begin + waiting_pred <= 0; + byte_addr <= 0; + restart <= 1; + input_ready <= 1; + need_purge <= 0; + end + end + + if (!input_hasdata && output_valid==1 && waiting_pred==1) begin + waiting_pred <= 0; + byte_addr <= 0; + restart <= 1; + input_ready <= 1; + need_purge <= 0; + end + + if (!need_purge && waiting_pred==1 && pred_valid_D==0 && length_remaining==0) begin + output_valid <= 1; + output_match <= 0; + output_index <= 0; + + waiting_pred <= 0; + byte_addr <= 0; + restart <= 1; + input_ready <= 1; + need_purge <= 0; + end + end + + end + end + + +endmodule \ No newline at end of file diff --git a/hw/hdl/operators/selection/selection.vhd b/hw/hdl/operators/selection/selection.vhd new file mode 100644 index 00000000..897452e0 --- /dev/null +++ b/hw/hdl/operators/selection/selection.vhd @@ -0,0 +1,153 @@ +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + +entity selection is +port ( + clk : in std_logic; + rst_n : in std_logic; + + selType : in std_logic_vector(31 downto 0); + lowThr : in std_logic_vector(31 downto 0); + uppThr : in std_logic_vector(31 downto 0); + + axis_in_tvalid : in std_logic; + axis_in_tready : out std_logic; + axis_in_tdata : in std_logic_vector(511 downto 0); + axis_in_tkeep : in std_logic_vector(63 downto 0); + axis_in_tlast : in std_logic; + + axis_out_tvalid : out std_logic; + axis_out_tready : in std_logic; + axis_out_tdata : out std_logic_vector(511 downto 0); + axis_out_tkeep : out std_logic_vector(63 downto 0); + axis_out_tlast : out std_logic +); +end selection; + +architecture behavioral of selection is + +signal SelectionType : integer range 0 to 5; +signal LowerThreshold : signed(31 downto 0); +signal UpperThreshold : signed(31 downto 0); + +type CLIntegersType is array (15 downto 0) of signed(31 downto 0); +signal CLIntegers : CLIntegersType; +signal predicatesInCL : signed(31 downto 0); +signal equalCLIntegers : CLIntegersType; +signal nonequalCLIntegers : CLIntegersType; +signal smallerCLIntegers : CLIntegersType; +signal smallerequalCLIntegers : CLIntegersType; +signal largerCLIntegers : CLIntegersType; +signal largerequalCLIntegers : CLIntegersType; + +signal outputReg : std_logic_vector(511 downto 0); +signal keepReg : std_logic_vector(63 downto 0); +signal lastReg : std_logic; +signal valReg : std_logic; + +begin + +-- Params +SelectionType <= to_integer(unsigned(selType)); +LowerThreshold <= signed(lowThr); +UpperThreshold <= signed(uppThr); + +-- Gen CLs +gen_CLIntegers: for i in 0 to 15 generate + CLIntegers(i) <= signed(axis_in_tdata(i*32+31 downto i*32)); + + equalCLIntegers(i) <= CLIntegers(i) when CLIntegers(i) = LowerThreshold else (others => '0'); + nonequalCLIntegers(i) <= CLIntegers(i) when CLIntegers(i) /= LowerThreshold else (others => '0'); + smallerCLIntegers(i) <= CLIntegers(i) when CLIntegers(i) < UpperThreshold else (others => '0'); + smallerequalCLIntegers(i) <= CLIntegers(i) when CLIntegers(i) <= UpperThreshold else (others => '0'); + largerCLIntegers(i) <= CLIntegers(i) when CLIntegers(i) > LowerThreshold else (others => '0'); + largerequalCLIntegers(i) <= CLIntegers(i) when CLIntegers(i) >= LowerThreshold else (others => '0'); +end generate gen_CLIntegers; + +-- Gen output +axis_out_tdata <= outputReg; +axis_out_tkeep <= keepReg; +axis_out_tlast <= lastReg; +axis_out_tvalid <= valReg; + +axis_in_tready <= axis_out_tready; + +-- REG P +process(clk) +begin +if clk'event and clk = '1' then + if rst_n = '0' then + outputReg <= (others => '0'); + keepReg <= (others => '0'); + lastReg <= '0'; + valReg <= '0'; + else + if axis_out_tready = '1' then + keepReg <= axis_in_tkeep; + lastReg <= axis_in_tlast; + valReg <= axis_in_tvalid; + case SelectionType is + when 0 => outputReg <= std_logic_vector(equalCLIntegers(15)) & std_logic_vector(equalCLIntegers(14)) & + std_logic_vector(equalCLIntegers(13)) & std_logic_vector(equalCLIntegers(12)) & + std_logic_vector(equalCLIntegers(11)) & std_logic_vector(equalCLIntegers(10)) & + std_logic_vector(equalCLIntegers(9)) & std_logic_vector(equalCLIntegers(8)) & + std_logic_vector(equalCLIntegers(7)) & std_logic_vector(equalCLIntegers(6)) & + std_logic_vector(equalCLIntegers(5)) & std_logic_vector(equalCLIntegers(4)) & + std_logic_vector(equalCLIntegers(3)) & std_logic_vector(equalCLIntegers(2)) & + std_logic_vector(equalCLIntegers(1)) & std_logic_vector(equalCLIntegers(0)); + + when 1 => outputReg <= std_logic_vector(nonequalCLIntegers(15)) & std_logic_vector(nonequalCLIntegers(14)) & + std_logic_vector(nonequalCLIntegers(13)) & std_logic_vector(nonequalCLIntegers(12)) & + std_logic_vector(nonequalCLIntegers(11)) & std_logic_vector(nonequalCLIntegers(10)) & + std_logic_vector(nonequalCLIntegers(9)) & std_logic_vector(nonequalCLIntegers(8)) & + std_logic_vector(nonequalCLIntegers(7)) & std_logic_vector(nonequalCLIntegers(6)) & + std_logic_vector(nonequalCLIntegers(5)) & std_logic_vector(nonequalCLIntegers(4)) & + std_logic_vector(nonequalCLIntegers(3)) & std_logic_vector(nonequalCLIntegers(2)) & + std_logic_vector(nonequalCLIntegers(1)) & std_logic_vector(nonequalCLIntegers(0)); + + when 2 => outputReg <= std_logic_vector(smallerCLIntegers(15)) & std_logic_vector(smallerCLIntegers(14)) & + std_logic_vector(smallerCLIntegers(13)) & std_logic_vector(smallerCLIntegers(12)) & + std_logic_vector(smallerCLIntegers(11)) & std_logic_vector(smallerCLIntegers(10)) & + std_logic_vector(smallerCLIntegers(9)) & std_logic_vector(smallerCLIntegers(8)) & + std_logic_vector(smallerCLIntegers(7)) & std_logic_vector(smallerCLIntegers(6)) & + std_logic_vector(smallerCLIntegers(5)) & std_logic_vector(smallerCLIntegers(4)) & + std_logic_vector(smallerCLIntegers(3)) & std_logic_vector(smallerCLIntegers(2)) & + std_logic_vector(smallerCLIntegers(1)) & std_logic_vector(smallerCLIntegers(0)); + + when 3 => outputReg <= std_logic_vector(smallerequalCLIntegers(15)) & std_logic_vector(smallerequalCLIntegers(14)) & + std_logic_vector(smallerequalCLIntegers(13)) & std_logic_vector(smallerequalCLIntegers(12)) & + std_logic_vector(smallerequalCLIntegers(11)) & std_logic_vector(smallerequalCLIntegers(10)) & + std_logic_vector(smallerequalCLIntegers(9)) & std_logic_vector(smallerequalCLIntegers(8)) & + std_logic_vector(smallerequalCLIntegers(7)) & std_logic_vector(smallerequalCLIntegers(6)) & + std_logic_vector(smallerequalCLIntegers(5)) & std_logic_vector(smallerequalCLIntegers(4)) & + std_logic_vector(smallerequalCLIntegers(3)) & std_logic_vector(smallerequalCLIntegers(2)) & + std_logic_vector(smallerequalCLIntegers(1)) & std_logic_vector(smallerequalCLIntegers(0)); + + when 4 => outputReg <= std_logic_vector(largerCLIntegers(15)) & std_logic_vector(largerCLIntegers(14)) & + std_logic_vector(largerCLIntegers(13)) & std_logic_vector(largerCLIntegers(12)) & + std_logic_vector(largerCLIntegers(11)) & std_logic_vector(largerCLIntegers(10)) & + std_logic_vector(largerCLIntegers(9)) & std_logic_vector(largerCLIntegers(8)) & + std_logic_vector(largerCLIntegers(7)) & std_logic_vector(largerCLIntegers(6)) & + std_logic_vector(largerCLIntegers(5)) & std_logic_vector(largerCLIntegers(4)) & + std_logic_vector(largerCLIntegers(3)) & std_logic_vector(largerCLIntegers(2)) & + std_logic_vector(largerCLIntegers(1)) & std_logic_vector(largerCLIntegers(0)); + + when 5 => outputReg <= std_logic_vector(largerequalCLIntegers(15)) & std_logic_vector(largerequalCLIntegers(14)) & + std_logic_vector(largerequalCLIntegers(13)) & std_logic_vector(largerequalCLIntegers(12)) & + std_logic_vector(largerequalCLIntegers(11)) & std_logic_vector(largerequalCLIntegers(10)) & + std_logic_vector(largerequalCLIntegers(9)) & std_logic_vector(largerequalCLIntegers(8)) & + std_logic_vector(largerequalCLIntegers(7)) & std_logic_vector(largerequalCLIntegers(6)) & + std_logic_vector(largerequalCLIntegers(5)) & std_logic_vector(largerequalCLIntegers(4)) & + std_logic_vector(largerequalCLIntegers(3)) & std_logic_vector(largerequalCLIntegers(2)) & + std_logic_vector(largerequalCLIntegers(1)) & std_logic_vector(largerequalCLIntegers(0)); + + when others => null; + end case; + end if; + + end if; +end if; +end process; + +end architecture; \ No newline at end of file diff --git a/hw/hdl/operators/selection/selection_slave.sv b/hw/hdl/operators/selection/selection_slave.sv new file mode 100644 index 00000000..969a7076 --- /dev/null +++ b/hw/hdl/operators/selection/selection_slave.sv @@ -0,0 +1,239 @@ +import lynxTypes::*; + +module selection_slave ( + input logic aclk, + input logic aresetn, + + AXI4L.s axi_ctrl, + + output logic [31:0] selType, + output logic [31:0] lowThr, + output logic [31:0] uppThr +); + +//`define DEBUG_CNFG_SLAVE + +// -- Decl ---------------------------------------------------------- +// ------------------------------------------------------------------ + +// Constants +localparam integer N_REGS = 3; +localparam integer ADDR_LSB = (AXIL_DATA_BITS/32) + 1; +localparam integer ADDR_MSB = $clog2(N_REGS); +localparam integer AXI_ADDR_BITS = ADDR_LSB + ADDR_MSB; + +// Internal registers +logic [AXI_ADDR_BITS-1:0] axi_awaddr; +logic axi_awready; +logic [AXI_ADDR_BITS-1:0] axi_araddr; +logic axi_arready; +logic [1:0] axi_bresp; +logic axi_bvalid; +logic axi_wready; +logic [AXIL_DATA_BITS-1:0] axi_rdata; +logic [1:0] axi_rresp; +logic axi_rvalid; + +// Registers +logic [N_REGS-1:0][AXIL_DATA_BITS-1:0] slv_reg; +logic slv_reg_rden; +logic slv_reg_wren; +logic aw_en; + +logic done_op; + +// -- Def ----------------------------------------------------------- +// ------------------------------------------------------------------ + +// -- Register map ----------------------------------------------------------------------- +localparam integer TYPE_REG = 0; +localparam integer LOW_THR_REG = 1; +localparam integer UPP_THR_REG = 2; + +// Write process +assign slv_reg_wren = axi_wready && axi_ctrl.wvalid && axi_awready && axi_ctrl.awvalid; + +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) begin + slv_reg <= 0; + + done_op <= 1'b0; + end + else begin + if(slv_reg_wren) begin + case (axi_awaddr[ADDR_LSB+ADDR_MSB-1:ADDR_LSB]) + TYPE_REG: // Type + for (int i = 0; i < (AXIL_DATA_BITS/8); i++) begin + if(axi_ctrl.wstrb[i]) begin + slv_reg[TYPE_REG][(i*8)+:8] <= axi_ctrl.wdata[(i*8)+:8]; + end + end + LOW_THR_REG: // Low thr + for (int i = 0; i < (AXIL_DATA_BITS/8); i++) begin + if(axi_ctrl.wstrb[i]) begin + slv_reg[LOW_THR_REG][(i*8)+:8] <= axi_ctrl.wdata[(i*8)+:8]; + end + end + UPP_THR_REG: // Upp thr + for (int i = 0; i < (AXIL_DATA_BITS/8); i++) begin + if(axi_ctrl.wstrb[i]) begin + slv_reg[UPP_THR_REG][(i*8)+:8] <= axi_ctrl.wdata[(i*8)+:8]; + end + end + default : ; + endcase + end + end +end + +assign selType = slv_reg[TYPE_REG]; +assign lowThr = slv_reg[LOW_THR_REG]; +assign uppThr = slv_reg[UPP_THR_REG]; + +// Read process +assign slv_reg_rden = axi_arready & axi_ctrl.arvalid & ~axi_rvalid; + +always_ff @(posedge aclk, negedge aresetn) begin + if( aresetn == 1'b0 ) begin + axi_rdata <= 0; + end + else begin + axi_rdata <= 0; + if(slv_reg_rden) begin + case (axi_araddr[ADDR_LSB+ADDR_MSB-1:ADDR_LSB]) + TYPE_REG: + axi_rdata[31:0] <= slv_reg[TYPE_REG]; + LOW_THR_REG: + axi_rdata[31:0] <= slv_reg[LOW_THR_REG]; + UPP_THR_REG: + axi_rdata[31:0] <= slv_reg[UPP_THR_REG]; + default: ; + endcase + end + end +end + +// I/O +assign axi_ctrl.awready = axi_awready; +assign axi_ctrl.arready = axi_arready; +assign axi_ctrl.bresp = axi_bresp; +assign axi_ctrl.bvalid = axi_bvalid; +assign axi_ctrl.wready = axi_wready; +assign axi_ctrl.rdata = axi_rdata; +assign axi_ctrl.rresp = axi_rresp; +assign axi_ctrl.rvalid = axi_rvalid; + +// awready and awaddr +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) + begin + axi_awready <= 1'b0; + axi_awaddr <= 0; + aw_en <= 1'b1; + end + else + begin + if (~axi_awready && axi_ctrl.awvalid && axi_ctrl.wvalid && aw_en) + begin + axi_awready <= 1'b1; + aw_en <= 1'b0; + axi_awaddr <= axi_ctrl.awaddr; + end + else if (axi_ctrl.bready && axi_bvalid) + begin + aw_en <= 1'b1; + axi_awready <= 1'b0; + end + else + begin + axi_awready <= 1'b0; + end + end +end + +// arready and araddr +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) + begin + axi_arready <= 1'b0; + axi_araddr <= 0; + end + else + begin + if (~axi_arready && axi_ctrl.arvalid) + begin + axi_arready <= 1'b1; + axi_araddr <= axi_ctrl.araddr; + end + else + begin + axi_arready <= 1'b0; + end + end +end + +// bvalid and bresp +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) + begin + axi_bvalid <= 0; + axi_bresp <= 2'b0; + end + else + begin + if (axi_awready && axi_ctrl.awvalid && ~axi_bvalid && axi_wready && axi_ctrl.wvalid) + begin + axi_bvalid <= 1'b1; + axi_bresp <= 2'b0; + end + else + begin + if (axi_ctrl.bready && axi_bvalid) + begin + axi_bvalid <= 1'b0; + end + end + end +end + +// wready +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) + begin + axi_wready <= 1'b0; + end + else + begin + if (~axi_wready && axi_ctrl.wvalid && axi_ctrl.awvalid && aw_en ) + begin + axi_wready <= 1'b1; + end + else + begin + axi_wready <= 1'b0; + end + end +end + +// rvalid and rresp (1Del?) +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) + begin + axi_rvalid <= 0; + axi_rresp <= 0; + end + else + begin + if (axi_arready && axi_ctrl.arvalid && ~axi_rvalid) + begin + axi_rvalid <= 1'b1; + axi_rresp <= 2'b0; + end + else if (axi_rvalid && axi_ctrl.rready) + begin + axi_rvalid <= 1'b0; + end + end +end + +endmodule // cnfg_slave \ No newline at end of file diff --git a/hw/hdl/operators/selection/selection_user_logic.sv b/hw/hdl/operators/selection/selection_user_logic.sv new file mode 100644 index 00000000..4e080580 --- /dev/null +++ b/hw/hdl/operators/selection/selection_user_logic.sv @@ -0,0 +1,67 @@ +`timescale 1ns / 1ps + +import lynxTypes::*; + +/** + * User logic + * + */ +module design_user_logic_0 ( + // Clock and reset + input wire aclk, + input wire[0:0] aresetn, + + // AXI4 control + AXI4L.s axi_ctrl, + + // AXI4S + AXI4S.m axis_src, + AXI4S.s axis_sink +); + +/* -- Tie-off unused interfaces and signals ----------------------------- */ +//always_comb axi_ctrl.tie_off_s(); +//always_comb axis_src.tie_off_m(); +//always_comb axis_sink.tie_off_s(); + +/* -- USER LOGIC -------------------------------------------------------- */ +// Reg input +AXI4S axis_sink_r (); +AXI4S axis_src_r (); +axis_reg_rtl inst_reg_sink (.aclk(aclk), .aresetn(aresetn), .axis_in(axis_sink), .axis_out(axis_sink_r)); +axis_reg_rtl inst_reg_src (.aclk(aclk), .aresetn(aresetn), .axis_in(axis_src_r), .axis_out(axis_src)); + +logic [31:0] selType; +logic [31:0] lowThr; +logic [31:0] uppThr; + +// Slave +selection_slave inst_slave ( + .aclk(aclk), + .aresetn(aresetn), + .axi_ctrl(axi_ctrl), + .selType(selType), + .lowThr(lowThr), + .uppThr(uppThr) +); + +// Selection +selection inst_top ( + .clk(aclk), + .rst_n(aresetn), + .selType(selType), + .lowThr(lowThr), + .uppThr(uppThr), + .axis_in_tvalid(axis_sink_r.tvalid), + .axis_in_tready(axis_sink_r.tready), + .axis_in_tdata(axis_sink_r.tdata), + .axis_in_tkeep(axis_sink_r.tkeep), + .axis_in_tlast(axis_sink_r.tlast), + .axis_out_tvalid(axis_src_r.tvalid), + .axis_out_tready(axis_src_r.tready), + .axis_out_tdata(axis_src_r.tdata), + .axis_out_tkeep(axis_src_r.tkeep), + .axis_out_tlast(axis_src_r.tlast), +); + +endmodule diff --git a/hw/hdl/operators/stride/stride.sv b/hw/hdl/operators/stride/stride.sv new file mode 100644 index 00000000..028ca5ed --- /dev/null +++ b/hw/hdl/operators/stride/stride.sv @@ -0,0 +1,96 @@ +`timescale 1ns / 1ps + +`include "axi_macros.svh" +`include "lynx_macros.svh" + +import lynxTypes::*; + +/** + * User logic + * + */ +module design_user_logic_0 ( + // AXI4L CONTROL + // Slave control. Utilize this interface for any kind of CSR implementation. + AXI4L.s axi_ctrl, + + // AXI4S HOST + AXI4S.m axis_card_src, + AXI4S.s axis_card_sink, + + // AXI4S RDMA + AXI4S.m axis_rdma_src, + AXI4S.s axis_rdma_sink, + + // FV + metaIntf.s fv_sink, + metaIntf.m fv_src, + + // Requests + reqIntf.m rd_req_user, + reqIntf.m wr_req_user, + + // RDMA + reqIntf.s rd_req_rdma, + reqIntf.s wr_req_rdma, + + // Clock and reset + input wire aclk, + input wire[0:0] aresetn +); + +/* -- Tie-off unused interfaces and signals ----------------------------- */ +always_comb axi_ctrl.tie_off_s(); +//always_comb axis_card_src.tie_off_m(); +//always_comb axis_card_sink.tie_off_s(); +//always_comb axis_rdma_src.tie_off_m(); +//always_comb axis_rdma_sink.tie_off_s(); +//always_comb fv_sink.tie_off_s(); +//always_comb fv_src.tie_off_m(); +//always_comb rd_req_user.tie_off_m(); +//always_comb wr_req_user.tie_off_m(); +always_comb rd_req_rdma.tie_off_s(); +//always_comb wr_req_rdma.tie_off_s(); + + +/* -- USER LOGIC -------------------------------------------------------- */ + +localparam integer PARAMS_BITS = 64; + +// Write - RDMA +`AXIS_ASSIGN(axis_rdma_sink, axis_card_src) +`REQ_ASSIGN(wr_req_rdma, wr_req_user) + +// Read - Farview +metaIntf #(.DATA_BITS(PARAMS_BITS)) params_sink (); +metaIntf #(.DATA_BITS(PARAMS_BITS)) params_src (); + +// Request handler +stride_req inst_stride_req ( + .aclk(aclk), + .aresetn(aresetn), + .fv_sink(fv_sink), + .fv_src(fv_src), + .rd_req_user(rd_req_user), + .params(params_sink) +); + +// Data handler +stride_data inst_stride_data ( + .aclk(aclk), + .aresetn(aresetn), + .params(params_src), + .axis_sink(axis_card_sink), + .axis_src(axis_rdma_src) +); + +// Sequence +queue_meta inst_seq ( + .aclk(aclk), + .aresetn(aresetn), + .sink(params_sink), + .src(params_src) +); + + +endmodule \ No newline at end of file diff --git a/hw/hdl/operators/stride/stride_data.sv b/hw/hdl/operators/stride/stride_data.sv new file mode 100644 index 00000000..0c64dbeb --- /dev/null +++ b/hw/hdl/operators/stride/stride_data.sv @@ -0,0 +1,182 @@ +import lynxTypes::*; + +module stride_data #( + parameter integer STR_DATA_BITS = AXI_DATA_BITS +) ( + input logic aclk, + input logic aresetn, + + // RDMA + AXI4S.s axis_sink, + AXI4S.m axis_src, + + // Sequence + metaIntf.s params +); + +localparam integer BEAT_LOG_BYTES = STR_DATA_BITS/8; +localparam integer BEAT_LOG_BITS = $clog2(BEAT_LOG_BYTES); +localparam integer WORD_SIZE = 3; + +// -- FSM +typedef enum logic[0:0] {ST_IDLE, ST_READ} state_t; +logic [0:0] state_C, state_N; + +logic [31:0] cnt_C, cnt_N; +logic [31:0] dwidth_C, dwidth_N; + +logic [2*STR_DATA_BITS-1:0] data_C, data_N; +logic [2*STR_DATA_BITS/8-1:0] keep_C, keep_N; +logic last_C, last_N; +logic val_C, val_N; +logic [31:0] dwidth_r_C, dwidth_r_N; +logic [9:0] pntr_out_C = 0, pntr_out_N; +logic [9:0] pntr_in_C = 0, pntr_in_N; + +logic [31:0] params_ntr; +logic [31:0] params_dwidth; + +// -- REG +always_ff @(posedge aclk, negedge aresetn) begin: PROC_REG +if (aresetn == 1'b0) begin + state_C <= ST_IDLE; + val_C <= 1'b0; + pntr_out_C <= 0; + pntr_in_C <= 0; +end +else + state_C <= state_N; + cnt_C <= cnt_N; + dwidth_C <= dwidth_N; + dwidth_r_C <= dwidth_r_N; + pntr_out_C <= pntr_out_N; + pntr_in_C <= pntr_in_N; + + val_C <= val_N; + last_C <= last_N; + data_C <= data_N; + keep_C <= keep_N; +end + +// -- NSL +always_comb begin: NSL + state_N = state_C; + + case(state_C) + ST_IDLE: + state_N = (params.ready) ? ST_READ : ST_IDLE; + + ST_READ: + state_N = ((cnt_C == 0) && (axis_sink.tready & axis_sink.tvalid)) ? ST_IDLE : ST_READ; + + endcase // state_C +end + +// -- DP +always_comb begin: DP + cnt_N = cnt_C; + dwidth_N = dwidth_C; + dwidth_r_N = dwidth_r_C; + pntr_out_N = pntr_out_C; + pntr_in_N = pntr_in_C; + + val_N = 1'b0; + last_N = last_C; + data_N = data_C; + keep_N = keep_C; + + // Params + params.ready = 1'b0; + + params_ntr = params.data[0+:32]; + params_dwidth = params.data[32+:32]; + + // Data in + axis_sink.tready = 1'b0; + + // Data out + axis_src.tvalid = 1'b0; + axis_src.tdata = data_C; + axis_src.tkeep = keep_C; + axis_src.tlast = last_C; + + case(state_C) + ST_IDLE: begin + if(params.valid) begin + params.ready = 1'b1; + + cnt_N = params_ntr - 1; + dwidth_N = params_dwidth; + end + end + + ST_READ: begin + axis_sink.tready = axis_src.tready; + + if(axis_src.tready) begin + // input + if(axis_sink.tready & axis_sink.tvalid) begin + val_N = 1'b1; + last_N = axis_sink.tlast; + + /*if(dwidth_C > BEAT_LOG_BITS) + data_N = axis_sink.tdata; + else + data_N = {axis_sink.tdata, data_C} >> ((1 << dwidth_C) << WORD_SIZE);*/ + data_N[((pntr_in_C<= BEAT_LOG_BITS)) begin + pntr_in_N = 0; + end + else begin + pntr_in_N = ((pntr_in_C + 1) << dwidth_C) == BEAT_LOG_BYTES ? 0 : pntr_in_C + 1; + end + + cnt_N = cnt_C - 1; + dwidth_r_N = dwidth_C; + end + end + else begin + val_N = val_C; + end + end + + endcase // state_C + + // output + if(val_C) begin + if(last_C || (dwidth_r_C >= BEAT_LOG_BITS)) begin + pntr_out_N = 0; + axis_src.tvalid = 1'b1; + end + else begin + pntr_out_N = ((pntr_out_C + 1) << dwidth_r_C) == BEAT_LOG_BYTES ? 0 : pntr_out_C + 1; + axis_src.tvalid = ((pntr_out_C + 1) << dwidth_r_C) == BEAT_LOG_BYTES; + end + end + +end + +ila_stride_data inst_str_data ( + .clk(aclk), + .probe0(state_C), + .probe1(cnt_C), + .probe2(dwidth_C), + .probe3(dwidth_r_C), + .probe4(pntr_in_C), + .probe5(pntr_out_C), + .probe6(last_C), + .probe7(val_C), + .probe8(params.valid), + .probe9(axis_sink.tvalid), + .probe10(axis_sink.tready), + .probe11(axis_sink.tlast), + .probe12(axis_src.tvalid), + .probe13(axis_src.tready), + .probe14(axis_src.tlast), + .probe15(axis_src.tdata), + .probe16(axis_src.tkeep) +); + +endmodule \ No newline at end of file diff --git a/hw/hdl/operators/stride/stride_req.sv b/hw/hdl/operators/stride/stride_req.sv new file mode 100644 index 00000000..ac8ae214 --- /dev/null +++ b/hw/hdl/operators/stride/stride_req.sv @@ -0,0 +1,146 @@ +import lynxTypes::*; + +module stride_req #( + parameter integer STR_DATA_BITS = AXI_DATA_BITS +) ( + input logic aclk, + input logic aresetn, + + // RDMA + metaIntf.s fv_sink, + metaIntf.m fv_src, + + // Host + reqIntf.m rd_req_user, + + // Sequence + metaIntf.m params +); + +localparam integer APP_WRITE = 1; +localparam integer BEAT_LOG_BYTES = STR_DATA_BITS/8; +localparam integer BEAT_LOG_BITS = $clog2(BEAT_LOG_BYTES); + +// -- FSM +typedef enum logic[0:0] {ST_IDLE, ST_READ} state_t; +logic [0:0] state_C, state_N; + +// Regs +logic [31:0] cnt_C, cnt_N; +logic [VADDR_BITS-1:0] laddr_C, laddr_N; +logic [31:0] stride_C, stride_N; +logic [31:0] dwidth_C, dwidth_N; +logic ctl_C, ctl_N; + +// Int +logic [VADDR_BITS-1:0] fv_raddr; +logic [VADDR_BITS-1:0] fv_laddr; +logic [31:0] fv_dwidth; +logic [31:0] fv_stride; +logic [31:0] fv_nbytes; + +logic [31:0] params_ntr; +logic [31:0] params_dwidth; + +// -- REG +always_ff @(posedge aclk, negedge aresetn) begin: PROC_REG +if (aresetn == 1'b0) begin + state_C <= ST_IDLE; +end +else + state_C <= state_N; + cnt_C <= cnt_N; + laddr_C <= laddr_N; + stride_C <= stride_N; + dwidth_C <= dwidth_N; + ctl_C <= ctl_N; +end + +// -- NSL +always_comb begin: NSL + state_N = state_C; + + case(state_C) + ST_IDLE: + state_N = (fv_sink.valid && fv_src.ready && params.ready) ? ST_READ : ST_IDLE; + + ST_READ: + state_N = rd_req_user.ready ? ((cnt_C == 0) ? ST_IDLE : ST_READ) : ST_READ; + + endcase // state_C +end + +// -- DP +always_comb begin: DP + cnt_N = cnt_C; + laddr_N = laddr_C; + stride_N = stride_C; + dwidth_N = dwidth_C; + ctl_N = ctl_C; + + // Incoming + fv_raddr = fv_sink.data[64+:48]; + fv_laddr = fv_sink.data[112+:48]; + fv_dwidth = fv_sink.data[160+:32]; + fv_stride = fv_sink.data[192+:32]; + fv_nbytes = fv_sink.data[224+:32]; + + // FV sink + fv_sink.ready = 1'b0; + + // FV src + fv_src.valid = 1'b0; + + fv_src.data = 0; + fv_src.data[0+:5] = APP_WRITE; + fv_src.data[5+:24] = fv_sink.data[5+:24]; + fv_src.data[64+:48] = 0; + fv_src.data[112+:48] = fv_raddr; + fv_src.data[160+:32] = fv_nbytes; + + // RD host + rd_req_user.valid = 1'b0; + + rd_req_user.req = 0; + rd_req_user.req.vaddr = laddr_C; + rd_req_user.req.len = (1 << dwidth_C); + rd_req_user.req.ctl = ctl_C; + + // Params + params.valid = 1'b0; + + params_dwidth = fv_dwidth; + params_ntr = (fv_dwidth >= BEAT_LOG_BITS) ? fv_nbytes >> BEAT_LOG_BITS : fv_nbytes >> fv_dwidth; + params.data = {params_dwidth, params_ntr}; + + // DP fsm + case(state_C) + ST_IDLE: begin + if(fv_sink.valid && fv_src.ready && params.ready) begin + fv_sink.ready = 1'b1; + fv_src.valid = 1'b1; + params.valid = 1'b1; + + cnt_N = (fv_nbytes >> fv_dwidth) - 1; + laddr_N = fv_laddr; + stride_N = fv_stride; + dwidth_N = fv_dwidth; + ctl_N = (cnt_N == 0); + end + end + + ST_READ: begin + if(rd_req_user.ready) begin + rd_req_user.valid = 1'b1; + + cnt_N = cnt_C - 1; + laddr_N = laddr_C + stride_C; + ctl_N = (cnt_N == 0); + end + end + + endcase // state_C + +end + +endmodule \ No newline at end of file diff --git a/hw/hdl/operators/testcount/minmaxsum_user_logic.sv b/hw/hdl/operators/testcount/minmaxsum_user_logic.sv new file mode 100644 index 00000000..f4067c90 --- /dev/null +++ b/hw/hdl/operators/testcount/minmaxsum_user_logic.sv @@ -0,0 +1,64 @@ +`timescale 1ns / 1ps + +import lynxTypes::*; + +/** + * User logic + * + */ +module design_user_logic_0 ( + // Clock and reset + input wire aclk, + input wire[0:0] aresetn, + + // AXI4 control + AXI4L.s axi_ctrl, + + // AXI4S + AXI4S.m axis_src, + AXI4S.s axis_sink +); + +/* -- Tie-off unused interfaces and signals ----------------------------- */ +//always_comb axi_ctrl.tie_off_s(); +always_comb axis_src.tie_off_m(); +//always_comb axis_sink.tie_off_s(); + +/* -- USER LOGIC -------------------------------------------------------- */ +// Reg input +AXI4S axis_sink_r (); +//AXI4S axis_src_r (); +axis_reg_rtl inst_reg_sink (.aclk(aclk), .aresetn(aresetn), .axis_in(axis_sink), .axis_out(axis_sink_r)); +//axis_reg_rtl inst_reg_src (.aclk(aclk), .aresetn(aresetn), .axis_in(axis_src_r), .axis_out(axis_src)); + +logic clr; +logic done; +logic [3:0] test_type; +logic [31:0] test_condition; +logic [31:0] result_count; + +// Slave +testcount_slave inst_slave ( + .aclk(aclk), + .aresetn(aresetn), + .axi_ctrl(axi_ctrl), + .clr(clr), + .done(done), + .test_type(test_type), + .test_condition(test_condition), + .result_count(result_count) +) + +// Minmaxsum +minmaxsum inst_top ( + .clk(aclk), + .rst_n(aresetn), + .clr(clr), + .done(done), + .test_type(test_type), + .test_condition(test_condition), + .result_count(result_count), + .axis_in(axis_sink_r) +); + +endmodule diff --git a/hw/hdl/operators/testcount/onesCounter.v b/hw/hdl/operators/testcount/onesCounter.v new file mode 100644 index 00000000..af5c2b7e --- /dev/null +++ b/hw/hdl/operators/testcount/onesCounter.v @@ -0,0 +1,82 @@ +/* + * Copyright 2016 - 2017 Systems Group, ETH Zurich + * + * This hardware operator is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + + +module onesCounter #(parameter WIDTH = 32) +( + input wire clk, + input wire rst_n, + + input wire data_in_valid, + input wire [31:0] data_in, + + output reg count_valid, + output reg [5:0] count + ); + +reg [2:0] sum_4_1, sum_4_2, sum_4_3, sum_4_4, sum_4_5, sum_4_6, sum_4_7, sum_4_8; + +reg [4:0] sum_2_1, sum_2_2; + +reg sum_4_valid, sum_2_valid; + + +always @(posedge clk) begin + if (~rst_n) begin + // reset + sum_4_1 <= 0; + sum_4_2 <= 0; + sum_4_3 <= 0; + sum_4_4 <= 0; + sum_4_5 <= 0; + sum_4_6 <= 0; + sum_4_7 <= 0; + sum_4_8 <= 0; + + sum_4_valid <= data_in_valid; + + sum_2_1 <= 0; + sum_2_2 <= 0; + + sum_2_valid <= 0; + + count <= 0; + count_valid <= 0; + end + else begin + sum_4_1 <= data_in[0] + data_in[1] + data_in[2] + data_in[3]; + sum_4_2 <= data_in[4] + data_in[5] + data_in[6] + data_in[7]; + sum_4_3 <= data_in[8] + data_in[9] + data_in[10] + data_in[11]; + sum_4_4 <= data_in[12] + data_in[13] + data_in[14] + data_in[15]; + sum_4_5 <= data_in[16] + data_in[17] + data_in[18] + data_in[19]; + sum_4_6 <= data_in[20] + data_in[21] + data_in[22] + data_in[23]; + sum_4_7 <= data_in[24] + data_in[25] + data_in[26] + data_in[27]; + sum_4_8 <= data_in[28] + data_in[29] + data_in[30] + data_in[31]; + + sum_4_valid <= data_in_valid; + + sum_2_1 <= sum_4_1 + sum_4_2 + sum_4_3 + sum_4_4; + sum_2_2 <= sum_4_5 + sum_4_6 + sum_4_7 + sum_4_8; + + sum_2_valid <= sum_4_valid; + + count <= sum_2_1 + sum_2_2; + count_valid <= sum_2_valid; + end +end + +endmodule \ No newline at end of file diff --git a/hw/hdl/operators/testcount/testcount.sv b/hw/hdl/operators/testcount/testcount.sv new file mode 100644 index 00000000..b9f84d64 --- /dev/null +++ b/hw/hdl/operators/testcount/testcount.sv @@ -0,0 +1,183 @@ +import lynxTypes::*; + +module testcount ( + input logic clk, + input logic rst_n, + + input logic clr, + output logic done, + + input logic [3:0] test_type, + input logic [31:0] test_condition, + output logic [31:0] result_count, + + AXI4S.s axis_in +); + +logic less_w[15:0]; +logic equal_w[15:0]; + +logic less[15:0]; +logic equal[15:0]; +logic notEqual[15:0]; +logic greater[15:0]; +logic greaterEqual[15:0]; +logic lessEqual[15:0]; + +reg [15:0] condition_test_result; + +reg [31:0] temp_count; + +reg data_in_valid_d1; +reg data_in_valid_d2; + +reg data_in_last_d1; +reg data_in_last_d2; +reg data_in_last_d3; +reg data_in_last_d4; +reg data_in_last_d5; + +wire matches_valid; +wire [5:0] matches_count; + +reg [3:0] test_type_d1; +reg data_valid; +reg data_last; + +localparam [2:0] + EQUAL = 3'b000, + NOT_EQUAL = 3'b001, + LESS_THAN = 3'b010, + LESS_EQUAL = 3'b011, + GREATER_THAN = 3'b100, + GREATER_EQUAL = 3'b101; + +/////////////////////////////////// cycle 0: buffer input signals ///////////////////////////////// +always@(posedge clk) begin + if(~rst_n) begin + data_in_valid_d1 <= 0; + data_in_valid_d2 <= 0; + data_in_last_d1 <= 0; + data_in_last_d2 <= 0; + data_in_last_d3 <= 0; + data_in_last_d4 <= 0; + data_in_last_d5 <= 0; + + test_type_d1 <= 0; + + data_valid <= 0; + data_last <= 0; + end + else begin + data_in_valid_d1 <= axis_in.tvalid; + data_in_valid_d2 <= data_in_valid_d1; + + data_in_last_d1 <= axis_in.tlast; + data_in_last_d2 <= data_in_last_d1; + data_in_last_d3 <= data_in_last_d2; + data_in_last_d4 <= data_in_last_d3; + data_in_last_d5 <= data_in_last_d4; + + test_type_d1 <= test_type; + + if(matches_valid) begin + data_valid <= 1'b1; + end + if(data_in_last_d5) begin + data_last <= 1'b1; + end + end +end + +/////////////////////////////////// cycle 1: evaluate conditions ///////////////////////////////// +// test for == and less than +generate + genvar i; + for(i = 0; i < 16; i = i + 1) begin + assign less_w[i] = (axis_in.tdata[(i+1)*32-1 : i*32] < test_condition); + assign equal_w[i] = (axis_in.tdata[(i+1)*32-1 : i*32] == test_condition); + + // produce the rest of all other condition tests + always@(posedge clk) begin + if( ~ rst_n ) begin + greater[i] <= 0; + greaterEqual[i] <= 0; + lessEqual[i] <= 0; + notEqual[i] <= 0; + less[i] <= 0; + equal[i] <= 0; + end + else if(axis_in.tvalid) begin + greater[i] <= ~less_w[i] & ~equal_w[i]; + greaterEqual[i] <= ~less_w[i]; + lessEqual[i] <= less_w[i] | equal_w[i]; + notEqual[i] <= ~equal_w[i]; + less[i] <= less_w[i]; + equal[i] <= equal_w[i]; + end + else begin + greater[i] <= 0; + greaterEqual[i] <= 0; + lessEqual[i] <= 0; + notEqual[i] <= 0; + less[i] <= 0; + equal[i] <= 0; + end + end + + /////////////////////////////////// cycle 1: get the right condition test ///////////////////////////////// + always@(posedge clk) begin + case(test_type_d1) + LESS_THAN: condition_test_result[i] <= less[i]; + LESS_EQUAL: condition_test_result[i] <= lessEqual[i]; + GREATER_THAN: condition_test_result[i] <= greater[i]; + GREATER_EQUAL: condition_test_result[i] <= greaterEqual[i]; + EQUAL: condition_test_result[i] <= equal[i]; + NOT_EQUAL: condition_test_result[i] <= notEqual[i]; + default: condition_test_result[i] <= 0; + endcase + end + end +endgenerate + + +onesCounter onesCounter ( + .clk (clk), + .rst_n (rst_n), + + .data_in_valid (data_in_valid_d2), + .data_in (condition_test_result), + + .count_valid (matches_valid), + .count (matches_count) +); + +always@(posedge clk) begin + if(~rst_n) begin + temp_count <= 0; + end + else begin + if(clr) + temp_count <= 0; + if(matches_valid) begin + temp_count <= temp_count + matches_count; + end + end +end + +/////////////////////////////////// cycle 2: Assign output signals ///////////////////////////////// +always @(posedge clk) begin + if (~rst_n) begin + // reset + result_count <= 0; + done <= 0; + end + else begin + result_count <= temp_count; + done <= data_last & data_valid; + end +end + +assign axis_in.tready = 1'b1; + +endmodule \ No newline at end of file diff --git a/hw/hdl/operators/testcount/testcount_slave.sv b/hw/hdl/operators/testcount/testcount_slave.sv new file mode 100644 index 00000000..077552f4 --- /dev/null +++ b/hw/hdl/operators/testcount/testcount_slave.sv @@ -0,0 +1,250 @@ +import lynxTypes::*; + +module testcount_slave ( + input logic aclk, + input logic aresetn, + + AXI4L.s axi_ctrl, + + output logic clr, + input logic done, + + output logic [3:0] test_type, + output logic [31:0] test_condition, + input logic [31:0] result_count +); + +//`define DEBUG_CNFG_SLAVE + +// -- Decl ---------------------------------------------------------- +// ------------------------------------------------------------------ + +// Constants +localparam integer N_REGS = 5; +localparam integer ADDR_LSB = (AXIL_DATA_BITS/32) + 1; +localparam integer ADDR_MSB = $clog2(N_REGS); +localparam integer AXI_ADDR_BITS = ADDR_LSB + ADDR_MSB; + +// Internal registers +logic [AXI_ADDR_BITS-1:0] axi_awaddr; +logic axi_awready; +logic [AXI_ADDR_BITS-1:0] axi_araddr; +logic axi_arready; +logic [1:0] axi_bresp; +logic axi_bvalid; +logic axi_wready; +logic [AXIL_DATA_BITS-1:0] axi_rdata; +logic [1:0] axi_rresp; +logic axi_rvalid; + +// Registers +logic [N_REGS-1:0][AXIL_DATA_BITS-1:0] slv_reg; +logic slv_reg_rden; +logic slv_reg_wren; +logic aw_en; + +logic done_op; + +// -- Def ----------------------------------------------------------- +// ------------------------------------------------------------------ + +// -- Register map ----------------------------------------------------------------------- +localparam integer CTRL_REG = 0; +localparam integer STAT_REG = 1; +localparam integer TYPE_REG = 2; +localparam integer COND_REG = 3; +localparam integer RSLT_REG = 4; + +// Write process +assign slv_reg_wren = axi_wready && axi_ctrl.wvalid && axi_awready && axi_ctrl.awvalid; + +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) begin + slv_reg <= 0; + + done_op <= 1'b0; + end + else begin + slv_reg[CTRL_REG] <= 0; + + done_op <= slv_reg[CTRL_REG][0] ? 1'b0 : done ? 1'b1 : done_op; + + if(slv_reg_wren) begin + case (axi_awaddr[ADDR_LSB+ADDR_MSB-1:ADDR_LSB]) + CTRL_REG: + for (int i = 0; i < 1; i++) begin + if(axi_ctrl.wstrb[i]) begin + slv_reg[CTRL_REG][(i*8)+:8] <= axi_ctrl.wdata[(i*8)+:8]; + end + end + TYPE_REG: + for (int i = 0; i < (AXIL_DATA_BITS/8); i++) begin + if(axi_ctrl.wstrb[i]) begin + slv_reg[TYPE_REG][(i*8)+:8] <= axi_ctrl.wdata[(i*8)+:8]; + end + end + COND_REG: + for (int i = 0; i < (AXIL_DATA_BITS/8); i++) begin + if(axi_ctrl.wstrb[i]) begin + slv_reg[COND_REG][(i*8)+:8] <= axi_ctrl.wdata[(i*8)+:8]; + end + end + default : ; + endcase + end + end +end + +assign clr = slv_reg[CTRL_REG][0]; +assign test_type = slv_reg[TYPE_REG][3:0]; +assign test_condition = slv_reg[COND_REG][31:0]; + +// Read process +assign slv_reg_rden = axi_arready & axi_ctrl.arvalid & ~axi_rvalid; + +always_ff @(posedge aclk, negedge aresetn) begin + if( aresetn == 1'b0 ) begin + axi_rdata <= 0; + end + else begin + axi_rdata <= 0; + if(slv_reg_rden) begin + case (axi_araddr[ADDR_LSB+ADDR_MSB-1:ADDR_LSB]) + STAT_REG: + axi_rdata[0] <= done_op; + TYPE_REG: + axi_rdata[3:0] <= slv_reg[TYPE_REG][3:0]; + COND_REG: + axi_rdata[31:0] <= slv_reg[COND_REG][31:0]; + RSLT_REG: + axi_rdata[31:0] <= result_count; + default: ; + endcase + end + end +end + +// I/O +assign axi_ctrl.awready = axi_awready; +assign axi_ctrl.arready = axi_arready; +assign axi_ctrl.bresp = axi_bresp; +assign axi_ctrl.bvalid = axi_bvalid; +assign axi_ctrl.wready = axi_wready; +assign axi_ctrl.rdata = axi_rdata; +assign axi_ctrl.rresp = axi_rresp; +assign axi_ctrl.rvalid = axi_rvalid; + +// awready and awaddr +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) + begin + axi_awready <= 1'b0; + axi_awaddr <= 0; + aw_en <= 1'b1; + end + else + begin + if (~axi_awready && axi_ctrl.awvalid && axi_ctrl.wvalid && aw_en) + begin + axi_awready <= 1'b1; + aw_en <= 1'b0; + axi_awaddr <= axi_ctrl.awaddr; + end + else if (axi_ctrl.bready && axi_bvalid) + begin + aw_en <= 1'b1; + axi_awready <= 1'b0; + end + else + begin + axi_awready <= 1'b0; + end + end +end + +// arready and araddr +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) + begin + axi_arready <= 1'b0; + axi_araddr <= 0; + end + else + begin + if (~axi_arready && axi_ctrl.arvalid) + begin + axi_arready <= 1'b1; + axi_araddr <= axi_ctrl.araddr; + end + else + begin + axi_arready <= 1'b0; + end + end +end + +// bvalid and bresp +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) + begin + axi_bvalid <= 0; + axi_bresp <= 2'b0; + end + else + begin + if (axi_awready && axi_ctrl.awvalid && ~axi_bvalid && axi_wready && axi_ctrl.wvalid) + begin + axi_bvalid <= 1'b1; + axi_bresp <= 2'b0; + end + else + begin + if (axi_ctrl.bready && axi_bvalid) + begin + axi_bvalid <= 1'b0; + end + end + end +end + +// wready +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) + begin + axi_wready <= 1'b0; + end + else + begin + if (~axi_wready && axi_ctrl.wvalid && axi_ctrl.awvalid && aw_en ) + begin + axi_wready <= 1'b1; + end + else + begin + axi_wready <= 1'b0; + end + end +end + +// rvalid and rresp (1Del?) +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) + begin + axi_rvalid <= 0; + axi_rresp <= 0; + end + else + begin + if (axi_arready && axi_ctrl.arvalid && ~axi_rvalid) + begin + axi_rvalid <= 1'b1; + axi_rresp <= 2'b0; + end + else if (axi_rvalid && axi_ctrl.rready) + begin + axi_rvalid <= 1'b0; + end + end +end + +endmodule // cnfg_slave \ No newline at end of file diff --git a/hw/hdl/pkg/axi_intf.sv b/hw/hdl/pkg/axi_intf.sv new file mode 100644 index 00000000..38108ad3 --- /dev/null +++ b/hw/hdl/pkg/axi_intf.sv @@ -0,0 +1,384 @@ +`ifndef AXI_INTF_SV_ +`define AXI_INTF_SV_ + +import lynxTypes::*; + +// ---------------------------------------------------------------------------- +// AXI4 +// ---------------------------------------------------------------------------- +interface AXI4 #( + parameter AXI4_ADDR_BITS = AXI_ADDR_BITS, + parameter AXI4_DATA_BITS = AXI_DATA_BITS +) ( + input logic aclk +); + +localparam AXI4_STRB_BITS = AXI4_DATA_BITS / 8; + +typedef logic [AXI4_ADDR_BITS-1:0] addr_t; +typedef logic [AXI4_DATA_BITS-1:0] data_t; +typedef logic [AXI4_STRB_BITS-1:0] strb_t; + +// AR channel +addr_t araddr; +logic[1:0] arburst; +logic[3:0] arcache; +logic[0:0] arid; +logic[7:0] arlen; +logic[0:0] arlock; +logic[2:0] arprot; +logic[3:0] arqos; +logic[3:0] arregion; +logic[2:0] arsize; +logic arready; +logic arvalid; + +// AW channel +addr_t awaddr; +logic[1:0] awburst; +logic[3:0] awcache; +logic[0:0] awid; +logic[7:0] awlen; +logic[0:0] awlock; +logic[2:0] awprot; +logic[3:0] awqos; +logic[3:0] awregion; +logic[2:0] awsize; +logic awready; +logic awvalid; + +// R channel +data_t rdata; +logic[0:0] rid; +logic rlast; +logic[1:0] rresp; +logic rready; +logic rvalid; + +// W channel +data_t wdata; +logic wlast; +strb_t wstrb; +logic wready; +logic wvalid; + +// B channel +logic[0:0] bid; +logic[1:0] bresp; +logic bready; +logic bvalid; + +// Tie off unused master signals +task tie_off_m (); + araddr = 0; + arburst = 2'b01; + arcache = 4'b0; + arid = 0; + arlen = 8'b0; + arlock = 1'b0; + arprot = 3'b0; + arqos = 4'b0; + arregion = 4'b0; + arsize = 3'b0; + arvalid = 1'b0; + awaddr = 0; + awburst = 2'b01; + awcache = 4'b0; + awid = 0; + awlen = 8'b0; + awlock = 1'b0; + awprot = 3'b0; + awqos = 4'b0; + awregion = 4'b0; + awsize = 3'b0; + awvalid = 1'b0; + bready = 1'b0; + rready = 1'b0; + wdata = 0; + wlast = 1'b0; + wstrb = 0; + wvalid = 1'b0; +endtask + +// Tie off unused slave signals +task tie_off_s (); + arready = 1'b0; + awready = 1'b0; + bresp = 2'b0; + bvalid = 1'b0; + bid = 0; + rdata = 0; + rid = 0; + rlast = 1'b0; + rresp = 2'b0; + rvalid = 1'b0; + wready = 1'b0; +endtask + +// Master +modport m ( + import tie_off_m, + // AR + input awready, + output awaddr, awburst, awcache, awlen, awlock, awprot, awqos, awregion, awsize, awvalid, awid, + // AW + input arready, + output araddr, arburst, arcache, arlen, arlock, arprot, arqos, arregion, arsize, arvalid, arid, + // R + input rlast, rresp, rdata, rvalid, rid, + output rready, + // W + input wready, + output wdata, wlast, wstrb, wvalid, + // B + input bresp, bvalid, bid, + output bready +); + +// Slave +modport s ( + import tie_off_s, + // AR + input awaddr, awburst, awcache, awlen, awlock, awprot, awqos, awregion, awsize, awvalid, awid, + output awready, + // AW + input araddr, arburst, arcache, arlen, arlock, arprot, arqos, arregion, arsize, arvalid, arid, + output arready, + // R + input rready, + output rlast, rresp, rdata, rvalid, rid, + // W + input wdata, wlast, wstrb, wvalid, + output wready, + // B + input bready, + output bresp, bvalid, bid +); + +endinterface + +// ---------------------------------------------------------------------------- +// AXI4 lite +// ---------------------------------------------------------------------------- +interface AXI4L #( + parameter AXI4L_ADDR_BITS = AXI_ADDR_BITS, + parameter AXI4L_DATA_BITS = AXIL_DATA_BITS +) ( + input logic aclk +); + +localparam AXI4L_STRB_BITS = AXI4L_DATA_BITS / 8; + +typedef logic [AXI4L_ADDR_BITS-1:0] addr_t; +typedef logic [AXI4L_DATA_BITS-1:0] data_t; +typedef logic [AXI4L_STRB_BITS-1:0] strb_t; + +// AR channel +addr_t araddr; +logic[2:0] arprot; +logic[3:0] arqos; +logic[3:0] arregion; +logic arready; +logic arvalid; + +// AW channel +addr_t awaddr; +logic[2:0] awprot; +logic[3:0] awqos; +logic[3:0] awregion; +logic awready; +logic awvalid; + +// R channel +data_t rdata; +logic[1:0] rresp; +logic rready; +logic rvalid; + +// W channel +data_t wdata; +strb_t wstrb; +logic wready; +logic wvalid; + +// B channel +logic[1:0] bresp; +logic bready; +logic bvalid; + +// Tie off unused master signals +task tie_off_m (); + araddr = 0; + arprot = 3'b0; + arqos = 4'b0; + arregion = 4'b0; + arvalid = 1'b0; + awaddr = 0; + awprot = 3'b0; + awqos = 4'b0; + awregion = 4'b0; + awvalid = 1'b0; + bready = 1'b0; + rready = 1'b0; + wdata = 0; + wstrb = 0; + wvalid = 1'b0; +endtask + +// Tie off unused slave signals +task tie_off_s (); + arready = 1'b0; + awready = 1'b0; + bresp = 2'b0; + bvalid = 1'b0; + rdata = 0; + rresp = 2'b0; + rvalid = 1'b0; + wready = 1'b0; +endtask + +// Master +modport m ( + import tie_off_m, + // AR + input awready, + output awaddr, awprot, awqos, awregion, awvalid, + // AW + input arready, + output araddr, arprot, arqos, arregion, arvalid, + // R + input rresp, rdata, rvalid, + output rready, + // W + input wready, + output wdata, wstrb, wvalid, + // B + input bresp, bvalid, + output bready +); + +// Slave +modport s ( + import tie_off_s, + // AR + input awaddr, awprot, awqos, awregion, awvalid, + output awready, + // AW + input araddr, arprot, arqos, arregion, arvalid, + output arready, + // R + input rready, + output rresp, rdata, rvalid, + // W + input wdata, wstrb, wvalid, + output wready, + // B + input bready, + output bresp, bvalid +); + +endinterface + +// ---------------------------------------------------------------------------- +// AXI4 stream +// ---------------------------------------------------------------------------- +interface AXI4S #( + parameter AXI4S_DATA_BITS = AXI_DATA_BITS +) ( + input logic aclk +); + +localparam AXI4S_KEEP_BITS = AXI4S_DATA_BITS / 8; + +typedef logic [AXI4S_DATA_BITS-1:0] data_t; +typedef logic [AXI4S_KEEP_BITS-1:0] keep_t; + +data_t tdata; +keep_t tkeep; +logic tlast; +logic tready; +logic tvalid; + +// Tie off unused master signals +task tie_off_m (); + tdata = 0; + tkeep = 0; + tlast = 1'b0; + tvalid = 1'b0; +endtask + +// Tie off unused slave signals +task tie_off_s (); + tready = 1'b0; +endtask + +// Master +modport m ( + import tie_off_m, + input tready, + output tdata, tkeep, tlast, tvalid +); + +// Slave +modport s ( + import tie_off_s, + input tdata, tkeep, tlast, tvalid, + output tready +); + +endinterface + + +// ---------------------------------------------------------------------------- +// AXI4 stream routed +// ---------------------------------------------------------------------------- +interface AXI4SR #( + parameter AXI4S_DATA_BITS = AXI_DATA_BITS +) ( + input logic aclk +); + +localparam AXI4S_KEEP_BITS = AXI4S_DATA_BITS / 8; + +typedef logic [AXI4S_DATA_BITS-1:0] data_t; +typedef logic [AXI4S_KEEP_BITS-1:0] keep_t; +typedef logic [3:0] dest_t; + +data_t tdata; +keep_t tkeep; +dest_t tdest; +logic tlast; +logic tready; +logic tvalid; + +// Tie off unused master signals +task tie_off_m (); + tdata = 0; + tkeep = 0; + tlast = 1'b0; + tdest = 0; + tvalid = 1'b0; +endtask + +// Tie off unused slave signals +task tie_off_s (); + tready = 1'b0; +endtask + +// Master +modport m ( + import tie_off_m, + input tready, + output tdata, tkeep, tlast, tvalid, tdest +); + +// Slave +modport s ( + import tie_off_s, + input tdata, tkeep, tlast, tvalid, tdest, + output tready +); + +endinterface + +`endif \ No newline at end of file diff --git a/hw/hdl/pkg/axi_macros.svh b/hw/hdl/pkg/axi_macros.svh new file mode 100644 index 00000000..9a0fc514 --- /dev/null +++ b/hw/hdl/pkg/axi_macros.svh @@ -0,0 +1,40 @@ +`ifndef AXI_ASSIGN_SVH_ +`define AXI_ASSIGN_SVH_ + +`define AXIS_ASSIGN(s, m) \ + assign m.tdata = s.tdata; \ + assign m.tkeep = s.tkeep; \ + assign m.tlast = s.tlast; \ + assign m.tvalid = s.tvalid; \ + assign s.tready = m.tready; + +`define AXISR_ASSIGN(s, m) \ + assign m.tdata = s.tdata; \ + assign m.tkeep = s.tkeep; \ + assign m.tlast = s.tlast; \ + assign m.tvalid = s.tvalid; \ + assign s.tready = m.tready; \ + assign m.tdest = s.tdest; + +`define AXIL_ASSIGN(s, m) \ + assign m.araddr = s.araddr; \ + assign m.arprot = s.arprot; \ + assign m.arvalid = s.arvalid; \ + assign m.awaddr = s.awaddr; \ + assign m.awprot = s.awprot; \ + assign m.awvalid = s.awvalid; \ + assign m.bready = s.bready; \ + assign m.rready = s.rready; \ + assign m.wdata = s.wdata; \ + assign m.wstrb = s.wstrb; \ + assign m.wvalid = s.wvalid; \ + assign s.arready = m.arready; \ + assign s.awready = m.awready; \ + assign s.bresp = m.bresp; \ + assign s.bvalid = m.bvalid; \ + assign s.rdata = m.rdata; \ + assign s.rresp = m.rresp; \ + assign s.rvalid = m.rvalid; \ + assign s.wready = m.wready; + +`endif \ No newline at end of file diff --git a/hw/hdl/pkg/lynx_intf.sv b/hw/hdl/pkg/lynx_intf.sv new file mode 100644 index 00000000..4d3ab54e --- /dev/null +++ b/hw/hdl/pkg/lynx_intf.sv @@ -0,0 +1,378 @@ +`ifndef LYNX_INTF_SV_ +`define LYNX_INTF_SV_ + +import lynxTypes::*; + +// ---------------------------------------------------------------------------- +// Config +// ---------------------------------------------------------------------------- +interface tlbIntf #( + parameter N_ASSOC = 4 +); + +typedef logic [VADDR_BITS-1:0] addr_t; +typedef logic [N_ASSOC-1:0][TLB_DATA_BITS-1:0] data_t; + +addr_t addr; +data_t data; + +// Slave +modport s ( + input addr, + output data +); + +// Master +modport m ( + output addr, + input data +); + +endinterface + +// ---------------------------------------------------------------------------- +// Config interface +// ---------------------------------------------------------------------------- +interface cnfgIntf (); + +logic done_host; +logic done_card; +logic done_sync; +logic restart; +pf_t pf; + +// Slave +modport s ( + output done_host, + output done_card, + output done_sync, + input restart, + output pf +); + +// Master +modport m ( + input done_host, + input done_card, + input done_sync, + output restart, + input pf +); + +endinterface + +// ---------------------------------------------------------------------------- +// DMA interface +// ---------------------------------------------------------------------------- +interface dmaIntf ( + input logic aclk +); + +dma_req_t req; +logic valid; +logic ready; + +logic done; + +// Tie off unused slave signals +task tie_off_s (); + ready = 1'b0; + done = 1'b0; +endtask + +// Tie off unused master signals +task tie_off_m (); + req = 0; + valid = 1'b0; +endtask + +// Slave +modport s ( + import tie_off_s, + input req, + input valid, + output ready, + output done +); + +// Master +modport m ( + import tie_off_m, + output req, + output valid, + input ready, + input done +); + +endinterface + +// ---------------------------------------------------------------------------- +// DMA ISR interface +// ---------------------------------------------------------------------------- +interface dmaIsrIntf (); + +dma_isr_req_t req; +logic valid; +logic ready; + +logic done; +logic isr_return; + +// Tie off unused slave signals +task tie_off_s (); + ready = 1'b0; + done = 1'b0; + isr_return = 1'b0; +endtask + +// Tie off unused master signals +task tie_off_m (); + req = 0; + valid = 1'b0; +endtask + +// Slave +modport s ( + import tie_off_s, + input req, + input valid, + output ready, + output done, + output isr_return +); + +// Master +modport m ( + import tie_off_m, + output req, + output valid, + input ready, + input done, + input isr_return +); + +endinterface + +// ---------------------------------------------------------------------------- +// Request interface +// ---------------------------------------------------------------------------- +interface reqIntf( + input logic aclk +); + +req_t req; +logic valid; +logic ready; + +// Tie off unused +task tie_off_s (); + ready = 1'b0; +endtask + +task tie_off_m (); + req = 0; + valid = 1'b0; +endtask + +// Slave +modport s ( + import tie_off_s, + input req, + input valid, + output ready +); + +// Master +modport m ( + import tie_off_m, + output req, + output valid, + input ready +); + +endinterface + +// ---------------------------------------------------------------------------- +// Farview Request interface +// ---------------------------------------------------------------------------- +interface rdmaIntf( + input logic aclk +); + +rdma_req_t req; +logic valid; +logic ready; + +// Tie off unused +task tie_off_s (); + ready = 1'b0; +endtask + +task tie_off_m (); + req = 0; + valid = 1'b0; +endtask + +// Slave +modport s ( + import tie_off_s, + input req, + input valid, + output ready +); + +// Master +modport m ( + import tie_off_m, + output req, + output valid, + input ready +); + +endinterface + +// ---------------------------------------------------------------------------- +// Meta interface +// ---------------------------------------------------------------------------- +interface metaIntf #( + parameter DATA_BITS = 96 +) ( + input logic aclk +); + +logic valid; +logic ready; +logic [DATA_BITS-1:0] data; + +// Tie off unused +task tie_off_s (); + ready = 1'b0; +endtask + +task tie_off_m (); + data = 0; + valid = 1'b0; +endtask + +// Slave +modport s ( + import tie_off_s, + input valid, + output ready, + input data +); + +// Master +modport m ( + import tie_off_m, + output valid, + input ready, + output data +); + +endinterface + +// ---------------------------------------------------------------------------- +// Mux user interface +// ---------------------------------------------------------------------------- +interface muxUserIntf #( + parameter integer N_ID_BITS = N_REGIONS_BITS, + parameter integer ARB_DATA_BITS = AXI_DATA_BITS +); + +localparam integer BEAT_LOG_BITS = $clog2(ARB_DATA_BITS/8); + +logic [N_ID_BITS-1:0] id; +logic [LEN_BITS-BEAT_LOG_BITS-1:0] len; + +logic ready; +logic valid; + +// Tie off unused +task tie_off_s (); + id = 0; + len = 0; + ready = 1'b0; +endtask + +task tie_off_m (); + valid = 1'b0; +endtask + +// Slave +modport s ( + import tie_off_s, + output id, + output len, + output ready, + input valid +); + +// Master +modport m ( + import tie_off_m, + input id, + input len, + input ready, + output valid +); + +endinterface + +// ---------------------------------------------------------------------------- +// XDMA bypass +// ---------------------------------------------------------------------------- +interface xdmaIntf (); + +logic [63:0] h2c_addr; +logic [27:0] h2c_len; +logic [15:0] h2c_ctl; +logic h2c_valid; +logic h2c_ready; + +logic [63:0] c2h_addr; +logic [27:0] c2h_len; +logic [15:0] c2h_ctl; +logic c2h_valid; +logic c2h_ready; + +logic [7:0] h2c_status; +logic [7:0] c2h_status; + +// Slave +modport s ( + input h2c_addr, + input h2c_len, + input h2c_ctl, + input h2c_valid, + output h2c_ready, + input c2h_addr, + input c2h_len, + input c2h_ctl, + input c2h_valid, + output c2h_ready, + output h2c_status, + output c2h_status +); + +// Master +modport m ( + output h2c_addr, + output h2c_len, + output h2c_ctl, + output h2c_valid, + input h2c_ready, + output c2h_addr, + output c2h_len, + output c2h_ctl, + output c2h_valid, + input c2h_ready, + input h2c_status, + input c2h_status +); + + +endinterface + + +`endif diff --git a/hw/hdl/pkg/lynx_macros.svh b/hw/hdl/pkg/lynx_macros.svh new file mode 100644 index 00000000..1ea48e1b --- /dev/null +++ b/hw/hdl/pkg/lynx_macros.svh @@ -0,0 +1,27 @@ +`ifndef REQ_ASSIGN_SVH_ +`define REQ_ASSIGN_SVH_ + +`define REQ_ASSIGN(s, m) \ + assign m.req = s.req; \ + assign m.valid = s.valid; \ + assign s.ready = m.ready; + +`define DMA_REQ_ASSIGN(s, m) \ + assign m.req = s.req; \ + assign m.valid = s.valid; \ + assign s.ready = m.ready; \ + assign s.done = m.done; + +`define DMA_ISR_REQ_ASSIGN(s, m) \ + assign m.req = s.req; \ + assign m.valid = s.valid; \ + assign s.ready = m.ready; \ + assign s.done = m.done; \ + assign s.isr_return = m.isr_return; + +`define META_ASSIGN(s, m) \ + assign m.data = s.data; \ + assign m.valid = s.valid; \ + assign s.ready = m.ready; + +`endif \ No newline at end of file diff --git a/hw/hdl/regs/axi_reg.sv b/hw/hdl/regs/axi_reg.sv new file mode 100644 index 00000000..2f625d32 --- /dev/null +++ b/hw/hdl/regs/axi_reg.sv @@ -0,0 +1,86 @@ +import lynxTypes::*; + +module axi_reg ( + input logic aclk, + input logic aresetn, + + AXI4.s axi_in, + AXI4.m axi_out +); + +axi_register_slice_0 ( + .aclk(aclk), + .aresetn(aresetn), + .s_axi_awaddr(axi_in.awaddr), + .s_axi_awlen(axi_in.awlen), + .s_axi_awsize(axi_in.awsize), + .s_axi_awburst(axi_in.awburst), + .s_axi_awlock(axi_in.awlock), + .s_axi_awcache(axi_in.awcache), + .s_axi_awprot(axi_in.awprot), + .s_axi_awregion(axi_in.awregion), + .s_axi_awqos(axi_in.awqos), + .s_axi_awvalid(axi_in.awvalid), + .s_axi_awready(axi_in.awready), + .s_axi_araddr(axi_in.araddr), + .s_axi_arlen(axi_in.arlen), + .s_axi_arsize(axi_in.arsize), + .s_axi_arburst(axi_in.arburst), + .s_axi_arlock(axi_in.arlock), + .s_axi_arcache(axi_in.arcache), + .s_axi_arprot(axi_in.arprot), + .s_axi_arregion(axi_in.arregion), + .s_axi_arqos(axi_in.arqos), + .s_axi_arvalid(axi_in.arvalid), + .s_axi_arready(axi_in.arready), + .s_axi_wdata(axi_in.wdata), + .s_axi_wstrb(axi_in.wstrb), + .s_axi_wlast(axi_in.wlast), + .s_axi_wvalid(axi_in.wvalid), + .s_axi_wready(axi_in.wready), + .s_axi_bresp(axi_in.bresp), + .s_axi_bvalid(axi_in.bvalid), + .s_axi_bready(axi_in.bready), + .s_axi_rdata(axi_in.rdata), + .s_axi_rresp(axi_in.rresp), + .s_axi_rlast(axi_in.rlast), + .s_axi_rvalid(axi_in.rvalid), + .s_axi_rready(axi_in.rready), + .m_axi_awaddr(axi_out.awaddr), + .m_axi_awlen(axi_out.awlen), + .m_axi_awsize(axi_out.awsize), + .m_axi_awburst(axi_out.awburst), + .m_axi_awlock(axi_out.awlock), + .m_axi_awcache(axi_out.awcache), + .m_axi_awprot(axi_out.awprot), + .m_axi_awregion(axi_out.awregion), + .m_axi_awqos(axi_out.awqos), + .m_axi_awvalid(axi_out.awvalid), + .m_axi_awready(axi_out.awready), + .m_axi_araddr(axi_out.araddr), + .m_axi_arlen(axi_out.arlen), + .m_axi_arsize(axi_out.arsize), + .m_axi_arburst(axi_out.arburst), + .m_axi_arlock(axi_out.arlock), + .m_axi_arcache(axi_out.arcache), + .m_axi_arprot(axi_out.arprot), + .m_axi_arregion(axi_out.arregion), + .m_axi_arqos(axi_out.arqos), + .m_axi_arvalid(axi_out.arvalid), + .m_axi_arready(axi_out.arready), + .m_axi_wdata(axi_out.wdata), + .m_axi_wstrb(axi_out.wstrb), + .m_axi_wlast(axi_out.wlast), + .m_axi_wvalid(axi_out.wvalid), + .m_axi_wready(axi_out.wready), + .m_axi_bresp(axi_out.bresp), + .m_axi_bvalid(axi_out.bvalid), + .m_axi_bready(axi_out.bready), + .m_axi_rdata(axi_out.rdata), + .m_axi_rresp(axi_out.rresp), + .m_axi_rlast(axi_out.rlast), + .m_axi_rvalid(axi_out.rvalid), + .m_axi_rready(axi_out.rready) +); + +endmodule \ No newline at end of file diff --git a/hw/hdl/regs/axi_reg_array.sv b/hw/hdl/regs/axi_reg_array.sv new file mode 100644 index 00000000..8cff3312 --- /dev/null +++ b/hw/hdl/regs/axi_reg_array.sv @@ -0,0 +1,27 @@ +import lynxTypes::*; + +`include "axi_macros.svh" + +module axi_reg_array #( + parameter integer N_STAGES = 2 +) ( + input logic aclk, + input logic aresetn, + + AXI4.s axi_in, + AXI4.m axi_out +); + +// ----------------------------------------------------------------------------------------------------------------------- +// -- Register slices ---------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------------------------------------------------- +AXI4 axi_s [N_STAGES+1] (); + +`AXIS_ASSIGN(axi_in, axi_s[0]) +`AXIS_ASSIGN(axi_s[N_STAGES], axi_out) + +for(genvar i = 0; i < N_STAGES; i++) begin + axi_reg inst_reg (.aclk(aclk), .aresetn(aresetn), .axi_in(axi_s[i]), .axi_out(axi_s[i+1])); +end + +endmodule \ No newline at end of file diff --git a/hw/hdl/regs/axil_decoupler.sv b/hw/hdl/regs/axil_decoupler.sv new file mode 100644 index 00000000..5d00e04a --- /dev/null +++ b/hw/hdl/regs/axil_decoupler.sv @@ -0,0 +1,165 @@ +import lynxTypes::*; + +module axil_decoupler ( + input logic [N_REGIONS-1:0] decouple, + + AXI4L.s axi_in [N_REGIONS], + AXI4L.m axi_out [N_REGIONS] +); + +// ----------------------------------------------------------------------------------------------------------------------- +// -- Decoupling --------------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------------------------------------------------- +logic[AXI_ADDR_BITS-1:0] axi_in_araddr; +logic[2:0] axi_in_arprot; +logic[3:0] axi_in_arqos; +logic[3:0] axi_in_arregion; +logic axi_in_arready; +logic axi_in_arvalid; +logic[AXI_ADDR_BITS-1:0] axi_in_awaddr; +logic[2:0] axi_in_awprot; +logic[3:0] axi_in_awqos; +logic[3:0] axi_in_awregion; +logic axi_in_awready; +logic axi_in_awvalid; +logic[AXIL_DATA_BITS-1:0] axi_in_rdata; +logic[1:0] axi_in_rresp; +logic axi_in_rready; +logic axi_in_rvalid; +logic[AXIL_DATA_BITS-1:0] axi_in_wdata; +logic[AXIL_DATA_BITS/8-1:0] axi_in_wstrb; +logic axi_in_wready; +logic axi_in_wvalid; +logic[1:0] axi_in_bresp; +logic axi_in_bready; +logic axi_in_bvalid; + +logic[AXI_ADDR_BITS-1:0] axi_out_araddr; +logic[2:0] axi_out_arprot; +logic[3:0] axi_out_arqos; +logic[3:0] axi_out_arregion; +logic axi_out_arready; +logic axi_out_arvalid; +logic[AXI_ADDR_BITS-1:0] axi_out_awaddr; +logic[2:0] axi_out_awprot; +logic[3:0] axi_out_awqos; +logic[3:0] axi_out_awregion; +logic axi_out_awready; +logic axi_out_awvalid; +logic[AXIL_DATA_BITS-1:0] axi_out_rdata; +logic[1:0] axi_out_rresp; +logic axi_out_rready; +logic axi_out_rvalid; +logic[AXIL_DATA_BITS-1:0] axi_out_wdata; +logic[AXIL_DATA_BITS/8-1:0] axi_out_wstrb; +logic axi_out_wready; +logic axi_out_wvalid; +logic[1:0] axi_out_bresp; +logic axi_out_bready; +logic axi_out_bvalid; + +// Assign +for(genvar i = 0; i < N_REGIONS; i++) begin + // In + assign axi_in_araddr[i] = axi_in[i].araddr; + assign axi_in_arprot[i] = axi_in[i].arprot; + assign axi_in_arqos[i] = axi_in[i].arqos; + assign axi_in_arregion[i] = axi_in[i].arregion; + assign axi_in_arvalid[i] = axi_in[i].arvalid; + assign axi_in[i].arready = axi_in_arready[i]; + + assign axi_in_awaddr[i] = axi_in[i].awaddr; + assign axi_in_awprot[i] = axi_in[i].awprot; + assign axi_in_awqos[i] = axi_in[i].awqos; + assign axi_in_awregion[i] = axi_in[i].awregion; + assign axi_in_awvalid[i] = axi_in[i].awvalid; + assign axi_in[i].awready = axi_in_awready[i]; + + assign axi_in[i].rdata = axi_in_rdata[i]; + assign axi_in[i].rresp = axi_in_rresp[i]; + assign axi_in[i].rvalid = axi_in_rvalid[i]; + assign axi_in_rready[i] = axi_in[i].rready; + + assign axi_in_wdata[i] = axi_in[i].wdata; + assign axi_in_wstrb[i] = axi_in[i].wstrb; + assign axi_in_wvalid[i] = axi_in[i].wvalid; + assign axi_in[i].wready = axi_in_wready[i]; + + assign axi_in[i].bresp = axi_in_bresp[i]; + assign axi_in[i].bvalid = axi_in_bvalid[i]; + assign axi_in_bready[i] = axi_in[i].bready; + + // Out + assign axi_out[i].araddr = axi_out_araddr[i]; + assign axi_out[i].arprot = axi_out_arprot[i]; + assign axi_out[i].arqos = axi_out_arqos[i]; + assign axi_out[i].arregion = axi_out_arregion[i]; + assign axi_out[i].arvalid = axi_out_arvalid[i]; + assign axi_out_arready[i] = axi_out[i].arready; + + assign axi_out[i].awaddr = axi_out_awaddr[i]; + assign axi_out[i].awprot = axi_out_awprot[i]; + assign axi_out[i].awqos = axi_out_awqos[i]; + assign axi_out[i].awregion = axi_out_awregion[i]; + assign axi_out[i].awvalid = axi_out_awvalid[i]; + assign axi_out_awready[i] = axi_out[i].awready; + + assign axi_out_rdata[i] = axi_out[i].rdata; + assign axi_out_rresp[i] = axi_out[i].rresp; + assign axi_out_rvalid[i] = axi_out[i].rvalid; + assign axi_out[i].rready = axi_out_rready[i]; + + assign axi_out[i].wdata = axi_out_wdata[i]; + assign axi_out[i].wstrb = axi_out_wstrb[i]; + assign axi_out[i].wvalid = axi_out_wvalid[i]; + assign axi_out_wready[i] = axi_out[i].wready; + + assign axi_out_bresp[i] = axi_out[i].bresp; + assign axi_out_bvalid[i] = axi_out[i].bvalid; + assign axi_out[i].bready = axi_out_bready[i]; +end + +genvar i; +generate +for(i = 0; i < N_REGIONS; i++) begin + // ar + assign axi_out_arvalid[i] = decouple[i] ? 1'b0 : axi_in_arvalid[i]; + assign axi_in_arready[i] = decouple[i] ? 1'b0 : axi_out_arready[i]; + + assign axi_out_araddr[i] = axi_in_araddr[i]; + assign axi_out_arprot[i] = axi_in_arprot[i]; + assign axi_out_arqos[i] = axi_in_arqos[i]; + assign axi_out_arregion[i] = axi_in_arregion[i]; + + // aw + assign axi_out_arvalid[i] = decouple[i] ? 1'b0 : axi_in_arvalid[i]; + assign axi_in_arready[i] = decouple[i] ? 1'b0 : axi_out_arready[i]; + + assign axi_out_awaddr[i] = axi_in_awaddr[i]; + assign axi_out_awprot[i] = axi_in_awprot[i]; + assign axi_out_awqos[i] = axi_in_awqos[i]; + assign axi_out_awregion[i] = axi_in_awregion[i]; + + // b + assign axi_in_bvalid[i] = decouple[i] ? 1'b0 : axi_out_bvalid[i]; + assign axi_out_bready[i] = decouple[i] ? 1'b0 : axi_in_bready[i]; + + assign axi_in_bresp[i] = axi_out_bresp[i]; + + // r + assign axi_in_rvalid[i] = decouple[i] ? 1'b0 : axi_out_rvalid[i]; + assign axi_out_rready[i] = decouple[i] ? 1'b0 : axi_in_rready[i]; + + assign axi_in_rdata[i] = axi_out_rdata[i]; + assign axi_in_rresp[i] = axi_out_rresp[i]; + + // w + assign axi_out_wvalid[i] = decouple[i] ? 1'b0 : axi_in_wvalid[i]; + assign axi_in_wready[i] = decouple[i] ? 1'b0 : axi_out_wready[i]; + + assign axi_out_wdata[i] = axi_in_wdata[i]; + assign axi_out_wstrb[i] = axi_in_wstrb[i]; +end +endgenerate + +endmodule diff --git a/hw/hdl/regs/axil_reg.sv b/hw/hdl/regs/axil_reg.sv new file mode 100644 index 00000000..8334f2d1 --- /dev/null +++ b/hw/hdl/regs/axil_reg.sv @@ -0,0 +1,54 @@ +import lynxTypes::*; + +module axil_reg ( + input logic aclk, + input logic aresetn, + + AXI4L.s axi_in, + AXI4L.m axi_out +); + +axil_register_slice_0 ( + .aclk(aclk), + .aresetn(aresetn), + .s_axi_awaddr(axi_in.awaddr), + .s_axi_awprot(axi_in.awprot), + .s_axi_awvalid(axi_in.awvalid), + .s_axi_awready(axi_in.awready), + .s_axi_araddr(axi_in.araddr), + .s_axi_arprot(axi_in.arprot), + .s_axi_arvalid(axi_in.arvalid), + .s_axi_arready(axi_in.arready), + .s_axi_wdata(axi_in.wdata), + .s_axi_wstrb(axi_in.wstrb), + .s_axi_wvalid(axi_in.wvalid), + .s_axi_wready(axi_in.wready), + .s_axi_bresp(axi_in.bresp), + .s_axi_bvalid(axi_in.bvalid), + .s_axi_bready(axi_in.bready), + .s_axi_rdata(axi_in.rdata), + .s_axi_rresp(axi_in.rresp), + .s_axi_rvalid(axi_in.rvalid), + .s_axi_rready(axi_in.rready), + .m_axi_awaddr(axi_out.awaddr), + .m_axi_awprot(axi_out.awprot), + .m_axi_awvalid(axi_out.awvalid), + .m_axi_awready(axi_out.awready), + .m_axi_araddr(axi_out.araddr), + .m_axi_arprot(axi_out.arprot), + .m_axi_arvalid(axi_out.arvalid), + .m_axi_arready(axi_out.arready), + .m_axi_wdata(axi_out.wdata), + .m_axi_wstrb(axi_out.wstrb), + .m_axi_wvalid(axi_out.wvalid), + .m_axi_wready(axi_out.wready), + .m_axi_bresp(axi_out.bresp), + .m_axi_bvalid(axi_out.bvalid), + .m_axi_bready(axi_out.bready), + .m_axi_rdata(axi_out.rdata), + .m_axi_rresp(axi_out.rresp), + .m_axi_rvalid(axi_out.rvalid), + .m_axi_rready(axi_out.rready) +); + +endmodule \ No newline at end of file diff --git a/hw/hdl/regs/axis_decoupler.sv b/hw/hdl/regs/axis_decoupler.sv new file mode 100644 index 00000000..46b794f4 --- /dev/null +++ b/hw/hdl/regs/axis_decoupler.sv @@ -0,0 +1,50 @@ +import lynxTypes::*; + +module axis_decoupler ( + input logic [N_REGIONS-1:0] decouple, + + AXI4S.s axis_in [N_REGIONS], + AXI4S.m axis_out [N_REGIONS] +); + +// ----------------------------------------------------------------------------------------------------------------------- +// -- Decoupling --------------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------------------------------------------------- +logic [N_REGIONS-1:0] axis_in_tvalid; +logic [N_REGIONS-1:0] axis_in_tready; +logic [N_REGIONS-1:0][AXI_DATA_BITS-1:0] axis_in_tdata; +logic [N_REGIONS-1:0][AXI_DATA_BITS/8-1:0] axis_in_tkeep; +logic [N_REGIONS-1:0] axis_in_tlast; + +logic [N_REGIONS-1:0] axis_out_tvalid; +logic [N_REGIONS-1:0] axis_out_tready; +logic [N_REGIONS-1:0][AXI_DATA_BITS-1:0] axis_out_tdata; +logic [N_REGIONS-1:0][AXI_DATA_BITS/8-1:0] axis_out_tkeep; +logic [N_REGIONS-1:0] axis_out_tlast; + +// Assign +for(genvar i = 0; i < N_REGIONS; i++) begin + assign axis_in_tvalid[i] = axis_in[i].tvalid; + assign axis_in_tdata[i] = axis_in[i].tdata; + assign axis_in_tkeep[i] = axis_in[i].tkeep; + assign axis_in_tlast[i] = axis_in[i].tlast; + assign axis_in[i].tready = axis_in_tready[i]; + + assign axis_out[i].tvalid = axis_out_tvalid[i]; + assign axis_out[i].tdata = axis_out_tdata[i]; + assign axis_out[i].tkeep = axis_out_tkeep[i]; + assign axis_out[i].tlast = axis_out_tlast[i]; + assign axis_out_tready[i] = axis_out[i].tready; +end + +// Decoupler +for(genvar i = 0; i < N_REGIONS; i++) begin + assign axis_out_tvalid[i] = decouple[i] ? 1'b0 : axis_in_tvalid[i]; + assign axis_in_tready[i] = decouple[i] ? 1'b0 : axis_out_tready[i]; + + assign axis_out_tdata[i] = axis_in_tdata[i]; + assign axis_out_tlast[i] = axis_in_tlast[i]; + assign axis_out_tkeep[i] = axis_in_tkeep[i]; +end + +endmodule \ No newline at end of file diff --git a/hw/hdl/regs/axis_reg.sv b/hw/hdl/regs/axis_reg.sv new file mode 100644 index 00000000..0846949a --- /dev/null +++ b/hw/hdl/regs/axis_reg.sv @@ -0,0 +1,62 @@ +import lynxTypes::*; + +module axis_reg #( + parameter integer DATA_BITS = AXI_DATA_BITS +) ( + input logic aclk, + input logic aresetn, + + AXI4S.s axis_in, + AXI4S.m axis_out +); + +if(DATA_BITS == 512) begin + axis_register_slice_512_0 inst_reg_slice ( + .aclk(aclk), + .aresetn(aresetn), + .s_axis_tvalid(axis_in.tvalid), + .s_axis_tready(axis_in.tready), + .s_axis_tdata(axis_in.tdata), + .s_axis_tkeep(axis_in.tkeep), + .s_axis_tlast(axis_in.tlast), + .m_axis_tvalid(axis_out.tvalid), + .m_axis_tready(axis_out.tready), + .m_axis_tdata(axis_out.tdata), + .m_axis_tkeep(axis_out.tkeep), + .m_axis_tlast(axis_out.tlast) + ); +end +else if(DATA_BITS == 1024) begin + axis_register_slice_1k_0 inst_reg_slice ( + .aclk(aclk), + .aresetn(aresetn), + .s_axis_tvalid(axis_in.tvalid), + .s_axis_tready(axis_in.tready), + .s_axis_tdata(axis_in.tdata), + .s_axis_tkeep(axis_in.tkeep), + .s_axis_tlast(axis_in.tlast), + .m_axis_tvalid(axis_out.tvalid), + .m_axis_tready(axis_out.tready), + .m_axis_tdata(axis_out.tdata), + .m_axis_tkeep(axis_out.tkeep), + .m_axis_tlast(axis_out.tlast) + ); +end +else if(DATA_BITS == 2048) begin + axis_register_slice_2k_0 inst_reg_slice ( + .aclk(aclk), + .aresetn(aresetn), + .s_axis_tvalid(axis_in.tvalid), + .s_axis_tready(axis_in.tready), + .s_axis_tdata(axis_in.tdata), + .s_axis_tkeep(axis_in.tkeep), + .s_axis_tlast(axis_in.tlast), + .m_axis_tvalid(axis_out.tvalid), + .m_axis_tready(axis_out.tready), + .m_axis_tdata(axis_out.tdata), + .m_axis_tkeep(axis_out.tkeep), + .m_axis_tlast(axis_out.tlast) + ); +end + +endmodule \ No newline at end of file diff --git a/hw/hdl/regs/axis_reg_array.sv b/hw/hdl/regs/axis_reg_array.sv new file mode 100644 index 00000000..029baa6e --- /dev/null +++ b/hw/hdl/regs/axis_reg_array.sv @@ -0,0 +1,28 @@ +import lynxTypes::*; + +`include "axi_macros.svh" + +module axis_reg_array #( + parameter integer N_STAGES = 2, + parameter integer DATA_BITS = AXI_DATA_BITS +) ( + input logic aclk, + input logic aresetn, + + AXI4S.s axis_in, + AXI4S.m axis_out +); + +// ----------------------------------------------------------------------------------------------------------------------- +// -- Register slices ---------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------------------------------------------------- +AXI4S #(.AXI4S_DATA_BITS(DATA_BITS)) axis_s [N_STAGES+1] (); + +`AXIS_ASSIGN(axis_in, axis_s[0]) +`AXIS_ASSIGN(axis_s[N_STAGES], axis_out) + +for(genvar i = 0; i < N_STAGES; i++) begin + axis_reg #(.DATA_BITS(DATA_BITS)) inst_reg (.aclk(aclk), .aresetn(aresetn), .axis_in(axis_s[i]), .axis_out(axis_s[i+1])); +end + +endmodule \ No newline at end of file diff --git a/hw/hdl/regs/axis_reg_array_rtl.sv b/hw/hdl/regs/axis_reg_array_rtl.sv new file mode 100644 index 00000000..5c7345e8 --- /dev/null +++ b/hw/hdl/regs/axis_reg_array_rtl.sv @@ -0,0 +1,27 @@ +import lynxTypes::*; + +`include "axi_macros.svh" + +module axis_reg_array_rtl #( + parameter integer N_STAGES = 2 +) ( + input logic aclk, + input logic aresetn, + + AXI4S.s axis_in, + AXI4S.m axis_out +); + +// ----------------------------------------------------------------------------------------------------------------------- +// -- Register slices ---------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------------------------------------------------- +AXI4S axis_s [N_STAGES+1] (); + +`AXIS_ASSIGN(axis_in, axis_s[0]) +`AXIS_ASSIGN(axis_s[N_STAGES], axis_out) + +for(genvar i = 0; i < N_STAGES; i++) begin + axis_reg_rtl inst_reg (.aclk(aclk), .aresetn(aresetn), .axis_in(axis_s[i]), .axis_out(axis_s[i+1])); +end + +endmodule \ No newline at end of file diff --git a/hw/hdl/regs/axis_reg_rtl.sv b/hw/hdl/regs/axis_reg_rtl.sv new file mode 100644 index 00000000..f82ebba9 --- /dev/null +++ b/hw/hdl/regs/axis_reg_rtl.sv @@ -0,0 +1,100 @@ +import lynxTypes::*; + +//`define XILINX_REG + +module axis_reg_rtl #( + parameter integer REG_DATA_BITS = AXI_DATA_BITS +) ( + input logic aclk, + input logic aresetn, + + AXI4S.s axis_in, + AXI4S.m axis_out +); + +// Internal registers +logic axis_in_tready_C, axis_in_tready_N; + +logic [REG_DATA_BITS-1:0] axis_out_tdata_C, axis_out_tdata_N; +logic [(REG_DATA_BITS/8)-1:0] axis_out_tkeep_C, axis_out_tkeep_N; +logic axis_out_tvalid_C, axis_out_tvalid_N; +logic axis_out_tlast_C, axis_out_tlast_N; + +logic [REG_DATA_BITS-1:0] tmp_tdata_C, tmp_tdata_N; +logic [(REG_DATA_BITS/8)-1:0] tmp_tkeep_C, tmp_tkeep_N; +logic tmp_tvalid_C, tmp_tvalid_N; +logic tmp_tlast_C, tmp_tlast_N; + +// Comb +assign axis_in_tready_N = axis_out.tready || (!tmp_tvalid_C && (!axis_out_tvalid_C || !axis_in.tvalid)); + +always_comb begin + axis_out_tvalid_N = axis_out_tvalid_C; + axis_out_tdata_N = axis_out_tdata_C; + axis_out_tkeep_N = axis_out_tkeep_C; + axis_out_tlast_N = axis_out_tlast_C; + + tmp_tvalid_N = tmp_tvalid_C; + tmp_tdata_N = tmp_tdata_C; + tmp_tkeep_N = tmp_tkeep_C; + tmp_tlast_N = tmp_tlast_C; + + if(axis_in_tready_C) begin + if(axis_out.tready || !axis_out_tvalid_C) begin + axis_out_tvalid_N = axis_in.tvalid; + axis_out_tdata_N = axis_in.tdata; + axis_out_tkeep_N = axis_in.tkeep; + axis_out_tlast_N = axis_in.tlast; + end + else begin + tmp_tvalid_N = axis_in.tvalid; + tmp_tdata_N = axis_in.tdata; + tmp_tkeep_N = axis_in.tkeep; + tmp_tlast_N = axis_in.tlast; + end + end + else if(axis_out.tready) begin + axis_out_tvalid_N = tmp_tvalid_C; + axis_out_tdata_N = tmp_tdata_C; + axis_out_tkeep_N = tmp_tkeep_C; + axis_out_tlast_N = tmp_tlast_C; + + tmp_tvalid_N = 1'b0; + end +end + +// Reg process +always_ff @(posedge aclk, negedge aresetn) begin + if(aresetn == 1'b0) begin + axis_out_tdata_C <= 0; + axis_out_tkeep_C <= 0; + axis_out_tlast_C <= 0; + axis_out_tvalid_C <= 0; + tmp_tdata_C <= 0; + tmp_tkeep_C <= 0; + tmp_tlast_C <= 0; + tmp_tvalid_C <= 0; + axis_in_tready_C <= 0; + end + else begin + axis_out_tdata_C <= axis_out_tdata_N; + axis_out_tkeep_C <= axis_out_tkeep_N; + axis_out_tlast_C <= axis_out_tlast_N; + axis_out_tvalid_C <= axis_out_tvalid_N; + tmp_tdata_C <= tmp_tdata_N; + tmp_tkeep_C <= tmp_tkeep_N; + tmp_tlast_C <= tmp_tlast_N; + tmp_tvalid_C <= tmp_tvalid_N; + axis_in_tready_C <= axis_in_tready_N; + end +end + +// Outputs +assign axis_in.tready = axis_in_tready_C; + +assign axis_out.tdata = axis_out_tdata_C; +assign axis_out.tkeep = axis_out_tkeep_C; +assign axis_out.tlast = axis_out_tlast_C; +assign axis_out.tvalid = axis_out_tvalid_C; + +endmodule \ No newline at end of file diff --git a/hw/hdl/regs/axisr_decoupler.sv b/hw/hdl/regs/axisr_decoupler.sv new file mode 100644 index 00000000..47e0f06b --- /dev/null +++ b/hw/hdl/regs/axisr_decoupler.sv @@ -0,0 +1,55 @@ +import lynxTypes::*; + +module axisr_decoupler ( + input logic [N_REGIONS-1:0] decouple, + + AXI4SR.s axis_in [N_REGIONS], + AXI4SR.m axis_out [N_REGIONS] +); + +// ----------------------------------------------------------------------------------------------------------------------- +// -- Decoupling --------------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------------------------------------------------- +logic [N_REGIONS-1:0] axis_in_tvalid; +logic [N_REGIONS-1:0] axis_in_tready; +logic [N_REGIONS-1:0][AXI_DATA_BITS-1:0] axis_in_tdata; +logic [N_REGIONS-1:0][AXI_DATA_BITS/8-1:0] axis_in_tkeep; +logic [N_REGIONS-1:0] axis_in_tlast; +logic [N_REGIONS-1:0][3:0] axis_in_tdest; + +logic [N_REGIONS-1:0] axis_out_tvalid; +logic [N_REGIONS-1:0] axis_out_tready; +logic [N_REGIONS-1:0][AXI_DATA_BITS-1:0] axis_out_tdata; +logic [N_REGIONS-1:0][AXI_DATA_BITS/8-1:0] axis_out_tkeep; +logic [N_REGIONS-1:0] axis_out_tlast; +logic [N_REGIONS-1:0][3:0] axis_out_tdest; + +// Assign +for(genvar i = 0; i < N_REGIONS; i++) begin + assign axis_in_tvalid[i] = axis_in[i].tvalid; + assign axis_in_tdata[i] = axis_in[i].tdata; + assign axis_in_tkeep[i] = axis_in[i].tkeep; + assign axis_in_tlast[i] = axis_in[i].tlast; + assign axis_in_tdest[i] = axis_in[i].tdest; + assign axis_in[i].tready = axis_in_tready[i]; + + assign axis_out[i].tvalid = axis_out_tvalid[i]; + assign axis_out[i].tdata = axis_out_tdata[i]; + assign axis_out[i].tkeep = axis_out_tkeep[i]; + assign axis_out[i].tlast = axis_out_tlast[i]; + assign axis_out[i].tdest = axis_out_tdest[i]; + assign axis_out_tready[i] = axis_out[i].tready; +end + +// Decoupler +for(genvar i = 0; i < N_REGIONS; i++) begin + assign axis_out_tvalid[i] = decouple[i] ? 1'b0 : axis_in_tvalid[i]; + assign axis_in_tready[i] = decouple[i] ? 1'b0 : axis_out_tready[i]; + + assign axis_out_tdata[i] = axis_in_tdata[i]; + assign axis_out_tlast[i] = axis_in_tlast[i]; + assign axis_out_tkeep[i] = axis_in_tkeep[i]; + assign axis_out_tdest[i] = axis_in_tdest[i]; +end + +endmodule \ No newline at end of file diff --git a/hw/hdl/regs/axisr_reg.sv b/hw/hdl/regs/axisr_reg.sv new file mode 100644 index 00000000..175c2636 --- /dev/null +++ b/hw/hdl/regs/axisr_reg.sv @@ -0,0 +1,68 @@ +import lynxTypes::*; + +module axisr_reg #( + parameter integer DATA_BITS = AXI_DATA_BITS +) ( + input logic aclk, + input logic aresetn, + + AXI4SR.s axis_in, + AXI4SR.m axis_out +); + +if(DATA_BITS == 512) begin + axisr_register_slice_512_0 inst_reg_slice ( + .aclk(aclk), + .aresetn(aresetn), + .s_axis_tvalid(axis_in.tvalid), + .s_axis_tready(axis_in.tready), + .s_axis_tdata(axis_in.tdata), + .s_axis_tkeep(axis_in.tkeep), + .s_axis_tlast(axis_in.tlast), + .s_axis_tdest(axis_in.tdest), + .m_axis_tvalid(axis_out.tvalid), + .m_axis_tready(axis_out.tready), + .m_axis_tdata(axis_out.tdata), + .m_axis_tkeep(axis_out.tkeep), + .m_axis_tlast(axis_out.tlast), + .m_axis_tdest(axis_out.tdest) + ); +end +else if(DATA_BITS == 1024) begin + axisr_register_slice_1k_0 inst_reg_slice ( + .aclk(aclk), + .aresetn(aresetn), + .s_axis_tvalid(axis_in.tvalid), + .s_axis_tready(axis_in.tready), + .s_axis_tdata(axis_in.tdata), + .s_axis_tkeep(axis_in.tkeep), + .s_axis_tlast(axis_in.tlast), + .s_axis_tdest(axis_in.tdest), + .m_axis_tvalid(axis_out.tvalid), + .m_axis_tready(axis_out.tready), + .m_axis_tdata(axis_out.tdata), + .m_axis_tkeep(axis_out.tkeep), + .m_axis_tlast(axis_out.tlast), + .m_axis_tdest(axis_out.tdest) + ); +end +else if(DATA_BITS == 2048) begin + axisr_register_slice_2k_0 inst_reg_slice ( + .aclk(aclk), + .aresetn(aresetn), + .s_axis_tvalid(axis_in.tvalid), + .s_axis_tready(axis_in.tready), + .s_axis_tdata(axis_in.tdata), + .s_axis_tkeep(axis_in.tkeep), + .s_axis_tlast(axis_in.tlast), + .s_axis_tdest(axis_in.tdest), + .m_axis_tvalid(axis_out.tvalid), + .m_axis_tready(axis_out.tready), + .m_axis_tdata(axis_out.tdata), + .m_axis_tkeep(axis_out.tkeep), + .m_axis_tlast(axis_out.tlast), + .m_axis_tdest(axis_out.tdest) + ); +end + +endmodule \ No newline at end of file diff --git a/hw/hdl/regs/axisr_reg_array.sv b/hw/hdl/regs/axisr_reg_array.sv new file mode 100644 index 00000000..48a82f9f --- /dev/null +++ b/hw/hdl/regs/axisr_reg_array.sv @@ -0,0 +1,28 @@ +import lynxTypes::*; + +`include "axi_macros.svh" + +module axisr_reg_array #( + parameter integer N_STAGES = 2, + parameter integer DATA_BITS = AXI_DATA_BITS +) ( + input logic aclk, + input logic aresetn, + + AXI4SR.s axis_in, + AXI4SR.m axis_out +); + +// ----------------------------------------------------------------------------------------------------------------------- +// -- Register slices ---------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------------------------------------------------- +AXI4SR #(.AXI4S_DATA_BITS(DATA_BITS)) axis_s [N_STAGES+1] (); + +`AXISR_ASSIGN(axis_in, axis_s[0]) +`AXISR_ASSIGN(axis_s[N_STAGES], axis_out) + +for(genvar i = 0; i < N_STAGES; i++) begin + axisr_reg #(.DATA_BITS(DATA_BITS)) inst_reg (.aclk(aclk), .aresetn(aresetn), .axis_in(axis_s[i]), .axis_out(axis_s[i+1])); +end + +endmodule \ No newline at end of file diff --git a/hw/hdl/regs/data_queue_credits_sink.sv b/hw/hdl/regs/data_queue_credits_sink.sv new file mode 100644 index 00000000..6d0a8df3 --- /dev/null +++ b/hw/hdl/regs/data_queue_credits_sink.sv @@ -0,0 +1,63 @@ +import lynxTypes::*; + +module data_queue_credits_sink #( + parameter integer DATA_BITS = AXI_DATA_BITS +) ( + input logic aclk, + input logic aresetn, + + AXI4SR.s axis_in, + AXI4S.m axis_out +); + +if(N_DDR_CHAN == 1) begin + axis_data_fifo_512 inst_data ( + .s_axis_aresetn(aresetn), + .s_axis_aclk(aclk), + .s_axis_tvalid(axis_in.tvalid), + .s_axis_tready(axis_in.tready), + .s_axis_tdata(axis_in.tdata), + .s_axis_tkeep(axis_in.tkeep), + .s_axis_tlast(axis_in.tlast), + .m_axis_tvalid(axis_out.tvalid), + .m_axis_tready(axis_out.tready), + .m_axis_tdata(axis_out.tdata), + .m_axis_tkeep(axis_out.tkeep), + .m_axis_tlast(axis_out.tlast) + ); +end +else if(N_DDR_CHAN == 2) begin + axis_data_fifo_1k inst_data ( + .s_axis_aresetn(aresetn), + .s_axis_aclk(aclk), + .s_axis_tvalid(axis_in.tvalid), + .s_axis_tready(axis_in.tready), + .s_axis_tdata(axis_in.tdata), + .s_axis_tkeep(axis_in.tkeep), + .s_axis_tlast(axis_in.tlast), + .m_axis_tvalid(axis_out.tvalid), + .m_axis_tready(axis_out.tready), + .m_axis_tdata(axis_out.tdata), + .m_axis_tkeep(axis_out.tkeep), + .m_axis_tlast(axis_out.tlast) + ); +end +else begin + axis_data_fifo_2k inst_data ( + .s_axis_aresetn(aresetn), + .s_axis_aclk(aclk), + .s_axis_tvalid(axis_in.tvalid), + .s_axis_tready(axis_in.tready), + .s_axis_tdata(axis_in.tdata), + .s_axis_tkeep(axis_in.tkeep), + .s_axis_tlast(axis_in.tlast), + .m_axis_tvalid(axis_out.tvalid), + .m_axis_tready(axis_out.tready), + .m_axis_tdata(axis_out.tdata), + .m_axis_tkeep(axis_out.tkeep), + .m_axis_tlast(axis_out.tlast) + ); +end + +endmodule + diff --git a/hw/hdl/regs/data_queue_credits_src.sv b/hw/hdl/regs/data_queue_credits_src.sv new file mode 100644 index 00000000..94b1fc4e --- /dev/null +++ b/hw/hdl/regs/data_queue_credits_src.sv @@ -0,0 +1,66 @@ +import lynxTypes::*; + +module data_queue_credits_src #( + parameter integer DATA_BITS = AXI_DATA_BITS +) ( + input logic aclk, + input logic aresetn, + + AXI4S.s axis_in, + AXI4SR.m axis_out, + input logic [3:0] rd_dest +); + +if(N_DDR_CHAN == 1) begin + axis_data_fifo_512 inst_data ( + .s_axis_aresetn(aresetn), + .s_axis_aclk(aclk), + .s_axis_tvalid(axis_in.tvalid), + .s_axis_tready(axis_in.tready), + .s_axis_tdata(axis_in.tdata), + .s_axis_tkeep(axis_in.tkeep), + .s_axis_tlast(axis_in.tlast), + .m_axis_tvalid(axis_out.tvalid), + .m_axis_tready(axis_out.tready), + .m_axis_tdata(axis_out.tdata), + .m_axis_tkeep(axis_out.tkeep), + .m_axis_tlast(axis_out.tlast) + ); +end +else if(N_DDR_CHAN == 2) begin + axis_data_fifo_1k inst_data ( + .s_axis_aresetn(aresetn), + .s_axis_aclk(aclk), + .s_axis_tvalid(axis_in.tvalid), + .s_axis_tready(axis_in.tready), + .s_axis_tdata(axis_in.tdata), + .s_axis_tkeep(axis_in.tkeep), + .s_axis_tlast(axis_in.tlast), + .m_axis_tvalid(axis_out.tvalid), + .m_axis_tready(axis_out.tready), + .m_axis_tdata(axis_out.tdata), + .m_axis_tkeep(axis_out.tkeep), + .m_axis_tlast(axis_out.tlast) + ); +end +else begin + axis_data_fifo_2k inst_data ( + .s_axis_aresetn(aresetn), + .s_axis_aclk(aclk), + .s_axis_tvalid(axis_in.tvalid), + .s_axis_tready(axis_in.tready), + .s_axis_tdata(axis_in.tdata), + .s_axis_tkeep(axis_in.tkeep), + .s_axis_tlast(axis_in.tlast), + .m_axis_tvalid(axis_out.tvalid), + .m_axis_tready(axis_out.tready), + .m_axis_tdata(axis_out.tdata), + .m_axis_tkeep(axis_out.tkeep), + .m_axis_tlast(axis_out.tlast) + ); +end + +assign axis_out.tdest = rd_dest; + +endmodule + diff --git a/hw/hdl/regs/dma_isr_req_queue.sv b/hw/hdl/regs/dma_isr_req_queue.sv new file mode 100644 index 00000000..51e6caa5 --- /dev/null +++ b/hw/hdl/regs/dma_isr_req_queue.sv @@ -0,0 +1,26 @@ +import lynxTypes::*; + +module dma_isr_req_queue ( + input logic aclk, + input logic aresetn, + + dmaIsrIntf.s req_in, + dmaIsrIntf.m req_out +); + +axis_data_fifo_req_128 inst_req ( + .s_axis_aresetn(aresetn), + .s_axis_aclk(aclk), + .s_axis_tvalid(req_in.valid), + .s_axis_tready(req_in.ready), + .s_axis_tdata(req_in.req), + .m_axis_tvalid(req_out.valid), + .m_axis_tready(req_out.ready), + .m_axis_tdata(req_out.req) +); + +assign req_in.done = req_out.done; +assign req_in.isr_return = req_out.isr_return; + +endmodule + diff --git a/hw/hdl/regs/dma_req_queue.sv b/hw/hdl/regs/dma_req_queue.sv new file mode 100644 index 00000000..4e805fd0 --- /dev/null +++ b/hw/hdl/regs/dma_req_queue.sv @@ -0,0 +1,25 @@ +import lynxTypes::*; + +module dma_req_queue ( + input logic aclk, + input logic aresetn, + + dmaIntf.s req_in, + dmaIntf.m req_out +); + +axis_data_fifo_req_96 inst_req ( + .s_axis_aresetn(aresetn), + .s_axis_aclk(aclk), + .s_axis_tvalid(req_in.valid), + .s_axis_tready(req_in.ready), + .s_axis_tdata(req_in.req), + .m_axis_tvalid(req_out.valid), + .m_axis_tready(req_out.ready), + .m_axis_tdata(req_out.req) +); + +assign req_in.done = req_out.done; + +endmodule + diff --git a/hw/hdl/regs/meta_decoupler.sv b/hw/hdl/regs/meta_decoupler.sv new file mode 100644 index 00000000..6eea7136 --- /dev/null +++ b/hw/hdl/regs/meta_decoupler.sv @@ -0,0 +1,22 @@ +import lynxTypes::*; + +module meta_decoupler ( + input logic [N_REGIONS-1:0] decouple, + + metaIntf.s meta_in [N_REGIONS], + metaIntf.m meta_out [N_REGIONS] +); + // ----------------------------------------------------------------------------------------------------------------------- + // -- Decoupling --------------------------------------------------------------------------------------------------------- + // ----------------------------------------------------------------------------------------------------------------------- + genvar i; + generate + for(i = 0; i < N_REGIONS; i++) begin + assign meta_out[i].valid = decouple[i] ? 1'b0 : meta_in[i].valid; + assign meta_in[i].ready = decouple[i] ? 1'b0 : meta_out[i].ready; + + assign meta_out[i].data = meta_in[i].data; + end + endgenerate + +endmodule \ No newline at end of file diff --git a/hw/hdl/regs/meta_reg.sv b/hw/hdl/regs/meta_reg.sv new file mode 100644 index 00000000..5fc12ad7 --- /dev/null +++ b/hw/hdl/regs/meta_reg.sv @@ -0,0 +1,50 @@ +import lynxTypes::*; + +module meta_reg #( + parameter DATA_BITS = 256 +) ( + input logic aclk, + input logic aresetn, + + metaIntf.s meta_in, + metaIntf.m meta_out +); + +if(DATA_BITS == 256) begin + axis_register_slice_meta_256_0 inst_reg_slice ( + .aclk(aclk), + .aresetn(aresetn), + .s_axis_tvalid(meta_in.valid), + .s_axis_tready(meta_in.ready), + .s_axis_tdata(meta_in.data), + .m_axis_tvalid(meta_out.valid), + .m_axis_tready(meta_out.ready), + .m_axis_tdata(meta_out.data) + ); +end +else if(DATA_BITS == 56) begin + axis_register_slice_meta_56_0 inst_reg_slice ( + .aclk(aclk), + .aresetn(aresetn), + .s_axis_tvalid(meta_in.valid), + .s_axis_tready(meta_in.ready), + .s_axis_tdata(meta_in.data), + .m_axis_tvalid(meta_out.valid), + .m_axis_tready(meta_out.ready), + .m_axis_tdata(meta_out.data) + ); +end +else if(DATA_BITS == 32) begin + axis_register_slice_meta_32_0 inst_reg_slice ( + .aclk(aclk), + .aresetn(aresetn), + .s_axis_tvalid(meta_in.valid), + .s_axis_tready(meta_in.ready), + .s_axis_tdata(meta_in.data), + .m_axis_tvalid(meta_out.valid), + .m_axis_tready(meta_out.ready), + .m_axis_tdata(meta_out.data) + ); +end + +endmodule \ No newline at end of file diff --git a/hw/hdl/regs/meta_reg_array.sv b/hw/hdl/regs/meta_reg_array.sv new file mode 100644 index 00000000..9df1ac3a --- /dev/null +++ b/hw/hdl/regs/meta_reg_array.sv @@ -0,0 +1,27 @@ +import lynxTypes::*; + +`include "lynx_macros.svh" + +module meta_reg_array #( + parameter integer N_STAGES = 2 +) ( + input logic aclk, + input logic aresetn, + + metaIntf.s meta_in, + metaIntf.m meta_out +); + +// ----------------------------------------------------------------------------------------------------------------------- +// -- Register slices ---------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------------------------------------------------- +metaIntf #(.DATA_BITS(FV_REQ_BITS)) meta_s [N_STAGES+1] (); + +`META_ASSIGN(meta_in, meta_s[0]) +`META_ASSIGN(meta_s[N_STAGES], meta_out) + +for(genvar i = 0; i < N_STAGES; i++) begin + meta_reg inst_reg (.aclk(aclk), .aresetn(aresetn), .meta_in(meta_s[i]), .meta_out(meta_s[i+1])); +end + +endmodule \ No newline at end of file diff --git a/hw/hdl/regs/req_decoupler.sv b/hw/hdl/regs/req_decoupler.sv new file mode 100644 index 00000000..ab23a59f --- /dev/null +++ b/hw/hdl/regs/req_decoupler.sv @@ -0,0 +1,22 @@ +import lynxTypes::*; + +module req_decoupler ( + input logic [N_REGIONS-1:0] decouple, + + reqIntf.s req_in [N_REGIONS], + reqIntf.m req_out [N_REGIONS] +); + // ----------------------------------------------------------------------------------------------------------------------- + // -- Decoupling --------------------------------------------------------------------------------------------------------- + // ----------------------------------------------------------------------------------------------------------------------- + genvar i; + generate + for(i = 0; i < N_REGIONS; i++) begin + assign req_out[i].valid = decouple[i] ? 1'b0 : req_in[i].valid; + assign req_in[i].ready = decouple[i] ? 1'b0 : req_out[i].ready; + + assign req_out[i].req = req_in[i].req; + end + endgenerate + +endmodule \ No newline at end of file diff --git a/hw/hdl/regs/req_queue.sv b/hw/hdl/regs/req_queue.sv new file mode 100644 index 00000000..9feef5fe --- /dev/null +++ b/hw/hdl/regs/req_queue.sv @@ -0,0 +1,23 @@ +import lynxTypes::*; + +module req_queue ( + input logic aclk, + input logic aresetn, + + reqIntf.s req_in, + reqIntf.m req_out +); + +axis_data_fifo_req_96 inst_req ( + .s_axis_aresetn(aresetn), + .s_axis_aclk(aclk), + .s_axis_tvalid(req_in.valid), + .s_axis_tready(req_in.ready), + .s_axis_tdata(req_in.req), + .m_axis_tvalid(req_out.valid), + .m_axis_tready(req_out.ready), + .m_axis_tdata(req_out.req) +); + +endmodule + diff --git a/hw/hdl/regs/req_reg.sv b/hw/hdl/regs/req_reg.sv new file mode 100644 index 00000000..9f3539fd --- /dev/null +++ b/hw/hdl/regs/req_reg.sv @@ -0,0 +1,22 @@ +import lynxTypes::*; + +module req_reg ( + input logic aclk, + input logic aresetn, + + reqIntf.s req_in, + reqIntf.m req_out +); + +axis_register_slice_req_96_0 inst_reg_slice ( + .aclk(aclk), + .aresetn(aresetn), + .s_axis_tvalid(req_in.valid), + .s_axis_tready(req_in.ready), + .s_axis_tdata(req_in.req), + .m_axis_tvalid(req_out.valid), + .m_axis_tready(req_out.ready), + .m_axis_tdata(req_out.req) +); + +endmodule \ No newline at end of file diff --git a/hw/hdl/regs/req_reg_array.sv b/hw/hdl/regs/req_reg_array.sv new file mode 100644 index 00000000..0eeb7bce --- /dev/null +++ b/hw/hdl/regs/req_reg_array.sv @@ -0,0 +1,27 @@ +import lynxTypes::*; + +`include "lynx_macros.svh" + +module req_reg_array #( + parameter integer N_STAGES = 2 +) ( + input logic aclk, + input logic aresetn, + + reqIntf.s req_in, + reqIntf.m req_out +); + +// ----------------------------------------------------------------------------------------------------------------------- +// -- Register slices ---------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------------------------------------------------- +reqIntf req_s [N_STAGES+1] (); + +`REQ_ASSIGN(req_in, req_s[0]) +`REQ_ASSIGN(req_s[N_STAGES], req_out) + +for(genvar i = 0; i < N_STAGES; i++) begin + req_reg inst_reg (.aclk(aclk), .aresetn(aresetn), .req_in(req_s[i]), .req_out(req_s[i+1])); +end + +endmodule \ No newline at end of file diff --git a/hw/hdl/regs/req_reg_array_rtl.sv b/hw/hdl/regs/req_reg_array_rtl.sv new file mode 100644 index 00000000..d2df4518 --- /dev/null +++ b/hw/hdl/regs/req_reg_array_rtl.sv @@ -0,0 +1,27 @@ +import lynxTypes::*; + +`include "lynx_macros.svh" + +module req_reg_array_rtl #( + parameter integer N_STAGES = 2 +) ( + input logic aclk, + input logic aresetn, + + reqIntf.s req_in, + reqIntf.m req_out +); + +// ----------------------------------------------------------------------------------------------------------------------- +// -- Register slices ---------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------------------------------------------------- +reqIntf req_s [N_STAGES+1] (); + +`REQ_ASSIGN(req_in, req_s[0]) +`REQ_ASSIGN(req_s[N_STAGES], req_out) + +for(genvar i = 0; i < N_STAGES; i++) begin + req_reg_rtl inst_reg (.aclk(aclk), .aresetn(aresetn), .req_in(req_s[i]), .req_out(req_s[i+1])); +end + +endmodule \ No newline at end of file diff --git a/hw/hdl/regs/req_reg_rtl.sv b/hw/hdl/regs/req_reg_rtl.sv new file mode 100644 index 00000000..d9ff5849 --- /dev/null +++ b/hw/hdl/regs/req_reg_rtl.sv @@ -0,0 +1,83 @@ +import lynxTypes::*; + +module req_reg_rtl ( + input logic aclk, + input logic aresetn, + + reqIntf.s req_in, + reqIntf.m req_out +); + // Internal registers + logic in_ready_C, in_ready_N; + + req_t out_req_C, out_req_N; + logic [N_REQUEST_BITS-1:0] out_req_id_C, out_req_id_N; + logic out_valid_C, out_valid_N; + + req_t tmp_req_C, tmp_req_N; + logic [N_REQUEST_BITS-1:0] tmp_req_id_C, tmp_req_id_N; + logic tmp_valid_C, tmp_valid_N; + + // Comb + assign in_ready_N = req_out.ready || (!tmp_valid_C && (!out_valid_C || !req_in.valid)); + + always_comb begin + out_valid_N = out_valid_C; + out_req_N = out_req_C; + out_req_id_N = out_req_id_C; + + tmp_valid_N = tmp_valid_C; + tmp_req_N = tmp_req_C; + tmp_req_id_N = tmp_req_id_C; + + if(in_ready_C) begin + if(req_out.ready || !out_valid_C) begin + out_valid_N = req_in.valid; + out_req_N = req_in.req; + out_req_id_N = req_in.id; + end + else begin + tmp_valid_N = req_in.valid; + tmp_req_N = req_in.req; + tmp_req_id_N = req_in.id; + end + end + else if(req_out.ready) begin + out_valid_N = tmp_valid_C; + out_req_N = tmp_req_C; + out_req_id_N = tmp_req_id_C; + + tmp_valid_N = 1'b0; + end + end + + // Reg process + always_ff @(posedge aclk, negedge aresetn) begin + if(~aresetn) begin + out_valid_C <= 1'b0; + out_req_C <= 0; + out_req_id_C <= 0; + tmp_valid_C <= 1'b0; + tmp_req_C <= 0; + tmp_req_id_C <= 0; + in_ready_C <= 1'b0; + end + else begin + out_valid_C <= out_valid_N; + out_req_C <= out_req_N; + out_req_id_C <= out_req_id_C; + tmp_valid_C <= tmp_valid_N; + tmp_req_C <= tmp_req_N; + tmp_req_id_C <= tmp_req_id_N; + in_ready_C <= in_ready_N; + end + end + + // Outputs + assign req_in.ready = in_ready_C; + + assign req_out.valid = out_valid_C; + assign req_out.req = out_req_C; + assign req_out.id = out_req_id_C; + +endmodule \ No newline at end of file diff --git a/hw/hdl/slave/cnfg_slave.sv b/hw/hdl/slave/cnfg_slave.sv new file mode 100644 index 00000000..ad052ea9 --- /dev/null +++ b/hw/hdl/slave/cnfg_slave.sv @@ -0,0 +1,691 @@ +/** + * Config Slave + * + * Configuration slave, datapath control and RD/WR request handling + */ + +import lynxTypes::*; + +module cnfg_slave #( + parameter integer ID_REG = 0 +)( + input logic aclk, + input logic aresetn, + + // Control bus (HOST) + AXI4L.s axi_ctrl, + +`ifdef EN_BPSS + // Request in user logic + reqIntf.s rd_req_user, + reqIntf.s wr_req_user, +`endif + +`ifdef EN_FV + // Request out rdma + metaIntf.m fv_req, +`endif + + // Request out + reqIntf.m rd_req, + reqIntf.m wr_req, + + // Config intf + cnfgIntf.m rd_cnfg, + cnfgIntf.m wr_cnfg, + + // Control + output logic decouple, + output logic pf_irq +); + +// -- Decl ------------------------------------------------------------------------------- +// --------------------------------------------------------------------------------------- + +// Constants +`ifdef EN_FV +localparam integer N_REGS = 25; +`else +localparam integer N_REGS = 19; +`endif +localparam integer ADDR_LSB = (AXIL_DATA_BITS/32) + 1; +localparam integer ADDR_MSB = $clog2(N_REGS); +localparam integer AXIL_ADDR_BITS = ADDR_LSB + ADDR_MSB; + +localparam integer CTRL_BYTES = 2; +localparam integer VADDR_BYTES = 6; +localparam integer LEN_BYTES = 4; + +// Internal registers +logic [AXIL_ADDR_BITS-1:0] axi_awaddr; +logic axi_awready; +logic [AXIL_ADDR_BITS-1:0] axi_araddr; +logic axi_arready; +logic [1:0] axi_bresp; +logic axi_bvalid; +logic axi_wready; +logic [AXIL_DATA_BITS-1:0] axi_rdata; +logic [1:0] axi_rresp; +logic axi_rvalid; + +// Slave Registers +logic [N_REGS-1:0][AXIL_DATA_BITS-1:0] slv_reg; +logic slv_reg_rden; +logic slv_reg_wren; +logic aw_en; + +// Internal signals +logic irq_pending; +logic rd_sent; +logic wr_sent; + +logic [31:0] rd_queue_used; +logic [31:0] wr_queue_used; + +`ifdef EN_FV +logic [31:0] rdma_queue_used; +logic rdma_post; +`endif + +// -- Def -------------------------------------------------------------------------------- +// --------------------------------------------------------------------------------------- + +// -- Register map ----------------------------------------------------------------------- +// 0 (W1S|W1C) : Control +localparam integer CTRL_REG = 0; + localparam integer CTRL_START_RD = 0; + localparam integer CTRL_START_WR = 1; + localparam integer CTRL_SYNC_RD = 2; + localparam integer CTRL_SYNC_WR = 3; + localparam integer CTRL_STREAM_RD = 4; + localparam integer CTRL_STREAM_WR = 5; + localparam integer CTRL_CLR_STAT_RD = 6; + localparam integer CTRL_CLR_STAT_WR = 7; + localparam integer CTRL_CLR_IRQ_PENDING = 8; + localparam integer CTRL_SEND_FV_REQ = 9; + localparam integer CTRL_SEND_QP_CTX = 10; + localparam integer CTRL_SEND_QP_CONN = 11; +// 1 (RW) : Virtual address read +localparam integer VADDR_RD_REG = 1; +// 2 (RW) : Length read +localparam integer LEN_RD_REG = 2; +// 3 (RW) : Virtual address write +localparam integer VADDR_WR_REG = 3; +// 4 (RW) : Length write +localparam integer LEN_WR_REG = 4; +// 5 (RO) : Virtual address miss +localparam integer VADDR_MISS_REG = 5; +// 6 (RO) : Length miss +localparam integer LEN_MISS_REG = 6; +// 7,8 (W1S|W1C|R) : Datapath control set/clear +localparam integer CTRL_DP_REG_SET = 7; +localparam integer CTRL_DP_REG_CLR = 8; + localparam integer CTRL_DP_DECOUPLE = 0; +// 9 (RW) : Timer stop at completion counter +localparam integer TMR_STOP_REG = 9; +// 10, 11 (RO) : Timers +localparam integer TMR_RD_REG = 10; +localparam integer TMR_WR_REG = 11; +// 12 (RO) : Status +localparam integer STAT_CMD_USED_RD_REG = 12; +localparam integer STAT_CMD_USED_WR_REG = 13; +// 13, 14 (RO) : Number of completed transfers +localparam integer STAT_DMA_RD_REG = 14; +localparam integer STAT_DMA_WR_REG = 15; +// 15, 16 (RO) : Number of sent requests +localparam integer STAT_SENT_RD_REG = 16; +localparam integer STAT_SENT_WR_REG = 17; +// 17 (RO) : Number of page faults +localparam integer STAT_PFAULTS_REG = 18; +// FV +// 20, 21, 22 (RW) : FV post +localparam integer FV_POST_REG_0 = 20; +localparam integer FV_POST_REG_1 = 21; +localparam integer FV_POST_REG_2 = 22; +localparam integer FV_POST_REG_3 = 23; +// 23 (RO) : FV cmd check used +localparam integer FV_STAT_CMD_USED_REG = 24; +// + +// ---------------------------------------------------------------------------------------- +// Write process +// ---------------------------------------------------------------------------------------- +assign slv_reg_wren = axi_wready && axi_ctrl.wvalid && axi_awready && axi_ctrl.awvalid; + +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) begin + slv_reg[CTRL_REG][15:0] <= 0; + slv_reg[CTRL_DP_REG_SET][15:0] <= 0; + slv_reg[TMR_STOP_REG][31:0] <= 1; + + irq_pending <= 1'b0; + +`ifdef EN_FV + rdma_post <= 1'b0; +`endif + end + else begin + slv_reg[CTRL_REG] <= 0; // Control + +`ifdef EN_FV + rdma_post <= 1'b0; +`endif + + // Page fault + if(rd_cnfg.pf.miss || wr_cnfg.pf.miss) begin + irq_pending <= 1'b1; + slv_reg[VADDR_MISS_REG] <= rd_cnfg.pf.miss ? rd_cnfg.pf.vaddr : wr_cnfg.pf.vaddr; // miss virtual address + slv_reg[LEN_MISS_REG] <= rd_cnfg.pf.miss ? rd_cnfg.pf.len : wr_cnfg.pf.len; // miss length + end + if(slv_reg[CTRL_REG][CTRL_CLR_IRQ_PENDING]) + irq_pending <= 1'b0; + + // Status counters + slv_reg[STAT_DMA_RD_REG][31:0] <= slv_reg[CTRL_REG][CTRL_CLR_STAT_RD] ? 0 : slv_reg[STAT_DMA_RD_REG][31:0] + rd_cnfg.done_host + rd_cnfg.done_card + rd_cnfg.done_sync; + slv_reg[STAT_DMA_WR_REG][31:0] <= slv_reg[CTRL_REG][CTRL_CLR_STAT_WR] ? 0 : slv_reg[STAT_DMA_WR_REG][31:0] + wr_cnfg.done_host + wr_cnfg.done_card + wr_cnfg.done_sync; + slv_reg[STAT_SENT_RD_REG][31:0] <= slv_reg[CTRL_REG][CTRL_CLR_STAT_RD] ? 0 : slv_reg[STAT_SENT_RD_REG][31:0] + rd_sent; + slv_reg[STAT_SENT_WR_REG][31:0] <= slv_reg[CTRL_REG][CTRL_CLR_STAT_WR] ? 0 : slv_reg[STAT_SENT_WR_REG][31:0] + wr_sent; + slv_reg[STAT_PFAULTS_REG][31:0] <= (slv_reg[CTRL_REG][CTRL_CLR_STAT_RD] || slv_reg[CTRL_REG][CTRL_CLR_STAT_WR]) ? 0 : slv_reg[STAT_PFAULTS_REG] + (rd_cnfg.pf.miss || wr_cnfg.pf.miss); + + // Timers + slv_reg[TMR_RD_REG] <= slv_reg[CTRL_REG][CTRL_CLR_STAT_RD] ? 0 : (slv_reg[STAT_DMA_RD_REG][31:0] >= slv_reg[TMR_STOP_REG][31:0]) ? slv_reg[TMR_RD_REG] : slv_reg[TMR_RD_REG] + 1; + slv_reg[TMR_WR_REG] <= slv_reg[CTRL_REG][CTRL_CLR_STAT_WR] ? 0 : (slv_reg[STAT_DMA_WR_REG][31:0] >= slv_reg[TMR_STOP_REG][31:0]) ? slv_reg[TMR_WR_REG] : slv_reg[TMR_WR_REG] + 1; + + if(slv_reg_wren) begin + case (axi_awaddr[ADDR_LSB+:ADDR_MSB]) + CTRL_REG: // Control + for (int i = 0; i < CTRL_BYTES; i++) begin + if(axi_ctrl.wstrb[i]) begin + slv_reg[CTRL_REG][(i*8)+:8] <= axi_ctrl.wdata[(i*8)+:8]; + end + end + VADDR_RD_REG: // Virtual address read + for (int i = 0; i < VADDR_BYTES; i++) begin + if(axi_ctrl.wstrb[i]) begin + slv_reg[VADDR_RD_REG][(i*8)+:8] <= axi_ctrl.wdata[(i*8)+:8]; + end + end + LEN_RD_REG: // Length read + for (int i = 0; i < LEN_BYTES; i++) begin + if(axi_ctrl.wstrb[i]) begin + slv_reg[LEN_RD_REG][(i*8)+:8] <= axi_ctrl.wdata[(i*8)+:8]; + end + end + VADDR_WR_REG: // Virtual address write + for (int i = 0; i < VADDR_BYTES; i++) begin + if(axi_ctrl.wstrb[i]) begin + slv_reg[VADDR_WR_REG][(i*8)+:8] <= axi_ctrl.wdata[(i*8)+:8]; + end + end + LEN_WR_REG: // Length write + for (int i = 0; i < LEN_BYTES; i++) begin + if(axi_ctrl.wstrb[i]) begin + slv_reg[LEN_WR_REG][(i*8)+:8] <= axi_ctrl.wdata[(i*8)+:8]; + end + end + CTRL_DP_REG_SET: // Datapath control set + for (int i = 0; i < CTRL_BYTES; i++) begin + if(axi_ctrl.wstrb[i]) begin + slv_reg[CTRL_DP_REG_SET][(i*8)+:8] <= slv_reg[CTRL_DP_REG_SET][(i*8)+:8] | axi_ctrl.wdata[(i*8)+:8]; + end + end + CTRL_DP_REG_CLR: // Datapath control clear + for (int i = 0; i < CTRL_BYTES; i++) begin + if(axi_ctrl.wstrb[i]) begin + slv_reg[CTRL_DP_REG_SET][(i*8)+:8] <= slv_reg[CTRL_DP_REG_SET][(i*8)+:8] & ~axi_ctrl.wdata[(i*8)+:8]; + end + end + TMR_STOP_REG: // Timer stop at + for (int i = 0; i < LEN_BYTES; i++) begin + if(axi_ctrl.wstrb[i]) begin + slv_reg[TMR_STOP_REG][(i*8)+:8] <= axi_ctrl.wdata[(i*8)+:8]; + end + end + +`ifdef EN_FV + FV_CTX_REG_0: // Context + for (int i = 0; i < AXIL_DATA_BITS/8; i++) begin + if(axi_ctrl.wstrb[i]) begin + slv_reg[FV_CTX_REG_0][(i*8)+:8] <= axi_ctrl.wdata[(i*8)+:8]; + end + end + FV_CTX_REG_1: // Context + for (int i = 0; i < AXIL_DATA_BITS/8; i++) begin + if(axi_ctrl.wstrb[i]) begin + slv_reg[FV_CTX_REG_1][(i*8)+:8] <= axi_ctrl.wdata[(i*8)+:8]; + end + end + FV_CTX_REG_2: // Context final + for (int i = 0; i < AXIL_DATA_BITS/8; i++) begin + if(axi_ctrl.wstrb[i]) begin + slv_reg[FV_CTX_REG_2][(i*8)+:8] <= axi_ctrl.wdata[(i*8)+:8]; + end + end + FV_CONN_REG_0: // Connection + for (int i = 0; i < AXIL_DATA_BITS/8; i++) begin + if(axi_ctrl.wstrb[i]) begin + slv_reg[FV_CONN_REG_0][(i*8)+:8] <= axi_ctrl.wdata[(i*8)+:8]; + end + end + FV_CONN_REG_1: // Connection + for (int i = 0; i < AXIL_DATA_BITS/8; i++) begin + if(axi_ctrl.wstrb[i]) begin + slv_reg[FV_CONN_REG_1][(i*8)+:8] <= axi_ctrl.wdata[(i*8)+:8]; + end + end + FV_CONN_REG_2: // Connection final + for (int i = 0; i < AXIL_DATA_BITS/8; i++) begin + if(axi_ctrl.wstrb[i]) begin + slv_reg[FV_CONN_REG_2][(i*8)+:8] <= axi_ctrl.wdata[(i*8)+:8]; + end + end + FV_POST_REG_0: // Post + for (int i = 0; i < AXIL_DATA_BITS/8; i++) begin + if(axi_ctrl.wstrb[i]) begin + slv_reg[FV_POST_REG_0][(i*8)+:8] <= axi_ctrl.wdata[(i*8)+:8]; + end + end + FV_POST_REG_1: // Post + for (int i = 0; i < AXIL_DATA_BITS/8; i++) begin + if(axi_ctrl.wstrb[i]) begin + slv_reg[FV_POST_REG_1][(i*8)+:8] <= axi_ctrl.wdata[(i*8)+:8]; + end + end + FV_POST_REG_2: // Post final + for (int i = 0; i < AXIL_DATA_BITS/8; i++) begin + if(axi_ctrl.wstrb[i]) begin + slv_reg[FV_POST_REG_2][(i*8)+:8] <= axi_ctrl.wdata[(i*8)+:8]; + end + end + FV_POST_REG_3: // Post final + for (int i = 0; i < AXIL_DATA_BITS/8; i++) begin + if(axi_ctrl.wstrb[i]) begin + slv_reg[FV_POST_REG_3][(i*8)+:8] <= axi_ctrl.wdata[(i*8)+:8]; + rdma_post <= 1'b1; + end + end +`endif + + default : ; + endcase + end + end +end + +/* ---------------------------------------------------------------------------------------- */ +/* -- Read process ------------------------------------------------------------------------ */ +/* ---------------------------------------------------------------------------------------- */ +assign slv_reg_rden = axi_arready & axi_ctrl.arvalid & ~axi_rvalid; + +always_ff @(posedge aclk, negedge aresetn) begin + if( aresetn == 1'b0 ) begin + axi_rdata <= 0; + end + else begin + axi_rdata <= 0; + if(slv_reg_rden) begin + case (axi_araddr[ADDR_LSB+:ADDR_MSB]) + VADDR_RD_REG: // Virtual address read + axi_rdata[VADDR_BITS-1:0] <= slv_reg[VADDR_RD_REG][VADDR_BITS-1:0]; + LEN_RD_REG: // Length read + axi_rdata[LEN_BITS-1:0] <= slv_reg[LEN_RD_REG][LEN_BITS-1:0]; + VADDR_WR_REG: // Virtual address write + axi_rdata[VADDR_BITS-1:0] <= slv_reg[VADDR_WR_REG][VADDR_BITS-1:0]; + LEN_WR_REG: // Length write + axi_rdata[LEN_BITS-1:0] <= slv_reg[LEN_WR_REG][LEN_BITS-1:0]; + VADDR_MISS_REG: // Virtual address miss + axi_rdata[VADDR_BITS-1:0] <= slv_reg[VADDR_MISS_REG][VADDR_BITS-1:0]; + LEN_MISS_REG: // Length miss + axi_rdata[LEN_BITS-1:0] <= slv_reg[LEN_MISS_REG][LEN_BITS-1:0]; + CTRL_DP_REG_SET: // Datapath + axi_rdata[15:0] <= slv_reg[CTRL_DP_REG_SET][15:0]; + CTRL_DP_REG_CLR: // Datapath + axi_rdata[15:0] <= slv_reg[CTRL_DP_REG_SET][15:0]; + TMR_STOP_REG: // Timer stop at + axi_rdata[31:0] <= slv_reg[TMR_STOP_REG][31:0]; + TMR_RD_REG: // Timer read + axi_rdata <= slv_reg[TMR_RD_REG]; + TMR_WR_REG: // Timer write + axi_rdata <= slv_reg[TMR_WR_REG]; + STAT_CMD_USED_RD_REG: // Status queues used read + axi_rdata[31:0] <= rd_queue_used; + STAT_CMD_USED_WR_REG: // Status queues used write + axi_rdata[31:0] <= wr_queue_used; + STAT_DMA_RD_REG: // Status dma read + axi_rdata[31:0] <= slv_reg[STAT_DMA_RD_REG][31:0]; + STAT_DMA_WR_REG: // Status dma write + axi_rdata[31:0] <= slv_reg[STAT_DMA_WR_REG][31:0]; + STAT_SENT_RD_REG: // Status sent read + axi_rdata[31:0] <= slv_reg[STAT_SENT_RD_REG][31:0]; + STAT_SENT_WR_REG: // Status sent write + axi_rdata[31:0] <= slv_reg[STAT_SENT_WR_REG][31:0]; + STAT_PFAULTS_REG: // Status page faults + axi_rdata[31:0] <= slv_reg[STAT_PFAULTS_REG][31:0]; + +`ifdef EN_FV + FV_POST_REG_0: // Post + axi_rdata <= slv_reg[FV_POST_REG_0]; + FV_POST_REG_1: // Post + axi_rdata <= slv_reg[FV_POST_REG_1]; + FV_POST_REG_2: // Post final + axi_rdata <= slv_reg[FV_POST_REG_2]; + FV_STAT_CMD_USED_REG: // Status queue used + axi_rdata[31:0] <= rdma_queue_used; +`endif + + default: ; + endcase + end + end +end + +/* ---------------------------------------------------------------------------------------- */ +/* -- Output ------------------------------------------------------------------------------ */ +/* ---------------------------------------------------------------------------------------- */ +assign rd_sent = rd_req.valid & rd_req.ready; +assign wr_sent = wr_req.valid & wr_req.ready; + +always_comb begin + // Page fault handling + rd_cnfg.restart = slv_reg[CTRL_REG][CTRL_CLR_IRQ_PENDING]; + wr_cnfg.restart = slv_reg[CTRL_REG][CTRL_CLR_IRQ_PENDING]; + pf_irq = irq_pending; + + // Decoupling + decouple = slv_reg[CTRL_DP_REG_SET][CTRL_DP_DECOUPLE]; +end + +reqIntf rd_req_cnfg(); +reqIntf wr_req_cnfg(); +reqIntf rd_req_host(); +reqIntf wr_req_host(); + +// Assign +assign rd_req_cnfg.req.vaddr = slv_reg[VADDR_RD_REG][VADDR_BITS-1:0]; +assign rd_req_cnfg.req.len = slv_reg[LEN_RD_REG][LEN_BITS-1:0]; +assign rd_req_cnfg.req.sync = slv_reg[CTRL_REG][CTRL_SYNC_RD]; +assign rd_req_cnfg.req.ctl = 1'b1; +assign rd_req_cnfg.req.rsrvd = 0; +assign rd_req_cnfg.valid = slv_reg[CTRL_REG][CTRL_START_RD]; + +assign wr_req_cnfg.req.vaddr = slv_reg[VADDR_WR_REG][VADDR_BITS-1:0]; +assign wr_req_cnfg.req.len = slv_reg[LEN_WR_REG][LEN_BITS-1:0]; +assign wr_req_cnfg.req.sync = slv_reg[CTRL_REG][CTRL_SYNC_WR]; +assign wr_req_cnfg.req.ctl = 1'b1; +assign wr_req_cnfg.req.rsrvd = 0; +assign wr_req_cnfg.valid = slv_reg[CTRL_REG][CTRL_START_WR]; + +// Command queues +axis_data_fifo_req_96_used inst_cmd_queue_rd ( + .s_axis_aresetn(aresetn), + .s_axis_aclk(aclk), + .s_axis_tvalid(rd_req_cnfg.valid), + .s_axis_tready(rd_req_cnfg.ready), + .s_axis_tdata(rd_req_cnfg.req), + .m_axis_tvalid(rd_req_host.valid), + .m_axis_tready(rd_req_host.ready), + .m_axis_tdata(rd_req_host.req), + .axis_wr_data_count(rd_queue_used) +); + +axis_data_fifo_req_96_used inst_cmd_queue_wr ( + .s_axis_aresetn(aresetn), + .s_axis_aclk(aclk), + .s_axis_tvalid(wr_req_cnfg.valid), + .s_axis_tready(wr_req_cnfg.ready), + .s_axis_tdata(wr_req_cnfg.req), + .m_axis_tvalid(wr_req_host.valid), + .m_axis_tready(wr_req_host.ready), + .m_axis_tdata(wr_req_host.req), + .axis_wr_data_count(wr_queue_used) +); + +`ifdef EN_USER_BYPASS + +reqIntf rd_req_ul_q (); +reqIntf wr_req_ul_q (); + +// Command queues (user logic) +axis_data_fifo_req_96_used inst_cmd_queue_rd_ul ( + .s_axis_aresetn(aresetn), + .s_axis_aclk(aclk), + .s_axis_tvalid(rd_req_ul.valid), + .s_axis_tready(rd_req_ul.ready), + .s_axis_tdata(rd_req_ul.req), + .m_axis_tvalid(rd_req_ul_q.valid), + .m_axis_tready(rd_req_ul_q.ready), + .m_axis_tdata(rd_req_ul_q.data), + .axis_wr_data_count() +); + +axis_data_fifo_req_96_used inst_cmd_queue_wr_ul ( + .s_axis_aresetn(aresetn), + .s_axis_aclk(aclk), + .s_axis_tvalid(wr_req_ul.valid), + .s_axis_tready(wr_req_ul.ready), + .s_axis_tdata(wr_req_ul.req), + .m_axis_tvalid(wr_req_ul_q.valid), + .m_axis_tready(wr_req_ul_q.ready), + .m_axis_tdata(wr_req_ul_q.data), + .axis_wr_data_count() +); + +axis_interconnect_cnfg_req_arbiter inst_rd_interconnect ( + .ACLK(aclk), + .ARESETN(aresetn), + + .S00_AXIS_ACLK(aclk), + .S00_AXIS_ARESETN(aresetn), + .S00_AXIS_TVALID(rd_req_host.valid), + .S00_AXIS_TREADY(rd_req_host.ready), + .S00_AXIS_TDATA(rd_req_host.req), + + .S01_AXIS_ACLK(aclk), + .S01_AXIS_ARESETN(aresetn), + .S01_AXIS_TVALID(rd_req_ul_q.valid), + .S01_AXIS_TREADY(rd_req_ul_q.ready), + .S01_AXIS_TDATA(rd_req_ul_q.req), + + .M00_AXIS_ACLK(aclk), + .M00_AXIS_ARESETN(aresetn), + .M00_AXIS_TVALID(rd_req.valid), + .M00_AXIS_TREADY(rd_req.ready), + .M00_AXIS_TDATA(rd_req.req), + + .S00_ARB_REQ_SUPPRESS(0), + .S01_ARB_REQ_SUPPRESS(0), + .S00_DECODE_ERR(), + .S01_DECODE_ERR() +); + +axis_interconnect_cnfg_req_arbiter inst_wr_interconnect ( + .ACLK(aclk), + .ARESETN(aresetn), + + .S00_AXIS_ACLK(aclk), + .S00_AXIS_ARESETN(aresetn), + .S00_AXIS_TVALID(wr_req_host.valid), + .S00_AXIS_TREADY(wr_req_host.ready), + .S00_AXIS_TDATA(wr_req_host.req), + + .S01_AXIS_ACLK(aclk), + .S01_AXIS_ARESETN(aresetn), + .S01_AXIS_TVALID(wr_req_ul_q.valid), + .S01_AXIS_TREADY(wr_req_ul_q.ready), + .S01_AXIS_TDATA(wr_req_ul_q.req), + + .M00_AXIS_ACLK(aclk), + .M00_AXIS_ARESETN(aresetn), + .M00_AXIS_TVALID(wr_req.valid), + .M00_AXIS_TREADY(wr_req.ready), + .M00_AXIS_TDATA(wr_req.req), + + .S00_ARB_REQ_SUPPRESS(0), + .S01_ARB_REQ_SUPPRESS(0), + .S00_DECODE_ERR(), + .S01_DECODE_ERR() +); + +`else + +assign rd_req.req = rd_req_host.req; +assign rd_req.valid = rd_req_host.valid; +assign rd_req_host.ready = rd_req.ready; + +assign wr_req.req = wr_req_host.req; +assign wr_req.valid = wr_req_host.valid; +assign wr_req_host.ready = wr_req.ready; + +`endif + +`ifdef EN_FV + +assign local_qpn = slv_reg[FV_QPN_REG][23:0]; + +// FV requests +metaIntf #(.DATA_BITS(RPC_CMD_BITS)) rdma_req_cnfg(); + +// Assign +assign rdma_req_cnfg.data[4:0] = slv_reg[FV_POST_REG_0][4:0]; // opcode +assign rdma_req_cnfg.data[28:5] = slv_reg[FV_QPN_REG][23:0]; // local qpn +assign rdma_req_cnfg.data[32:29] = ID_REG; // local region +assign rdma_req_cnfg.data[33] = 1'b1; // host +assign rdma_req_cnfg.data[63:34] = 0; // reserved +assign rdma_req_cnfg.data[127:64] = slv_reg[FV_POST_REG_1]; // remote vaddr[15:0], local vaddr +assign rdma_req_cnfg.data[191:128] = slv_reg[FV_POST_REG_2]; // length, remote vaddr[47:16] +assign rdma_req_cnfg.data[255:192] = slv_reg[FV_POST_REG_3]; // params +assign rdma_req_cnfg.valid = rdma_post; + +// Parser +network_req_parser #(.ID_REG(ID_REG), .HOST(1)) inst_parser (.aclk(aclk), .aresetn(aresetn), .req_in(rdma_req_cnfg), .req_out(rdma_req), .used(rdma_queue_used)); + +`endif + +/* ---------------------------------------------------------------------------------------- */ +/* -- AXI --------------------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------------------------- */ + +// I/O +assign axi_ctrl.awready = axi_awready; +assign axi_ctrl.arready = axi_arready; +assign axi_ctrl.bresp = axi_bresp; +assign axi_ctrl.bvalid = axi_bvalid; +assign axi_ctrl.wready = axi_wready; +assign axi_ctrl.rdata = axi_rdata; +assign axi_ctrl.rresp = axi_rresp; +assign axi_ctrl.rvalid = axi_rvalid; + +// awready and awaddr +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) + begin + axi_awready <= 1'b0; + axi_awaddr <= 0; + aw_en <= 1'b1; + end + else + begin + if (~axi_awready && axi_ctrl.awvalid && axi_ctrl.wvalid && aw_en) + begin + axi_awready <= 1'b1; + aw_en <= 1'b0; + axi_awaddr <= axi_ctrl.awaddr; + end + else if (axi_ctrl.bready && axi_bvalid) + begin + aw_en <= 1'b1; + axi_awready <= 1'b0; + end + else + begin + axi_awready <= 1'b0; + end + end +end + +// arready and araddr +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) + begin + axi_arready <= 1'b0; + axi_araddr <= 0; + end + else + begin + if (~axi_arready && axi_ctrl.arvalid) + begin + axi_arready <= 1'b1; + axi_araddr <= axi_ctrl.araddr; + end + else + begin + axi_arready <= 1'b0; + end + end +end + +// bvalid and bresp +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) + begin + axi_bvalid <= 0; + axi_bresp <= 2'b0; + end + else + begin + if (axi_awready && axi_ctrl.awvalid && ~axi_bvalid && axi_wready && axi_ctrl.wvalid) + begin + axi_bvalid <= 1'b1; + axi_bresp <= 2'b0; + end + else + begin + if (axi_ctrl.bready && axi_bvalid) + begin + axi_bvalid <= 1'b0; + end + end + end +end + +// wready +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) + begin + axi_wready <= 1'b0; + end + else + begin + if (~axi_wready && axi_ctrl.wvalid && axi_ctrl.awvalid && aw_en ) + begin + axi_wready <= 1'b1; + end + else + begin + axi_wready <= 1'b0; + end + end +end + +// rvalid and rresp (1Del?) +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) + begin + axi_rvalid <= 0; + axi_rresp <= 0; + end + else + begin + if (axi_arready && axi_ctrl.arvalid && ~axi_rvalid) + begin + axi_rvalid <= 1'b1; + axi_rresp <= 2'b0; + end + else if (axi_rvalid && axi_ctrl.rready) + begin + axi_rvalid <= 1'b0; + end + end +end + +endmodule diff --git a/hw/hdl/slave/cnfg_slave_avx.sv b/hw/hdl/slave/cnfg_slave_avx.sv new file mode 100644 index 00000000..261f35cf --- /dev/null +++ b/hw/hdl/slave/cnfg_slave_avx.sv @@ -0,0 +1,740 @@ +/** + * Config slave AVX + * + * Configuration slave, datapath control and RD/WR request handling + */ + +import lynxTypes::*; + +module cnfg_slave_avx #( + parameter integer ID_REG = 0 +) ( + input logic aclk, + input logic aresetn, + + // Control bus (HOST) + AXI4.s axim_ctrl, + +`ifdef EN_BPSS + // Request user logic + reqIntf.s rd_req_user, + reqIntf.s wr_req_user, +`endif + +`ifdef EN_FV + // Request out rdma + metaIntf.m rdma_req, +`endif + + // Request out + reqIntf.m rd_req, + reqIntf.m wr_req, + + // Config intf + cnfgIntf.m rd_cnfg, + cnfgIntf.m wr_cnfg, + + // Control + output logic decouple, + output logic pf_irq +); + +// Constants +`ifdef EN_FV + localparam integer N_REGS = 12; +`else + localparam integer N_REGS = 7; +`endif +localparam integer ADDR_LSB = $clog2(AVX_DATA_BITS/8); +localparam integer ADDR_MSB = $clog2(N_REGS); +localparam integer AVX_ADDR_BITS = ADDR_LSB + ADDR_MSB; + +localparam integer CTRL_BYTES = 2; +localparam integer VADDR_BYTES = 6; +localparam integer LEN_BYTES = 4; + +// Internal regs +logic [AVX_ADDR_BITS-1:0] axi_awaddr; +logic axi_awready; +logic axi_wready; +logic [1:0] axi_bresp; +logic axi_bvalid; +logic [AVX_ADDR_BITS-1:0] axi_araddr; +logic axi_arready; +logic [AVX_DATA_BITS-1:0] axi_rdata; +logic [1:0] axi_rresp; +logic axi_rlast; +logic axi_rvalid; + +logic [1:0] axi_arburst; +logic [1:0] axi_awburst; +logic [7:0] axi_arlen; +logic [7:0] axi_awlen; +logic [7:0] axi_awlen_cntr; +logic [7:0] axi_arlen_cntr; + +logic aw_wrap_en; +logic ar_wrap_en; +logic [31:0] aw_wrap_size; +logic [31:0] ar_wrap_size; + +logic axi_awv_awr_flag; +logic axi_arv_arr_flag; + +// Slave registers +logic [N_REGS-1:0][AVX_DATA_BITS-1:0] slv_reg; +logic slv_reg_rden; +logic slv_reg_wren; + +// Internal signals +logic irq_pending; +logic rd_sent; +logic wr_sent; + +logic [31:0] rd_queue_used; +logic [31:0] wr_queue_used; + +`ifdef EN_FV + logic [31:0] rdma_queue_used; + logic rdma_post; +`endif + +// -- Def -------------------------------------------------------------------------------- +// --------------------------------------------------------------------------------------- + +// -- Register map ----------------------------------------------------------------------- +// 0 (W1S|W1C) : Control +localparam integer CTRL_REG = 0; + localparam integer CTRL_START_RD = 0; + localparam integer CTRL_START_WR = 1; + localparam integer CTRL_SYNC_RD = 2; + localparam integer CTRL_SYNC_WR = 3; + localparam integer CTRL_STREAM_RD = 4; + localparam integer CTRL_STREAM_WR = 5; + localparam integer CTRL_CLR_STAT_RD = 6; + localparam integer CTRL_CLR_STAT_WR = 7; + localparam integer CTRL_CLR_IRQ_PENDING = 8; + localparam integer CTRL_DEST_RD = 9; + localparam integer CTRL_DEST_WR = 13; + localparam integer CTRL_VADDR_RD_OFFS = 64; + localparam integer CTRL_VADDR_WR_OFFS = 128; + localparam integer CTRL_LEN_RD_OFFS = 192; + localparam integer CTRL_LEN_WR_OFFS = 224; +// 1 (RO) : Page fault +localparam integer PF_REG = 1; + localparam integer VADDR_MISS_OFFS = 0; + localparam integer LEN_MISS_OFFS = 64; +// 2, 3 (W1S|W1C|R) : Datapath control set/clear +localparam integer CTRL_DP_REG_SET = 2; +localparam integer CTRL_DP_REG_CLR = 3; + localparam integer CTRL_DP_DECOUPLE = 0; +// 4 (RW) : Timer stop at completion counter +localparam integer TMR_STOP_REG = 4; +// 5 (RO) : Timers +localparam integer TMR_REG = 5; + localparam integer TMR_RD_OFFS = 0; + localparam integer TMR_WR_OFFS = 64; +// 6 (RO) : Status +localparam integer STAT_REG = 6; + localparam integer STAT_CMD_USED_RD_OFFS = 0; + localparam integer STAT_CMD_USED_WR_OFFS = 32; + localparam integer STAT_DMA_RD_OFFS = 64; + localparam integer STAT_DMA_WR_OFFS = 96; + localparam integer STAT_SENT_RD_OFFS = 128; + localparam integer STAT_SENT_WR_OFFS = 160; + localparam integer STAT_PFAULTS_OFFS = 192; +// FV +// 10 (W1S) : Post +localparam integer FV_POST_REG = 10; +// 11 (RO) : Status cmd used +localparam integer FV_STAT_CMD_USED_REG = 11; +// + +// ---------------------------------------------------------------------------------------- +// Write process +// ---------------------------------------------------------------------------------------- +assign slv_reg_wren = axi_wready && axim_ctrl.wvalid; + +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) begin + slv_reg[CTRL_REG][15:0] <= 0; + slv_reg[CTRL_DP_REG_SET][31:0] <= 0; + slv_reg[TMR_STOP_REG][31:0] <= 1; + +`ifdef EN_FV + rdma_post <= 1'b0; +`endif + end + else begin + slv_reg[CTRL_REG][31:0] <= 0; +`ifdef EN_FV + rdma_post <= 1'b0; +`endif + + // Page fault + if(rd_cnfg.pf.miss || wr_cnfg.pf.miss) begin + irq_pending <= 1'b1; + slv_reg[PF_REG][VADDR_MISS_OFFS+:VADDR_BITS] <= rd_cnfg.pf.miss ? rd_cnfg.pf.vaddr : wr_cnfg.pf.vaddr; // miss virtual address + slv_reg[PF_REG][LEN_MISS_OFFS+:LEN_BITS] <= rd_cnfg.pf.miss ? rd_cnfg.pf.len : wr_cnfg.pf.len; // miss length + end + if(slv_reg[CTRL_REG][CTRL_CLR_IRQ_PENDING]) + irq_pending <= 1'b0; + + // Status counters + slv_reg[STAT_REG][STAT_DMA_RD_OFFS+:32] <= slv_reg[CTRL_REG][CTRL_CLR_STAT_RD] ? 0 : slv_reg[STAT_REG][STAT_DMA_RD_OFFS+:32] + rd_cnfg.done_host + rd_cnfg.done_card + rd_cnfg.done_sync; + slv_reg[STAT_REG][STAT_DMA_WR_OFFS+:32] <= slv_reg[CTRL_REG][CTRL_CLR_STAT_WR] ? 0 : slv_reg[STAT_REG][STAT_DMA_WR_OFFS+:32] + wr_cnfg.done_host + wr_cnfg.done_card + wr_cnfg.done_sync; + slv_reg[STAT_REG][STAT_SENT_RD_OFFS+:32] <= slv_reg[CTRL_REG][CTRL_CLR_STAT_RD] ? 0 : slv_reg[STAT_REG][STAT_SENT_RD_OFFS+:32] + rd_sent; + slv_reg[STAT_REG][STAT_SENT_WR_OFFS+:32] <= slv_reg[CTRL_REG][CTRL_CLR_STAT_WR] ? 0 : slv_reg[STAT_REG][STAT_SENT_WR_OFFS+:32] + wr_sent; + slv_reg[STAT_REG][STAT_PFAULTS_OFFS+:32] <= (slv_reg[CTRL_REG][CTRL_CLR_STAT_RD] || slv_reg[CTRL_REG][CTRL_CLR_STAT_WR]) ? + 0 : slv_reg[STAT_REG][STAT_PFAULTS_OFFS+:32] + (rd_cnfg.pf.miss || wr_cnfg.pf.miss); + + // Timers + slv_reg[TMR_REG][TMR_RD_OFFS+:64] <= slv_reg[CTRL_REG][CTRL_CLR_STAT_RD] ? + 0 : (slv_reg[STAT_REG][STAT_DMA_RD_OFFS+:32] >= slv_reg[TMR_STOP_REG][31:0]) ? slv_reg[TMR_REG][TMR_RD_OFFS+:64] : slv_reg[TMR_REG][TMR_RD_OFFS+:64] + 1; + slv_reg[TMR_REG][TMR_WR_OFFS+:64] <= slv_reg[CTRL_REG][CTRL_CLR_STAT_WR] ? + 0 : (slv_reg[STAT_REG][STAT_DMA_WR_OFFS+:32] >= slv_reg[TMR_STOP_REG][31:0]) ? slv_reg[TMR_REG][TMR_WR_OFFS+:64] : slv_reg[TMR_REG][TMR_WR_OFFS+:64] + 1; + + if(slv_reg_wren) begin + case (axi_awaddr[ADDR_LSB+:ADDR_MSB]) + CTRL_REG: // Control + for (int i = 0; i < (AVX_DATA_BITS/8); i++) begin + if(axim_ctrl.wstrb[i]) begin + slv_reg[CTRL_REG][(i*8)+:8] <= axim_ctrl.wdata[(i*8)+:8]; + end + end + CTRL_DP_REG_SET: // Control datapath set + for (int i = 0; i < CTRL_BYTES; i++) begin + if(axim_ctrl.wstrb[i]) begin + slv_reg[CTRL_DP_REG_SET][(i*8)+:8] <= slv_reg[CTRL_DP_REG_SET][(i*8)+:8] | axim_ctrl.wdata[(i*8)+:8]; + end + end + CTRL_DP_REG_CLR: // Control datapath clear + for (int i = 0; i < CTRL_BYTES; i++) begin + if(axim_ctrl.wstrb[i]) begin + slv_reg[CTRL_DP_REG_SET][(i*8)+:8] <= slv_reg[CTRL_DP_REG_SET][(i*8)+:8] & ~axim_ctrl.wdata[(i*8)+:8]; + end + end + TMR_STOP_REG: // Timer stop at + for (int i = 0; i < LEN_BYTES; i++) begin + if(axim_ctrl.wstrb[i]) begin + slv_reg[TMR_STOP_REG][(i*8)+:8] <= axim_ctrl.wdata[(i*8)+:8]; + end + end + +`ifdef EN_FV + FV_POST_REG: // Post + for (int i = 0; i < AVX_DATA_BITS/8; i++) begin + if(axim_ctrl.wstrb[i]) begin + slv_reg[FV_POST_REG][(i*8)+:8] <= axim_ctrl.wdata[(i*8)+:8]; + rdma_post <= 1'b1; + end + end +`endif + + default: ; + endcase + end + end +end + +// ---------------------------------------------------------------------------------------- +// Read process +// ---------------------------------------------------------------------------------------- +assign slv_reg_rden = axi_arv_arr_flag; // & ~axi_rvalid; + +always_ff @(posedge aclk, negedge aresetn) begin + if( aresetn == 1'b0 ) begin + axi_rdata <= 0; + end + else begin + axi_rdata <= 0; + if(slv_reg_rden) begin + case (axi_araddr[ADDR_LSB+:ADDR_MSB]) + PF_REG: // Page fault + axi_rdata[0+:96] <= slv_reg[PF_REG]; + CTRL_DP_REG_SET: // Datapath + axi_rdata[15:0] <= slv_reg[CTRL_DP_REG_SET][15:0]; + CTRL_DP_REG_CLR: // Datapath + axi_rdata[15:0] <= slv_reg[CTRL_DP_REG_SET][15:0]; + TMR_STOP_REG: // Timer stop at + axi_rdata[31:0] <= slv_reg[TMR_STOP_REG][31:0]; + TMR_REG: // Timers + axi_rdata[127:0] <= slv_reg[TMR_REG]; + STAT_REG: begin // Status + axi_rdata[63:0] <= {wr_queue_used, rd_queue_used}; + axi_rdata[223:64] <= slv_reg[STAT_REG][223:64]; + end + +`ifdef EN_FV + FV_POST_REG: + axi_rdata <= slv_reg[FV_POST_REG]; + FV_STAT_CMD_USED_REG: + axi_rdata[31:0] <= rdma_queue_used; +`endif + + default: ; + endcase + end + end +end + +// ---------------------------------------------------------------------------------------- +// Output +// ---------------------------------------------------------------------------------------- +assign rd_sent = rd_req.valid & rd_req.ready; +assign wr_sent = wr_req.valid & wr_req.ready; + +always_comb begin + // Page fault handling + rd_cnfg.restart = slv_reg[CTRL_REG][CTRL_CLR_IRQ_PENDING]; + wr_cnfg.restart = slv_reg[CTRL_REG][CTRL_CLR_IRQ_PENDING]; + pf_irq = irq_pending; + + // Decoupling + decouple = slv_reg[CTRL_DP_REG_SET][CTRL_DP_DECOUPLE]; +end + +reqIntf rd_req_cnfg(); +reqIntf wr_req_cnfg(); +reqIntf rd_req_host(); +reqIntf wr_req_host(); + +// Assign +assign rd_req_cnfg.req.vaddr = slv_reg[CTRL_REG][CTRL_VADDR_RD_OFFS+:VADDR_BITS]; +assign rd_req_cnfg.req.len = slv_reg[CTRL_REG][CTRL_LEN_RD_OFFS+:LEN_BITS]; +assign rd_req_cnfg.req.sync = slv_reg[CTRL_REG][CTRL_SYNC_RD]; +assign rd_req_cnfg.req.ctl = 1'b1; +assign rd_req_cnfg.req.stream = slv_reg[CTRL_REG][CTRL_STREAM_RD]; +assign rd_req_cnfg.req.dest = slv_reg[CTRL_REG][CTRL_DEST_RD+:4]; +assign rd_req_cnfg.req.rsrvd = 0; +assign rd_req_cnfg.valid = slv_reg[CTRL_REG][CTRL_START_RD]; + +assign wr_req_cnfg.req.vaddr = slv_reg[CTRL_REG][CTRL_VADDR_WR_OFFS+:VADDR_BITS]; +assign wr_req_cnfg.req.len = slv_reg[CTRL_REG][CTRL_LEN_WR_OFFS+:LEN_BITS]; +assign wr_req_cnfg.req.sync = slv_reg[CTRL_REG][CTRL_SYNC_WR]; +assign wr_req_cnfg.req.ctl = 1'b1; +assign wr_req_cnfg.req.stream = slv_reg[CTRL_REG][CTRL_STREAM_WR]; +assign wr_req_cnfg.req.dest = slv_reg[CTRL_REG][CTRL_DEST_WR+:4]; +assign wr_req_cnfg.req.rsrvd = 0; +assign wr_req_cnfg.valid = slv_reg[CTRL_REG][CTRL_START_WR]; + +// Command queues +axis_data_fifo_req_96_used inst_cmd_queue_rd ( + .s_axis_aresetn(aresetn), + .s_axis_aclk(aclk), + .s_axis_tvalid(rd_req_cnfg.valid), + .s_axis_tready(rd_req_cnfg.ready), + .s_axis_tdata(rd_req_cnfg.req), + .m_axis_tvalid(rd_req_host.valid), + .m_axis_tready(rd_req_host.ready), + .m_axis_tdata(rd_req_host.req), + .axis_wr_data_count(rd_queue_used) +); + +axis_data_fifo_req_96_used inst_cmd_queue_wr ( + .s_axis_aresetn(aresetn), + .s_axis_aclk(aclk), + .s_axis_tvalid(wr_req_cnfg.valid), + .s_axis_tready(wr_req_cnfg.ready), + .s_axis_tdata(wr_req_cnfg.req), + .m_axis_tvalid(wr_req_host.valid), + .m_axis_tready(wr_req_host.ready), + .m_axis_tdata(wr_req_host.req), + .axis_wr_data_count(wr_queue_used) +); + +`ifdef EN_BPSS + +reqIntf rd_req_user_q (); +reqIntf wr_req_user_q (); + +// Command queues (user logic) +axis_data_fifo_req_96_used inst_cmd_queue_rd_user ( + .s_axis_aresetn(aresetn), + .s_axis_aclk(aclk), + .s_axis_tvalid(rd_req_user.valid), + .s_axis_tready(rd_req_user.ready), + .s_axis_tdata(rd_req_user.req), + .m_axis_tvalid(rd_req_user_q.valid), + .m_axis_tready(rd_req_user_q.ready), + .m_axis_tdata(rd_req_user_q.req), + .axis_wr_data_count() +); + +axis_data_fifo_req_96_used inst_cmd_queue_wr_user ( + .s_axis_aresetn(aresetn), + .s_axis_aclk(aclk), + .s_axis_tvalid(wr_req_user.valid), + .s_axis_tready(wr_req_user.ready), + .s_axis_tdata(wr_req_user.req), + .m_axis_tvalid(wr_req_user_q.valid), + .m_axis_tready(wr_req_user_q.ready), + .m_axis_tdata(wr_req_user_q.req), + .axis_wr_data_count() +); + +axis_interconnect_cnfg_req_arbiter inst_rd_interconnect_user ( + .ACLK(aclk), + .ARESETN(aresetn), + + .S00_AXIS_ACLK(aclk), + .S00_AXIS_ARESETN(aresetn), + .S00_AXIS_TVALID(rd_req_host.valid), + .S00_AXIS_TREADY(rd_req_host.ready), + .S00_AXIS_TDATA(rd_req_host.req), + + .S01_AXIS_ACLK(aclk), + .S01_AXIS_ARESETN(aresetn), + .S01_AXIS_TVALID(rd_req_user_q.valid), + .S01_AXIS_TREADY(rd_req_user_q.ready), + .S01_AXIS_TDATA(rd_req_user_q.req), + + .M00_AXIS_ACLK(aclk), + .M00_AXIS_ARESETN(aresetn), + .M00_AXIS_TVALID(rd_req.valid), + .M00_AXIS_TREADY(rd_req.ready), + .M00_AXIS_TDATA(rd_req.req), + + .S00_ARB_REQ_SUPPRESS(0), + .S01_ARB_REQ_SUPPRESS(0) +); + +axis_interconnect_cnfg_req_arbiter inst_wr_interconnect ( + .ACLK(aclk), + .ARESETN(aresetn), + + .S00_AXIS_ACLK(aclk), + .S00_AXIS_ARESETN(aresetn), + .S00_AXIS_TVALID(wr_req_host.valid), + .S00_AXIS_TREADY(wr_req_host.ready), + .S00_AXIS_TDATA(wr_req_host.req), + + .S01_AXIS_ACLK(aclk), + .S01_AXIS_ARESETN(aresetn), + .S01_AXIS_TVALID(wr_req_user_q.valid), + .S01_AXIS_TREADY(wr_req_user_q.ready), + .S01_AXIS_TDATA(wr_req_user_q.req), + + .M00_AXIS_ACLK(aclk), + .M00_AXIS_ARESETN(aresetn), + .M00_AXIS_TVALID(wr_req.valid), + .M00_AXIS_TREADY(wr_req.ready), + .M00_AXIS_TDATA(wr_req.req), + + .S00_ARB_REQ_SUPPRESS(0), + .S01_ARB_REQ_SUPPRESS(0) +); + +`else + +assign rd_req.req = rd_req_host.req; +assign rd_req.valid = rd_req_host.valid; +assign rd_req_host.ready = rd_req.ready; + +assign wr_req.req = wr_req_host.req; +assign wr_req.valid = wr_req_host.valid; +assign wr_req_host.ready = wr_req.ready; + +`endif + +`ifdef EN_FV + +// RDMA requests +metaIntf #(.DATA_BITS(FV_REQ_BITS)) rdma_req_cnfg(); + +// Assign +assign rdma_req_cnfg.data[4:0] = slv_reg[FV_POST_REG][0+:5]; // opcode +assign rdma_req_cnfg.data[28:5] = slv_reg[FV_POST_REG][5+:24]; // local qpn +assign rdma_req_cnfg.data[32:29] = ID_REG; // local region +assign rdma_req_cnfg.data[33] = 1'b1; // host +assign rdma_req_cnfg.data[34] = 1'b0; // mode +assign rdma_req_cnfg.data[63:35] = 0; // reserved +assign rdma_req_cnfg.data[255:64] = slv_reg[FV_POST_REG][64+:192]; // params, length, remote vaddr, local vaddr +assign rdma_req_cnfg.valid = rdma_post; + +// Parser +network_req_parser #(.ID_REG(ID_REG), .HOST(1)) inst_parser (.aclk(aclk), .aresetn(aresetn), .req_in(rdma_req_cnfg), .req_out(rdma_req), .used(rdma_queue_used)); + +`endif + +// ---------------------------------------------------------------------------------------- +// AXI +// ---------------------------------------------------------------------------------------- + +// I/O +assign axim_ctrl.awready = axi_awready; +assign axim_ctrl.wready = axi_wready; +assign axim_ctrl.bresp = axi_bresp; +assign axim_ctrl.bvalid = axi_bvalid; +assign axim_ctrl.arready = axi_arready; +assign axim_ctrl.rdata = axi_rdata; +assign axim_ctrl.rresp = axi_rresp; +assign axim_ctrl.rlast = axi_rlast; +assign axim_ctrl.rvalid = axi_rvalid; +assign axim_ctrl.bid = axim_ctrl.awid; +assign axim_ctrl.rid = axim_ctrl.arid; +assign aw_wrap_size = (AVX_DATA_BITS/8 * (axi_awlen)); +assign ar_wrap_size = (AVX_DATA_BITS/8 * (axi_arlen)); +assign aw_wrap_en = ((axi_awaddr & aw_wrap_size) == aw_wrap_size)? 1'b1: 1'b0; +assign ar_wrap_en = ((axi_araddr & ar_wrap_size) == ar_wrap_size)? 1'b1: 1'b0; + +// awready +always @( posedge aclk ) +begin + if ( aresetn == 1'b0 ) + begin + axi_awready <= 1'b0; + axi_awv_awr_flag <= 1'b0; + end + else + begin + if (~axi_awready && axim_ctrl.awvalid && ~axi_awv_awr_flag && ~axi_arv_arr_flag) + begin + // slave is ready to accept an address and + // associated control signals + axi_awready <= 1'b1; + axi_awv_awr_flag <= 1'b1; + // used for generation of bresp() and bvalid + end + else if (axim_ctrl.wlast && axi_wready) + // preparing to accept next address after current write burst tx completion + begin + axi_awv_awr_flag <= 1'b0; + end + else + begin + axi_awready <= 1'b0; + end + end +end + +// awaddr +always @( posedge aclk ) +begin + if ( aresetn == 1'b0 ) + begin + axi_awaddr <= 0; + axi_awlen_cntr <= 0; + axi_awburst <= 0; + axi_awlen <= 0; + end + else + begin + if (~axi_awready && axim_ctrl.awvalid && ~axi_awv_awr_flag) + begin + // address latching + axi_awaddr <= axim_ctrl.awaddr[AVX_ADDR_BITS-1:0]; + axi_awburst <= axim_ctrl.awburst; + axi_awlen <= axim_ctrl.awlen; + // start address of transfer + axi_awlen_cntr <= 0; + end + else if((axi_awlen_cntr <= axi_awlen) && axi_wready && axim_ctrl.wvalid) + begin + + axi_awlen_cntr <= axi_awlen_cntr + 1; + + case (axi_awburst) + 2'b00: // fixed burst + // The write address for all the beats in the transaction are fixed + begin + axi_awaddr <= axi_awaddr; + //for awsize = 4 bytes (010) + end + 2'b01: //incremental burst + // The write address for all the beats in the transaction are increments by awsize + begin + axi_awaddr[AVX_ADDR_BITS-1:ADDR_LSB] <= axi_awaddr[AVX_ADDR_BITS-1:ADDR_LSB] + 1; + axi_awaddr[ADDR_LSB-1:0] <= {ADDR_LSB{1'b0}}; + end + 2'b10: //Wrapping burst + // The write address wraps when the address reaches wrap boundary + if (aw_wrap_en) + begin + axi_awaddr <= (axi_awaddr - aw_wrap_size); + end + else + begin + axi_awaddr[AVX_ADDR_BITS-1:ADDR_LSB] <= axi_awaddr[AVX_ADDR_BITS-1:ADDR_LSB] + 1; + axi_awaddr[ADDR_LSB-1:0] <= {ADDR_LSB{1'b0}}; + end + default: //reserved (incremental burst for example) + begin + axi_awaddr <= axi_awaddr[AVX_ADDR_BITS-1:ADDR_LSB] + 1; + end + endcase + end + end +end + +// wready +always @( posedge aclk ) +begin + if ( aresetn == 1'b0 ) + begin + axi_wready <= 1'b0; + end + else + begin + if ( ~axi_wready && axim_ctrl.wvalid && axi_awv_awr_flag) + begin + // slave can accept the write data + axi_wready <= 1'b1; + end + //else if (~axi_awv_awr_flag) + else if (axim_ctrl.wlast && axi_wready) + begin + axi_wready <= 1'b0; + end + end +end + + +// bvalid & bresp +always @( posedge aclk ) +begin + if ( aresetn == 1'b0 ) + begin + axi_bvalid <= 0; + axi_bresp <= 2'b0; + end + else + begin + if (axi_awv_awr_flag && axi_wready && axim_ctrl.wvalid && ~axi_bvalid && axim_ctrl.wlast ) + begin + axi_bvalid <= 1'b1; + axi_bresp <= 2'b0; + // 'OKAY' response + end + else + begin + if (axim_ctrl.bready && axi_bvalid) + //check if bready is asserted while bvalid is high) + //(there is a possibility that bready is always asserted high) + begin + axi_bvalid <= 1'b0; + end + end + end + end + +// arready +always @( posedge aclk ) +begin + if ( aresetn == 1'b0 ) + begin + axi_arready <= 1'b0; + axi_arv_arr_flag <= 1'b0; + end + else + begin + if (~axi_arready && axim_ctrl.arvalid && ~axi_awv_awr_flag && ~axi_arv_arr_flag) + begin + axi_arready <= 1'b1; + axi_arv_arr_flag <= 1'b1; + end + else if (axi_rvalid && axim_ctrl.rready && axi_arlen_cntr == axi_arlen) + // preparing to accept next address after current read completion + begin + axi_arv_arr_flag <= 1'b0; + end + else + begin + axi_arready <= 1'b0; + end + end +end + +// araddr +always @( posedge aclk ) +begin + if ( aresetn == 1'b0 ) + begin + axi_araddr <= 0; + axi_arlen_cntr <= 0; + axi_arburst <= 0; + axi_arlen <= 0; + axi_rlast <= 1'b0; + end + else + begin + if (~axi_arready && axim_ctrl.arvalid && ~axi_arv_arr_flag) + begin + // address latching + axi_araddr <= axim_ctrl.araddr[AVX_ADDR_BITS-1:0]; + axi_arburst <= axim_ctrl.arburst; + axi_arlen <= axim_ctrl.arlen; + // start address of transfer + axi_arlen_cntr <= 0; + axi_rlast <= 1'b0; + end + else if((axi_arlen_cntr <= axi_arlen) && axi_rvalid && axim_ctrl.rready) + begin + + axi_arlen_cntr <= axi_arlen_cntr + 1; + axi_rlast <= 1'b0; + + case (axi_arburst) + 2'b00: // fixed burst + // The read address for all the beats in the transaction are fixed + begin + axi_araddr <= axi_araddr; + end + 2'b01: //incremental burst + // The read address for all the beats in the transaction are increments by awsize + begin + axi_araddr[AVX_ADDR_BITS-1:ADDR_LSB] <= axi_araddr[AVX_ADDR_BITS-1:ADDR_LSB] + 1; + axi_araddr[ADDR_LSB-1:0] <= {ADDR_LSB{1'b0}}; + end + 2'b10: //Wrapping burst + // The read address wraps when the address reaches wrap boundary + if (ar_wrap_en) + begin + axi_araddr <= (axi_araddr - ar_wrap_size); + end + else + begin + axi_araddr[AVX_ADDR_BITS-1:ADDR_LSB] <= axi_araddr[AVX_ADDR_BITS-1:ADDR_LSB] + 1; + axi_araddr[ADDR_LSB-1:0] <= {ADDR_LSB{1'b0}}; + end + default: //reserved (incremental burst for example) + begin + axi_araddr <= axi_araddr[AVX_ADDR_BITS-1:ADDR_LSB]+1; + end + endcase + end + else if((axi_arlen_cntr == axi_arlen) && ~axi_rlast && axi_arv_arr_flag ) + begin + axi_rlast <= 1'b1; + end + else if (axim_ctrl.rready) + begin + axi_rlast <= 1'b0; + end + end +end + +// arvalid +always @( posedge aclk ) +begin + if ( aresetn == 1'b0 ) + begin + axi_rvalid <= 0; + axi_rresp <= 0; + end + else + begin + if (axi_arv_arr_flag && ~axi_rvalid) + begin + axi_rvalid <= 1'b1; + axi_rresp <= 2'b0; + // 'OKAY' response + end + else if (axi_rvalid && axim_ctrl.rready) + begin + axi_rvalid <= 1'b0; + end + end +end + +endmodule \ No newline at end of file diff --git a/hw/hdl/slave/static_slave.sv b/hw/hdl/slave/static_slave.sv new file mode 100644 index 00000000..2a29da72 --- /dev/null +++ b/hw/hdl/slave/static_slave.sv @@ -0,0 +1,526 @@ +/** + * Static configuration slave + */ + +import lynxTypes::*; + +module static_slave ( + input logic aclk, + input logic aresetn, + +`ifdef EN_PR + // XDMA descriptors + xdmaIntf.m pr_xdma_req, +`endif + +`ifdef EN_FV + // ARP + metaIntf.m arp_lookup_request, + metaIntf.s arp_lookup_reply, + + // IP + metaIntf.m set_ip_addr, + metaIntf.m set_board_number, + + // QP + metaIntf.m qp_interface, + metaIntf.m conn_interface, +`endif + + // Lowspeed control (only applicable to u250) + output logic [2:0] lowspeed_ctrl, + + // Control bus (HOST) + AXI4L.s axi_ctrl +); + +// -- Decl ---------------------------------------------------------- +// ------------------------------------------------------------------ + +// Constants +`ifdef EN_FV +localparam integer N_REGS = 29; +`else + `ifdef EN_PR +localparam integer N_REGS = 14; + `else +localparam integer N_REGS = 8; + `endif +`endif +localparam integer ADDR_LSB = $clog2(AXIL_DATA_BITS/8); +localparam integer ADDR_MSB = $clog2(N_REGS); +localparam integer AXIL_ADDR_BITS = ADDR_LSB + ADDR_MSB; + +// Internal registers +logic [AXIL_ADDR_BITS-1:0] axi_awaddr; +logic axi_awready; +logic [AXIL_ADDR_BITS-1:0] axi_araddr; +logic axi_arready; +logic [1:0] axi_bresp; +logic axi_bvalid; +logic axi_wready; +logic [AXIL_DATA_BITS-1:0] axi_rdata; +logic [1:0] axi_rresp; +logic axi_rvalid; + +// Registers +logic [N_REGS-1:0][AXIL_DATA_BITS-1:0] slv_reg; +logic slv_reg_rden; +logic slv_reg_wren; +logic aw_en; + +// -- Def ----------------------------------------------------------- +// ------------------------------------------------------------------ + +// -- Register map ----------------------------------------------------------------------- +// CONFIG +// 0 (RW) : Probe +localparam integer PROBE_REG = 0; +// 1 (RO) : Number of channels +localparam integer N_CHAN_REG = 1; +// 2 (RO) : Number of regions +localparam integer N_REGIONS_REG = 2; +// 3 (RO) : Control config +localparam integer CTRL_CNFG_REG = 3; +// 4 (RO) : Memory config +localparam integer MEM_CNFG_REG = 4; +// 5 (RO) : Partial reconfiguration config +localparam integer PR_CNFG_REG = 5; +// 6 (RO) : FV config +localparam integer FV_CNFG_REG = 6; +// 7 (RW) : Control (only for u250) +localparam integer LOWSPEED_REG = 7; +// PR +// 10 (W1S) : PR control +localparam integer PR_CTRL_REG = 10; + localparam integer PR_START = 0; + localparam integer PR_CTL = 1; + localparam integer PR_CLR = 2; +// 11 (RO) : Status +localparam integer PR_STAT_REG = 11; + localparam integer PR_DONE = 0; + localparam integer PR_READY = 1; +// 12 (RW) : Physical address +localparam integer PR_ADDR_REG = 12; +// 13 (RW) : Length read +localparam integer PR_LEN_REG = 13; +// FV +// 20 (RW) : IP address +localparam integer FV_IPADDR_REG = 20; +// 21 (RW) : Board number +localparam integer FV_BOARDNUM_REG = 21; +// 22 (W1S) : ARP lookup +localparam integer FV_ARP_REG = 22; +// 23 - 25 (RW) : Write QP context +localparam integer FV_CTX_REG_0 = 23; +localparam integer FV_CTX_REG_1 = 24; +localparam integer FV_CTX_REG_2 = 25; +// 26 - 28 (RW) : Write QP connection +localparam integer FV_CONN_REG_0 = 26; +localparam integer FV_CONN_REG_1 = 27; +localparam integer FV_CONN_REG_2 = 28; +// + +// ---------------------------------------------------------------------------------------- +// Write process +// ---------------------------------------------------------------------------------------- +assign slv_reg_wren = axi_wready && axi_ctrl.wvalid && axi_awready && axi_ctrl.awvalid; + +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) begin + slv_reg[LOWSPEED_REG][2:0] <= ~0; + +`ifdef EN_PR + slv_reg[PR_CTRL_REG][15:0] <= 0; + slv_reg[PR_STAT_REG][15:0] <= 0; +`endif + +`ifdef EN_FV + set_ip_addr.valid <= 1'b0; + set_board_number.valid <= 1'b0; + arp_lookup_request.valid <= 1'b0; + arp_lookup_reply.ready <= 1'b1; + + qp_interface.valid <= 1'b0; + conn_interface.valid <= 1'b0; +`endif + end + else begin +`ifdef EN_PR + slv_reg[PR_CTRL_REG] <= 0; + slv_reg[PR_STAT_REG][PR_STAT_DONE] <= slv_reg[PR_CTRL_REG][PR_CTRL_CLR] ? 1'b0 : pr_req.done ? 1'b1 : slv_reg[PR_STAT_REG][PR_STAT_DONE]; +`endif + +`ifdef EN_FV + arp_lookup_request.valid <= arp_lookup_request.ready ? 1'b0 : arp_lookup_request.valid; + arp_lookup_reply.ready <= 1'b1; + + qp_interface.valid <= qp_interface.ready ? 1'b0 : qp_interface.valid; + conn_interface.valid <= conn_interface.ready ? 1'b0 : conn_interface.valid; +`endif + + if(slv_reg_wren) begin + case (axi_awaddr[ADDR_LSB+:ADDR_MSB]) + PROBE_REG: // Probe + for (int i = 0; i < AXIL_DATA_BITS/8; i++) begin + if(axi_ctrl.wstrb[i]) begin + slv_reg[PROBE_REG][(i*8)+:8] <= axi_ctrl.wdata[(i*8)+:8]; + end + end + + LOWSPEED_REG: // Lowspeed control + for (int i = 0; i < 1; i++) begin + if(axi_ctrl.wstrb[i]) begin + slv_reg[LOWSPEED_REG][(i*8)+:8] <= axi_ctrl.wdata[(i*8)+:8]; + end + end + +`ifdef EN_PR + PR_CTRL_REG: // PR control + for (int i = 0; i < 2; i++) begin + if(axi_ctrl.wstrb[i]) begin + slv_reg[PR_CTRL_REG][(i*8)+:8] <= axi_ctrl.wdata[(i*8)+:8]; + end + end + PR_ADDR_REG: // PR address + for (int i = 0; i < AXIL_DATA_BITS/8; i++) begin + if(axi_ctrl.wstrb[i]) begin + slv_reg[PR_ADDR_REG][(i*8)+:8] <= axi_ctrl.wdata[(i*8)+:8]; + end + end + PR_LEN_REG: // PR length + for (int i = 0; i < 4; i++) begin + if(axi_ctrl.wstrb[i]) begin + slv_reg[PR_LEN_REG][(i*8)+:8] <= axi_ctrl.wdata[(i*8)+:8]; + end + end +`endif + +`ifdef EN_FV + FV_IPADDR_REG: // IP address + for (int i = 0; i < 4; i++) begin + if(axi_ctrl.wstrb[i]) begin + set_ip_addr.data[(i*8)+:8] <= axi_ctrl.wdata[(i*8)+:8]; + set_ip_addr.valid <= 1'b1; + end + end + FV_BOARDNUM_REG: // Board number + for (int i = 0; i < 1; i++) begin + if(axi_ctrl.wstrb[i]) begin + set_board_number.data[3:0] <= axi_ctrl.wdata[3:0]; + set_board_number.valid <= 1'b1; + end + end + FV_ARP_REG: // ARP lookup + for (int i = 0; i < 4; i++) begin + if(axi_ctrl.wstrb[i]) begin + arp_lookup_request.data[(i*8)+:8] <= axi_ctrl.wdata[(24-i*8)+:8]; + arp_lookup_request.valid <= 1'b1; + end + end + FV_CTX_REG_0: // Context + for (int i = 0; i < AXIL_DATA_BITS/8; i++) begin + if(axi_ctrl.wstrb[i]) begin + slv_reg[FV_CTX_REG_0][(i*8)+:8] <= axi_ctrl.wdata[(i*8)+:8]; + end + end + FV_CTX_REG_1: // Context + for (int i = 0; i < AXIL_DATA_BITS/8; i++) begin + if(axi_ctrl.wstrb[i]) begin + slv_reg[FV_CTX_REG_1][(i*8)+:8] <= axi_ctrl.wdata[(i*8)+:8]; + end + end + FV_CTX_REG_2: // Context final + for (int i = 0; i < AXIL_DATA_BITS/8; i++) begin + if(axi_ctrl.wstrb[i]) begin + slv_reg[FV_CTX_REG_2][(i*8)+:8] <= axi_ctrl.wdata[(i*8)+:8]; + qp_interface.valid <= 1'b1; + end + end + FV_CONN_REG_0: // Connection + for (int i = 0; i < AXIL_DATA_BITS/8; i++) begin + if(axi_ctrl.wstrb[i]) begin + slv_reg[FV_CONN_REG_0][(i*8)+:8] <= axi_ctrl.wdata[(i*8)+:8]; + end + end + FV_CONN_REG_1: // Connection + for (int i = 0; i < AXIL_DATA_BITS/8; i++) begin + if(axi_ctrl.wstrb[i]) begin + slv_reg[FV_CONN_REG_1][(i*8)+:8] <= axi_ctrl.wdata[(i*8)+:8]; + end + end + FV_CONN_REG_2: // Connection final + for (int i = 0; i < AXIL_DATA_BITS/8; i++) begin + if(axi_ctrl.wstrb[i]) begin + slv_reg[FV_CONN_REG_2][(i*8)+:8] <= axi_ctrl.wdata[(i*8)+:8]; + conn_interface.valid <= 1'b1; + end + end +`endif + + default : ; + endcase + end + end +end + +// ---------------------------------------------------------------------------------------- +// Read process +// ---------------------------------------------------------------------------------------- +assign slv_reg_rden = axi_arready & axi_ctrl.arvalid & ~axi_rvalid; + +always_ff @(posedge aclk, negedge aresetn) begin + if( aresetn == 1'b0 ) begin + axi_rdata <= 0; + end + else begin + axi_rdata <= 0; + if(slv_reg_rden) begin + case (axi_araddr[ADDR_LSB+:ADDR_MSB]) + PROBE_REG: + axi_rdata <= slv_reg[PROBE_REG]; + N_CHAN_REG: // Number of channels + axi_rdata <= N_CHAN; + N_REGIONS_REG: // Number of regions + axi_rdata <= N_REGIONS; + CTRL_CNFG_REG: begin // Control config + axi_rdata[0] <= AVX_FLOW; + axi_rdata[1] <= BPSS_FLOW; + end + MEM_CNFG_REG: begin // Memory config + axi_rdata[0] <= DDR_FLOW; + axi_rdata[5:1] <= N_DDR_CHAN; + end + PR_CNFG_REG: // PR config + axi_rdata <= PR_FLOW; + FV_CNFG_REG: begin // FV config + axi_rdata[0] <= FV_FLOW; + axi_rdata[1] <= FV_VERBS; + end + LOWSPEED_REG: + axi_rdata[2:0] <= slv_reg[LOWSPEED_REG][2:0]; + +`ifdef EN_PR + PR_STAT_REG: + axi_rdata[1:0] <= {pr_req.ready, slv_reg[PR_STAT_REG][PR_DONE]}; + PR_ADDR_REG: + axi_rdata <= slv_reg[PR_ADDR_REG]; + PR_LEN_REG: + axi_rdata[31:0] <= slv_reg[PR_LEN_REG][31:0]; +`endif + +`ifdef EN_FV + FV_CTX_REG_0: // Context + axi_rdata <= slv_reg[FV_CTX_REG_0]; + FV_CTX_REG_1: // Context + axi_rdata <= slv_reg[FV_CTX_REG_1]; + FV_CTX_REG_2: // Context final + axi_rdata <= slv_reg[FV_CTX_REG_2]; + FV_CONN_REG_0: // Connection + axi_rdata <= slv_reg[FV_CONN_REG_0]; + FV_CONN_REG_1: // Connection + axi_rdata <= slv_reg[FV_CONN_REG_1]; + FV_CONN_REG_2: // Connection final + axi_rdata <= slv_reg[FV_CONN_REG_2]; +`endif + + default: ; + endcase + end + end +end + +// ---------------------------------------------------------------------------------------- +// Output +// ---------------------------------------------------------------------------------------- +assign lowspeed_ctrl = slv_reg[LOWSPEED_REG][2:0]; + +`ifdef EN_PR + +dmaIntf pr_req (); +dmaIntf xdma_req (); + +always_comb begin + // PR request + pr_req.valid = slv_reg[PR_CTRL_REG][PR_START]; + pr_req.req.ctl = slv_reg[PR_CTRL_REG][PR_CTL]; + pr_req.req.paddr = slv_reg[PR_ADDR_REG]; + pr_req.req.len = slv_reg[PR_LEN_REG]; + // Done signal + pr_req.done = xdma_req.done; +end + +queue_stream #( + .QTYPE(dma_req_t), + .QDEPTH(4) +) inst_que ( + .aclk(aclk), + .aresetn(aresetn), + .val_snk(pr_req.valid), + .rdy_snk(pr_req.ready), + .data_snk(pr_req.req), + .val_src(xdma_req.valid), + .rdy_src(xdma_req.ready), + .data_src(xdma_req.req) +); + +// XDMA +assign pr_xdma_req.h2c_ctl = {{11{1'b0}}, xdma_req.req.ctl, {2{1'b0}}, {2{xdma_req.req.ctl}}}; +assign pr_xdma_req.h2c_addr = xdma_req.req.paddr; +assign pr_xdma_req.h2c_len = xdma_req.req.len; +assign pr_xdma_req.h2c_valid = xdma_req.valid; + +assign pr_xdma_req.c2h_ctl = 0; +assign pr_xdma_req.c2h_addr = 0; +assign pr_xdma_req.c2h_len = 0; +assign pr_xdma_req.c2h_valid = 0; + +assign xdma_req.ready = pr_xdma_req.h2c_ready;; +assign xdma_req.done = pr_xdma_req.h2c_status[1]; + +`endif + +`ifdef EN_FV + +// FV qp interface +assign qp_interface.data[54:0] = slv_reg[FV_CTX_REG_0][54:0]; // remote psn, local qpn, local region, qp state +assign qp_interface.data[94:55] = slv_reg[FV_CTX_REG_1][39:0]; // remote key, local psn +assign qp_interface.data[142:95] = slv_reg[FV_CTX_REG_2][47:0]; // vaddr +assign qp_interface.data[143:143] = 0; + +// FV qp connection interface +assign conn_interface.data[39:0] = slv_reg[FV_CONN_REG_0][39:0]; // remote qpn, local qpn (24?) +assign conn_interface.data[103:40] = slv_reg[FV_CONN_REG_1][63:0]; // gid +assign conn_interface.data[167:104] = slv_reg[FV_CONN_REG_2][63:0]; // gid +assign conn_interface.data[183:168] = slv_reg[FV_CONN_REG_0][55:40]; // port + +`endif + +// ---------------------------------------------------------------------------------------- +// AXI +// ---------------------------------------------------------------------------------------- + +// I/O +assign axi_ctrl.awready = axi_awready; +assign axi_ctrl.arready = axi_arready; +assign axi_ctrl.bresp = axi_bresp; +assign axi_ctrl.bvalid = axi_bvalid; +assign axi_ctrl.wready = axi_wready; +assign axi_ctrl.rdata = axi_rdata; +assign axi_ctrl.rresp = axi_rresp; +assign axi_ctrl.rvalid = axi_rvalid; + +// awready and awaddr +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) + begin + axi_awready <= 1'b0; + axi_awaddr <= 0; + aw_en <= 1'b1; + end + else + begin + if (~axi_awready && axi_ctrl.awvalid && axi_ctrl.wvalid && aw_en) + begin + axi_awready <= 1'b1; + aw_en <= 1'b0; + axi_awaddr <= axi_ctrl.awaddr; + end + else if (axi_ctrl.bready && axi_bvalid) + begin + aw_en <= 1'b1; + axi_awready <= 1'b0; + end + else + begin + axi_awready <= 1'b0; + end + end +end + +// arready and araddr +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) + begin + axi_arready <= 1'b0; + axi_araddr <= 0; + end + else + begin + if (~axi_arready && axi_ctrl.arvalid) + begin + axi_arready <= 1'b1; + axi_araddr <= axi_ctrl.araddr; + end + else + begin + axi_arready <= 1'b0; + end + end +end + +// bvalid and bresp +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) + begin + axi_bvalid <= 0; + axi_bresp <= 2'b0; + end + else + begin + if (axi_awready && axi_ctrl.awvalid && ~axi_bvalid && axi_wready && axi_ctrl.wvalid) + begin + axi_bvalid <= 1'b1; + axi_bresp <= 2'b0; + end + else + begin + if (axi_ctrl.bready && axi_bvalid) + begin + axi_bvalid <= 1'b0; + end + end + end +end + +// wready +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) + begin + axi_wready <= 1'b0; + end + else + begin + if (~axi_wready && axi_ctrl.wvalid && axi_ctrl.awvalid && aw_en ) + begin + axi_wready <= 1'b1; + end + else + begin + axi_wready <= 1'b0; + end + end +end + +// rvalid and rresp (1Del?) +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) + begin + axi_rvalid <= 0; + axi_rresp <= 0; + end + else + begin + if (axi_arready && axi_ctrl.arvalid && ~axi_rvalid) + begin + axi_rvalid <= 1'b1; + axi_rresp <= 2'b0; + end + else if (axi_rvalid && axi_ctrl.rready) + begin + axi_rvalid <= 1'b0; + end + end +end + +endmodule // cnfg_slave \ No newline at end of file diff --git a/hw/hdl/slave/tlb_slave.sv b/hw/hdl/slave/tlb_slave.sv new file mode 100644 index 00000000..af46f6b4 --- /dev/null +++ b/hw/hdl/slave/tlb_slave.sv @@ -0,0 +1,239 @@ +/** + * TLB bram + * + * Implementation of the TLB in the on-chip memory. + * @param: + * - TLB_ORDER : TLB size (power of 2) + * - PG_BITS : Initial addressing bit + * - N_ASSOC : Set associativity + */ + +import lynxTypes::*; + +module tlb_slave #( + parameter integer TLB_ORDER = 10, + parameter integer PG_BITS = 12, + parameter integer N_ASSOC = 4 +) ( + input logic aclk, + input logic aresetn, + + AXI4L.s axi_ctrl, + + tlbIntf.s TLB +); + +// -- Decl ---------------------------------------------------------- +// ------------------------------------------------------------------ + +// Constants +localparam integer N_BRAM_BITS = $clog2(N_ASSOC); +localparam integer ADDR_LSB = (AXIL_DATA_BITS/32) + 1; +localparam integer ADDR_MSB = TLB_ORDER; +localparam integer AXIL_ADDR_BITS = N_BRAM_BITS + ADDR_MSB + ADDR_LSB; + +// Internal registers +logic [AXIL_ADDR_BITS-1:0] axi_awaddr; +logic axi_awready; +logic [AXIL_ADDR_BITS-1:0] axi_araddr; +logic axi_arready; +logic [1:0] axi_bresp; +logic axi_bvalid; +logic axi_wready; +logic [AXIL_DATA_BITS-1:0] axi_rdata; +logic [1:0] axi_rresp; +logic axi_rvalid; + +// Internal signals +logic slv_reg_rden; +logic slv_reg_wren; +logic aw_en; + +// -- Def ----------------------------------------------------------- +// ------------------------------------------------------------------ + +// Write active +assign slv_reg_wren = axi_wready && axi_ctrl.wvalid && axi_awready && axi_ctrl.awvalid; + +// Read active +assign slv_reg_rden = axi_arready & axi_ctrl.arvalid & ~axi_rvalid; + +// Page table +logic [ADDR_MSB-1:0] ram_addr; +logic [N_ASSOC-1:0] [(AXIL_DATA_BITS/8)-1:0] ram_wr_en; +logic [N_ASSOC-1:0] [AXIL_DATA_BITS-1:0] ram_rd_data; + +always_comb begin + ram_wr_en = 0; + axi_rdata = ram_rd_data[0]; + + + if(slv_reg_wren) begin + ram_addr = axi_awaddr[ADDR_LSB+ADDR_MSB-1:ADDR_LSB]; + if(N_ASSOC > 1) begin + for (int i = 0; i < N_ASSOC; i++) begin + if(i == axi_awaddr[AXIL_ADDR_BITS-1:ADDR_MSB+ADDR_LSB]) begin + ram_wr_en[i] = axi_ctrl.wstrb; + end + end + end + else begin + ram_wr_en[0][(AXIL_DATA_BITS/8)-1:0] = axi_ctrl.wstrb; + end + end + else begin + ram_addr = axi_araddr[ADDR_LSB+ADDR_MSB-1:ADDR_LSB]; + if(N_ASSOC > 1) begin + for (int i = 0; i < N_ASSOC; i++) begin + if(i == axi_araddr[AXIL_ADDR_BITS-1:ADDR_MSB+ADDR_LSB]) begin + axi_rdata = ram_rd_data[i]; + end + end + end + else begin + axi_rdata = ram_rd_data[0][AXIL_DATA_BITS-1:0]; + end + end +end + +// TLB +for (genvar i = 0; i < N_ASSOC; i++) begin + // BRAM instantiation + ram_tp_nc #( + .ADDR_BITS(TLB_ORDER), + .DATA_BITS(TLB_DATA_BITS) + ) inst_pt ( + .clk (aclk), + .a_we (ram_wr_en[i]), + .a_addr (ram_addr), + .b_addr (TLB.addr[PG_BITS+TLB_ORDER-1:PG_BITS]), + .a_data_in (axi_ctrl.wdata), + .a_data_out(ram_rd_data[i]), + .b_data_out(TLB.data[i]) + ); +end + +// I/O +assign axi_ctrl.awready = axi_awready; +assign axi_ctrl.arready = axi_arready; +assign axi_ctrl.bresp = axi_bresp; +assign axi_ctrl.bvalid = axi_bvalid; +assign axi_ctrl.wready = axi_wready; +assign axi_ctrl.rdata = axi_rdata; +assign axi_ctrl.rresp = axi_rresp; +assign axi_ctrl.rvalid = axi_rvalid; + +// awready and awaddr +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) + begin + axi_awready <= 1'b0; + axi_awaddr <= 0; + aw_en <= 1'b1; + end + else + begin + if (~axi_awready && axi_ctrl.awvalid && axi_ctrl.wvalid && aw_en) + begin + axi_awready <= 1'b1; + aw_en <= 1'b0; + axi_awaddr <= axi_ctrl.awaddr; + end + else if (axi_ctrl.bready && axi_bvalid) + begin + aw_en <= 1'b1; + axi_awready <= 1'b0; + end + else + begin + axi_awready <= 1'b0; + end + end +end + +// arready and araddr +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) + begin + axi_arready <= 1'b0; + axi_araddr <= 0; + end + else + begin + if (~axi_arready && axi_ctrl.arvalid) + begin + axi_arready <= 1'b1; + axi_araddr <= axi_ctrl.araddr; + end + else + begin + axi_arready <= 1'b0; + end + end +end + +// bvalid and bresp +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) + begin + axi_bvalid <= 0; + axi_bresp <= 2'b0; + end + else + begin + if (axi_awready && axi_ctrl.awvalid && ~axi_bvalid && axi_wready && axi_ctrl.wvalid) + begin + axi_bvalid <= 1'b1; + axi_bresp <= 2'b0; + end + else + begin + if (axi_ctrl.bready && axi_bvalid) + begin + axi_bvalid <= 1'b0; + end + end + end +end + +// wready +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) + begin + axi_wready <= 1'b0; + end + else + begin + if (~axi_wready && axi_ctrl.wvalid && axi_ctrl.awvalid && aw_en ) + begin + axi_wready <= 1'b1; + end + else + begin + axi_wready <= 1'b0; + end + end +end + +// rvalid and rresp (1Del?) +always_ff @(posedge aclk, negedge aresetn) begin + if ( aresetn == 1'b0 ) + begin + axi_rvalid <= 0; + axi_rresp <= 0; + end + else + begin + if (axi_arready && axi_ctrl.arvalid && ~axi_rvalid) + begin + axi_rvalid <= 1'b1; + axi_rresp <= 2'b0; + end + else if (axi_rvalid && axi_ctrl.rready) + begin + axi_rvalid <= 1'b0; + end + end +end + +endmodule // tlb_slave \ No newline at end of file diff --git a/hw/hdl/util/fifo.sv b/hw/hdl/util/fifo.sv new file mode 100644 index 00000000..96e0458a --- /dev/null +++ b/hw/hdl/util/fifo.sv @@ -0,0 +1,83 @@ +import lynxTypes::*; + +module fifo #( + parameter integer DATA_BITS = 64, + parameter integer FIFO_SIZE = 8 +) ( + input logic aclk, + input logic aresetn, + + input logic rd, + input logic wr, + + output logic ready_rd, + output logic ready_wr, + + input logic [DATA_BITS-1:0] data_in, + output logic [DATA_BITS-1:0] data_out +); + +// Constants +localparam integer PNTR_BITS = $clog2(FIFO_SIZE); + +// Internal registers +logic [PNTR_BITS-1:0] wr_pntr; +logic [PNTR_BITS-1:0] rd_pntr; +logic [PNTR_BITS:0] n_entries; + +logic isFull; +logic isEmpty; + +logic [FIFO_SIZE-1:0][DATA_BITS-1:0] data; + +// FIFO flags +assign isFull = (n_entries == FIFO_SIZE); +assign isEmpty = (n_entries == 0); + +genvar i; + +always_ff @(posedge aclk or negedge aresetn) begin + if(aresetn == 1'b0) begin + n_entries <= 0; + data <= 0; + end else begin + // Number of entries + if (rd && !isEmpty && (!wr || isFull)) + n_entries <= n_entries - 1; + else if (wr && !isFull && (!rd || isEmpty)) + n_entries <= n_entries + 1; + // Data + if(wr && !isFull) + data[wr_pntr] <= data_in; + end +end + +always_ff @(posedge aclk or negedge aresetn) begin + if(aresetn == 1'b0) begin + rd_pntr <= 0; + wr_pntr <= 0; + end else begin + // Write pointer + if(wr && !isFull) begin + if(wr_pntr == (FIFO_SIZE-1)) + wr_pntr <= 0; + else + wr_pntr <= wr_pntr + 1; + end + // Read pointer + if(rd && !isEmpty) begin + if(rd_pntr == (FIFO_SIZE-1)) + rd_pntr <= 0; + else + rd_pntr <= rd_pntr + 1; + end + end +end + +// Output +assign ready_rd = ~isEmpty; +assign ready_wr = ~isFull; + +assign data_out = data[rd_pntr]; + +endmodule // fifo \ No newline at end of file diff --git a/hw/hdl/util/queue.sv b/hw/hdl/util/queue.sv new file mode 100644 index 00000000..62884a26 --- /dev/null +++ b/hw/hdl/util/queue.sv @@ -0,0 +1,33 @@ +import lynxTypes::*; + +module queue #( + parameter type QTYPE = logic[63:0], + parameter QDEPTH = 8 +) ( + input logic aclk, + input logic aresetn, + + input logic val_snk, + output logic rdy_snk, + input QTYPE data_snk, + + input logic val_src, + output logic rdy_src, + output QTYPE data_src +); + +fifo #( + .DATA_BITS($bits(QTYPE)), + .FIFO_SIZE(QDEPTH) +) inst_fifo ( + .aclk (aclk), + .aresetn (aresetn), + .rd (val_src), + .wr (val_snk), + .ready_rd (rdy_src), + .ready_wr (rdy_snk), + .data_in (data_snk), + .data_out (data_src) +); + +endmodule \ No newline at end of file diff --git a/hw/hdl/util/queue_meta.sv b/hw/hdl/util/queue_meta.sv new file mode 100644 index 00000000..1595a065 --- /dev/null +++ b/hw/hdl/util/queue_meta.sv @@ -0,0 +1,33 @@ +import lynxTypes::*; + +module queue_meta #( + parameter QDEPTH = 8 +) ( + input logic aclk, + input logic aresetn, + + metaIntf.s sink, + metaIntf.m src +); + +logic val_rd; +logic rdy_rd; + +fifo #( + .DATA_BITS($bits(sink.data)), + .FIFO_SIZE(QDEPTH) +) inst_fifo ( + .aclk (aclk), + .aresetn (aresetn), + .rd (val_rd), + .wr (sink.valid), + .ready_rd (rdy_rd), + .ready_wr (sink.ready), + .data_in (sink.data), + .data_out (src.data) +); + +assign src.valid = rdy_rd; +assign val_rd = src.valid & src.ready; + +endmodule \ No newline at end of file diff --git a/hw/hdl/util/queue_stream.sv b/hw/hdl/util/queue_stream.sv new file mode 100644 index 00000000..2780f2f0 --- /dev/null +++ b/hw/hdl/util/queue_stream.sv @@ -0,0 +1,39 @@ +import lynxTypes::*; + +module queue_stream #( + parameter type QTYPE = logic[63:0], + parameter QDEPTH = 8 +) ( + input logic aclk, + input logic aresetn, + + input logic val_snk, + output logic rdy_snk, + input QTYPE data_snk, + + output logic val_src, + input logic rdy_src, + output QTYPE data_src +); + +logic val_rd; +logic rdy_rd; + +fifo #( + .DATA_BITS($bits(QTYPE)), + .FIFO_SIZE(QDEPTH) +) inst_fifo ( + .aclk (aclk), + .aresetn (aresetn), + .rd (val_rd), + .wr (val_snk), + .ready_rd (rdy_rd), + .ready_wr (rdy_snk), + .data_in (data_snk), + .data_out (data_src) +); + +assign val_src = rdy_rd; +assign val_rd = val_src & rdy_src; + +endmodule \ No newline at end of file diff --git a/hw/hdl/util/tdp_ram_nc.sv b/hw/hdl/util/tdp_ram_nc.sv new file mode 100644 index 00000000..9db2d4a6 --- /dev/null +++ b/hw/hdl/util/tdp_ram_nc.sv @@ -0,0 +1,35 @@ +module ram_tp_nc + #( + parameter ADDR_BITS = 10, + parameter DATA_BITS = 64 + ) + ( + input clk, + input [(DATA_BITS/8)-1:0] a_we, + input [ADDR_BITS-1:0] a_addr, + input [ADDR_BITS-1:0] b_addr, + input [DATA_BITS-1:0] a_data_in, + output [DATA_BITS-1:0] a_data_out, + output [DATA_BITS-1:0] b_data_out + ); + + localparam DEPTH = 2**ADDR_BITS; + + (* ram_style = "block" *) reg [DATA_BITS-1:0] ram[DEPTH]; + reg [DATA_BITS-1:0] a_data_reg; + reg [DATA_BITS-1:0] b_data_reg; + + always_ff @(posedge clk) begin + for (int i = 0; i < (DATA_BITS/8); i++) begin + if(a_we[i]) begin + ram[a_addr][(i*8)+:8] <= a_data_in[(i*8)+:8]; + end + end + a_data_reg <= ram[a_addr]; + b_data_reg <= ram[b_addr]; + end + + assign a_data_out = a_data_reg; + assign b_data_out = b_data_reg; + +endmodule // ram_tp_nc \ No newline at end of file diff --git a/hw/scripts/bd/cr_static_u250.tcl b/hw/scripts/bd/cr_static_u250.tcl new file mode 100644 index 00000000..f8daf1e9 --- /dev/null +++ b/hw/scripts/bd/cr_static_u250.tcl @@ -0,0 +1,768 @@ +# Proc to create BD design_static +proc cr_bd_design_static_vcu118 { parentCell } { + upvar #0 cfg cnfg + + # CHANGE DESIGN NAME HERE + set design_name design_static + + common::send_msg_id "BD_TCL-003" "INFO" "Currently there is no design <$design_name> in project, so creating one..." + + create_bd_design $design_name + + set bCheckIPsPassed 1 + ######################################################################################################## + # CHECK IPs + ######################################################################################################## + set bCheckIPs 1 + if { $bCheckIPs == 1 } { + set list_check_ips "\ + xilinx.com:ip:clk_wiz:6.0\ + xilinx.com:ip:ddr4:2.2\ + xilinx.com:ip:proc_sys_reset:5.0\ + xilinx.com:ip:util_ds_buf:2.1\ + xilinx.com:ip:xdma:4.1\ + " + + set list_ips_missing "" + common::send_msg_id "BD_TCL-006" "INFO" "Checking if the following IPs exist in the project's IP catalog: $list_check_ips ." + + foreach ip_vlnv $list_check_ips { + set ip_obj [get_ipdefs -all $ip_vlnv] + if { $ip_obj eq "" } { + lappend list_ips_missing $ip_vlnv + } + } + + if { $list_ips_missing ne "" } { + catch {common::send_msg_id "BD_TCL-115" "ERROR" "The following IPs are not found in the IP Catalog:\n $list_ips_missing\n\nResolution: Please add the repository containing the IP(s) to the project." } + set bCheckIPsPassed 0 + } + + } + + if { $bCheckIPsPassed != 1 } { + common::send_msg_id "BD_TCL-1003" "WARNING" "Will not continue with creation of design due to the error(s) above." + return 3 + } + + variable script_folder + + if { $parentCell eq "" } { + set parentCell [get_bd_cells /] + } + + # Get object for parentCell + set parentObj [get_bd_cells $parentCell] + if { $parentObj == "" } { + catch {common::send_msg_id "BD_TCL-100" "ERROR" "Unable to find parent cell <$parentCell>!"} + return + } + + # Make sure parentObj is hier blk + set parentType [get_property TYPE $parentObj] + if { $parentType ne "hier" } { + catch {common::send_msg_id "BD_TCL-101" "ERROR" "Parent <$parentObj> has TYPE = <$parentType>. Expected to be ."} + return + } + + # Save current instance; Restore later + set oldCurInst [current_bd_instance .] + + # Set parent object as current + current_bd_instance $parentObj + +######################################################################################################## +# Create interface ports +######################################################################################################## + # Static config + set axi_cnfg [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:aximm_rtl:1.0 axi_cnfg ] + set_property -dict [ list \ + CONFIG.ADDR_WIDTH {64} \ + CONFIG.DATA_WIDTH {64} \ + CONFIG.PROTOCOL {AXI4LITE} \ + ] $axi_cnfg + + # XDMA status + set dsc_status [ create_bd_intf_port -mode Master -vlnv xilinx.com:display_xdma:xdma_status_ports_rtl:1.0 dsc_status ] + + # PCIe + set pcie_clk [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:diff_clock_rtl:1.0 pcie_clk ] + set_property -dict [ list \ + CONFIG.FREQ_HZ {100000000} \ + ] $pcie_clk + set pcie_x16 [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:pcie_7x_mgt_rtl:1.0 pcie_x16 ] + + # DDRs + if {$cnfg(en_ddr) eq 1} { + if {$cnfg(ddr_0) eq 1} { + set c0_ddr4 [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:ddr4_rtl:1.0 c0_ddr4 ] + set c0_sys_clk_0 [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:diff_clock_rtl:1.0 c0_sys_clk_0 ] + set_property -dict [ list \ + CONFIG.FREQ_HZ {300000000} \ + ] $c0_sys_clk_0 + + set axi_ctrl_ddr_0 [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:aximm_rtl:1.0 axi_ctrl_ddr_0 ] + set_property -dict [ list \ + CONFIG.PROTOCOL {AXI4LITE} \ + CONFIG.FREQ_HZ {300000000} \ + CONFIG.MAX_BURST_LENGTH {1} \ + CONFIG.SUPPORTS_NARROW_BURST {0} \ + ] $axi_ctrl_ddr_0 + } + + if {$cnfg(ddr_1) eq 1} { + set c1_ddr4 [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:ddr4_rtl:1.0 c1_ddr4 ] + set c1_sys_clk_0 [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:diff_clock_rtl:1.0 c1_sys_clk_0 ] + set_property -dict [ list \ + CONFIG.FREQ_HZ {300000000} \ + ] $c1_sys_clk_0 + + set axi_ctrl_ddr_1 [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:aximm_rtl:1.0 axi_ctrl_ddr_1 ] + set_property -dict [ list \ + CONFIG.PROTOCOL {AXI4LITE} \ + CONFIG.FREQ_HZ {300000000} \ + CONFIG.MAX_BURST_LENGTH {1} \ + CONFIG.SUPPORTS_NARROW_BURST {0} \ + ] $axi_ctrl_ddr_1 + } + + if {$cnfg(ddr_2) eq 1} { + set c2_ddr4 [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:ddr4_rtl:1.0 c2_ddr4 ] + set c2_sys_clk_0 [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:diff_clock_rtl:1.0 c2_sys_clk_0 ] + set_property -dict [ list \ + CONFIG.FREQ_HZ {300000000} \ + ] $c2_sys_clk_0 + + set axi_ctrl_ddr_2 [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:aximm_rtl:1.0 axi_ctrl_ddr_2 ] + set_property -dict [ list \ + CONFIG.PROTOCOL {AXI4LITE} \ + CONFIG.FREQ_HZ {300000000} \ + CONFIG.MAX_BURST_LENGTH {1} \ + CONFIG.SUPPORTS_NARROW_BURST {0} \ + ] $axi_ctrl_ddr_2 + } + + if {$cnfg(ddr_3) eq 1} { + set c3_ddr4 [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:ddr4_rtl:1.0 c3_ddr4 ] + set c3_sys_clk_0 [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:diff_clock_rtl:1.0 c3_sys_clk_0 ] + set_property -dict [ list \ + CONFIG.FREQ_HZ {300000000} \ + ] $c3_sys_clk_0 + + set axi_ctrl_ddr_3 [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:aximm_rtl:1.0 axi_ctrl_ddr_3 ] + set_property -dict [ list \ + CONFIG.PROTOCOL {AXI4LITE} \ + CONFIG.FREQ_HZ {300000000} \ + CONFIG.MAX_BURST_LENGTH {1} \ + CONFIG.SUPPORTS_NARROW_BURST {0} \ + ] $axi_ctrl_ddr_3 + } + } + + set nn 0 + if {$cnfg(en_pr) eq 1} { + incr nn + } + + # Streams and XDMA control + for {set i 0} {$i < $cnfg(n_chan)} {incr i} { + # Host source + set cmd "set axis_dyn_in_$i \[ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 axis_dyn_in_$i ] + set_property -dict \[ list \ + CONFIG.HAS_TKEEP {1} \ + CONFIG.HAS_TLAST {1} \ + CONFIG.HAS_TREADY {1} \ + CONFIG.HAS_TSTRB {0} \ + CONFIG.LAYERED_METADATA {undef} \ + CONFIG.TDATA_NUM_BYTES {64} \ + CONFIG.TDEST_WIDTH {0} \ + CONFIG.TID_WIDTH {0} \ + CONFIG.TUSER_WIDTH {0} \ + ] \$axis_dyn_in_$i" + eval $cmd + + # Host sink + set cmd "set axis_dyn_out_$i \[ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 axis_dyn_out_$i ]" + eval $cmd + + # Host source control + set cmd "set dsc_bypass_c2h_$i \[ create_bd_intf_port -mode Slave -vlnv xilinx.com:display_xdma:dsc_bypass_rtl:1.0 dsc_bypass_c2h_$i ]" + eval $cmd + + # Host sink control + set cmd "set dsc_bypass_h2c_$i \[ create_bd_intf_port -mode Slave -vlnv xilinx.com:display_xdma:dsc_bypass_rtl:1.0 dsc_bypass_h2c_$i ]" + eval $cmd + } + + # DDR channels + if {$cnfg(en_ddr) eq 1} { + for {set i 0} {$i < $cnfg(n_ddr_chan) * 2} {incr i} { + set cmd "set axi_ddr_in_$i \[ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:aximm_rtl:1.0 axi_ddr_in_$i ] + set_property -dict \[ list \ + CONFIG.ADDR_WIDTH {64} \ + CONFIG.ARUSER_WIDTH {0} \ + CONFIG.AWUSER_WIDTH {0} \ + CONFIG.BUSER_WIDTH {0} \ + CONFIG.DATA_WIDTH {512} \ + CONFIG.HAS_BRESP {1} \ + CONFIG.HAS_BURST {1} \ + CONFIG.HAS_CACHE {1} \ + CONFIG.HAS_LOCK {1} \ + CONFIG.HAS_PROT {1} \ + CONFIG.HAS_QOS {0} \ + CONFIG.HAS_REGION {0} \ + CONFIG.HAS_RRESP {1} \ + CONFIG.HAS_WSTRB {1} \ + CONFIG.ID_WIDTH {1} \ + CONFIG.MAX_BURST_LENGTH {64} \ + CONFIG.NUM_READ_OUTSTANDING {8} \ + CONFIG.NUM_READ_THREADS {8} \ + CONFIG.NUM_WRITE_OUTSTANDING {8} \ + CONFIG.NUM_WRITE_THREADS {8} \ + CONFIG.PROTOCOL {AXI4} \ + CONFIG.READ_WRITE_MODE {READ_WRITE} \ + CONFIG.RUSER_BITS_PER_BYTE {0} \ + CONFIG.RUSER_WIDTH {0} \ + CONFIG.SUPPORTS_NARROW_BURST {0} \ + CONFIG.WUSER_BITS_PER_BYTE {0} \ + CONFIG.WUSER_WIDTH {0} \ + ] \$axi_ddr_in_$i" + eval $cmd + } + } + + # Dynamic control + for {set i 0} {$i < $cnfg(n_reg)} {incr i} { + set cmd "set axi_ctrl_$i \[ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:aximm_rtl:1.0 axi_ctrl_$i ] + set_property -dict \[ list \ + CONFIG.ADDR_WIDTH {64} \ + CONFIG.DATA_WIDTH {64} \ + CONFIG.PROTOCOL {AXI4LITE} \ + ] \$axi_ctrl_$i" + eval $cmd + } + + # AVX control + if {$cnfg(en_avx) eq 1} { + for {set i 0} {$i < $cnfg(n_reg)} {incr i} { + set cmd "set axim_ctrl_$i \[ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:aximm_rtl:1.0 axim_ctrl_$i ] + set_property -dict \[ list \ + CONFIG.ADDR_WIDTH {64} \ + CONFIG.DATA_WIDTH {256} \ + CONFIG.HAS_BRESP {1} \ + CONFIG.HAS_BURST {1} \ + CONFIG.HAS_CACHE {1} \ + CONFIG.HAS_LOCK {1} \ + CONFIG.HAS_PROT {1} \ + CONFIG.HAS_QOS {0} \ + CONFIG.HAS_REGION {0} \ + CONFIG.HAS_RRESP {1} \ + CONFIG.HAS_WSTRB {1} \ + CONFIG.NUM_READ_OUTSTANDING {8} \ + CONFIG.NUM_WRITE_OUTSTANDING {8} \ + CONFIG.PROTOCOL {AXI4} \ + CONFIG.READ_WRITE_MODE {READ_WRITE} \ + ] \$axim_ctrl_$i" + eval $cmd + } + } + +######################################################################################################## +# Create ports +######################################################################################################## + # Main reset + set aresetn [ create_bd_port -dir O -type rst aresetn ] + + set nn 0 + if {$cnfg(en_pr) eq 1} { + incr nn + } + + # Main clock + set cmd "set aclk \[ create_bd_port -dir O -type clk aclk ] + set_property -dict \[ list \ + CONFIG.ASSOCIATED_BUSIF {axi_cnfg" + for {set i 0} {$i < $cnfg(n_chan)} {incr i} { + append cmd ":axis_dyn_out_$i:axis_dyn_in_$i" + } + if {$cnfg(en_ddr) eq 1} { + for {set i 0} {$i < $cnfg(n_ddr_chan) * 2} {incr i} { + append cmd ":axi_ddr_in_$i" + } + } + for {set i 0} {$i < $cnfg(n_reg)} {incr i} { + append cmd ":axi_ctrl_$i" + } + if {$cnfg(en_avx) eq 1} { + for {set i 0} {$i < $cnfg(n_reg)} {incr i} { + append cmd ":axim_ctrl_$i" + } + } + append cmd "} \ + CONFIG.ASSOCIATED_RESET {aresetn} \ + ] \$aclk" + eval $cmd + + # PCIe reset + set perst_n [ create_bd_port -dir I -type rst perst_n ] + set_property -dict [ list \ + CONFIG.POLARITY {ACTIVE_LOW} \ + ] $perst_n + + # External reset + set reset_0 [ create_bd_port -dir I -type rst reset_0 ] + set_property -dict [ list \ + CONFIG.POLARITY {ACTIVE_HIGH} \ + ] $reset_0 + + # User interrupts + set cmd "set usr_irq \[ create_bd_port -dir I -from 1 -to 0 -type intr usr_irq ] + set_property -dict \[ list \ + CONFIG.PortWidth {$cnfg(n_reg)} \ + ] \$usr_irq" + eval $cmd + + # PR clock and reset + if {$cnfg(en_pr) eq 1} { + set pclk [ create_bd_port -dir O -type clk pclk ] + set presetn [ create_bd_port -dir O -from 0 -to 0 -type rst presetn ] + } + +######################################################################################################## +# Create interconnect and components +######################################################################################################## + # Create instance: axi_interconnect_0, and set properties + if {$cnfg(en_avx) eq 1} { + set ic0_mi [expr {2*$cnfg(n_reg) + 1}] + } else { + set ic0_mi [expr {$cnfg(n_reg) + 1}] + } + + set cmd "set axi_interconnect_0 \[ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_interconnect:2.1 axi_interconnect_0 ] + set_property -dict \[ list \ + CONFIG.NUM_MI {$ic0_mi} \ + CONFIG.S00_HAS_DATA_FIFO {2} \ + CONFIG.STRATEGY {2} \ " + if {$cnfg(en_avx) eq 1} { + for {set i 0} {$i <= 2 * $cnfg(n_reg)} {incr i} { + append cmd [format " CONFIG.M%02d_HAS_REGSLICE {4}" $i] + } + } else { + for {set i 0} {$i <= $cnfg(n_reg)} {incr i} { + append cmd [format " CONFIG.M%02d_HAS_REGSLICE {4}" $i] + } + } + append cmd "] \$axi_interconnect_0" + eval $cmd + + # Create instance: axi_interconnect_1(2), and set properties + if {$cnfg(en_ddr) eq 1} { + set ic1_si 2 + set ic1_mi 1 + for {set i 1} {$i <= $cnfg(n_ddr_chan)} {incr i} { + set cmd "set axi_interconnect_$i \[ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_interconnect:2.1 axi_interconnect_$i ] + set_property -dict \[ list \ + CONFIG.NUM_MI {$ic1_mi} \ + CONFIG.NUM_SI {$ic1_si} \ + CONFIG.S00_HAS_REGSLICE {4} \ + CONFIG.STRATEGY {2} \ " + append cmd "[format " CONFIG.M%02d_HAS_DATA_FIFO {0} CONFIG.M%02d_HAS_REGSLICE {4}" 0 0]" + for {set j 0} {$j < 2} {incr j} { + append cmd "[format " CONFIG.S%02d_HAS_REGSLICE {4}" $j]" + } + append cmd "] \$axi_interconnect_$i" + eval $cmd + } + + if {$cnfg(ddr_0) eq 1} { + # Create instance: ddr4_0, and set properties + set ddr4_0 [ create_bd_cell -type ip -vlnv xilinx.com:ip:ddr4:2.2 ddr4_0 ] + set_property -dict [ list \ + CONFIG.C0.BANK_GROUP_WIDTH {2} \ + CONFIG.C0.CKE_WIDTH {1} \ + CONFIG.C0.CS_WIDTH {1} \ + CONFIG.C0.ODT_WIDTH {1} \ + CONFIG.C0.ControllerType {DDR4_SDRAM} \ + CONFIG.C0.DDR4_AxiAddressWidth {34} \ + CONFIG.C0.DDR4_AxiDataWidth {512} \ + CONFIG.C0.DDR4_CLKOUT0_DIVIDE {5} \ + CONFIG.C0.DDR4_CasLatency {17} \ + CONFIG.C0.DDR4_CasWriteLatency {12} \ + CONFIG.C0.DDR4_DataMask {NONE} \ + CONFIG.C0.DDR4_DataWidth {72} \ + CONFIG.C0.DDR4_Ecc {true} \ + CONFIG.C0.DDR4_InputClockPeriod {3332} \ + CONFIG.C0.DDR4_MemoryPart {MTA18ASF2G72PZ-2G3} \ + CONFIG.C0.DDR4_MemoryType {RDIMMs} \ + CONFIG.C0.DDR4_TimePeriod {833} \ + CONFIG.C0.DDR4_AUTO_AP_COL_A3 {true} \ + CONFIG.C0.DDR4_Mem_Add_Map {ROW_COLUMN_BANK_INTLV} \ + ] $ddr4_0 + + + # Create instance: rst_ddr4_0_300M, and set properties + set rst_ddr4_0_300M [ create_bd_cell -type ip -vlnv xilinx.com:ip:proc_sys_reset:5.0 rst_ddr4_0_300M ] + } + + if {$cnfg(ddr_1) eq 1} { + # Create instance: ddr4_1, and set properties + set ddr4_1 [ create_bd_cell -type ip -vlnv xilinx.com:ip:ddr4:2.2 ddr4_1 ] + set_property -dict [ list \ + CONFIG.C0.BANK_GROUP_WIDTH {2} \ + CONFIG.C0.CKE_WIDTH {1} \ + CONFIG.C0.CS_WIDTH {1} \ + CONFIG.C0.ODT_WIDTH {1} \ + CONFIG.C0.ControllerType {DDR4_SDRAM} \ + CONFIG.C0.DDR4_AxiAddressWidth {34} \ + CONFIG.C0.DDR4_AxiDataWidth {512} \ + CONFIG.C0.DDR4_CLKOUT0_DIVIDE {5} \ + CONFIG.C0.DDR4_CasLatency {17} \ + CONFIG.C0.DDR4_CasWriteLatency {12} \ + CONFIG.C0.DDR4_DataMask {NONE} \ + CONFIG.C0.DDR4_DataWidth {72} \ + CONFIG.C0.DDR4_Ecc {true} \ + CONFIG.C0.DDR4_InputClockPeriod {3332} \ + CONFIG.C0.DDR4_MemoryPart {MTA18ASF2G72PZ-2G3} \ + CONFIG.C0.DDR4_MemoryType {RDIMMs} \ + CONFIG.C0.DDR4_TimePeriod {833} \ + CONFIG.C0.DDR4_AUTO_AP_COL_A3 {true} \ + CONFIG.C0.DDR4_Mem_Add_Map {ROW_COLUMN_BANK_INTLV} \ + ] $ddr4_1 + + # Create instance: rst_ddr4_0_300M, and set properties + set rst_ddr4_1_300M [ create_bd_cell -type ip -vlnv xilinx.com:ip:proc_sys_reset:5.0 rst_ddr4_1_300M ] + } + + if {$cnfg(ddr_2) eq 1} { + # Create instance: ddr4_1, and set properties + set ddr4_1 [ create_bd_cell -type ip -vlnv xilinx.com:ip:ddr4:2.2 ddr4_1 ] + set_property -dict [ list \ + CONFIG.C0.BANK_GROUP_WIDTH {2} \ + CONFIG.C0.CKE_WIDTH {1} \ + CONFIG.C0.CS_WIDTH {1} \ + CONFIG.C0.ODT_WIDTH {1} \ + CONFIG.C0.ControllerType {DDR4_SDRAM} \ + CONFIG.C0.DDR4_AxiAddressWidth {34} \ + CONFIG.C0.DDR4_AxiDataWidth {512} \ + CONFIG.C0.DDR4_CLKOUT0_DIVIDE {5} \ + CONFIG.C0.DDR4_CasLatency {17} \ + CONFIG.C0.DDR4_CasWriteLatency {12} \ + CONFIG.C0.DDR4_DataMask {NONE} \ + CONFIG.C0.DDR4_DataWidth {72} \ + CONFIG.C0.DDR4_Ecc {true} \ + CONFIG.C0.DDR4_InputClockPeriod {3332} \ + CONFIG.C0.DDR4_MemoryPart {MTA18ASF2G72PZ-2G3} \ + CONFIG.C0.DDR4_MemoryType {RDIMMs} \ + CONFIG.C0.DDR4_TimePeriod {833} \ + CONFIG.C0.DDR4_AUTO_AP_COL_A3 {true} \ + CONFIG.C0.DDR4_Mem_Add_Map {ROW_COLUMN_BANK_INTLV} \ + ] $ddr4_1 + + # Create instance: rst_ddr4_0_300M, and set properties + set rst_ddr4_1_300M [ create_bd_cell -type ip -vlnv xilinx.com:ip:proc_sys_reset:5.0 rst_ddr4_1_300M ] + } + + if {$cnfg(ddr_3) eq 1} { + # Create instance: ddr4_1, and set properties + set ddr4_1 [ create_bd_cell -type ip -vlnv xilinx.com:ip:ddr4:2.2 ddr4_1 ] + set_property -dict [ list \ + CONFIG.C0.BANK_GROUP_WIDTH {2} \ + CONFIG.C0.CKE_WIDTH {1} \ + CONFIG.C0.CS_WIDTH {1} \ + CONFIG.C0.ODT_WIDTH {1} \ + CONFIG.C0.ControllerType {DDR4_SDRAM} \ + CONFIG.C0.DDR4_AxiAddressWidth {34} \ + CONFIG.C0.DDR4_AxiDataWidth {512} \ + CONFIG.C0.DDR4_CLKOUT0_DIVIDE {5} \ + CONFIG.C0.DDR4_CasLatency {17} \ + CONFIG.C0.DDR4_CasWriteLatency {12} \ + CONFIG.C0.DDR4_DataMask {NONE} \ + CONFIG.C0.DDR4_DataWidth {72} \ + CONFIG.C0.DDR4_Ecc {true} \ + CONFIG.C0.DDR4_InputClockPeriod {3332} \ + CONFIG.C0.DDR4_MemoryPart {MTA18ASF2G72PZ-2G3} \ + CONFIG.C0.DDR4_MemoryType {RDIMMs} \ + CONFIG.C0.DDR4_TimePeriod {833} \ + CONFIG.C0.DDR4_AUTO_AP_COL_A3 {true} \ + CONFIG.C0.DDR4_Mem_Add_Map {ROW_COLUMN_BANK_INTLV} \ + ] $ddr4_1 + + # Create instance: rst_ddr4_0_300M, and set properties + set rst_ddr4_1_300M [ create_bd_cell -type ip -vlnv xilinx.com:ip:proc_sys_reset:5.0 rst_ddr4_1_300M ] + } + } + + if {$cnfg(en_pr) eq 1} { + # Create instance: clk_wiz_0, and set properties + set clk_wiz_0 [ create_bd_cell -type ip -vlnv xilinx.com:ip:clk_wiz:6.0 clk_wiz_0 ] + set_property -dict [ list \ + CONFIG.CLKIN1_JITTER_PS {40.0} \ + CONFIG.CLKOUT1_JITTER {119.392} \ + CONFIG.CLKOUT1_PHASE_ERROR {154.678} \ + CONFIG.CLKOUT1_REQUESTED_OUT_FREQ {200.000} \ + CONFIG.MMCM_CLKFBOUT_MULT_F {24.000} \ + CONFIG.MMCM_CLKIN1_PERIOD {4.000} \ + CONFIG.MMCM_CLKIN2_PERIOD {10.0} \ + CONFIG.MMCM_CLKOUT0_DIVIDE_F {6.000} \ + CONFIG.MMCM_DIVCLK_DIVIDE {5} \ + CONFIG.PRIM_IN_FREQ {250.000} \ + ] $clk_wiz_0 + + # Create instance: proc_sys_reset_0, and set properties + set proc_sys_reset_0 [ create_bd_cell -type ip -vlnv xilinx.com:ip:proc_sys_reset:5.0 proc_sys_reset_0 ] + } + + # Create instance: util_ds_buf, and set properties + set util_ds_buf [ create_bd_cell -type ip -vlnv xilinx.com:ip:util_ds_buf:2.1 util_ds_buf ] + set_property -dict [ list \ + CONFIG.C_BUF_TYPE {IBUFDSGTE} \ + ] $util_ds_buf + + # Create instance: proc_sys_reset_1, and set properties + set proc_sys_reset_1 [ create_bd_cell -type ip -vlnv xilinx.com:ip:proc_sys_reset:5.0 proc_sys_reset_1 ] + + proc dec2bin i { + #returns a string, e.g. dec2bin 10 => 1010 + set res {} + while {$i>0} { + set res [expr {$i%2}]$res + set i [expr {$i/2}] + } + if {$res == {}} {set res 0} + return $res + } + + set nn 0 + if {$cnfg(en_pr) eq 1} { + incr nn + } + + set bypass [expr {(1 << ($cnfg(n_chan))) - 1}] + set bypass [dec2bin $bypass] + + # Create instance: xdma_0, and set properties + set cmd "set xdma_0 \[ create_bd_cell -type ip -vlnv xilinx.com:ip:xdma:4.1 xdma_0 ] + set_property -dict \[ list \ + CONFIG.axi_bypass_64bit_en {true} \ + CONFIG.axi_bypass_prefetchable {true} \ + CONFIG.axi_data_width {512_bit} \ + CONFIG.axi_id_width {4} \ + CONFIG.axist_bypass_en {true} \ + CONFIG.axist_bypass_scale {Gigabytes} \ + CONFIG.axist_bypass_size {1} \ + CONFIG.axisten_freq {250} \ + CONFIG.cfg_mgmt_if {false} \ + CONFIG.dsc_bypass_rd {[format "%04d" $bypass]} \ + CONFIG.dsc_bypass_wr {[format "%04d" $bypass]} \ + CONFIG.pciebar2axibar_axil_master {0x00000000} \ + CONFIG.pf0_msi_cap_multimsgcap {32_vectors} \ + CONFIG.pf0_msix_cap_pba_offset {00008FE0} \ + CONFIG.pf0_msix_cap_table_offset {00008000} \ + CONFIG.pf0_msix_cap_table_size {01F} \ + CONFIG.pf0_msix_enabled {true} \ + CONFIG.pl_link_cap_max_link_speed {8.0_GT/s} \ + CONFIG.pl_link_cap_max_link_width {X16} \ + CONFIG.xdma_axi_intf_mm {AXI_Stream} \ + CONFIG.xdma_num_usr_irq {$cnfg(n_reg)} \ + CONFIG.xdma_rnum_chnl {[expr {$cnfg(n_chan)}]} \ + CONFIG.xdma_sts_ports {true} \ + CONFIG.xdma_wnum_chnl {[expr {$cnfg(n_chan)}]} \ + CONFIG.xdma_wnum_rids {16} \ + CONFIG.xdma_rnum_rids {16} \ + ] \$xdma_0" + eval $cmd + +######################################################################################################## +# Create interface connections +######################################################################################################## + # XDMA + connect_bd_intf_net -intf_net diff_clock_rtl_0_1 [get_bd_intf_ports pcie_clk] [get_bd_intf_pins util_ds_buf/CLK_IN_D] + connect_bd_intf_net -intf_net xdma_0_pcie_mgt [get_bd_intf_ports pcie_x16] [get_bd_intf_pins xdma_0/pcie_mgt] + connect_bd_intf_net -intf_net axi_interconnect_0_M00_AXI [get_bd_intf_ports axi_cnfg] [get_bd_intf_pins axi_interconnect_0/M00_AXI] + connect_bd_intf_net -intf_net xdma_0_M_AXI_BYPASS [get_bd_intf_pins axi_interconnect_0/S00_AXI] [get_bd_intf_pins xdma_0/M_AXI_BYPASS] + connect_bd_intf_net -intf_net xdma_0_dma_status_ports [get_bd_intf_ports dsc_status] [get_bd_intf_pins xdma_0/dma_status_ports] + + # Dynamic control + if {$cnfg(en_avx) eq 1} { + for {set i 0} {$i < $cnfg(n_reg)} {incr i} { + set j [expr {$i*2 + 1}] + set cmd [format "connect_bd_intf_net -intf_net axi_interconnect_0_M%02d_AXI \[get_bd_intf_ports axi_ctrl_%d] \[get_bd_intf_pins axi_interconnect_0/M%02d_AXI]" $j $i $j] + eval $cmd + set j [expr {$i*2 + 2}] + set cmd [format "connect_bd_intf_net -intf_net axi_interconnect_0_M%02d_AXI \[get_bd_intf_ports axim_ctrl_%d] \[get_bd_intf_pins axi_interconnect_0/M%02d_AXI]" $j $i $j] + eval $cmd + } + } else { + for {set i 0} {$i < $cnfg(n_reg)} {incr i} { + set j [expr {$i + 1}] + set cmd [format "connect_bd_intf_net -intf_net axi_interconnect_0_M%02d_AXI \[get_bd_intf_ports axi_ctrl_%d] \[get_bd_intf_pins axi_interconnect_0/M%02d_AXI]" $j $i $j] + eval $cmd + } + } + + # DDRs + if {$cnfg(en_ddr) eq 1} { + for {set i 1} {$i <= $cnfg(n_ddr_chan)} {incr i} { + set nn [expr {$i - 1}] + set cmd [format "connect_bd_intf_net -intf_net axi_interconnect_%d_M00_AXI \[get_bd_intf_pins axi_interconnect_%d/M00_AXI] \[get_bd_intf_pins ddr4_%d/C0_DDR4_S_AXI]" $i $i $nn] + eval $cmd + set cmd [format "connect_bd_intf_net -intf_net ddr4_%d_C0_DDR4 [get_bd_intf_ports c$nn\_ddr4] [get_bd_intf_pins ddr4_$nn/C0_DDR4]" $nn] + eval $cmd + set cmd [format "connect_bd_intf_net -intf_net diff_clock_rtl_%d_2 [get_bd_intf_ports c$nn\_sys_clk_0] [get_bd_intf_pins ddr4_$nn/C0_SYS_CLK]" $nn $nn $nn] + eval $cmd + set cmd "connect_bd_intf_net [get_bd_intf_ports axi_ctrl_ddr_$nn] [get_bd_intf_pins ddr4_$nn/C0_DDR4_S_AXI_CTRL]" + eval $cmd + + for {set j 0} {$j < 2} {incr j} { + set nn [expr {$i - 1 + $j*$cnfg(n_ddr_chan)}] + set cmd [format "connect_bd_intf_net -intf_net axi_ddr_in_$nn\_1 \[get_bd_intf_ports axi_ddr_in_$nn] \[get_bd_intf_pins axi_interconnect_%d/S%02d_AXI]" $i $j] + eval $cmd + } + } + } + + set nn 0 + if {$cnfg(en_pr) eq 1} { + incr nn + } + + # Data lines + for {set i 0} {$i < $cnfg(n_chan)} {incr i} { + set cmd "connect_bd_intf_net -intf_net axis_dyn_in_$i\_1 \[get_bd_intf_ports axis_dyn_in_$i] \[get_bd_intf_pins xdma_0/S_AXIS_C2H_$i]" + eval $cmd + + set cmd "connect_bd_intf_net -intf_net xdma_0_M_AXIS_H2C_$i \[get_bd_intf_ports axis_dyn_out_$i] \[get_bd_intf_pins xdma_0/M_AXIS_H2C_$i]" + eval $cmd + + set cmd "connect_bd_intf_net -intf_net dsc_bypass_c2h_$i\_1 \[get_bd_intf_ports dsc_bypass_c2h_$i] \[get_bd_intf_pins xdma_0/dsc_bypass_c2h_$i]" + eval $cmd + + set cmd "connect_bd_intf_net -intf_net dsc_bypass_h2c_$i\_1 \[get_bd_intf_ports dsc_bypass_h2c_$i] \[get_bd_intf_pins xdma_0/dsc_bypass_h2c_$i]" + eval $cmd + } + +######################################################################################################## +# Create port connections +######################################################################################################## + # PR + if {$cnfg(en_pr) eq 1} { + connect_bd_net -net pr_aresetn_1 [get_bd_ports presetn] [get_bd_pins proc_sys_reset_0/peripheral_aresetn] + connect_bd_net -net clk_wiz_0_clk_out1 [get_bd_ports pclk] [get_bd_pins clk_wiz_0/clk_out1] [get_bd_pins proc_sys_reset_0/slowest_sync_clk] + } + + # XDMA + connect_bd_net -net reset_rtl_0_1 [get_bd_ports perst_n] [get_bd_pins xdma_0/sys_rst_n] + connect_bd_net -net usr_irq_1 [get_bd_ports usr_irq] [get_bd_pins xdma_0/usr_irq_req] + connect_bd_net -net util_ds_buf_IBUF_DS_ODIV2 [get_bd_pins util_ds_buf/IBUF_DS_ODIV2] [get_bd_pins xdma_0/sys_clk] + connect_bd_net -net util_ds_buf_IBUF_OUT [get_bd_pins util_ds_buf/IBUF_OUT] [get_bd_pins xdma_0/sys_clk_gt] + + # External reset + set cmd "connect_bd_net -net reset_rtl_0_0_1 \[get_bd_ports reset_0] \[get_bd_pins clk_wiz_0/reset] \[get_bd_pins proc_sys_reset_0/ext_reset_in]" + if {$cnfg(en_ddr) eq 1} { + if {$cnfg(ddr_0) eq 1} { + append cmd " \[get_bd_pins ddr4_0/sys_rst]" + } + if {$cnfg(ddr_1) eq 1} { + append cmd " \[get_bd_pins ddr4_1/sys_rst]" + } + if {$cnfg(ddr_2) eq 1} { + append cmd " \[get_bd_pins ddr4_2/sys_rst]" + } + if {$cnfg(ddr_3) eq 1} { + append cmd " \[get_bd_pins ddr4_3/sys_rst]" + } + } + eval $cmd + + # DDRs + if {$cnfg(en_ddr) eq 1} { + for {set i 1} {$i <= $cnfg(n_ddr_chan)} {incr i} { + set nn [expr {$i - 1}] + set cmd [format "connect_bd_net -net rst_ddr4_$nn\_300M_peripheral_aresetn \[get_bd_pins axi_interconnect_$i/M00_ARESETN] \[get_bd_pins ddr4_$nn/c0_ddr4_aresetn] \[get_bd_pins rst_ddr4_$nn\_300M/peripheral_aresetn]"] + eval $cmd + set cmd [format "connect_bd_net -net ddr4_$nn\_c0_ddr4_ui_clk \[get_bd_pins axi_interconnect_$i/M00_ACLK] \[get_bd_pins ddr4_$nn/c0_ddr4_ui_clk] \[get_bd_pins rst_ddr4_$nn\_300M/slowest_sync_clk]"] + eval $cmd + set cmd [format "connect_bd_net -net ddr4_$nn\_c0_ddr4_ui_clk_sync_rst \[get_bd_pins ddr4_$nn/c0_ddr4_ui_clk_sync_rst] \[get_bd_pins rst_ddr4_$nn\_300M/ext_reset_in]" ] + eval $cmd + } + } + + # XDMA + connect_bd_net -net xdma_0_axi_aresetn_ns [get_bd_pins xdma_0/axi_aresetn] [get_bd_pins proc_sys_reset_1/ext_reset_in] + connect_bd_net -net proc_sys_reset_1_peripheral_aresetn [get_bd_ports aresetn] [get_bd_pins proc_sys_reset_1/peripheral_aresetn] + + set cmd_clk "connect_bd_net -net xdma_0_axi_aclk \[get_bd_ports aclk] \[get_bd_pins proc_sys_reset_1/slowest_sync_clk] \[get_bd_pins axi_interconnect_0/ACLK] \[get_bd_pins axi_interconnect_0/M00_ACLK] \[get_bd_pins xdma_0/axi_aclk] \[get_bd_pins axi_interconnect_0/S00_ACLK]" + set cmd_rst "connect_bd_net -net xdma_0_axi_aresetn_s \[get_bd_pins axi_interconnect_0/ARESETN] \[get_bd_pins proc_sys_reset_1/interconnect_aresetn] \[get_bd_pins axi_interconnect_0/M00_ARESETN] \[get_bd_pins axi_interconnect_0/S00_ARESETN]" + + set nn 1 + if {$cnfg(en_pr) eq 1} { + append cmd_clk " \[get_bd_pins clk_wiz_0/clk_in1]" + } + + if {$cnfg(en_avx) eq 1} { + for {set i 1} {$i <= 2 * $cnfg(n_reg)} {incr i} { + append cmd_clk [format " \[get_bd_pins axi_interconnect_0/M%02d_ACLK]" $i] + append cmd_rst [format " \[get_bd_pins axi_interconnect_0/M%02d_ARESETN]" $i] + } + } else { + for {set i 1} {$i <= $cnfg(n_reg)} {incr i} { + append cmd_clk [format " \[get_bd_pins axi_interconnect_0/M%02d_ACLK]" $i] + append cmd_rst [format " \[get_bd_pins axi_interconnect_0/M%02d_ARESETN]" $i] + } + } + + if {$cnfg(en_ddr) eq 1} { + for {set i 1} {$i <= $cnfg(n_ddr_chan)} {incr i} { + append cmd_clk " \[get_bd_pins axi_interconnect_$i/ACLK]" + append cmd_rst " \[get_bd_pins axi_interconnect_$i/ARESETN]" + for {set j 0} {$j < 2} {incr j} { + append cmd_clk [format " \[get_bd_pins axi_interconnect_$i/S%02d_ACLK]" $j] + append cmd_rst [format " \[get_bd_pins axi_interconnect_$i/S%02d_ARESETN]" $j] + } + } + } + + eval $cmd_clk + eval $cmd_rst + +######################################################################################################## +# Create address segments +######################################################################################################## + # Static config + create_bd_addr_seg -range 0x00008000 -offset 0x00000000 [get_bd_addr_spaces xdma_0/M_AXI_BYPASS] [get_bd_addr_segs axi_cnfg/Reg] SEG_axi_cnfg_Reg + + if {$cnfg(en_avx) eq 1} { + for {set i 0} {$i < $cnfg(n_reg)} {incr i} { + set cmd [format "create_bd_addr_seg -range 0x00040000 -offset 0x000%02x0000 \[get_bd_addr_spaces xdma_0/M_AXI_BYPASS] \[get_bd_addr_segs axi_ctrl_$i/Reg] SEG_axi_ctrl_$i\_Reg" [expr {0x10 + $i *4}]] + eval $cmd + set cmd [format "create_bd_addr_seg -range 0x00040000 -offset 0x00%03x0000 \[get_bd_addr_spaces xdma_0/M_AXI_BYPASS] \[get_bd_addr_segs axim_ctrl_$i/Reg] SEG_axim_ctrl_$i\_Reg" [expr {0x100 + $i *4}]] + eval $cmd + } + } else { + for {set i 0} {$i < $cnfg(n_reg)} {incr i} { + set cmd [format "create_bd_addr_seg -range 0x00040000 -offset 0x000%02x0000 \[get_bd_addr_spaces xdma_0/M_AXI_BYPASS] \[get_bd_addr_segs axi_ctrl_$i/Reg] SEG_axi_ctrl_$i\_Reg" [expr {0x10 + $i *4}]] + eval $cmd + } + } + + # DDRs + if {$cnfg(en_ddr) eq 1} { + for {set i 0} {$i < $cnfg(n_ddr_chan)} {incr i} { + for {set j 0} {$j < 2} {incr j} { + set nn [expr {$i + $j * $cnfg(n_ddr_chan)}] + set cmd "create_bd_addr_seg -range 0x80000000 -offset 0x00000000 \[get_bd_addr_spaces axi_ddr_in_$nn] \[get_bd_addr_segs ddr4_$i/C0_DDR4_MEMORY_MAP/C0_DDR4_ADDRESS_BLOCK] SEG_ddr4_$i\_$j\_C0_DDR4_ADDRESS_BLOCK" + eval $cmd + } + + set cmd "create_bd_addr_seg -range 0x00008000 -offset 0x00000000 \[get_bd_addr_spaces axi_ctrl_ddr_$i] \[get_bd_addr_segs ddr4_$i/C0_DDR4_MEMORY_MAP_CTRL/C0_REG] SEG_ddr4_ctrl_$i\_C0_DDR4_ADDRESS_BLOCK" + eval $cmd + } + } + + # Restore current instance + current_bd_instance $oldCurInst + + save_bd_design + close_bd_design $design_name +} +# End of cr_bd_design_static() diff --git a/hw/scripts/bd/cr_static_u280.tcl b/hw/scripts/bd/cr_static_u280.tcl new file mode 100644 index 00000000..ef990144 --- /dev/null +++ b/hw/scripts/bd/cr_static_u280.tcl @@ -0,0 +1,672 @@ +# Proc to create BD design_static +proc cr_bd_design_static_vcu118 { parentCell } { + upvar #0 cfg cnfg + + # CHANGE DESIGN NAME HERE + set design_name design_static + + common::send_msg_id "BD_TCL-003" "INFO" "Currently there is no design <$design_name> in project, so creating one..." + + create_bd_design $design_name + + set bCheckIPsPassed 1 + ######################################################################################################## + # CHECK IPs + ######################################################################################################## + set bCheckIPs 1 + if { $bCheckIPs == 1 } { + set list_check_ips "\ + xilinx.com:ip:clk_wiz:6.0\ + xilinx.com:ip:ddr4:2.2\ + xilinx.com:ip:proc_sys_reset:5.0\ + xilinx.com:ip:util_ds_buf:2.1\ + xilinx.com:ip:xdma:4.1\ + " + + set list_ips_missing "" + common::send_msg_id "BD_TCL-006" "INFO" "Checking if the following IPs exist in the project's IP catalog: $list_check_ips ." + + foreach ip_vlnv $list_check_ips { + set ip_obj [get_ipdefs -all $ip_vlnv] + if { $ip_obj eq "" } { + lappend list_ips_missing $ip_vlnv + } + } + + if { $list_ips_missing ne "" } { + catch {common::send_msg_id "BD_TCL-115" "ERROR" "The following IPs are not found in the IP Catalog:\n $list_ips_missing\n\nResolution: Please add the repository containing the IP(s) to the project." } + set bCheckIPsPassed 0 + } + + } + + if { $bCheckIPsPassed != 1 } { + common::send_msg_id "BD_TCL-1003" "WARNING" "Will not continue with creation of design due to the error(s) above." + return 3 + } + + variable script_folder + + if { $parentCell eq "" } { + set parentCell [get_bd_cells /] + } + + # Get object for parentCell + set parentObj [get_bd_cells $parentCell] + if { $parentObj == "" } { + catch {common::send_msg_id "BD_TCL-100" "ERROR" "Unable to find parent cell <$parentCell>!"} + return + } + + # Make sure parentObj is hier blk + set parentType [get_property TYPE $parentObj] + if { $parentType ne "hier" } { + catch {common::send_msg_id "BD_TCL-101" "ERROR" "Parent <$parentObj> has TYPE = <$parentType>. Expected to be ."} + return + } + + # Save current instance; Restore later + set oldCurInst [current_bd_instance .] + + # Set parent object as current + current_bd_instance $parentObj + +######################################################################################################## +# Create interface ports +######################################################################################################## + # Static config + set axi_cnfg [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:aximm_rtl:1.0 axi_cnfg ] + set_property -dict [ list \ + CONFIG.ADDR_WIDTH {64} \ + CONFIG.DATA_WIDTH {64} \ + CONFIG.PROTOCOL {AXI4LITE} \ + ] $axi_cnfg + + # XDMA status + set dsc_status [ create_bd_intf_port -mode Master -vlnv xilinx.com:display_xdma:xdma_status_ports_rtl:1.0 dsc_status ] + + # PCIe + set pcie_clk [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:diff_clock_rtl:1.0 pcie_clk ] + set_property -dict [ list \ + CONFIG.FREQ_HZ {100000000} \ + ] $pcie_clk + set pcie_x16 [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:pcie_7x_mgt_rtl:1.0 pcie_x16 ] + + # DDRs + if {$cnfg(en_ddr) eq 1} { + if {$cnfg(ddr_0) eq 1} { + set c0_ddr4 [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:ddr4_rtl:1.0 c0_ddr4 ] + set c0_sys_clk_0 [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:diff_clock_rtl:1.0 c0_sys_clk_0 ] + set_property -dict [ list \ + CONFIG.FREQ_HZ {100000000} \ + ] $c0_sys_clk_0 + + set axi_ctrl_ddr_0 [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:aximm_rtl:1.0 axi_ctrl_ddr_0 ] + set_property -dict [ list \ + CONFIG.PROTOCOL {AXI4LITE} \ + CONFIG.FREQ_HZ {100000000} \ + CONFIG.MAX_BURST_LENGTH {1} \ + CONFIG.SUPPORTS_NARROW_BURST {0} \ + ] $axi_ctrl_ddr_0 + } + + if {$cnfg(ddr_1) eq 1} { + set c1_ddr4 [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:ddr4_rtl:1.0 c1_ddr4 ] + set c1_sys_clk_0 [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:diff_clock_rtl:1.0 c1_sys_clk_0 ] + set_property -dict [ list \ + CONFIG.FREQ_HZ {100000000} \ + ] $c1_sys_clk_0 + + set axi_ctrl_ddr_1 [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:aximm_rtl:1.0 axi_ctrl_ddr_1 ] + set_property -dict [ list \ + CONFIG.PROTOCOL {AXI4LITE} \ + CONFIG.FREQ_HZ {100000000} \ + CONFIG.MAX_BURST_LENGTH {1} \ + CONFIG.SUPPORTS_NARROW_BURST {0} \ + ] $axi_ctrl_ddr_1 + } + } + + set nn 0 + if {$cnfg(en_pr) eq 1} { + incr nn + } + + # Streams and XDMA control + for {set i 0} {$i < $cnfg(n_chan)} {incr i} { + # Host source + set cmd "set axis_dyn_in_$i \[ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 axis_dyn_in_$i ] + set_property -dict \[ list \ + CONFIG.HAS_TKEEP {1} \ + CONFIG.HAS_TLAST {1} \ + CONFIG.HAS_TREADY {1} \ + CONFIG.HAS_TSTRB {0} \ + CONFIG.LAYERED_METADATA {undef} \ + CONFIG.TDATA_NUM_BYTES {64} \ + CONFIG.TDEST_WIDTH {0} \ + CONFIG.TID_WIDTH {0} \ + CONFIG.TUSER_WIDTH {0} \ + ] \$axis_dyn_in_$i" + eval $cmd + + # Host sink + set cmd "set axis_dyn_out_$i \[ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 axis_dyn_out_$i ]" + eval $cmd + + # Host source control + set cmd "set dsc_bypass_c2h_$i \[ create_bd_intf_port -mode Slave -vlnv xilinx.com:display_xdma:dsc_bypass_rtl:1.0 dsc_bypass_c2h_$i ]" + eval $cmd + + # Host sink control + set cmd "set dsc_bypass_h2c_$i \[ create_bd_intf_port -mode Slave -vlnv xilinx.com:display_xdma:dsc_bypass_rtl:1.0 dsc_bypass_h2c_$i ]" + eval $cmd + } + + # DDR channels + if {$cnfg(en_ddr) eq 1} { + for {set i 0} {$i < $cnfg(n_ddr_chan) * 2} {incr i} { + set cmd "set axi_ddr_in_$i \[ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:aximm_rtl:1.0 axi_ddr_in_$i ] + set_property -dict \[ list \ + CONFIG.ADDR_WIDTH {64} \ + CONFIG.ARUSER_WIDTH {0} \ + CONFIG.AWUSER_WIDTH {0} \ + CONFIG.BUSER_WIDTH {0} \ + CONFIG.DATA_WIDTH {512} \ + CONFIG.HAS_BRESP {1} \ + CONFIG.HAS_BURST {1} \ + CONFIG.HAS_CACHE {1} \ + CONFIG.HAS_LOCK {1} \ + CONFIG.HAS_PROT {1} \ + CONFIG.HAS_QOS {0} \ + CONFIG.HAS_REGION {0} \ + CONFIG.HAS_RRESP {1} \ + CONFIG.HAS_WSTRB {1} \ + CONFIG.ID_WIDTH {1} \ + CONFIG.MAX_BURST_LENGTH {64} \ + CONFIG.NUM_READ_OUTSTANDING {8} \ + CONFIG.NUM_READ_THREADS {8} \ + CONFIG.NUM_WRITE_OUTSTANDING {8} \ + CONFIG.NUM_WRITE_THREADS {8} \ + CONFIG.PROTOCOL {AXI4} \ + CONFIG.READ_WRITE_MODE {READ_WRITE} \ + CONFIG.RUSER_BITS_PER_BYTE {0} \ + CONFIG.RUSER_WIDTH {0} \ + CONFIG.SUPPORTS_NARROW_BURST {0} \ + CONFIG.WUSER_BITS_PER_BYTE {0} \ + CONFIG.WUSER_WIDTH {0} \ + ] \$axi_ddr_in_$i" + eval $cmd + } + } + + # Dynamic control + for {set i 0} {$i < $cnfg(n_reg)} {incr i} { + set cmd "set axi_ctrl_$i \[ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:aximm_rtl:1.0 axi_ctrl_$i ] + set_property -dict \[ list \ + CONFIG.ADDR_WIDTH {64} \ + CONFIG.DATA_WIDTH {64} \ + CONFIG.PROTOCOL {AXI4LITE} \ + ] \$axi_ctrl_$i" + eval $cmd + } + + # AVX control + if {$cnfg(en_avx) eq 1} { + for {set i 0} {$i < $cnfg(n_reg)} {incr i} { + set cmd "set axim_ctrl_$i \[ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:aximm_rtl:1.0 axim_ctrl_$i ] + set_property -dict \[ list \ + CONFIG.ADDR_WIDTH {64} \ + CONFIG.DATA_WIDTH {256} \ + CONFIG.HAS_BRESP {1} \ + CONFIG.HAS_BURST {1} \ + CONFIG.HAS_CACHE {1} \ + CONFIG.HAS_LOCK {1} \ + CONFIG.HAS_PROT {1} \ + CONFIG.HAS_QOS {0} \ + CONFIG.HAS_REGION {0} \ + CONFIG.HAS_RRESP {1} \ + CONFIG.HAS_WSTRB {1} \ + CONFIG.NUM_READ_OUTSTANDING {8} \ + CONFIG.NUM_WRITE_OUTSTANDING {8} \ + CONFIG.PROTOCOL {AXI4} \ + CONFIG.READ_WRITE_MODE {READ_WRITE} \ + ] \$axim_ctrl_$i" + eval $cmd + } + } + +######################################################################################################## +# Create ports +######################################################################################################## + # Main reset + set aresetn [ create_bd_port -dir O -type rst aresetn ] + + set nn 0 + if {$cnfg(en_pr) eq 1} { + incr nn + } + + # Main clock + set cmd "set aclk \[ create_bd_port -dir O -type clk aclk ] + set_property -dict \[ list \ + CONFIG.ASSOCIATED_BUSIF {axi_cnfg" + for {set i 0} {$i < $cnfg(n_chan)} {incr i} { + append cmd ":axis_dyn_out_$i:axis_dyn_in_$i" + } + if {$cnfg(en_ddr) eq 1} { + for {set i 0} {$i < $cnfg(n_ddr_chan) * 2} {incr i} { + append cmd ":axi_ddr_in_$i" + } + } + for {set i 0} {$i < $cnfg(n_reg)} {incr i} { + append cmd ":axi_ctrl_$i" + } + if {$cnfg(en_avx) eq 1} { + for {set i 0} {$i < $cnfg(n_reg)} {incr i} { + append cmd ":axim_ctrl_$i" + } + } + append cmd "} \ + CONFIG.ASSOCIATED_RESET {aresetn} \ + ] \$aclk" + eval $cmd + + # PCIe reset + set perst_n [ create_bd_port -dir I -type rst perst_n ] + set_property -dict [ list \ + CONFIG.POLARITY {ACTIVE_LOW} \ + ] $perst_n + + # External reset + set reset_0 [ create_bd_port -dir I -type rst reset_0 ] + set_property -dict [ list \ + CONFIG.POLARITY {ACTIVE_HIGH} \ + ] $reset_0 + + # User interrupts + set cmd "set usr_irq \[ create_bd_port -dir I -from 1 -to 0 -type intr usr_irq ] + set_property -dict \[ list \ + CONFIG.PortWidth {$cnfg(n_reg)} \ + ] \$usr_irq" + eval $cmd + + # PR clock and reset + if {$cnfg(en_pr) eq 1} { + set pclk [ create_bd_port -dir O -type clk pclk ] + set presetn [ create_bd_port -dir O -from 0 -to 0 -type rst presetn ] + } + +######################################################################################################## +# Create interconnect and components +######################################################################################################## + # Create instance: axi_interconnect_0, and set properties + if {$cnfg(en_avx) eq 1} { + set ic0_mi [expr {2*$cnfg(n_reg) + 1}] + } else { + set ic0_mi [expr {$cnfg(n_reg) + 1}] + } + + set cmd "set axi_interconnect_0 \[ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_interconnect:2.1 axi_interconnect_0 ] + set_property -dict \[ list \ + CONFIG.NUM_MI {$ic0_mi} \ + CONFIG.S00_HAS_DATA_FIFO {2} \ + CONFIG.STRATEGY {2} \ " + if {$cnfg(en_avx) eq 1} { + for {set i 0} {$i <= 2 * $cnfg(n_reg)} {incr i} { + append cmd [format " CONFIG.M%02d_HAS_REGSLICE {4}" $i] + } + } else { + for {set i 0} {$i <= $cnfg(n_reg)} {incr i} { + append cmd [format " CONFIG.M%02d_HAS_REGSLICE {4}" $i] + } + } + append cmd "] \$axi_interconnect_0" + eval $cmd + + # Create instance: axi_interconnect_1(2), and set properties + if {$cnfg(en_ddr) eq 1} { + set ic1_si 2 + set ic1_mi 1 + for {set i 1} {$i <= $cnfg(n_ddr_chan)} {incr i} { + set cmd "set axi_interconnect_$i \[ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_interconnect:2.1 axi_interconnect_$i ] + set_property -dict \[ list \ + CONFIG.NUM_MI {$ic1_mi} \ + CONFIG.NUM_SI {$ic1_si} \ + CONFIG.S00_HAS_REGSLICE {4} \ + CONFIG.STRATEGY {2} \ " + append cmd "[format " CONFIG.M%02d_HAS_DATA_FIFO {0} CONFIG.M%02d_HAS_REGSLICE {4}" 0 0]" + for {set j 0} {$j < 2} {incr j} { + append cmd "[format " CONFIG.S%02d_HAS_REGSLICE {4}" $j]" + } + append cmd "] \$axi_interconnect_$i" + eval $cmd + } + + if {$cnfg(ddr_0) eq 1} { + # Create instance: ddr4_0, and set properties + set ddr4_0 [ create_bd_cell -type ip -vlnv xilinx.com:ip:ddr4:2.2 ddr4_0 ] + set_property -dict [ list \ + CONFIG.C0.BANK_GROUP_WIDTH {2} \ + CONFIG.C0.CKE_WIDTH {1} \ + CONFIG.C0.CS_WIDTH {1} \ + CONFIG.C0.ODT_WIDTH {1} \ + CONFIG.C0.ControllerType {DDR4_SDRAM} \ + CONFIG.C0.DDR4_AxiAddressWidth {34} \ + CONFIG.C0.DDR4_AxiDataWidth {512} \ + CONFIG.C0.DDR4_CLKOUT0_DIVIDE {5} \ + CONFIG.C0.DDR4_CasLatency {17} \ + CONFIG.C0.DDR4_CasWriteLatency {12} \ + CONFIG.C0.DDR4_DataMask {NONE} \ + CONFIG.C0.DDR4_DataWidth {72} \ + CONFIG.C0.DDR4_Ecc {true} \ + CONFIG.C0.DDR4_InputClockPeriod {9996} \ + CONFIG.C0.DDR4_MemoryPart {MTA18ASF2G72PZ-2G3} \ + CONFIG.C0.DDR4_MemoryType {RDIMMs} \ + CONFIG.C0.DDR4_TimePeriod {833} \ + CONFIG.C0.DDR4_AUTO_AP_COL_A3 {true} \ + CONFIG.C0.DDR4_Mem_Add_Map {ROW_COLUMN_BANK_INTLV} \ + ] $ddr4_0 + + + # Create instance: rst_ddr4_0_300M, and set properties + set rst_ddr4_0_300M [ create_bd_cell -type ip -vlnv xilinx.com:ip:proc_sys_reset:5.0 rst_ddr4_0_300M ] + } + + if {$cnfg(ddr_1) eq 1} { + # Create instance: ddr4_1, and set properties + set ddr4_1 [ create_bd_cell -type ip -vlnv xilinx.com:ip:ddr4:2.2 ddr4_1 ] + set_property -dict [ list \ + CONFIG.C0.BANK_GROUP_WIDTH {2} \ + CONFIG.C0.CKE_WIDTH {1} \ + CONFIG.C0.CS_WIDTH {1} \ + CONFIG.C0.ODT_WIDTH {1} \ + CONFIG.C0.ControllerType {DDR4_SDRAM} \ + CONFIG.C0.DDR4_AxiAddressWidth {34} \ + CONFIG.C0.DDR4_AxiDataWidth {512} \ + CONFIG.C0.DDR4_CLKOUT0_DIVIDE {5} \ + CONFIG.C0.DDR4_CasLatency {17} \ + CONFIG.C0.DDR4_CasWriteLatency {12} \ + CONFIG.C0.DDR4_DataMask {NONE} \ + CONFIG.C0.DDR4_DataWidth {72} \ + CONFIG.C0.DDR4_Ecc {true} \ + CONFIG.C0.DDR4_InputClockPeriod {9996} \ + CONFIG.C0.DDR4_MemoryPart {MTA18ASF2G72PZ-2G3} \ + CONFIG.C0.DDR4_MemoryType {RDIMMs} \ + CONFIG.C0.DDR4_TimePeriod {833} \ + CONFIG.C0.DDR4_AUTO_AP_COL_A3 {true} \ + CONFIG.C0.DDR4_Mem_Add_Map {ROW_COLUMN_BANK_INTLV} \ + ] $ddr4_1 + + # Create instance: rst_ddr4_0_300M, and set properties + set rst_ddr4_1_300M [ create_bd_cell -type ip -vlnv xilinx.com:ip:proc_sys_reset:5.0 rst_ddr4_1_300M ] + } + } + + if {$cnfg(en_pr) eq 1} { + # Create instance: clk_wiz_0, and set properties + set clk_wiz_0 [ create_bd_cell -type ip -vlnv xilinx.com:ip:clk_wiz:6.0 clk_wiz_0 ] + set_property -dict [ list \ + CONFIG.CLKIN1_JITTER_PS {40.0} \ + CONFIG.CLKOUT1_JITTER {119.392} \ + CONFIG.CLKOUT1_PHASE_ERROR {154.678} \ + CONFIG.CLKOUT1_REQUESTED_OUT_FREQ {200.000} \ + CONFIG.MMCM_CLKFBOUT_MULT_F {24.000} \ + CONFIG.MMCM_CLKIN1_PERIOD {4.000} \ + CONFIG.MMCM_CLKIN2_PERIOD {10.0} \ + CONFIG.MMCM_CLKOUT0_DIVIDE_F {6.000} \ + CONFIG.MMCM_DIVCLK_DIVIDE {5} \ + CONFIG.PRIM_IN_FREQ {250.000} \ + ] $clk_wiz_0 + + # Create instance: proc_sys_reset_0, and set properties + set proc_sys_reset_0 [ create_bd_cell -type ip -vlnv xilinx.com:ip:proc_sys_reset:5.0 proc_sys_reset_0 ] + } + + # Create instance: util_ds_buf, and set properties + set util_ds_buf [ create_bd_cell -type ip -vlnv xilinx.com:ip:util_ds_buf:2.1 util_ds_buf ] + set_property -dict [ list \ + CONFIG.C_BUF_TYPE {IBUFDSGTE} \ + ] $util_ds_buf + + # Create instance: proc_sys_reset_1, and set properties + set proc_sys_reset_1 [ create_bd_cell -type ip -vlnv xilinx.com:ip:proc_sys_reset:5.0 proc_sys_reset_1 ] + + proc dec2bin i { + #returns a string, e.g. dec2bin 10 => 1010 + set res {} + while {$i>0} { + set res [expr {$i%2}]$res + set i [expr {$i/2}] + } + if {$res == {}} {set res 0} + return $res + } + + set nn 0 + if {$cnfg(en_pr) eq 1} { + incr nn + } + + set bypass [expr {(1 << ($cnfg(n_chan))) - 1}] + set bypass [dec2bin $bypass] + + # Create instance: xdma_0, and set properties + set cmd "set xdma_0 \[ create_bd_cell -type ip -vlnv xilinx.com:ip:xdma:4.1 xdma_0 ] + set_property -dict \[ list \ + CONFIG.axi_bypass_64bit_en {true} \ + CONFIG.axi_bypass_prefetchable {true} \ + CONFIG.axi_data_width {512_bit} \ + CONFIG.axi_id_width {4} \ + CONFIG.axist_bypass_en {true} \ + CONFIG.axist_bypass_scale {Gigabytes} \ + CONFIG.axist_bypass_size {1} \ + CONFIG.axisten_freq {250} \ + CONFIG.cfg_mgmt_if {false} \ + CONFIG.dsc_bypass_rd {[format "%04d" $bypass]} \ + CONFIG.dsc_bypass_wr {[format "%04d" $bypass]} \ + CONFIG.pciebar2axibar_axil_master {0x00000000} \ + CONFIG.pf0_msi_cap_multimsgcap {32_vectors} \ + CONFIG.pf0_msix_cap_pba_offset {00008FE0} \ + CONFIG.pf0_msix_cap_table_offset {00008000} \ + CONFIG.pf0_msix_cap_table_size {01F} \ + CONFIG.pf0_msix_enabled {true} \ + CONFIG.pl_link_cap_max_link_speed {8.0_GT/s} \ + CONFIG.pl_link_cap_max_link_width {X16} \ + CONFIG.xdma_axi_intf_mm {AXI_Stream} \ + CONFIG.xdma_num_usr_irq {$cnfg(n_reg)} \ + CONFIG.xdma_rnum_chnl {[expr {$cnfg(n_chan)}]} \ + CONFIG.xdma_sts_ports {true} \ + CONFIG.xdma_wnum_chnl {[expr {$cnfg(n_chan)}]} \ + CONFIG.xdma_wnum_rids {16} \ + CONFIG.xdma_rnum_rids {16} \ + ] \$xdma_0" + eval $cmd + +######################################################################################################## +# Create interface connections +######################################################################################################## + # XDMA + connect_bd_intf_net -intf_net diff_clock_rtl_0_1 [get_bd_intf_ports pcie_clk] [get_bd_intf_pins util_ds_buf/CLK_IN_D] + connect_bd_intf_net -intf_net xdma_0_pcie_mgt [get_bd_intf_ports pcie_x16] [get_bd_intf_pins xdma_0/pcie_mgt] + connect_bd_intf_net -intf_net axi_interconnect_0_M00_AXI [get_bd_intf_ports axi_cnfg] [get_bd_intf_pins axi_interconnect_0/M00_AXI] + connect_bd_intf_net -intf_net xdma_0_M_AXI_BYPASS [get_bd_intf_pins axi_interconnect_0/S00_AXI] [get_bd_intf_pins xdma_0/M_AXI_BYPASS] + connect_bd_intf_net -intf_net xdma_0_dma_status_ports [get_bd_intf_ports dsc_status] [get_bd_intf_pins xdma_0/dma_status_ports] + + # Dynamic control + if {$cnfg(en_avx) eq 1} { + for {set i 0} {$i < $cnfg(n_reg)} {incr i} { + set j [expr {$i*2 + 1}] + set cmd [format "connect_bd_intf_net -intf_net axi_interconnect_0_M%02d_AXI \[get_bd_intf_ports axi_ctrl_%d] \[get_bd_intf_pins axi_interconnect_0/M%02d_AXI]" $j $i $j] + eval $cmd + set j [expr {$i*2 + 2}] + set cmd [format "connect_bd_intf_net -intf_net axi_interconnect_0_M%02d_AXI \[get_bd_intf_ports axim_ctrl_%d] \[get_bd_intf_pins axi_interconnect_0/M%02d_AXI]" $j $i $j] + eval $cmd + } + } else { + for {set i 0} {$i < $cnfg(n_reg)} {incr i} { + set j [expr {$i + 1}] + set cmd [format "connect_bd_intf_net -intf_net axi_interconnect_0_M%02d_AXI \[get_bd_intf_ports axi_ctrl_%d] \[get_bd_intf_pins axi_interconnect_0/M%02d_AXI]" $j $i $j] + eval $cmd + } + } + + # DDRs + if {$cnfg(en_ddr) eq 1} { + for {set i 1} {$i <= $cnfg(n_ddr_chan)} {incr i} { + set nn [expr {$i - 1}] + set cmd [format "connect_bd_intf_net -intf_net axi_interconnect_%d_M00_AXI \[get_bd_intf_pins axi_interconnect_%d/M00_AXI] \[get_bd_intf_pins ddr4_%d/C0_DDR4_S_AXI]" $i $i $nn] + eval $cmd + set cmd [format "connect_bd_intf_net -intf_net ddr4_%d_C0_DDR4 [get_bd_intf_ports c$nn\_ddr4] [get_bd_intf_pins ddr4_$nn/C0_DDR4]" $nn] + eval $cmd + set cmd [format "connect_bd_intf_net -intf_net diff_clock_rtl_%d_2 [get_bd_intf_ports c$nn\_sys_clk_0] [get_bd_intf_pins ddr4_$nn/C0_SYS_CLK]" $nn $nn $nn] + eval $cmd + set cmd "connect_bd_intf_net [get_bd_intf_ports axi_ctrl_ddr_$nn] [get_bd_intf_pins ddr4_$nn/C0_DDR4_S_AXI_CTRL]" + eval $cmd + + for {set j 0} {$j < 2} {incr j} { + set nn [expr {$i - 1 + $j*$cnfg(n_ddr_chan)}] + set cmd [format "connect_bd_intf_net -intf_net axi_ddr_in_$nn\_1 \[get_bd_intf_ports axi_ddr_in_$nn] \[get_bd_intf_pins axi_interconnect_%d/S%02d_AXI]" $i $j] + eval $cmd + } + } + } + + set nn 0 + if {$cnfg(en_pr) eq 1} { + incr nn + } + + # Data lines + for {set i 0} {$i < $cnfg(n_chan)} {incr i} { + set cmd "connect_bd_intf_net -intf_net axis_dyn_in_$i\_1 \[get_bd_intf_ports axis_dyn_in_$i] \[get_bd_intf_pins xdma_0/S_AXIS_C2H_$i]" + eval $cmd + + set cmd "connect_bd_intf_net -intf_net xdma_0_M_AXIS_H2C_$i \[get_bd_intf_ports axis_dyn_out_$i] \[get_bd_intf_pins xdma_0/M_AXIS_H2C_$i]" + eval $cmd + + set cmd "connect_bd_intf_net -intf_net dsc_bypass_c2h_$i\_1 \[get_bd_intf_ports dsc_bypass_c2h_$i] \[get_bd_intf_pins xdma_0/dsc_bypass_c2h_$i]" + eval $cmd + + set cmd "connect_bd_intf_net -intf_net dsc_bypass_h2c_$i\_1 \[get_bd_intf_ports dsc_bypass_h2c_$i] \[get_bd_intf_pins xdma_0/dsc_bypass_h2c_$i]" + eval $cmd + } + +######################################################################################################## +# Create port connections +######################################################################################################## + # PR + if {$cnfg(en_pr) eq 1} { + connect_bd_net -net pr_aresetn_1 [get_bd_ports presetn] [get_bd_pins proc_sys_reset_0/peripheral_aresetn] + connect_bd_net -net clk_wiz_0_clk_out1 [get_bd_ports pclk] [get_bd_pins clk_wiz_0/clk_out1] [get_bd_pins proc_sys_reset_0/slowest_sync_clk] + } + + # XDMA + connect_bd_net -net reset_rtl_0_1 [get_bd_ports perst_n] [get_bd_pins xdma_0/sys_rst_n] + connect_bd_net -net usr_irq_1 [get_bd_ports usr_irq] [get_bd_pins xdma_0/usr_irq_req] + connect_bd_net -net util_ds_buf_IBUF_DS_ODIV2 [get_bd_pins util_ds_buf/IBUF_DS_ODIV2] [get_bd_pins xdma_0/sys_clk] + connect_bd_net -net util_ds_buf_IBUF_OUT [get_bd_pins util_ds_buf/IBUF_OUT] [get_bd_pins xdma_0/sys_clk_gt] + + # External reset + set cmd "connect_bd_net -net reset_rtl_0_0_1 \[get_bd_ports reset_0] \[get_bd_pins clk_wiz_0/reset] \[get_bd_pins proc_sys_reset_0/ext_reset_in]" + if {$cnfg(en_ddr) eq 1} { + if {$cnfg(ddr_0) eq 1} { + append cmd " \[get_bd_pins ddr4_0/sys_rst]" + } + if {$cnfg(ddr_1) eq 1} { + append cmd " \[get_bd_pins ddr4_1/sys_rst]" + } + } + eval $cmd + + # DDRs + if {$cnfg(en_ddr) eq 1} { + for {set i 1} {$i <= $cnfg(n_ddr_chan)} {incr i} { + set nn [expr {$i - 1}] + set cmd [format "connect_bd_net -net rst_ddr4_$nn\_300M_peripheral_aresetn \[get_bd_pins axi_interconnect_$i/M00_ARESETN] \[get_bd_pins ddr4_$nn/c0_ddr4_aresetn] \[get_bd_pins rst_ddr4_$nn\_300M/peripheral_aresetn]"] + eval $cmd + set cmd [format "connect_bd_net -net ddr4_$nn\_c0_ddr4_ui_clk \[get_bd_pins axi_interconnect_$i/M00_ACLK] \[get_bd_pins ddr4_$nn/c0_ddr4_ui_clk] \[get_bd_pins rst_ddr4_$nn\_300M/slowest_sync_clk]"] + eval $cmd + set cmd [format "connect_bd_net -net ddr4_$nn\_c0_ddr4_ui_clk_sync_rst \[get_bd_pins ddr4_$nn/c0_ddr4_ui_clk_sync_rst] \[get_bd_pins rst_ddr4_$nn\_300M/ext_reset_in]" ] + eval $cmd + } + } + + # XDMA + connect_bd_net -net xdma_0_axi_aresetn_ns [get_bd_pins xdma_0/axi_aresetn] [get_bd_pins proc_sys_reset_1/ext_reset_in] + connect_bd_net -net proc_sys_reset_1_peripheral_aresetn [get_bd_ports aresetn] [get_bd_pins proc_sys_reset_1/peripheral_aresetn] + + set cmd_clk "connect_bd_net -net xdma_0_axi_aclk \[get_bd_ports aclk] \[get_bd_pins proc_sys_reset_1/slowest_sync_clk] \[get_bd_pins axi_interconnect_0/ACLK] \[get_bd_pins axi_interconnect_0/M00_ACLK] \[get_bd_pins xdma_0/axi_aclk] \[get_bd_pins axi_interconnect_0/S00_ACLK]" + set cmd_rst "connect_bd_net -net xdma_0_axi_aresetn_s \[get_bd_pins axi_interconnect_0/ARESETN] \[get_bd_pins proc_sys_reset_1/interconnect_aresetn] \[get_bd_pins axi_interconnect_0/M00_ARESETN] \[get_bd_pins axi_interconnect_0/S00_ARESETN]" + + set nn 1 + if {$cnfg(en_pr) eq 1} { + append cmd_clk " \[get_bd_pins clk_wiz_0/clk_in1]" + } + + if {$cnfg(en_avx) eq 1} { + for {set i 1} {$i <= 2 * $cnfg(n_reg)} {incr i} { + append cmd_clk [format " \[get_bd_pins axi_interconnect_0/M%02d_ACLK]" $i] + append cmd_rst [format " \[get_bd_pins axi_interconnect_0/M%02d_ARESETN]" $i] + } + } else { + for {set i 1} {$i <= $cnfg(n_reg)} {incr i} { + append cmd_clk [format " \[get_bd_pins axi_interconnect_0/M%02d_ACLK]" $i] + append cmd_rst [format " \[get_bd_pins axi_interconnect_0/M%02d_ARESETN]" $i] + } + } + + if {$cnfg(en_ddr) eq 1} { + for {set i 1} {$i <= $cnfg(n_ddr_chan)} {incr i} { + append cmd_clk " \[get_bd_pins axi_interconnect_$i/ACLK]" + append cmd_rst " \[get_bd_pins axi_interconnect_$i/ARESETN]" + for {set j 0} {$j < 2} {incr j} { + append cmd_clk [format " \[get_bd_pins axi_interconnect_$i/S%02d_ACLK]" $j] + append cmd_rst [format " \[get_bd_pins axi_interconnect_$i/S%02d_ARESETN]" $j] + } + } + } + + eval $cmd_clk + eval $cmd_rst + +######################################################################################################## +# Create address segments +######################################################################################################## + # Static config + create_bd_addr_seg -range 0x00008000 -offset 0x00000000 [get_bd_addr_spaces xdma_0/M_AXI_BYPASS] [get_bd_addr_segs axi_cnfg/Reg] SEG_axi_cnfg_Reg + + if {$cnfg(en_avx) eq 1} { + for {set i 0} {$i < $cnfg(n_reg)} {incr i} { + set cmd [format "create_bd_addr_seg -range 0x00040000 -offset 0x000%02x0000 \[get_bd_addr_spaces xdma_0/M_AXI_BYPASS] \[get_bd_addr_segs axi_ctrl_$i/Reg] SEG_axi_ctrl_$i\_Reg" [expr {0x10 + $i *4}]] + eval $cmd + set cmd [format "create_bd_addr_seg -range 0x00040000 -offset 0x00%03x0000 \[get_bd_addr_spaces xdma_0/M_AXI_BYPASS] \[get_bd_addr_segs axim_ctrl_$i/Reg] SEG_axim_ctrl_$i\_Reg" [expr {0x100 + $i *4}]] + eval $cmd + } + } else { + for {set i 0} {$i < $cnfg(n_reg)} {incr i} { + set cmd [format "create_bd_addr_seg -range 0x00040000 -offset 0x000%02x0000 \[get_bd_addr_spaces xdma_0/M_AXI_BYPASS] \[get_bd_addr_segs axi_ctrl_$i/Reg] SEG_axi_ctrl_$i\_Reg" [expr {0x10 + $i *4}]] + eval $cmd + } + } + + # DDRs + if {$cnfg(en_ddr) eq 1} { + for {set i 0} {$i < $cnfg(n_ddr_chan)} {incr i} { + for {set j 0} {$j < 2} {incr j} { + set nn [expr {$i + $j * $cnfg(n_ddr_chan)}] + set cmd "create_bd_addr_seg -range 0x80000000 -offset 0x00000000 \[get_bd_addr_spaces axi_ddr_in_$nn] \[get_bd_addr_segs ddr4_$i/C0_DDR4_MEMORY_MAP/C0_DDR4_ADDRESS_BLOCK] SEG_ddr4_$i\_$j\_C0_DDR4_ADDRESS_BLOCK" + eval $cmd + } + + set cmd "create_bd_addr_seg -range 0x00008000 -offset 0x00000000 \[get_bd_addr_spaces axi_ctrl_ddr_$i] \[get_bd_addr_segs ddr4_$i/C0_DDR4_MEMORY_MAP_CTRL/C0_REG] SEG_ddr4_ctrl_$i\_C0_DDR4_ADDRESS_BLOCK" + eval $cmd + } + } + + # Restore current instance + current_bd_instance $oldCurInst + + save_bd_design + close_bd_design $design_name +} +# End of cr_bd_design_static() diff --git a/hw/scripts/bd/cr_static_vcu118.tcl b/hw/scripts/bd/cr_static_vcu118.tcl new file mode 100644 index 00000000..3476f762 --- /dev/null +++ b/hw/scripts/bd/cr_static_vcu118.tcl @@ -0,0 +1,645 @@ +# Proc to create BD design_static +proc cr_bd_design_static_vcu118 { parentCell } { + upvar #0 cfg cnfg + + # CHANGE DESIGN NAME HERE + set design_name design_static + + common::send_msg_id "BD_TCL-003" "INFO" "Currently there is no design <$design_name> in project, so creating one..." + + create_bd_design $design_name + + set bCheckIPsPassed 1 + ######################################################################################################## + # CHECK IPs + ######################################################################################################## + set bCheckIPs 1 + if { $bCheckIPs == 1 } { + set list_check_ips "\ + xilinx.com:ip:clk_wiz:6.0\ + xilinx.com:ip:ddr4:2.2\ + xilinx.com:ip:proc_sys_reset:5.0\ + xilinx.com:ip:util_ds_buf:2.1\ + xilinx.com:ip:xdma:4.1\ + " + + set list_ips_missing "" + common::send_msg_id "BD_TCL-006" "INFO" "Checking if the following IPs exist in the project's IP catalog: $list_check_ips ." + + foreach ip_vlnv $list_check_ips { + set ip_obj [get_ipdefs -all $ip_vlnv] + if { $ip_obj eq "" } { + lappend list_ips_missing $ip_vlnv + } + } + + if { $list_ips_missing ne "" } { + catch {common::send_msg_id "BD_TCL-115" "ERROR" "The following IPs are not found in the IP Catalog:\n $list_ips_missing\n\nResolution: Please add the repository containing the IP(s) to the project." } + set bCheckIPsPassed 0 + } + + } + + if { $bCheckIPsPassed != 1 } { + common::send_msg_id "BD_TCL-1003" "WARNING" "Will not continue with creation of design due to the error(s) above." + return 3 + } + + variable script_folder + + if { $parentCell eq "" } { + set parentCell [get_bd_cells /] + } + + # Get object for parentCell + set parentObj [get_bd_cells $parentCell] + if { $parentObj == "" } { + catch {common::send_msg_id "BD_TCL-100" "ERROR" "Unable to find parent cell <$parentCell>!"} + return + } + + # Make sure parentObj is hier blk + set parentType [get_property TYPE $parentObj] + if { $parentType ne "hier" } { + catch {common::send_msg_id "BD_TCL-101" "ERROR" "Parent <$parentObj> has TYPE = <$parentType>. Expected to be ."} + return + } + + # Save current instance; Restore later + set oldCurInst [current_bd_instance .] + + # Set parent object as current + current_bd_instance $parentObj + +######################################################################################################## +# Create interface ports +######################################################################################################## + # Static config + set axi_cnfg [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:aximm_rtl:1.0 axi_cnfg ] + set_property -dict [ list \ + CONFIG.ADDR_WIDTH {64} \ + CONFIG.DATA_WIDTH {64} \ + CONFIG.PROTOCOL {AXI4LITE} \ + ] $axi_cnfg + + # XDMA status + set dsc_status [ create_bd_intf_port -mode Master -vlnv xilinx.com:display_xdma:xdma_status_ports_rtl:1.0 dsc_status ] + + # PCIe + set pcie_clk [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:diff_clock_rtl:1.0 pcie_clk ] + set_property -dict [ list \ + CONFIG.FREQ_HZ {100000000} \ + ] $pcie_clk + set pcie_x16 [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:pcie_7x_mgt_rtl:1.0 pcie_x16 ] + + # DDRs + if {$cnfg(en_ddr) eq 1} { + if {$cnfg(ddr_0) eq 1} { + set c0_ddr4 [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:ddr4_rtl:1.0 c0_ddr4 ] + set c0_sys_clk_0 [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:diff_clock_rtl:1.0 c0_sys_clk_0 ] + set_property -dict [ list \ + CONFIG.FREQ_HZ {250000000} \ + ] $c0_sys_clk_0 + } + + if {$cnfg(ddr_1) eq 1} { + set c1_ddr4 [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:ddr4_rtl:1.0 c1_ddr4 ] + set c1_sys_clk_0 [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:diff_clock_rtl:1.0 c1_sys_clk_0 ] + set_property -dict [ list \ + CONFIG.FREQ_HZ {250000000} \ + ] $c1_sys_clk_0 + } + } + + set nn 0 + if {$cnfg(en_pr) eq 1} { + incr nn + } + + # Streams and XDMA control + for {set i 0} {$i < $cnfg(n_chan)} {incr i} { + # Host source + set cmd "set axis_dyn_in_$i \[ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 axis_dyn_in_$i ] + set_property -dict \[ list \ + CONFIG.HAS_TKEEP {1} \ + CONFIG.HAS_TLAST {1} \ + CONFIG.HAS_TREADY {1} \ + CONFIG.HAS_TSTRB {0} \ + CONFIG.LAYERED_METADATA {undef} \ + CONFIG.TDATA_NUM_BYTES {64} \ + CONFIG.TDEST_WIDTH {0} \ + CONFIG.TID_WIDTH {0} \ + CONFIG.TUSER_WIDTH {0} \ + ] \$axis_dyn_in_$i" + eval $cmd + + # Host sink + set cmd "set axis_dyn_out_$i \[ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 axis_dyn_out_$i ]" + eval $cmd + + # Host source control + set cmd "set dsc_bypass_c2h_$i \[ create_bd_intf_port -mode Slave -vlnv xilinx.com:display_xdma:dsc_bypass_rtl:1.0 dsc_bypass_c2h_$i ]" + eval $cmd + + # Host sink control + set cmd "set dsc_bypass_h2c_$i \[ create_bd_intf_port -mode Slave -vlnv xilinx.com:display_xdma:dsc_bypass_rtl:1.0 dsc_bypass_h2c_$i ]" + eval $cmd + } + + # DDR channels + if {$cnfg(en_ddr) eq 1} { + for {set i 0} {$i < $cnfg(n_ddr_chan) * 2} {incr i} { + set cmd "set axi_ddr_in_$i \[ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:aximm_rtl:1.0 axi_ddr_in_$i ] + set_property -dict \[ list \ + CONFIG.ADDR_WIDTH {64} \ + CONFIG.ARUSER_WIDTH {0} \ + CONFIG.AWUSER_WIDTH {0} \ + CONFIG.BUSER_WIDTH {0} \ + CONFIG.DATA_WIDTH {512} \ + CONFIG.HAS_BRESP {1} \ + CONFIG.HAS_BURST {1} \ + CONFIG.HAS_CACHE {1} \ + CONFIG.HAS_LOCK {1} \ + CONFIG.HAS_PROT {1} \ + CONFIG.HAS_QOS {0} \ + CONFIG.HAS_REGION {0} \ + CONFIG.HAS_RRESP {1} \ + CONFIG.HAS_WSTRB {1} \ + CONFIG.ID_WIDTH {1} \ + CONFIG.MAX_BURST_LENGTH {64} \ + CONFIG.NUM_READ_OUTSTANDING {8} \ + CONFIG.NUM_READ_THREADS {8} \ + CONFIG.NUM_WRITE_OUTSTANDING {8} \ + CONFIG.NUM_WRITE_THREADS {8} \ + CONFIG.PROTOCOL {AXI4} \ + CONFIG.READ_WRITE_MODE {READ_WRITE} \ + CONFIG.RUSER_BITS_PER_BYTE {0} \ + CONFIG.RUSER_WIDTH {0} \ + CONFIG.SUPPORTS_NARROW_BURST {0} \ + CONFIG.WUSER_BITS_PER_BYTE {0} \ + CONFIG.WUSER_WIDTH {0} \ + ] \$axi_ddr_in_$i" + eval $cmd + } + } + + # Dynamic control + for {set i 0} {$i < $cnfg(n_reg)} {incr i} { + set cmd "set axi_ctrl_$i \[ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:aximm_rtl:1.0 axi_ctrl_$i ] + set_property -dict \[ list \ + CONFIG.ADDR_WIDTH {64} \ + CONFIG.DATA_WIDTH {64} \ + CONFIG.PROTOCOL {AXI4LITE} \ + ] \$axi_ctrl_$i" + eval $cmd + } + + # AVX control + if {$cnfg(en_avx) eq 1} { + for {set i 0} {$i < $cnfg(n_reg)} {incr i} { + set cmd "set axim_ctrl_$i \[ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:aximm_rtl:1.0 axim_ctrl_$i ] + set_property -dict \[ list \ + CONFIG.ADDR_WIDTH {64} \ + CONFIG.DATA_WIDTH {256} \ + CONFIG.HAS_BRESP {1} \ + CONFIG.HAS_BURST {1} \ + CONFIG.HAS_CACHE {1} \ + CONFIG.HAS_LOCK {1} \ + CONFIG.HAS_PROT {1} \ + CONFIG.HAS_QOS {0} \ + CONFIG.HAS_REGION {0} \ + CONFIG.HAS_RRESP {1} \ + CONFIG.HAS_WSTRB {1} \ + CONFIG.NUM_READ_OUTSTANDING {8} \ + CONFIG.NUM_WRITE_OUTSTANDING {8} \ + CONFIG.PROTOCOL {AXI4} \ + CONFIG.READ_WRITE_MODE {READ_WRITE} \ + ] \$axim_ctrl_$i" + eval $cmd + } + } + +######################################################################################################## +# Create ports +######################################################################################################## + # Main reset + set aresetn [ create_bd_port -dir O -type rst aresetn ] + + set nn 0 + if {$cnfg(en_pr) eq 1} { + incr nn + } + + # Main clock + set cmd "set aclk \[ create_bd_port -dir O -type clk aclk ] + set_property -dict \[ list \ + CONFIG.ASSOCIATED_BUSIF {axi_cnfg" + for {set i 0} {$i < $cnfg(n_chan)} {incr i} { + append cmd ":axis_dyn_out_$i:axis_dyn_in_$i" + } + if {$cnfg(en_ddr) eq 1} { + for {set i 0} {$i < $cnfg(n_ddr_chan) * 2} {incr i} { + append cmd ":axi_ddr_in_$i" + } + } + for {set i 0} {$i < $cnfg(n_reg)} {incr i} { + append cmd ":axi_ctrl_$i" + } + if {$cnfg(en_avx) eq 1} { + for {set i 0} {$i < $cnfg(n_reg)} {incr i} { + append cmd ":axim_ctrl_$i" + } + } + append cmd "} \ + CONFIG.ASSOCIATED_RESET {aresetn} \ + ] \$aclk" + eval $cmd + + # PCIe reset + set perst_n [ create_bd_port -dir I -type rst perst_n ] + set_property -dict [ list \ + CONFIG.POLARITY {ACTIVE_LOW} \ + ] $perst_n + + # External reset + set reset_0 [ create_bd_port -dir I -type rst reset_0 ] + set_property -dict [ list \ + CONFIG.POLARITY {ACTIVE_HIGH} \ + ] $reset_0 + + # User interrupts + set cmd "set usr_irq \[ create_bd_port -dir I -from 1 -to 0 -type intr usr_irq ] + set_property -dict \[ list \ + CONFIG.PortWidth {$cnfg(n_reg)} \ + ] \$usr_irq" + eval $cmd + + # PR clock and reset + if {$cnfg(en_pr) eq 1} { + set pclk [ create_bd_port -dir O -type clk pclk ] + set presetn [ create_bd_port -dir O -from 0 -to 0 -type rst presetn ] + } + +######################################################################################################## +# Create interconnect and components +######################################################################################################## + # Create instance: axi_interconnect_0, and set properties + if {$cnfg(en_avx) eq 1} { + set ic0_mi [expr {2*$cnfg(n_reg) + 1}] + } else { + set ic0_mi [expr {$cnfg(n_reg) + 1}] + } + + set cmd "set axi_interconnect_0 \[ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_interconnect:2.1 axi_interconnect_0 ] + set_property -dict \[ list \ + CONFIG.NUM_MI {$ic0_mi} \ + CONFIG.S00_HAS_DATA_FIFO {2} \ + CONFIG.STRATEGY {2} \ " + if {$cnfg(en_avx) eq 1} { + for {set i 0} {$i <= 2 * $cnfg(n_reg)} {incr i} { + append cmd [format " CONFIG.M%02d_HAS_REGSLICE {4}" $i] + } + } else { + for {set i 0} {$i <= $cnfg(n_reg)} {incr i} { + append cmd [format " CONFIG.M%02d_HAS_REGSLICE {4}" $i] + } + } + append cmd "] \$axi_interconnect_0" + eval $cmd + + # Create instance: axi_interconnect_1(2), and set properties + if {$cnfg(en_ddr) eq 1} { + set ic1_si 2 + set ic1_mi 1 + for {set i 1} {$i <= $cnfg(n_ddr_chan)} {incr i} { + set cmd "set axi_interconnect_$i \[ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_interconnect:2.1 axi_interconnect_$i ] + set_property -dict \[ list \ + CONFIG.NUM_MI {$ic1_mi} \ + CONFIG.NUM_SI {$ic1_si} \ + CONFIG.S00_HAS_REGSLICE {4} \ + CONFIG.STRATEGY {2} \ " + append cmd "[format " CONFIG.M%02d_HAS_DATA_FIFO {0} CONFIG.M%02d_HAS_REGSLICE {4}" 0 0]" + for {set j 0} {$j < 2} {incr j} { + append cmd "format " CONFIG.S%02d_HAS_REGSLICE {4}" $j]" + } + append cmd "] \$axi_interconnect_$i" + eval $cmd + } + + if {$cnfg(ddr_0) eq 1} { + # Create instance: ddr4_0, and set properties + set ddr4_0 [ create_bd_cell -type ip -vlnv xilinx.com:ip:ddr4:2.2 ddr4_0 ] + set_property -dict [ list \ + CONFIG.C0.BANK_GROUP_WIDTH {1} \ + CONFIG.C0.DDR4_AxiAddressWidth {31} \ + CONFIG.C0.DDR4_AxiDataWidth {512} \ + CONFIG.C0.DDR4_CLKOUT0_DIVIDE {5} \ + CONFIG.C0.DDR4_CasWriteLatency {12} \ + CONFIG.C0.DDR4_DataMask {DM_NO_DBI} \ + CONFIG.C0.DDR4_DataWidth {64} \ + CONFIG.C0.DDR4_Ecc {false} \ + CONFIG.C0.DDR4_InputClockPeriod {4000} \ + CONFIG.C0.DDR4_MemoryPart {MT40A256M16GE-083E} \ + CONFIG.C0.DDR4_TimePeriod {833} \ + ] $ddr4_0 + + # Create instance: rst_ddr4_0_300M, and set properties + set rst_ddr4_0_300M [ create_bd_cell -type ip -vlnv xilinx.com:ip:proc_sys_reset:5.0 rst_ddr4_0_300M ] + } + + if {$cnfg(ddr_1) eq 1} { + # Create instance: ddr4_0, and set properties + set ddr4_1 [ create_bd_cell -type ip -vlnv xilinx.com:ip:ddr4:2.2 ddr4_1 ] + set_property -dict [ list \ + CONFIG.C0.BANK_GROUP_WIDTH {1} \ + CONFIG.C0.DDR4_AxiAddressWidth {31} \ + CONFIG.C0.DDR4_AxiDataWidth {512} \ + CONFIG.C0.DDR4_CLKOUT0_DIVIDE {5} \ + CONFIG.C0.DDR4_CasWriteLatency {12} \ + CONFIG.C0.DDR4_DataMask {DM_NO_DBI} \ + CONFIG.C0.DDR4_DataWidth {64} \ + CONFIG.C0.DDR4_Ecc {false} \ + CONFIG.C0.DDR4_InputClockPeriod {4000} \ + CONFIG.C0.DDR4_MemoryPart {MT40A256M16GE-083E} \ + CONFIG.C0.DDR4_TimePeriod {833} \ + ] $ddr4_1 + + # Create instance: rst_ddr4_0_300M, and set properties + set rst_ddr4_1_300M [ create_bd_cell -type ip -vlnv xilinx.com:ip:proc_sys_reset:5.0 rst_ddr4_1_300M ] + } + } + + if {$cnfg(en_pr) eq 1} { + # Create instance: clk_wiz_0, and set properties + set clk_wiz_0 [ create_bd_cell -type ip -vlnv xilinx.com:ip:clk_wiz:6.0 clk_wiz_0 ] + set_property -dict [ list \ + CONFIG.CLKIN1_JITTER_PS {40.0} \ + CONFIG.CLKOUT1_JITTER {119.392} \ + CONFIG.CLKOUT1_PHASE_ERROR {154.678} \ + CONFIG.CLKOUT1_REQUESTED_OUT_FREQ {200.000} \ + CONFIG.MMCM_CLKFBOUT_MULT_F {24.000} \ + CONFIG.MMCM_CLKIN1_PERIOD {4.000} \ + CONFIG.MMCM_CLKIN2_PERIOD {10.0} \ + CONFIG.MMCM_CLKOUT0_DIVIDE_F {6.000} \ + CONFIG.MMCM_DIVCLK_DIVIDE {5} \ + CONFIG.PRIM_IN_FREQ {250.000} \ + ] $clk_wiz_0 + + # Create instance: proc_sys_reset_0, and set properties + set proc_sys_reset_0 [ create_bd_cell -type ip -vlnv xilinx.com:ip:proc_sys_reset:5.0 proc_sys_reset_0 ] + } + + # Create instance: util_ds_buf, and set properties + set util_ds_buf [ create_bd_cell -type ip -vlnv xilinx.com:ip:util_ds_buf:2.1 util_ds_buf ] + set_property -dict [ list \ + CONFIG.C_BUF_TYPE {IBUFDSGTE} \ + ] $util_ds_buf + + # Create instance: proc_sys_reset_1, and set properties + set proc_sys_reset_1 [ create_bd_cell -type ip -vlnv xilinx.com:ip:proc_sys_reset:5.0 proc_sys_reset_1 ] + + proc dec2bin i { + #returns a string, e.g. dec2bin 10 => 1010 + set res {} + while {$i>0} { + set res [expr {$i%2}]$res + set i [expr {$i/2}] + } + if {$res == {}} {set res 0} + return $res + } + + set nn 0 + if {$cnfg(en_pr) eq 1} { + incr nn + } + + set bypass [expr {(1 << ($cnfg(n_chan))) - 1}] + set bypass [dec2bin $bypass] + + # Create instance: xdma_0, and set properties + set cmd "set xdma_0 \[ create_bd_cell -type ip -vlnv xilinx.com:ip:xdma:4.1 xdma_0 ] + set_property -dict \[ list \ + CONFIG.PF0_DEVICE_ID_mqdma {903F} \ + CONFIG.PF2_DEVICE_ID_mqdma {903F} \ + CONFIG.PF3_DEVICE_ID_mqdma {903F} \ + CONFIG.axi_bypass_64bit_en {true} \ + CONFIG.axi_bypass_prefetchable {false} \ + CONFIG.axi_data_width {512_bit} \ + CONFIG.axi_id_width {4} \ + CONFIG.axist_bypass_en {true} \ + CONFIG.axist_bypass_scale {Gigabytes} \ + CONFIG.axist_bypass_size {1} \ + CONFIG.axisten_freq {250} \ + CONFIG.cfg_mgmt_if {false} \ + CONFIG.dsc_bypass_rd {[format "%04d" $bypass]} \ + CONFIG.dsc_bypass_wr {[format "%04d" $bypass]} \ + CONFIG.pcie_blk_locn {X1Y2} \ + CONFIG.pciebar2axibar_axil_master {0x00000000} \ + CONFIG.pf0_base_class_menu {Memory_controller} \ + CONFIG.pf0_class_code {058000} \ + CONFIG.pf0_class_code_base {05} \ + CONFIG.pf0_class_code_interface {00} \ + CONFIG.pf0_class_code_sub {80} \ + CONFIG.pf0_device_id {903F} \ + CONFIG.pf0_msi_cap_multimsgcap {32_vectors} \ + CONFIG.pf0_msix_cap_pba_offset {00008FE0} \ + CONFIG.pf0_msix_cap_table_offset {00008000} \ + CONFIG.pf0_msix_cap_table_size {01F} \ + CONFIG.pf0_msix_enabled {true} \ + CONFIG.pf0_sub_class_interface_menu {Other_memory_controller} \ + CONFIG.pl_link_cap_max_link_speed {8.0_GT/s} \ + CONFIG.pl_link_cap_max_link_width {X16} \ + CONFIG.select_quad {GTY_Quad_227} \ + CONFIG.xdma_axi_intf_mm {AXI_Stream} \ + CONFIG.xdma_num_usr_irq {$cnfg(n_reg)} \ + CONFIG.xdma_rnum_chnl {[expr {$cnfg(n_chan)}]} \ + CONFIG.xdma_sts_ports {true} \ + CONFIG.xdma_wnum_chnl {[expr {$cnfg(n_chan)}]} \ + CONFIG.xdma_rnum_rids {16} \ + CONFIG.xdma_wnum_rids {16} \ + ] \$xdma_0" + eval $cmd + +######################################################################################################## +# Create interface connections +######################################################################################################## + # XDMA + connect_bd_intf_net -intf_net diff_clock_rtl_0_1 [get_bd_intf_ports pcie_clk] [get_bd_intf_pins util_ds_buf/CLK_IN_D] + connect_bd_intf_net -intf_net xdma_0_pcie_mgt [get_bd_intf_ports pcie_x16] [get_bd_intf_pins xdma_0/pcie_mgt] + connect_bd_intf_net -intf_net axi_interconnect_0_M00_AXI [get_bd_intf_ports axi_cnfg] [get_bd_intf_pins axi_interconnect_0/M00_AXI] + connect_bd_intf_net -intf_net xdma_0_M_AXI_BYPASS [get_bd_intf_pins axi_interconnect_0/S00_AXI] [get_bd_intf_pins xdma_0/M_AXI_BYPASS] + connect_bd_intf_net -intf_net xdma_0_dma_status_ports [get_bd_intf_ports dsc_status] [get_bd_intf_pins xdma_0/dma_status_ports] + + # Dynamic control + if {$cnfg(en_avx) eq 1} { + for {set i 0} {$i < $cnfg(n_reg)} {incr i} { + set j [expr {$i*2 + 1}] + set cmd [format "connect_bd_intf_net -intf_net axi_interconnect_0_M%02d_AXI \[get_bd_intf_ports axi_ctrl_%d] \[get_bd_intf_pins axi_interconnect_0/M%02d_AXI]" $j $i $j] + eval $cmd + set j [expr {$i*2 + 2}] + set cmd [format "connect_bd_intf_net -intf_net axi_interconnect_0_M%02d_AXI \[get_bd_intf_ports axim_ctrl_%d] \[get_bd_intf_pins axi_interconnect_0/M%02d_AXI]" $j $i $j] + eval $cmd + } + } else { + for {set i 0} {$i < $cnfg(n_reg)} {incr i} { + set j [expr {$i + 1}] + set cmd [format "connect_bd_intf_net -intf_net axi_interconnect_0_M%02d_AXI \[get_bd_intf_ports axi_ctrl_%d] \[get_bd_intf_pins axi_interconnect_0/M%02d_AXI]" $j $i $j] + eval $cmd + } + } + + # DDRs + if {$cnfg(en_ddr) eq 1} { + for {set i 1} {$i <= $cnfg(n_ddr_chan)} {incr i} { + set nn [expr {$i - 1}] + set cmd [format "connect_bd_intf_net -intf_net axi_interconnect_%d_M00_AXI \[get_bd_intf_pins axi_interconnect_%d/M00_AXI] \[get_bd_intf_pins ddr4_%d/C0_DDR4_S_AXI]" $i $i $nn] + eval $cmd + set cmd [format "connect_bd_intf_net -intf_net ddr4_%d_C0_DDR4 [get_bd_intf_ports c$nn\_ddr4] [get_bd_intf_pins ddr4_$nn/C0_DDR4]" $nn] + eval $cmd + set cmd [format "connect_bd_intf_net -intf_net diff_clock_rtl_%d_2 [get_bd_intf_ports c$nn\_sys_clk_0] [get_bd_intf_pins ddr4_$nn/C0_SYS_CLK]" $nn $nn $nn] + eval $cmd + for {set j 0} {$j < 2} {incr j} { + set nn [expr {$i - 1 + $j*$cnfg(n_ddr_chan)}] + set cmd [format "connect_bd_intf_net -intf_net axi_ddr_in_$nn\_1 \[get_bd_intf_ports axi_ddr_in_$nn] \[get_bd_intf_pins axi_interconnect_%d/S%02d_AXI]" $i $j] + eval $cmd + } + } + } + + set nn 0 + if {$cnfg(en_pr) eq 1} { + incr nn + } + + # Data lines + for {set i 0} {$i < $cnfg(n_chan)} {incr i} { + set cmd "connect_bd_intf_net -intf_net axis_dyn_in_$i\_1 \[get_bd_intf_ports axis_dyn_in_$i] \[get_bd_intf_pins xdma_0/S_AXIS_C2H_$i]" + eval $cmd + + set cmd "connect_bd_intf_net -intf_net xdma_0_M_AXIS_H2C_$i \[get_bd_intf_ports axis_dyn_out_$i] \[get_bd_intf_pins xdma_0/M_AXIS_H2C_$i]" + eval $cmd + + set cmd "connect_bd_intf_net -intf_net dsc_bypass_c2h_$i\_1 \[get_bd_intf_ports dsc_bypass_c2h_$i] \[get_bd_intf_pins xdma_0/dsc_bypass_c2h_$i]" + eval $cmd + + set cmd "connect_bd_intf_net -intf_net dsc_bypass_h2c_$i\_1 \[get_bd_intf_ports dsc_bypass_h2c_$i] \[get_bd_intf_pins xdma_0/dsc_bypass_h2c_$i]" + eval $cmd + } + +######################################################################################################## +# Create port connections +######################################################################################################## + # PR + if {$cnfg(en_pr) eq 1} { + connect_bd_net -net pr_aresetn_1 [get_bd_ports presetn] [get_bd_pins proc_sys_reset_0/peripheral_aresetn] + connect_bd_net -net clk_wiz_0_clk_out1 [get_bd_ports pclk] [get_bd_pins clk_wiz_0/clk_out1] [get_bd_pins proc_sys_reset_0/slowest_sync_clk] + } + + # XDMA + connect_bd_net -net reset_rtl_0_1 [get_bd_ports perst_n] [get_bd_pins xdma_0/sys_rst_n] + connect_bd_net -net usr_irq_1 [get_bd_ports usr_irq] [get_bd_pins xdma_0/usr_irq_req] + connect_bd_net -net util_ds_buf_IBUF_DS_ODIV2 [get_bd_pins util_ds_buf/IBUF_DS_ODIV2] [get_bd_pins xdma_0/sys_clk] + connect_bd_net -net util_ds_buf_IBUF_OUT [get_bd_pins util_ds_buf/IBUF_OUT] [get_bd_pins xdma_0/sys_clk_gt] + + # External reset + set cmd "connect_bd_net -net reset_rtl_0_0_1 \[get_bd_ports reset_0] \[get_bd_pins clk_wiz_0/reset] \[get_bd_pins proc_sys_reset_0/ext_reset_in]" + if {$cnfg(en_ddr) eq 1} { + if {$cnfg(ddr_0) eq 1} { + append cmd " \[get_bd_pins ddr4_0/sys_rst]" + } + if {$cnfg(ddr_1) eq 1} { + append cmd " \[get_bd_pins ddr4_1/sys_rst]" + } + } + eval $cmd + + # DDRs + if {$cnfg(en_ddr) eq 1} { + for {set i 1} {$i <= $cnfg(n_ddr_chan)} {incr i} { + set nn [expr {$i - 1}] + set cmd [format "connect_bd_net -net rst_ddr4_$nn\_300M_peripheral_aresetn \[get_bd_pins axi_interconnect_$i/M00_ARESETN] \[get_bd_pins ddr4_$nn/c0_ddr4_aresetn] \[get_bd_pins rst_ddr4_$nn\_300M/peripheral_aresetn]"] + eval $cmd + set cmd [format "connect_bd_net -net ddr4_$nn\_c0_ddr4_ui_clk \[get_bd_pins axi_interconnect_$i/M00_ACLK] \[get_bd_pins ddr4_$nn/c0_ddr4_ui_clk] \[get_bd_pins rst_ddr4_$nn\_300M/slowest_sync_clk]"] + eval $cmd + set cmd [format "connect_bd_net -net ddr4_$nn\_c0_ddr4_ui_clk_sync_rst \[get_bd_pins ddr4_$nn/c0_ddr4_ui_clk_sync_rst] \[get_bd_pins rst_ddr4_$nn\_300M/ext_reset_in]" ] + eval $cmd + } + } + + # XDMA + connect_bd_net -net xdma_0_axi_aresetn_ns [get_bd_pins xdma_0/axi_aresetn] [get_bd_pins proc_sys_reset_1/ext_reset_in] + connect_bd_net -net proc_sys_reset_1_peripheral_aresetn [get_bd_ports aresetn] [get_bd_pins proc_sys_reset_1/peripheral_aresetn] + + set cmd_clk "connect_bd_net -net xdma_0_axi_aclk \[get_bd_ports aclk] \[get_bd_pins proc_sys_reset_1/slowest_sync_clk] \[get_bd_pins axi_interconnect_0/ACLK] \[get_bd_pins axi_interconnect_0/M00_ACLK] \[get_bd_pins xdma_0/axi_aclk] \[get_bd_pins axi_interconnect_0/S00_ACLK]" + set cmd_rst "connect_bd_net -net xdma_0_axi_aresetn_s \[get_bd_pins axi_interconnect_0/ARESETN] \[get_bd_pins proc_sys_reset_1/interconnect_aresetn] \[get_bd_pins axi_interconnect_0/M00_ARESETN] \[get_bd_pins axi_interconnect_0/S00_ARESETN]" + + set nn 1 + if {$cnfg(en_pr) eq 1} { + append cmd_clk " \[get_bd_pins clk_wiz_0/clk_in1]" + } + + if {$cnfg(en_avx) eq 1} { + for {set i 1} {$i <= 2 * $cnfg(n_reg)} {incr i} { + append cmd_clk [format " \[get_bd_pins axi_interconnect_0/M%02d_ACLK]" $i] + append cmd_rst [format " \[get_bd_pins axi_interconnect_0/M%02d_ARESETN]" $i] + } + } else { + for {set i 1} {$i <= $cnfg(n_reg)} {incr i} { + append cmd_clk [format " \[get_bd_pins axi_interconnect_0/M%02d_ACLK]" $i] + append cmd_rst [format " \[get_bd_pins axi_interconnect_0/M%02d_ARESETN]" $i] + } + } + + if {$cnfg(en_ddr) eq 1} { + for {set i 1} {$i <= $cnfg(n_ddr_chan)} {incr i} { + append cmd_clk " \[get_bd_pins axi_interconnect_$i/ACLK]" + append cmd_rst " \[get_bd_pins axi_interconnect_$i/ARESETN]" + for {set j 0} {$j < 2} {incr j} { + append cmd_clk [format " \[get_bd_pins axi_interconnect_$i/S%02d_ACLK]" $j] + append cmd_rst [format " \[get_bd_pins axi_interconnect_$i/S%02d_ARESETN]" $j] + } + } + } + + eval $cmd_clk + eval $cmd_rst + +######################################################################################################## +# Create address segments +######################################################################################################## + # Static config + create_bd_addr_seg -range 0x00008000 -offset 0x00000000 [get_bd_addr_spaces xdma_0/M_AXI_BYPASS] [get_bd_addr_segs axi_cnfg/Reg] SEG_axi_cnfg_Reg + + if {$cnfg(en_avx) eq 1} { + for {set i 0} {$i < $cnfg(n_reg)} {incr i} { + set cmd [format "create_bd_addr_seg -range 0x00040000 -offset 0x000%02x0000 \[get_bd_addr_spaces xdma_0/M_AXI_BYPASS] \[get_bd_addr_segs axi_ctrl_$i/Reg] SEG_axi_ctrl_$i\_Reg" [expr {0x10 + $i *4}]] + eval $cmd + set cmd [format "create_bd_addr_seg -range 0x00040000 -offset 0x00%03x0000 \[get_bd_addr_spaces xdma_0/M_AXI_BYPASS] \[get_bd_addr_segs axim_ctrl_$i/Reg] SEG_axim_ctrl_$i\_Reg" [expr {0x100 + $i *4}]] + eval $cmd + } + } else { + for {set i 0} {$i < $cnfg(n_reg)} {incr i} { + set cmd [format "create_bd_addr_seg -range 0x00040000 -offset 0x000%02x0000 \[get_bd_addr_spaces xdma_0/M_AXI_BYPASS] \[get_bd_addr_segs axi_ctrl_$i/Reg] SEG_axi_ctrl_$i\_Reg" [expr {0x10 + $i *4}]] + eval $cmd + } + } + + # DDRs + if {$cnfg(en_ddr) eq 1} { + for {set i 0} {$i < $cnfg(n_ddr_chan)} {incr i} { + for {set j 0} {$j < 2} {incr j} { + set nn [expr {$i + $j * $cnfg(n_ddr_chan)}] + set cmd "create_bd_addr_seg -range 0x80000000 -offset 0x00000000 \[get_bd_addr_spaces axi_ddr_in_$nn] \[get_bd_addr_segs ddr4_$i/C0_DDR4_MEMORY_MAP/C0_DDR4_ADDRESS_BLOCK] SEG_ddr4_$i\_C0_DDR4_ADDRESS_BLOCK" + eval $cmd + } + } + } + + # Restore current instance + current_bd_instance $oldCurInst + + save_bd_design + close_bd_design $design_name +} +# End of cr_bd_design_static() diff --git a/hw/scripts/compile.tcl.in b/hw/scripts/compile.tcl.in new file mode 100644 index 00000000..8d8449fd --- /dev/null +++ b/hw/scripts/compile.tcl.in @@ -0,0 +1,125 @@ +######################################################################################################## +## Lynx - top +######################################################################################################## +set project "${CMAKE_PROJECT_NAME}" +set part "${FPGA_PART}" +#set board_part "${FPGA_BOARD}" + +######################################################################################################## +# Source files +######################################################################################################## +set root_dir "${CMAKE_SOURCE_DIR}" +set build_dir "${CMAKE_BINARY_DIR}" +set scripts_dir "$root_dir/scripts" +set hw_dir "$root_dir" +set ip_dir "$root_dir/ip" +set iprepo_dir "$build_dir/iprepo" + +set device_ip_dir "$ip_dir/dev" +file mkdir $device_ip_dir + +######################################################################################################## +# Config +######################################################################################################## +set cfg(fdev) ${FDEV_NAME} + +set cfg(n_reg) ${N_REGIONS} + +set cfg(en_strm) ${EN_STRM} +set cfg(en_ddr) ${EN_DDR} +set cfg(en_pr) ${EN_PR} + +set cfg(n_chan) [expr {$cfg(en_strm) + $cfg(en_ddr) + $cfg(en_pr)}] +set cfg(n_ddr_chan) ${N_DDR_CHAN} + +set cfg(en_bpss) ${EN_BPSS} +set cfg(en_avx) ${EN_AVX} + +set cfg(qsfp) ${QSFP_PORT} + +set cfg(ddr_0) 0 +set cfg(ddr_1) 0 +set cfg(ddr_2) 0 +set cfg(ddr_3) 0 + +if {$cfg(n_ddr_chan) >= 1} { + set cfg(ddr_0) 1 +} +if {$cfg(n_ddr_chan) >= 2} { + set cfg(ddr_1) 1 +} +if {$cfg(n_ddr_chan) >= 4} { + set cfg(ddr_2) 1 + set cfg(ddr_3) 1 +} + +set cfg(en_fv) ${EN_FV} +set cfg(en_fvv) ${EN_FVV} +set cfg(en_tcp) ${EN_TCP} + +set nn 0 +if {$cfg(en_strm) eq 1} { + set cfg(strm_chan) $nn + incr nn +} +if {$cfg(en_ddr) eq 1} { + set cfg(ddr_chan) $nn + incr nn +} +if {$cfg(en_pr) eq 1} { + set cfg(pr_chan) $nn + incr nn +} + +######################################################################################################## +# Project +######################################################################################################## +set proj_dir "$build_dir/$project" + +puts $proj_dir +open_project "$proj_dir/lynx.xpr" + +######################################################################################################## +# Run compilation +######################################################################################################## +set i 1 +while {[file isdirectory "$proj_dir/hdl/config_$i"]} { + incr i +} + +# Post script +set_property STEPS.WRITE_BITSTREAM.TCL.POST "$build_dir/post.tcl" [get_runs "impl_$i"] + +# Reset +set cmd "reset_run impl_1 -prev_step " +eval $cmd + +# Reset previous +if {$cfg(en_pr) eq 1} { + set cmd "reset_run " + for {set j 1} {$j <= $i} {incr j} { + append cmd "impl_$j " + } +} else { + set cmd "reset_run " + append cmd "impl_1 " +} +eval $cmd + +# Launch +if {$cfg(en_pr) eq 1} { + set cmd "launch_runs " + for {set j 1} {$j <= $i} {incr j} { + append cmd "impl_$j " + } +} else { + set cmd "launch_runs " + append cmd "impl_1 " +} + +# !!! SET THE NUMBER OF CORES +append cmd "-to_step write_bitstream -jobs 4" +eval $cmd + +close_project +#start_gui \ No newline at end of file diff --git a/hw/scripts/dynamic.tcl.in b/hw/scripts/dynamic.tcl.in new file mode 100644 index 00000000..da8882c1 --- /dev/null +++ b/hw/scripts/dynamic.tcl.in @@ -0,0 +1,133 @@ +######################################################################################################## +## Lynx - top +######################################################################################################## +set project "${CMAKE_PROJECT_NAME}" +set part "${FPGA_PART}" +#set board_part "${FPGA_BOARD}" + +######################################################################################################## +# Source files +######################################################################################################## +set root_dir "${CMAKE_SOURCE_DIR}" +set build_dir "${CMAKE_BINARY_DIR}" +set scripts_dir "$root_dir/scripts" +set hw_dir "$root_dir" +set ip_dir "$root_dir/ip" +set iprepo_dir "$build_dir/iprepo" + +set device_ip_dir "$ip_dir/dev" +file mkdir $device_ip_dir + +######################################################################################################## +# Config +######################################################################################################## +set cfg(fdev) ${FDEV_NAME} + +set cfg(n_reg) ${N_REGIONS} + +set cfg(en_strm) ${EN_STRM} +set cfg(en_ddr) ${EN_DDR} +set cfg(en_pr) ${EN_PR} + +set cfg(n_chan) [expr {$cfg(en_strm) + $cfg(en_ddr) + $cfg(en_pr)}] +set cfg(n_ddr_chan) ${N_DDR_CHAN} + +set cfg(en_bpss) ${EN_BPSS} +set cfg(en_avx) ${EN_AVX} + +set cfg(qsfp) ${QSFP_PORT} + +set cfg(ddr_0) 0 +set cfg(ddr_1) 0 +set cfg(ddr_2) 0 +set cfg(ddr_3) 0 + +if {$cfg(n_ddr_chan) >= 1} { + set cfg(ddr_0) 1 +} +if {$cfg(n_ddr_chan) >= 2} { + set cfg(ddr_1) 1 +} +if {$cfg(n_ddr_chan) >= 4} { + set cfg(ddr_2) 1 + set cfg(ddr_3) 1 +} + +set cfg(en_fv) ${EN_FV} +set cfg(en_fvv) ${EN_FVV} +set cfg(en_tcp) ${EN_TCP} + +set nn 0 +if {$cfg(en_strm) eq 1} { + set cfg(strm_chan) $nn + incr nn +} +if {$cfg(en_ddr) eq 1} { + set cfg(ddr_chan) $nn + incr nn +} +if {$cfg(en_pr) eq 1} { + set cfg(pr_chan) $nn + incr nn +} + +######################################################################################################## +# Project +######################################################################################################## +set proj_dir "$build_dir/$project" + +puts $proj_dir +open_project "$proj_dir/lynx.xpr" + +######################################################################################################## +# DYNAMIC LAYER +######################################################################################################## +# Create additional configs +set i 1 +while {[file isdirectory "$proj_dir/hdl/config_$i"]} { + incr i +} + +file mkdir "$proj_dir/hdl/config_$i" +file mkdir "$proj_dir/hdl/wrappers/config_$i" +source "$scripts_dir/wr_hdl/wr_hdl_user.tcl" -notrace +for {set j 0} {$j < $cfg(n_reg)} {incr j} { + wr_hdl_user_wrapper "$proj_dir/hdl/wrappers/config_$i/design_user_wrapper_c$i\_$j.sv" $j + wr_hdl_user "$proj_dir/hdl/config_$i/design_user_logic_c$i\_$j.sv" $j +} + +# Create dynamic regions +for {set j 0} {$j < $cfg(n_reg)} {incr j} { + # Sources + set cmd "set files \[list \ " + append cmd "\[file normalize \"\$proj_dir/hdl/wrappers/config_$i/design_user_wrapper_c$i\_$j.sv\"] \ " + append cmd "\[file normalize \"\$proj_dir/hdl/config_$i/design_user_logic_c$i\_$j.sv\"] \ " + append cmd "\[file normalize \"\$hw_dir/hdl/pkg/axi_intf.sv\"] \ " + append cmd "\[file normalize \"\$hw_dir/hdl/pkg/lynx_intf.sv\"] \ " + append cmd "\[file normalize \"\$hw_dir/hdl/pkg/axi_macros.svh\"] \ " + append cmd "\[file normalize \"\$hw_dir/hdl/pkg/lynx_macros.svh\"] \ " + append cmd "\[file normalize \"\$proj_dir/hdl/lynx_pkg.sv\"] \ " + append cmd "]" + eval $cmd + # Create pr region + create_reconfig_module -name "design_user_wrapper_c$i\_$j" -partition_def [get_partition_defs "dynamic_$j" ] -top "design_user_wrapper_$j" + add_files -norecurse -scan_for_includes $files -of_objects [get_reconfig_modules "design_user_wrapper_c$i\_$j"] +} + +set cmd "create_pr_configuration -name config_$i -partitions \[list \ " + for {set j 0} {$j < $cfg(n_reg)} {incr j} { + append cmd "inst_dynamic/inst_user_wrapper_$j:design_user_wrapper_c$i\_$j " + } + append cmd "]" +eval $cmd + + +set n [expr {$i + 1}] +set cmd "create_run impl_$n -parent_run impl_1 -flow {Vivado Implementation 2018} -pr_config config_$i" +eval $cmd + +set cmd "set_property STEPS.WRITE_BITSTREAM.ARGS.BIN_FILE true \[get_runs impl_$n]" +eval $cmd + +close_project +#start_gui \ No newline at end of file diff --git a/hw/scripts/ip_inst/base_infrastructure.tcl b/hw/scripts/ip_inst/base_infrastructure.tcl new file mode 100644 index 00000000..3b09ab06 --- /dev/null +++ b/hw/scripts/ip_inst/base_infrastructure.tcl @@ -0,0 +1,104 @@ +# Control xbars +if {$cfg(en_avx) eq 1} { + for {set i 0} {$i < $cfg(n_reg)} {incr i} { + set cmd "create_ip -name axi_crossbar -vendor xilinx.com -library ip -version 2.1 -module_name dyn_crossbar_$i" + eval $cmd + set offs [expr {0x10 + $i * 4}] + set cmd [format "set_property -dict \[list \ + CONFIG.NUM_MI {3} CONFIG.ADDR_WIDTH {64} CONFIG.PROTOCOL {AXI4LITE} CONFIG.DATA_WIDTH {64} CONFIG.CONNECTIVITY_MODE {SASD} CONFIG.R_REGISTER {1} \ + CONFIG.S00_WRITE_ACCEPTANCE {1} CONFIG.S01_WRITE_ACCEPTANCE {1} CONFIG.S02_WRITE_ACCEPTANCE {1} CONFIG.S03_WRITE_ACCEPTANCE {1} CONFIG.S04_WRITE_ACCEPTANCE {1} CONFIG.S05_WRITE_ACCEPTANCE {1} CONFIG.S06_WRITE_ACCEPTANCE {1} CONFIG.S07_WRITE_ACCEPTANCE {1} CONFIG.S08_WRITE_ACCEPTANCE {1} CONFIG.S09_WRITE_ACCEPTANCE {1} CONFIG.S10_WRITE_ACCEPTANCE {1} CONFIG.S11_WRITE_ACCEPTANCE {1} CONFIG.S12_WRITE_ACCEPTANCE {1} CONFIG.S13_WRITE_ACCEPTANCE {1} CONFIG.S14_WRITE_ACCEPTANCE {1} CONFIG.S15_WRITE_ACCEPTANCE {1} \ + CONFIG.S00_READ_ACCEPTANCE {1} CONFIG.S01_READ_ACCEPTANCE {1} CONFIG.S02_READ_ACCEPTANCE {1} CONFIG.S03_READ_ACCEPTANCE {1} CONFIG.S04_READ_ACCEPTANCE {1} CONFIG.S05_READ_ACCEPTANCE {1} CONFIG.S06_READ_ACCEPTANCE {1} CONFIG.S07_READ_ACCEPTANCE {1} CONFIG.S08_READ_ACCEPTANCE {1} CONFIG.S09_READ_ACCEPTANCE {1} CONFIG.S10_READ_ACCEPTANCE {1} CONFIG.S11_READ_ACCEPTANCE {1} CONFIG.S12_READ_ACCEPTANCE {1} CONFIG.S13_READ_ACCEPTANCE {1} CONFIG.S14_READ_ACCEPTANCE {1} CONFIG.S15_READ_ACCEPTANCE {1} \ + CONFIG.M00_WRITE_ISSUING {1} CONFIG.M01_WRITE_ISSUING {1} CONFIG.M02_WRITE_ISSUING {1} CONFIG.M03_WRITE_ISSUING {1} CONFIG.M04_WRITE_ISSUING {1} CONFIG.M05_WRITE_ISSUING {1} CONFIG.M06_WRITE_ISSUING {1} CONFIG.M07_WRITE_ISSUING {1} CONFIG.M08_WRITE_ISSUING {1} CONFIG.M09_WRITE_ISSUING {1} CONFIG.M10_WRITE_ISSUING {1} CONFIG.M11_WRITE_ISSUING {1} CONFIG.M12_WRITE_ISSUING {1} CONFIG.M13_WRITE_ISSUING {1} CONFIG.M14_WRITE_ISSUING {1} CONFIG.M15_WRITE_ISSUING {1} \ + CONFIG.M00_READ_ISSUING {1} CONFIG.M01_READ_ISSUING {1} CONFIG.M02_READ_ISSUING {1} CONFIG.M03_READ_ISSUING {1} CONFIG.M04_READ_ISSUING {1} CONFIG.M05_READ_ISSUING {1} CONFIG.M06_READ_ISSUING {1} CONFIG.M07_READ_ISSUING {1} CONFIG.M08_READ_ISSUING {1} CONFIG.M09_READ_ISSUING {1} CONFIG.M10_READ_ISSUING {1} CONFIG.M11_READ_ISSUING {1} CONFIG.M12_READ_ISSUING {1} CONFIG.M13_READ_ISSUING {1} CONFIG.M14_READ_ISSUING {1} CONFIG.M15_READ_ISSUING {1} \ + CONFIG.S00_SINGLE_THREAD {1} CONFIG.M00_A00_BASE_ADDR {0x0000000000%02x0000} CONFIG.M01_A00_BASE_ADDR {0x0000000000%02x0000} CONFIG.M02_A00_BASE_ADDR {0x0000000000%02x0000} CONFIG.M00_A00_ADDR_WIDTH {16} CONFIG.M01_A00_ADDR_WIDTH {16} CONFIG.M02_A00_ADDR_WIDTH {16} \ + CONFIG.Component_Name {dyn_crossbar_$i}] \[get_ips dyn_crossbar_$i]" $offs [expr {$offs + 1}] [expr {$offs + 2}] ] + eval $cmd + } +} else { + for {set i 0} {$i < $cfg(n_reg)} {incr i} { + set cmd "create_ip -name axi_crossbar -vendor xilinx.com -library ip -version 2.1 -module_name dyn_crossbar_$i" + eval $cmd + set offs [expr {0x10 + $i * 4}] + set cmd [format "set_property -dict \[list \ + CONFIG.NUM_MI {4} CONFIG.ADDR_WIDTH {64} CONFIG.PROTOCOL {AXI4LITE} CONFIG.DATA_WIDTH {64} CONFIG.CONNECTIVITY_MODE {SASD} CONFIG.R_REGISTER {1} \ + CONFIG.S00_WRITE_ACCEPTANCE {1} CONFIG.S01_WRITE_ACCEPTANCE {1} CONFIG.S02_WRITE_ACCEPTANCE {1} CONFIG.S03_WRITE_ACCEPTANCE {1} CONFIG.S04_WRITE_ACCEPTANCE {1} CONFIG.S05_WRITE_ACCEPTANCE {1} CONFIG.S06_WRITE_ACCEPTANCE {1} CONFIG.S07_WRITE_ACCEPTANCE {1} CONFIG.S08_WRITE_ACCEPTANCE {1} CONFIG.S09_WRITE_ACCEPTANCE {1} CONFIG.S10_WRITE_ACCEPTANCE {1} CONFIG.S11_WRITE_ACCEPTANCE {1} CONFIG.S12_WRITE_ACCEPTANCE {1} CONFIG.S13_WRITE_ACCEPTANCE {1} CONFIG.S14_WRITE_ACCEPTANCE {1} CONFIG.S15_WRITE_ACCEPTANCE {1} \ + CONFIG.S00_READ_ACCEPTANCE {1} CONFIG.S01_READ_ACCEPTANCE {1} CONFIG.S02_READ_ACCEPTANCE {1} CONFIG.S03_READ_ACCEPTANCE {1} CONFIG.S04_READ_ACCEPTANCE {1} CONFIG.S05_READ_ACCEPTANCE {1} CONFIG.S06_READ_ACCEPTANCE {1} CONFIG.S07_READ_ACCEPTANCE {1} CONFIG.S08_READ_ACCEPTANCE {1} CONFIG.S09_READ_ACCEPTANCE {1} CONFIG.S10_READ_ACCEPTANCE {1} CONFIG.S11_READ_ACCEPTANCE {1} CONFIG.S12_READ_ACCEPTANCE {1} CONFIG.S13_READ_ACCEPTANCE {1} CONFIG.S14_READ_ACCEPTANCE {1} CONFIG.S15_READ_ACCEPTANCE {1} \ + CONFIG.M00_WRITE_ISSUING {1} CONFIG.M01_WRITE_ISSUING {1} CONFIG.M02_WRITE_ISSUING {1} CONFIG.M03_WRITE_ISSUING {1} CONFIG.M04_WRITE_ISSUING {1} CONFIG.M05_WRITE_ISSUING {1} CONFIG.M06_WRITE_ISSUING {1} CONFIG.M07_WRITE_ISSUING {1} CONFIG.M08_WRITE_ISSUING {1} CONFIG.M09_WRITE_ISSUING {1} CONFIG.M10_WRITE_ISSUING {1} CONFIG.M11_WRITE_ISSUING {1} CONFIG.M12_WRITE_ISSUING {1} CONFIG.M13_WRITE_ISSUING {1} CONFIG.M14_WRITE_ISSUING {1} CONFIG.M15_WRITE_ISSUING {1} \ + CONFIG.M00_READ_ISSUING {1} CONFIG.M01_READ_ISSUING {1} CONFIG.M02_READ_ISSUING {1} CONFIG.M03_READ_ISSUING {1} CONFIG.M04_READ_ISSUING {1} CONFIG.M05_READ_ISSUING {1} CONFIG.M06_READ_ISSUING {1} CONFIG.M07_READ_ISSUING {1} CONFIG.M08_READ_ISSUING {1} CONFIG.M09_READ_ISSUING {1} CONFIG.M10_READ_ISSUING {1} CONFIG.M11_READ_ISSUING {1} CONFIG.M12_READ_ISSUING {1} CONFIG.M13_READ_ISSUING {1} CONFIG.M14_READ_ISSUING {1} CONFIG.M15_READ_ISSUING {1} \ + CONFIG.S00_SINGLE_THREAD {1} CONFIG.M00_A00_BASE_ADDR {0x0000000000%02x0000} CONFIG.M01_A00_BASE_ADDR {0x0000000000%02x0000} CONFIG.M02_A00_BASE_ADDR {0x0000000000%02x0000} CONFIG.M03_A00_BASE_ADDR {0x0000000000%02x0000} CONFIG.M00_A00_ADDR_WIDTH {16} CONFIG.M01_A00_ADDR_WIDTH {16} CONFIG.M02_A00_ADDR_WIDTH {16} CONFIG.M03_A00_ADDR_WIDTH {16} \ + CONFIG.Component_Name {dyn_crossbar_$i}] \[get_ips dyn_crossbar_$i]" $offs [expr {$offs + 1}] [expr {$offs + 2}] [expr {$offs + 3}] ] + eval $cmd + } +} + +# Bypass ic +if {$cfg(en_bpss) eq 1} { + create_ip -name axis_interconnect -vendor xilinx.com -library ip -version 1.1 -module_name axis_interconnect_cnfg_req_arbiter + set_property -dict [list CONFIG.Component_Name {axis_interconnect_cnfg_req_arbiter} CONFIG.C_NUM_SI_SLOTS {2} CONFIG.SWITCH_TDATA_NUM_BYTES {12} CONFIG.HAS_TSTRB {false} CONFIG.HAS_TKEEP {false} CONFIG.HAS_TLAST {false} CONFIG.HAS_TID {false} CONFIG.HAS_TDEST {false} CONFIG.C_M00_AXIS_REG_CONFIG {1} CONFIG.SWITCH_PACKET_MODE {false} CONFIG.C_SWITCH_MAX_XFERS_PER_ARB {1} CONFIG.C_SWITCH_NUM_CYCLES_TIMEOUT {0} CONFIG.M00_AXIS_TDATA_NUM_BYTES {12} CONFIG.S00_AXIS_TDATA_NUM_BYTES {12} CONFIG.S01_AXIS_TDATA_NUM_BYTES {12} CONFIG.M00_S01_CONNECTIVITY {true}] [get_ips axis_interconnect_cnfg_req_arbiter] +} + +# PR +if {$cfg(en_pr) eq 1} { + create_ip -name axis_clock_converter -vendor xilinx.com -library ip -version 1.1 -module_name pr_clock_converter + set_property -dict [list CONFIG.TDATA_NUM_BYTES {64} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1} CONFIG.Component_Name {pr_clock_converter}] [get_ips pr_clock_converter] + + create_ip -name axis_dwidth_converter -vendor xilinx.com -library ip -version 1.1 -module_name pr_dwidth_converter + set_property -dict [list CONFIG.S_TDATA_NUM_BYTES {64} CONFIG.M_TDATA_NUM_BYTES {4} CONFIG.HAS_TLAST {1} CONFIG.HAS_TKEEP {1} CONFIG.HAS_MI_TKEEP {0} CONFIG.Component_Name {pr_dwidth_converter}] [get_ips pr_dwidth_converter] +} + +# Data queues +create_ip -name axis_data_fifo -vendor xilinx.com -library ip -version 2.0 -module_name axis_data_fifo_512 +set_property -dict [list CONFIG.TDATA_NUM_BYTES {64} CONFIG.FIFO_DEPTH {256} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1} CONFIG.Component_Name {axis_data_fifo_512}] [get_ips axis_data_fifo_512] + +create_ip -name axis_data_fifo -vendor xilinx.com -library ip -version 2.0 -module_name axis_data_fifo_1k +set_property -dict [list CONFIG.TDATA_NUM_BYTES {128} CONFIG.FIFO_DEPTH {256} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1} CONFIG.Component_Name {axis_data_fifo_1k}] [get_ips axis_data_fifo_1k] + +create_ip -name axis_data_fifo -vendor xilinx.com -library ip -version 2.0 -module_name axis_data_fifo_2k +set_property -dict [list CONFIG.TDATA_NUM_BYTES {256} CONFIG.FIFO_DEPTH {256} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1} CONFIG.Component_Name {axis_data_fifo_2k}] [get_ips axis_data_fifo_2k] + +# Request queues +create_ip -name axis_data_fifo -vendor xilinx.com -library ip -version 2.0 -module_name axis_data_fifo_req_96_used +set_property -dict [list CONFIG.TDATA_NUM_BYTES {12} CONFIG.FIFO_DEPTH {64} CONFIG.HAS_WR_DATA_COUNT {1} CONFIG.Component_Name {axis_data_fifo_req_96_used}] [get_ips axis_data_fifo_req_96_used] + +create_ip -name axis_data_fifo -vendor xilinx.com -library ip -version 2.0 -module_name axis_data_fifo_req_96 +set_property -dict [list CONFIG.TDATA_NUM_BYTES {12} CONFIG.FIFO_DEPTH {64} CONFIG.Component_Name {axis_data_fifo_req_96}] [get_ips axis_data_fifo_req_96] + +create_ip -name axis_data_fifo -vendor xilinx.com -library ip -version 2.0 -module_name axis_data_fifo_req_128 +set_property -dict [list CONFIG.TDATA_NUM_BYTES {16} CONFIG.FIFO_DEPTH {64} CONFIG.Component_Name {axis_data_fifo_req_128}] [get_ips axis_data_fifo_req_128] + +# Reg slices +create_ip -name axis_register_slice -vendor xilinx.com -library ip -version 1.1 -module_name axis_register_slice_512_0 +set_property -dict [list CONFIG.TDATA_NUM_BYTES {64} CONFIG.REG_CONFIG {8} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1}] [get_ips axis_register_slice_512_0] + +create_ip -name axis_register_slice -vendor xilinx.com -library ip -version 1.1 -module_name axis_register_slice_1k_0 +set_property -dict [list CONFIG.TDATA_NUM_BYTES {128} CONFIG.REG_CONFIG {8} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1}] [get_ips axis_register_slice_1k_0] + +create_ip -name axis_register_slice -vendor xilinx.com -library ip -version 1.1 -module_name axis_register_slice_2k_0 +set_property -dict [list CONFIG.TDATA_NUM_BYTES {256} CONFIG.REG_CONFIG {8} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1}] [get_ips axis_register_slice_2k_0] + +create_ip -name axis_register_slice -vendor xilinx.com -library ip -version 1.1 -module_name axisr_register_slice_512_0 +set_property -dict [list CONFIG.TDATA_NUM_BYTES {64} CONFIG.REG_CONFIG {8} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1} CONFIG.TDEST_WIDTH {4}] [get_ips axisr_register_slice_512_0] + +create_ip -name axis_register_slice -vendor xilinx.com -library ip -version 1.1 -module_name axisr_register_slice_1k_0 +set_property -dict [list CONFIG.TDATA_NUM_BYTES {128} CONFIG.REG_CONFIG {8} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1} CONFIG.TDEST_WIDTH {4}] [get_ips axisr_register_slice_1k_0] + +create_ip -name axis_register_slice -vendor xilinx.com -library ip -version 1.1 -module_name axisr_register_slice_2k_0 +set_property -dict [list CONFIG.TDATA_NUM_BYTES {256} CONFIG.REG_CONFIG {8} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1} CONFIG.TDEST_WIDTH {4}] [get_ips axisr_register_slice_2k_0] + +create_ip -name axi_register_slice -vendor xilinx.com -library ip -version 2.1 -module_name axi_register_slice_0 +set_property -dict [list CONFIG.ADDR_WIDTH {64} CONFIG.DATA_WIDTH {512} CONFIG.REG_AW {1} CONFIG.REG_AR {1} CONFIG.REG_B {1}] [get_ips axi_register_slice_0] + +create_ip -name axi_register_slice -vendor xilinx.com -library ip -version 2.1 -module_name axil_register_slice_0 +set_property -dict [list CONFIG.PROTOCOL {AXI4LITE} CONFIG.ADDR_WIDTH {64} CONFIG.DATA_WIDTH {64} CONFIG.REG_AW {1} CONFIG.REG_AR {1} CONFIG.REG_W {1} CONFIG.REG_R {1} CONFIG.REG_B {1} CONFIG.Component_Name {axil_register_slice_0}] [get_ips axil_register_slice_0] + +create_ip -name axis_register_slice -vendor xilinx.com -library ip -version 1.1 -module_name axis_register_slice_req_96_0 +set_property -dict [list CONFIG.TDATA_NUM_BYTES {12} CONFIG.REG_CONFIG {8} CONFIG.Component_Name {axis_register_slice_req_96_0}] [get_ips axis_register_slice_req_96_0] + +create_ip -name axis_register_slice -vendor xilinx.com -library ip -version 1.1 -module_name axis_register_slice_meta_256_0 +set_property -dict [list CONFIG.TDATA_NUM_BYTES {32} CONFIG.REG_CONFIG {8} CONFIG.Component_Name {axis_register_slice_meta_256_0}] [get_ips axis_register_slice_meta_256_0] + +create_ip -name axis_register_slice -vendor xilinx.com -library ip -version 1.1 -module_name axis_register_slice_meta_56_0 +set_property -dict [list CONFIG.TDATA_NUM_BYTES {7} CONFIG.REG_CONFIG {8} CONFIG.Component_Name {axis_register_slice_meta_56_0}] [get_ips axis_register_slice_meta_56_0] + +create_ip -name axis_register_slice -vendor xilinx.com -library ip -version 1.1 -module_name axis_register_slice_meta_32_0 +set_property -dict [list CONFIG.TDATA_NUM_BYTES {4} CONFIG.REG_CONFIG {8} CONFIG.Component_Name {axis_register_slice_meta_32_0}] [get_ips axis_register_slice_meta_32_0] \ No newline at end of file diff --git a/hw/scripts/ip_inst/network_infrastructure.tcl b/hw/scripts/ip_inst/network_infrastructure.tcl new file mode 100644 index 00000000..725fa381 --- /dev/null +++ b/hw/scripts/ip_inst/network_infrastructure.tcl @@ -0,0 +1,184 @@ +#AXI Infrastructure (device independent) + + +#Clock Converters + +create_ip -name axis_clock_converter -vendor xilinx.com -library ip -version 1.1 -module_name axis_clock_converter_32 -dir $device_ip_dir +set_property -dict [list CONFIG.TDATA_NUM_BYTES {4} CONFIG.Component_Name {axis_clock_converter_32}] [get_ips axis_clock_converter_32] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_clock_converter_32/axis_clock_converter_32.xci] +update_compile_order -fileset sources_1 + +create_ip -name axis_clock_converter -vendor xilinx.com -library ip -version 1.1 -module_name axis_clock_converter_64 -dir $device_ip_dir +set_property -dict [list CONFIG.TDATA_NUM_BYTES {8} CONFIG.Component_Name {axis_clock_converter_64}] [get_ips axis_clock_converter_64] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_clock_converter_64/axis_clock_converter_64.xci] +update_compile_order -fileset sources_1 + +create_ip -name axis_clock_converter -vendor xilinx.com -library ip -version 1.1 -module_name axis_clock_converter_96 -dir $device_ip_dir +set_property -dict [list CONFIG.TDATA_NUM_BYTES {12} CONFIG.Component_Name {axis_clock_converter_96}] [get_ips axis_clock_converter_96] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_clock_converter_96/axis_clock_converter_96.xci] +update_compile_order -fileset sources_1 + +create_ip -name axis_clock_converter -vendor xilinx.com -library ip -version 1.1 -module_name axis_clock_converter_136 -dir $device_ip_dir +set_property -dict [list CONFIG.TDATA_NUM_BYTES {17} CONFIG.Component_Name {axis_clock_converter_136}] [get_ips axis_clock_converter_136] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_clock_converter_136/axis_clock_converter_136.xci] +update_compile_order -fileset sources_1 + +create_ip -name axis_clock_converter -vendor xilinx.com -library ip -version 1.1 -module_name axis_clock_converter_144 -dir $device_ip_dir +set_property -dict [list CONFIG.TDATA_NUM_BYTES {18} CONFIG.Component_Name {axis_clock_converter_144}] [get_ips axis_clock_converter_144] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_clock_converter_144/axis_clock_converter_144.xci] +update_compile_order -fileset sources_1 + +create_ip -name axis_clock_converter -vendor xilinx.com -library ip -version 1.1 -module_name axis_clock_converter_200 -dir $device_ip_dir +set_property -dict [list CONFIG.TDATA_NUM_BYTES {25} CONFIG.Component_Name {axis_clock_converter_200}] [get_ips axis_clock_converter_200] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_clock_converter_200/axis_clock_converter_200.xci] +update_compile_order -fileset sources_1 + +create_ip -name axi_clock_converter -vendor xilinx.com -library ip -version 2.1 -module_name axil_clock_converter -dir $device_ip_dir +set_property -dict [list CONFIG.Component_Name {axil_clock_converter} CONFIG.PROTOCOL {AXI4LITE} CONFIG.DATA_WIDTH {32} CONFIG.ID_WIDTH {0} CONFIG.AWUSER_WIDTH {0} CONFIG.ARUSER_WIDTH {0} CONFIG.RUSER_WIDTH {0} CONFIG.WUSER_WIDTH {0} CONFIG.BUSER_WIDTH {0}] [get_ips axil_clock_converter] +generate_target {instantiation_template} [get_files $device_ip_dir/axil_clock_converter/axil_clock_converter.xci] +update_compile_order -fileset sources_1 + +create_ip -name axi_clock_converter -vendor xilinx.com -library ip -version 2.1 -module_name axil_net_ctrl_clock_converter -dir $device_ip_dir +set_property -dict [list CONFIG.Component_Name {axil_net_ctrl_clock_converter} CONFIG.PROTOCOL {AXI4LITE} CONFIG.ADDR_WIDTH {64} CONFIG.DATA_WIDTH {64} CONFIG.ID_WIDTH {0} CONFIG.AWUSER_WIDTH {0} CONFIG.ARUSER_WIDTH {0} CONFIG.RUSER_WIDTH {0} CONFIG.WUSER_WIDTH {0} CONFIG.BUSER_WIDTH {0}] [get_ips axil_net_ctrl_clock_converter] +generate_target {instantiation_template} [get_files $device_ip_dir/axil_net_ctrl_clock_converter/axil_net_ctrl_clock_converter.xci] +update_compile_order -fileset sources_1 + +#Data Width Converters + +#create_ip -name axis_dwidth_converter -vendor xilinx.com -library ip -version 1.1 -module_name axis_256_to_512_converter -dir $device_ip_dir +#set_property -dict [list CONFIG.S_TDATA_NUM_BYTES {32} CONFIG.M_TDATA_NUM_BYTES {64} CONFIG.HAS_TLAST {1} CONFIG.HAS_TKEEP {1} CONFIG.HAS_MI_TKEEP {1} CONFIG.Component_Name {axis_256_to_512_converter}] [get_ips axis_256_to_512_converter] +#generate_target {instantiation_template} [get_files $device_ip_dir/axis_256_to_512_converter/axis_256_to_512_converter.xci] +#update_compile_order -fileset sources_1 + + +#create_ip -name axis_dwidth_converter -vendor xilinx.com -library ip -version 1.1 -module_name axis_512_to_256_converter -dir $device_ip_dir +#set_property -dict [list CONFIG.S_TDATA_NUM_BYTES {64} CONFIG.M_TDATA_NUM_BYTES {32} CONFIG.HAS_TLAST {1} CONFIG.HAS_TKEEP {1} CONFIG.HAS_MI_TKEEP {1} CONFIG.Component_Name {axis_512_to_256_converter}] [get_ips axis_512_to_256_converter] +#generate_target {instantiation_template} [get_files $device_ip_dir/axis_512_to_256_converter/axis_512_to_256_converter.xci] +#update_compile_order -fileset sources_1 + + +create_ip -name axi_crossbar -vendor xilinx.com -library ip -version 2.1 -module_name axil_controller_crossbar -dir $device_ip_dir +set_property -dict [list CONFIG.PROTOCOL {AXI4LITE} CONFIG.CONNECTIVITY_MODE {SASD} CONFIG.R_REGISTER {1} CONFIG.NUM_MI {5} CONFIG.M01_A00_BASE_ADDR {0x0000000000001000} CONFIG.M02_A00_BASE_ADDR {0x0000000000002000} CONFIG.M03_A00_BASE_ADDR {0x0000000000003000} CONFIG.M04_A00_BASE_ADDR {0x0000000000004000} CONFIG.S00_WRITE_ACCEPTANCE {1} CONFIG.S01_WRITE_ACCEPTANCE {1} CONFIG.S02_WRITE_ACCEPTANCE {1} CONFIG.S03_WRITE_ACCEPTANCE {1} CONFIG.S04_WRITE_ACCEPTANCE {1} CONFIG.S05_WRITE_ACCEPTANCE {1} CONFIG.S06_WRITE_ACCEPTANCE {1} CONFIG.S07_WRITE_ACCEPTANCE {1} CONFIG.S08_WRITE_ACCEPTANCE {1} CONFIG.S09_WRITE_ACCEPTANCE {1} CONFIG.S10_WRITE_ACCEPTANCE {1} CONFIG.S11_WRITE_ACCEPTANCE {1} CONFIG.S12_WRITE_ACCEPTANCE {1} CONFIG.S13_WRITE_ACCEPTANCE {1} CONFIG.S14_WRITE_ACCEPTANCE {1} CONFIG.S15_WRITE_ACCEPTANCE {1} CONFIG.S00_READ_ACCEPTANCE {1} CONFIG.S01_READ_ACCEPTANCE {1} CONFIG.S02_READ_ACCEPTANCE {1} CONFIG.S03_READ_ACCEPTANCE {1} CONFIG.S04_READ_ACCEPTANCE {1} CONFIG.S05_READ_ACCEPTANCE {1} CONFIG.S06_READ_ACCEPTANCE {1} CONFIG.S07_READ_ACCEPTANCE {1} CONFIG.S08_READ_ACCEPTANCE {1} CONFIG.S09_READ_ACCEPTANCE {1} CONFIG.S10_READ_ACCEPTANCE {1} CONFIG.S11_READ_ACCEPTANCE {1} CONFIG.S12_READ_ACCEPTANCE {1} CONFIG.S13_READ_ACCEPTANCE {1} CONFIG.S14_READ_ACCEPTANCE {1} CONFIG.S15_READ_ACCEPTANCE {1} CONFIG.M00_WRITE_ISSUING {1} CONFIG.M01_WRITE_ISSUING {1} CONFIG.M02_WRITE_ISSUING {1} CONFIG.M03_WRITE_ISSUING {1} CONFIG.M04_WRITE_ISSUING {1} CONFIG.M05_WRITE_ISSUING {1} CONFIG.M06_WRITE_ISSUING {1} CONFIG.M07_WRITE_ISSUING {1} CONFIG.M08_WRITE_ISSUING {1} CONFIG.M09_WRITE_ISSUING {1} CONFIG.M10_WRITE_ISSUING {1} CONFIG.M11_WRITE_ISSUING {1} CONFIG.M12_WRITE_ISSUING {1} CONFIG.M13_WRITE_ISSUING {1} CONFIG.M14_WRITE_ISSUING {1} CONFIG.M15_WRITE_ISSUING {1} CONFIG.M00_READ_ISSUING {1} CONFIG.M01_READ_ISSUING {1} CONFIG.M02_READ_ISSUING {1} CONFIG.M03_READ_ISSUING {1} CONFIG.M04_READ_ISSUING {1} CONFIG.M05_READ_ISSUING {1} CONFIG.M06_READ_ISSUING {1} CONFIG.M07_READ_ISSUING {1} CONFIG.M08_READ_ISSUING {1} CONFIG.M09_READ_ISSUING {1} CONFIG.M10_READ_ISSUING {1} CONFIG.M11_READ_ISSUING {1} CONFIG.M12_READ_ISSUING {1} CONFIG.M13_READ_ISSUING {1} CONFIG.M14_READ_ISSUING {1} CONFIG.M15_READ_ISSUING {1} CONFIG.S00_SINGLE_THREAD {1} CONFIG.Component_Name {axil_controller_crossbar}] [get_ips axil_controller_crossbar] +generate_target {instantiation_template} [get_files $device_ip_dir/axil_controller_crossbar/axil_controller_crossbar.xci] +update_compile_order -fileset sources_1 + + +#Register slices +create_ip -name axis_register_slice -vendor xilinx.com -library ip -version 1.1 -module_name axis_register_slice_8 -dir $device_ip_dir +set_property -dict [list CONFIG.TDATA_NUM_BYTES {1} CONFIG.Component_Name {axis_register_slice_8}] [get_ips axis_register_slice_8] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_register_slice_8/axis_register_slice_8.xci] +update_compile_order -fileset sources_1 + +create_ip -name axis_register_slice -vendor xilinx.com -library ip -version 1.1 -module_name axis_register_slice_16 -dir $device_ip_dir +set_property -dict [list CONFIG.TDATA_NUM_BYTES {2} CONFIG.Component_Name {axis_register_slice_16}] [get_ips axis_register_slice_16] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_register_slice_16/axis_register_slice_16.xci] +update_compile_order -fileset sources_1 + +create_ip -name axis_register_slice -vendor xilinx.com -library ip -version 1.1 -module_name axis_register_slice_24 -dir $device_ip_dir +set_property -dict [list CONFIG.TDATA_NUM_BYTES {3} CONFIG.Component_Name {axis_register_slice_24}] [get_ips axis_register_slice_24] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_register_slice_24/axis_register_slice_24.xci] +update_compile_order -fileset sources_1 + +create_ip -name axis_register_slice -vendor xilinx.com -library ip -version 1.1 -module_name axis_register_slice_32 -dir $device_ip_dir +set_property -dict [list CONFIG.TDATA_NUM_BYTES {4} CONFIG.Component_Name {axis_register_slice_32}] [get_ips axis_register_slice_32] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_register_slice_32/axis_register_slice_32.xci] +update_compile_order -fileset sources_1 + +create_ip -name axis_register_slice -vendor xilinx.com -library ip -version 1.1 -module_name axis_register_slice_48 -dir $device_ip_dir +set_property -dict [list CONFIG.TDATA_NUM_BYTES {6} CONFIG.Component_Name {axis_register_slice_48}] [get_ips axis_register_slice_48] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_register_slice_48/axis_register_slice_48.xci] +update_compile_order -fileset sources_1 + +create_ip -name axis_register_slice -vendor xilinx.com -library ip -version 1.1 -module_name axis_register_slice_88 -dir $device_ip_dir +set_property -dict [list CONFIG.TDATA_NUM_BYTES {11} CONFIG.Component_Name {axis_register_slice_88}] [get_ips axis_register_slice_88] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_register_slice_88/axis_register_slice_88.xci] +update_compile_order -fileset sources_1 + +create_ip -name axis_register_slice -vendor xilinx.com -library ip -version 1.1 -module_name axis_register_slice_96 -dir $device_ip_dir +set_property -dict [list CONFIG.TDATA_NUM_BYTES {12} CONFIG.Component_Name {axis_register_slice_96}] [get_ips axis_register_slice_96] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_register_slice_96/axis_register_slice_96.xci] +update_compile_order -fileset sources_1 + +create_ip -name axis_register_slice -vendor xilinx.com -library ip -version 1.1 -module_name axis_register_slice_176 -dir $device_ip_dir +set_property -dict [list CONFIG.TDATA_NUM_BYTES {22} CONFIG.Component_Name {axis_register_slice_176}] [get_ips axis_register_slice_176] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_register_slice_176/axis_register_slice_176.xci] +update_compile_order -fileset sources_1 + +create_ip -name axis_register_slice -vendor xilinx.com -library ip -version 1.1 -module_name axis_register_slice_64 -dir $device_ip_dir +set_property -dict [list CONFIG.TDATA_NUM_BYTES {8} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1} CONFIG.Component_Name {axis_register_slice_64}] [get_ips axis_register_slice_64] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_register_slice_64/axis_register_slice_64.xci] +update_compile_order -fileset sources_1 + +create_ip -name axis_register_slice -vendor xilinx.com -library ip -version 1.1 -module_name axis_register_slice_128 -dir $device_ip_dir +set_property -dict [list CONFIG.TDATA_NUM_BYTES {16} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1} CONFIG.Component_Name {axis_register_slice_128}] [get_ips axis_register_slice_128] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_register_slice_128/axis_register_slice_128.xci] +update_compile_order -fileset sources_1 + +create_ip -name axis_register_slice -vendor xilinx.com -library ip -version 1.1 -module_name axis_register_slice_256 -dir $device_ip_dir +set_property -dict [list CONFIG.TDATA_NUM_BYTES {32} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1} CONFIG.Component_Name {axis_register_slice_256}] [get_ips axis_register_slice_256] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_register_slice_256/axis_register_slice_256.xci] +update_compile_order -fileset sources_1 + +create_ip -name axis_register_slice -vendor xilinx.com -library ip -version 1.1 -module_name axis_register_slice_512 -dir $device_ip_dir +set_property -dict [list CONFIG.TDATA_NUM_BYTES {64} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1} CONFIG.Component_Name {axis_register_slice_512}] [get_ips axis_register_slice_512] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_register_slice_512/axis_register_slice_512.xci] +update_compile_order -fileset sources_1 + + +#FIFOs + +create_ip -name axis_data_fifo -vendor xilinx.com -library ip -version 2.0 -module_name axis_data_fifo_96 -dir $device_ip_dir +set_property -dict [list CONFIG.TDATA_NUM_BYTES {12} CONFIG.Component_Name {axis_data_fifo_96}] [get_ips axis_data_fifo_96] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_data_fifo_96/axis_data_fifo_96.xci] +update_compile_order -fileset sources_1 + +create_ip -name axis_data_fifo -vendor xilinx.com -library ip -version 2.0 -module_name axis_data_fifo_160 -dir $device_ip_dir +set_property -dict [list CONFIG.TDATA_NUM_BYTES {20} CONFIG.Component_Name {axis_data_fifo_160} CONFIG.HAS_WR_DATA_COUNT {1} CONFIG.HAS_RD_DATA_COUNT {1}] [get_ips axis_data_fifo_160] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_data_fifo_160/axis_data_fifo_160.xci] +update_compile_order -fileset sources_1 + +create_ip -name axis_data_fifo -vendor xilinx.com -library ip -version 2.0 -module_name axis_data_fifo_160_cc -dir $device_ip_dir +set_property -dict [list CONFIG.TDATA_NUM_BYTES {20} CONFIG.IS_ACLK_ASYNC {1} CONFIG.Component_Name {axis_data_fifo_160_cc} CONFIG.HAS_WR_DATA_COUNT {1} CONFIG.HAS_RD_DATA_COUNT {1}] [get_ips axis_data_fifo_160_cc] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_data_fifo_160_cc/axis_data_fifo_160_cc.xci] +update_compile_order -fileset sources_1 + +create_ip -name axis_data_fifo -vendor xilinx.com -library ip -version 2.0 -module_name axis_data_fifo_512_cc -dir $device_ip_dir +set_property -dict [list CONFIG.TDATA_NUM_BYTES {64} CONFIG.IS_ACLK_ASYNC {1} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1} CONFIG.Component_Name {axis_data_fifo_512_cc}] [get_ips axis_data_fifo_512_cc] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_data_fifo_512_cc/axis_data_fifo_512_cc.xci] +update_compile_order -fileset sources_1 + +create_ip -name fifo_generator -vendor xilinx.com -library ip -version 13.2 -module_name fifo_generator_rdma_cmd -dir $device_ip_dir +set_property -dict [list CONFIG.Component_Name {fifo_generator_rdma_cmd} CONFIG.INTERFACE_TYPE {AXI_STREAM} CONFIG.Reset_Type {Asynchronous_Reset} CONFIG.Clock_Type_AXI {Independent_Clock} CONFIG.TDATA_NUM_BYTES {16} CONFIG.TUSER_WIDTH {0} CONFIG.TSTRB_WIDTH {16} CONFIG.TKEEP_WIDTH {16} CONFIG.FIFO_Implementation_wach {Independent_Clocks_Distributed_RAM} CONFIG.Full_Threshold_Assert_Value_wach {15} CONFIG.Empty_Threshold_Assert_Value_wach {13} CONFIG.FIFO_Implementation_wdch {Independent_Clocks_Builtin_FIFO} CONFIG.Empty_Threshold_Assert_Value_wdch {1018} CONFIG.FIFO_Implementation_wrch {Independent_Clocks_Distributed_RAM} CONFIG.Full_Threshold_Assert_Value_wrch {15} CONFIG.Empty_Threshold_Assert_Value_wrch {13} CONFIG.FIFO_Implementation_rach {Independent_Clocks_Distributed_RAM} CONFIG.Full_Threshold_Assert_Value_rach {15} CONFIG.Empty_Threshold_Assert_Value_rach {13} CONFIG.FIFO_Implementation_rdch {Independent_Clocks_Builtin_FIFO} CONFIG.Empty_Threshold_Assert_Value_rdch {1018} CONFIG.FIFO_Implementation_axis {Independent_Clocks_Block_RAM} CONFIG.Input_Depth_axis {64} CONFIG.Full_Threshold_Assert_Value_axis {63} CONFIG.Empty_Threshold_Assert_Value_axis {61}] [get_ips fifo_generator_rdma_cmd] +generate_target {instantiation_template} [get_files $device_ip_dir/fifo_generator_rdma_cmd/fifo_generator_rdma_cmd.xci] +update_compile_order -fileset sources_1 + +create_ip -name fifo_generator -vendor xilinx.com -library ip -version 13.2 -module_name fifo_generator_rdma_data -dir $device_ip_dir +set_property -dict [list CONFIG.Component_Name {fifo_generator_rdma_data} CONFIG.INTERFACE_TYPE {AXI_STREAM} CONFIG.Reset_Type {Asynchronous_Reset} CONFIG.Clock_Type_AXI {Independent_Clock} CONFIG.TDATA_NUM_BYTES {64} CONFIG.TUSER_WIDTH {0} CONFIG.Enable_TLAST {true} CONFIG.TSTRB_WIDTH {64} CONFIG.HAS_TKEEP {true} CONFIG.TKEEP_WIDTH {64} CONFIG.FIFO_Implementation_wach {Independent_Clocks_Distributed_RAM} CONFIG.Full_Threshold_Assert_Value_wach {15} CONFIG.Empty_Threshold_Assert_Value_wach {13} CONFIG.FIFO_Implementation_wdch {Independent_Clocks_Builtin_FIFO} CONFIG.Empty_Threshold_Assert_Value_wdch {1018} CONFIG.FIFO_Implementation_wrch {Independent_Clocks_Distributed_RAM} CONFIG.Full_Threshold_Assert_Value_wrch {15} CONFIG.Empty_Threshold_Assert_Value_wrch {13} CONFIG.FIFO_Implementation_rach {Independent_Clocks_Distributed_RAM} CONFIG.Full_Threshold_Assert_Value_rach {15} CONFIG.Empty_Threshold_Assert_Value_rach {13} CONFIG.FIFO_Implementation_rdch {Independent_Clocks_Builtin_FIFO} CONFIG.Empty_Threshold_Assert_Value_rdch {1018} CONFIG.FIFO_Implementation_axis {Independent_Clocks_Block_RAM} CONFIG.Input_Depth_axis {128} CONFIG.Full_Threshold_Assert_Value_axis {127} CONFIG.Empty_Threshold_Assert_Value_axis {125}] [get_ips fifo_generator_rdma_data] +generate_target {instantiation_template} [get_files $device_ip_dir/fifo_generator_rdma_data/fifo_generator_rdma_data.xci] +update_compile_order -fileset sources_1 + +create_ip -name axi_register_slice -vendor xilinx.com -library ip -version 2.1 -module_name axi_register_slice -dir $device_ip_dir +set_property -dict [list CONFIG.PROTOCOL {AXI4LITE} CONFIG.REG_W {7} CONFIG.REG_R {7} CONFIG.Component_Name {axi_register_slice}] [get_ips axi_register_slice] +generate_target {instantiation_template} [get_files $device_ip_dir/axi_register_slice/axi_register_slice.xci] +update_compile_order -fileset sources_1 + +#Interconnects +create_ip -name axis_interconnect -vendor xilinx.com -library ip -version 1.1 -module_name axis_interconnect_96_1to2 -dir $device_ip_dir +set_property -dict [list CONFIG.Component_Name {axis_interconnect_96_1to2} CONFIG.C_NUM_MI_SLOTS {2} CONFIG.SWITCH_TDATA_NUM_BYTES {12} CONFIG.HAS_TSTRB {false} CONFIG.HAS_TKEEP {false} CONFIG.HAS_TLAST {false} CONFIG.HAS_TID {false} CONFIG.C_M00_AXIS_REG_CONFIG {1} CONFIG.C_S00_AXIS_REG_CONFIG {1} CONFIG.C_M01_AXIS_REG_CONFIG {1} CONFIG.HAS_TDEST {true} CONFIG.C_SWITCH_TDEST_WIDTH {1} CONFIG.SWITCH_PACKET_MODE {false} CONFIG.C_SWITCH_MAX_XFERS_PER_ARB {1} CONFIG.C_SWITCH_NUM_CYCLES_TIMEOUT {0} CONFIG.M00_AXIS_TDATA_NUM_BYTES {12} CONFIG.S00_AXIS_TDATA_NUM_BYTES {12} CONFIG.M01_AXIS_TDATA_NUM_BYTES {12} CONFIG.C_M00_AXIS_BASETDEST {0x00000000} CONFIG.C_M00_AXIS_HIGHTDEST {0x00000000} CONFIG.C_M01_AXIS_BASETDEST {0x00000001} CONFIG.C_M01_AXIS_HIGHTDEST {0x00000001} CONFIG.M01_S00_CONNECTIVITY {true}] [get_ips axis_interconnect_96_1to2] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_interconnect_96_1to2/axis_interconnect_96_1to2.xci] +update_compile_order -fileset sources_1 + +create_ip -name axis_interconnect -vendor xilinx.com -library ip -version 1.1 -module_name axis_interconnect_160_2to1 -dir $device_ip_dir +set_property -dict [list CONFIG.Component_Name {axis_interconnect_160_2to1} CONFIG.C_NUM_SI_SLOTS {2} CONFIG.SWITCH_TDATA_NUM_BYTES {20} CONFIG.HAS_TSTRB {false} CONFIG.HAS_TKEEP {false} CONFIG.HAS_TLAST {false} CONFIG.HAS_TID {false} CONFIG.HAS_TDEST {false} CONFIG.C_SWITCH_MAX_XFERS_PER_ARB {1} CONFIG.C_SWITCH_NUM_CYCLES_TIMEOUT {0} CONFIG.M00_AXIS_TDATA_NUM_BYTES {20} CONFIG.S00_AXIS_TDATA_NUM_BYTES {20} CONFIG.S01_AXIS_TDATA_NUM_BYTES {20} CONFIG.M00_S01_CONNECTIVITY {true}] [get_ips axis_interconnect_160_2to1] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_interconnect_160_2to1/axis_interconnect_160_2to1.xci] +update_compile_order -fileset sources_1 + +create_ip -name axis_interconnect -vendor xilinx.com -library ip -version 1.1 -module_name axis_interconnect_64_1to2 -dir $device_ip_dir +set_property -dict [list CONFIG.Component_Name {axis_interconnect_64_1to2} CONFIG.C_NUM_MI_SLOTS {2} CONFIG.SWITCH_TDATA_NUM_BYTES {8} CONFIG.HAS_TSTRB {false} CONFIG.HAS_TID {false} CONFIG.C_M00_AXIS_REG_CONFIG {1} CONFIG.C_S00_AXIS_REG_CONFIG {1} CONFIG.C_M01_AXIS_REG_CONFIG {1} CONFIG.HAS_TDEST {true} CONFIG.C_SWITCH_TDEST_WIDTH {1} CONFIG.C_SWITCH_NUM_CYCLES_TIMEOUT {0} CONFIG.M00_AXIS_TDATA_NUM_BYTES {8} CONFIG.S00_AXIS_TDATA_NUM_BYTES {8} CONFIG.M01_AXIS_TDATA_NUM_BYTES {8} CONFIG.C_M00_AXIS_BASETDEST {0x00000000} CONFIG.C_M00_AXIS_HIGHTDEST {0x00000000} CONFIG.C_M01_AXIS_BASETDEST {0x00000001} CONFIG.C_M01_AXIS_HIGHTDEST {0x00000001} CONFIG.M01_S00_CONNECTIVITY {true}] [get_ips axis_interconnect_64_1to2] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_interconnect_64_1to2/axis_interconnect_64_1to2.xci] +update_compile_order -fileset sources_1 + +create_ip -name axis_interconnect -vendor xilinx.com -library ip -version 1.1 -module_name axis_interconnect_512_1to2 -dir $device_ip_dir +set_property -dict [list CONFIG.Component_Name {axis_interconnect_512_1to2} CONFIG.C_NUM_MI_SLOTS {2} CONFIG.SWITCH_TDATA_NUM_BYTES {64} CONFIG.HAS_TSTRB {false} CONFIG.HAS_TID {false} CONFIG.C_M00_AXIS_REG_CONFIG {1} CONFIG.C_S00_AXIS_REG_CONFIG {1} CONFIG.C_M01_AXIS_REG_CONFIG {1} CONFIG.HAS_TDEST {true} CONFIG.C_SWITCH_TDEST_WIDTH {1} CONFIG.C_SWITCH_NUM_CYCLES_TIMEOUT {0} CONFIG.M00_AXIS_TDATA_NUM_BYTES {64} CONFIG.S00_AXIS_TDATA_NUM_BYTES {64} CONFIG.M01_AXIS_TDATA_NUM_BYTES {64} CONFIG.C_M00_AXIS_BASETDEST {0x00000000} CONFIG.C_M00_AXIS_HIGHTDEST {0x00000000} CONFIG.C_M01_AXIS_BASETDEST {0x00000001} CONFIG.C_M01_AXIS_HIGHTDEST {0x00000001} CONFIG.M01_S00_CONNECTIVITY {true}] [get_ips axis_interconnect_512_1to2] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_interconnect_512_1to2/axis_interconnect_512_1to2.xci] +update_compile_order -fileset sources_1 diff --git a/hw/scripts/ip_inst/network_stack.tcl b/hw/scripts/ip_inst/network_stack.tcl new file mode 100644 index 00000000..5b05d870 --- /dev/null +++ b/hw/scripts/ip_inst/network_stack.tcl @@ -0,0 +1,221 @@ +#IPs used in network_stack.sv + + +#Register slices + +create_ip -name axis_register_slice -vendor xilinx.com -library ip -version 1.1 -module_name net_axis_register_slice_64 -dir $device_ip_dir +set_property -dict [list CONFIG.TDATA_NUM_BYTES {8} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1}] [get_ips net_axis_register_slice_64] +generate_target {instantiation_template} [get_files $device_ip_dir/net_axis_register_slice_64/net_axis_register_slice_64.xci] +update_compile_order -fileset sources_1 + +#FIFO / RX Buffer +create_ip -name axis_data_fifo -vendor xilinx.com -library ip -version 2.0 -module_name axis_data_fifo_64_d1024 -dir $device_ip_dir +set_property -dict [list CONFIG.TDATA_NUM_BYTES {8} CONFIG.FIFO_DEPTH {1024} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1} CONFIG.HAS_WR_DATA_COUNT {1} CONFIG.HAS_RD_DATA_COUNT {1} CONFIG.Component_Name {axis_data_fifo_64_d1024}] [get_ips axis_data_fifo_64_d1024] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_data_fifo_64_d1024/axis_data_fifo_64_d1024.xci] +update_compile_order -fileset sources_1 + +create_ip -name axis_data_fifo -vendor xilinx.com -library ip -version 2.0 -module_name axis_data_fifo_128_d1024 -dir $device_ip_dir +set_property -dict [list CONFIG.TDATA_NUM_BYTES {16} CONFIG.FIFO_DEPTH {1024} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1} CONFIG.HAS_WR_DATA_COUNT {1} CONFIG.HAS_RD_DATA_COUNT {1} CONFIG.Component_Name {axis_data_fifo_128_d1024}] [get_ips axis_data_fifo_128_d1024] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_data_fifo_128_d1024/axis_data_fifo_128_d1024.xci] +update_compile_order -fileset sources_1 + +create_ip -name axis_data_fifo -vendor xilinx.com -library ip -version 2.0 -module_name axis_data_fifo_256_d1024 -dir $device_ip_dir +set_property -dict [list CONFIG.TDATA_NUM_BYTES {32} CONFIG.FIFO_DEPTH {1024} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1} CONFIG.HAS_WR_DATA_COUNT {1} CONFIG.HAS_RD_DATA_COUNT {1} CONFIG.Component_Name {axis_data_fifo_256_d1024}] [get_ips axis_data_fifo_256_d1024] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_data_fifo_256_d1024/axis_data_fifo_256_d1024.xci] +update_compile_order -fileset sources_1 + +create_ip -name axis_data_fifo -vendor xilinx.com -library ip -version 2.0 -module_name axis_data_fifo_512_d1024 -dir $device_ip_dir +set_property -dict [list CONFIG.TDATA_NUM_BYTES {64} CONFIG.FIFO_DEPTH {1024} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1} CONFIG.HAS_WR_DATA_COUNT {1} CONFIG.HAS_RD_DATA_COUNT {1} CONFIG.Component_Name {axis_data_fifo_512_d1024}] [get_ips axis_data_fifo_512_d1024] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_data_fifo_512_d1024/axis_data_fifo_512_d1024.xci] +update_compile_order -fileset sources_1 + +#Interconnects + +create_ip -name axis_interconnect -vendor xilinx.com -library ip -version 1.1 -module_name axis_interconnect_merger_256 -dir $device_ip_dir +set_property -dict [list CONFIG.Component_Name {axis_interconnect_merger_256} CONFIG.C_NUM_SI_SLOTS {2} CONFIG.SWITCH_TDATA_NUM_BYTES {32} CONFIG.HAS_TSTRB {false} CONFIG.HAS_TKEEP {false} CONFIG.HAS_TLAST {false} CONFIG.HAS_TID {false} CONFIG.HAS_TDEST {false} CONFIG.SWITCH_PACKET_MODE {false} CONFIG.C_SWITCH_MAX_XFERS_PER_ARB {1} CONFIG.C_SWITCH_NUM_CYCLES_TIMEOUT {0} CONFIG.M00_AXIS_TDATA_NUM_BYTES {32} CONFIG.S00_AXIS_TDATA_NUM_BYTES {32} CONFIG.S01_AXIS_TDATA_NUM_BYTES {32} CONFIG.M00_S01_CONNECTIVITY {true}] [get_ips axis_interconnect_merger_256] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_interconnect_merger_256/axis_interconnect_merger_256.xci] +update_compile_order -fileset sources_1 + +create_ip -name axis_interconnect -vendor xilinx.com -library ip -version 1.1 -module_name axis_interconnect_2to1 -dir $device_ip_dir +set_property -dict [list CONFIG.C_NUM_SI_SLOTS {2} CONFIG.SWITCH_TDATA_NUM_BYTES {8} CONFIG.HAS_TSTRB {false} CONFIG.HAS_TID {false} CONFIG.HAS_TDEST {false} CONFIG.SWITCH_PACKET_MODE {true} CONFIG.C_SWITCH_MAX_XFERS_PER_ARB {0} CONFIG.C_M00_AXIS_REG_CONFIG {1} CONFIG.C_S00_AXIS_REG_CONFIG {1} CONFIG.C_S01_AXIS_REG_CONFIG {1} CONFIG.C_SWITCH_NUM_CYCLES_TIMEOUT {0} CONFIG.M00_AXIS_TDATA_NUM_BYTES {8} CONFIG.S00_AXIS_TDATA_NUM_BYTES {8} CONFIG.S01_AXIS_TDATA_NUM_BYTES {8} CONFIG.M00_S01_CONNECTIVITY {true}] [get_ips axis_interconnect_2to1] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_interconnect_2to1/axis_interconnect_2to1.xci] +update_compile_order -fileset sources_1 + +create_ip -name axis_interconnect -vendor xilinx.com -library ip -version 1.1 -module_name axis_interconnect_3to1 -dir $device_ip_dir +set_property -dict [list CONFIG.C_NUM_SI_SLOTS {3} CONFIG.SWITCH_TDATA_NUM_BYTES {8} CONFIG.HAS_TSTRB {false} CONFIG.HAS_TID {false} CONFIG.HAS_TDEST {false} CONFIG.SWITCH_PACKET_MODE {true} CONFIG.C_S00_AXIS_REG_CONFIG {1} CONFIG.C_S01_AXIS_REG_CONFIG {1} CONFIG.C_S02_AXIS_REG_CONFIG {1} CONFIG.C_SWITCH_MAX_XFERS_PER_ARB {0} CONFIG.C_SWITCH_NUM_CYCLES_TIMEOUT {0} CONFIG.M00_AXIS_TDATA_NUM_BYTES {8} CONFIG.S00_AXIS_TDATA_NUM_BYTES {8} CONFIG.S01_AXIS_TDATA_NUM_BYTES {8} CONFIG.S02_AXIS_TDATA_NUM_BYTES {8} CONFIG.M00_S01_CONNECTIVITY {true} CONFIG.M00_S02_CONNECTIVITY {true}] [get_ips axis_interconnect_3to1] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_interconnect_3to1/axis_interconnect_3to1.xci] +update_compile_order -fileset sources_1 + +create_ip -name axis_interconnect -vendor xilinx.com -library ip -version 1.1 -module_name axis_interconnect_4to1 -dir $device_ip_dir +set_property -dict [list CONFIG.C_NUM_SI_SLOTS {4} CONFIG.SWITCH_TDATA_NUM_BYTES {8} CONFIG.HAS_TSTRB {false} CONFIG.HAS_TID {false} CONFIG.HAS_TDEST {false} CONFIG.SWITCH_PACKET_MODE {true} CONFIG.C_SWITCH_MAX_XFERS_PER_ARB {0} CONFIG.C_M00_AXIS_REG_CONFIG {1} CONFIG.C_S00_AXIS_REG_CONFIG {1} CONFIG.C_S01_AXIS_REG_CONFIG {1} CONFIG.C_S02_AXIS_REG_CONFIG {1} CONFIG.C_S03_AXIS_REG_CONFIG {1} CONFIG.C_SWITCH_NUM_CYCLES_TIMEOUT {0} CONFIG.M00_AXIS_TDATA_NUM_BYTES {8} CONFIG.S00_AXIS_TDATA_NUM_BYTES {8} CONFIG.S01_AXIS_TDATA_NUM_BYTES {8} CONFIG.S02_AXIS_TDATA_NUM_BYTES {8} CONFIG.S03_AXIS_TDATA_NUM_BYTES {8} CONFIG.M00_S01_CONNECTIVITY {true} CONFIG.M00_S02_CONNECTIVITY {true} CONFIG.M00_S03_CONNECTIVITY {true}] [get_ips axis_interconnect_4to1] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_interconnect_4to1/axis_interconnect_4to1.xci] +update_compile_order -fileset sources_1 + +create_ip -name axis_interconnect -vendor xilinx.com -library ip -version 1.1 -module_name axis_interconnect_128_2to1 -dir $device_ip_dir +set_property -dict [list CONFIG.C_NUM_SI_SLOTS {2} CONFIG.SWITCH_TDATA_NUM_BYTES {16} CONFIG.HAS_TSTRB {false} CONFIG.HAS_TID {false} CONFIG.HAS_TDEST {false} CONFIG.SWITCH_PACKET_MODE {true} CONFIG.C_SWITCH_MAX_XFERS_PER_ARB {0} CONFIG.C_M00_AXIS_REG_CONFIG {1} CONFIG.C_S00_AXIS_REG_CONFIG {1} CONFIG.C_S01_AXIS_REG_CONFIG {1} CONFIG.C_SWITCH_NUM_CYCLES_TIMEOUT {0} CONFIG.M00_AXIS_TDATA_NUM_BYTES {16} CONFIG.S00_AXIS_TDATA_NUM_BYTES {16} CONFIG.S01_AXIS_TDATA_NUM_BYTES {16} CONFIG.M00_S01_CONNECTIVITY {true}] [get_ips axis_interconnect_128_2to1] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_interconnect_128_2to1/axis_interconnect_128_2to1.xci] +update_compile_order -fileset sources_1 + +create_ip -name axis_interconnect -vendor xilinx.com -library ip -version 1.1 -module_name axis_interconnect_128_4to1 -dir $device_ip_dir +set_property -dict [list CONFIG.C_NUM_SI_SLOTS {4} CONFIG.SWITCH_TDATA_NUM_BYTES {16} CONFIG.HAS_TSTRB {false} CONFIG.HAS_TID {false} CONFIG.HAS_TDEST {false} CONFIG.SWITCH_PACKET_MODE {true} CONFIG.C_SWITCH_MAX_XFERS_PER_ARB {0} CONFIG.C_M00_AXIS_REG_CONFIG {1} CONFIG.C_S00_AXIS_REG_CONFIG {1} CONFIG.C_S01_AXIS_REG_CONFIG {1} CONFIG.C_S02_AXIS_REG_CONFIG {1} CONFIG.C_S03_AXIS_REG_CONFIG {1} CONFIG.C_SWITCH_NUM_CYCLES_TIMEOUT {0} CONFIG.M00_AXIS_TDATA_NUM_BYTES {16} CONFIG.S00_AXIS_TDATA_NUM_BYTES {16} CONFIG.S01_AXIS_TDATA_NUM_BYTES {16} CONFIG.S02_AXIS_TDATA_NUM_BYTES {16} CONFIG.S03_AXIS_TDATA_NUM_BYTES {16} CONFIG.M00_S01_CONNECTIVITY {true} CONFIG.M00_S02_CONNECTIVITY {true} CONFIG.M00_S03_CONNECTIVITY {true}] [get_ips axis_interconnect_128_4to1] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_interconnect_4to1/axis_interconnect_128_4to1.xci] +update_compile_order -fileset sources_1 + +create_ip -name axis_interconnect -vendor xilinx.com -library ip -version 1.1 -module_name axis_interconnect_256_2to1 -dir $device_ip_dir +set_property -dict [list CONFIG.C_NUM_SI_SLOTS {2} CONFIG.SWITCH_TDATA_NUM_BYTES {32} CONFIG.HAS_TSTRB {false} CONFIG.HAS_TID {false} CONFIG.HAS_TDEST {false} CONFIG.SWITCH_PACKET_MODE {true} CONFIG.C_SWITCH_MAX_XFERS_PER_ARB {0} CONFIG.C_M00_AXIS_REG_CONFIG {1} CONFIG.C_S00_AXIS_REG_CONFIG {1} CONFIG.C_S01_AXIS_REG_CONFIG {1} CONFIG.C_SWITCH_NUM_CYCLES_TIMEOUT {0} CONFIG.M00_AXIS_TDATA_NUM_BYTES {32} CONFIG.S00_AXIS_TDATA_NUM_BYTES {32} CONFIG.S01_AXIS_TDATA_NUM_BYTES {32} CONFIG.M00_S01_CONNECTIVITY {true}] [get_ips axis_interconnect_256_2to1] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_interconnect_256_2to1/axis_interconnect_256_2to1.xci] +update_compile_order -fileset sources_1 + +create_ip -name axis_interconnect -vendor xilinx.com -library ip -version 1.1 -module_name axis_interconnect_256_4to1 -dir $device_ip_dir +set_property -dict [list CONFIG.C_NUM_SI_SLOTS {4} CONFIG.SWITCH_TDATA_NUM_BYTES {32} CONFIG.HAS_TSTRB {false} CONFIG.HAS_TID {false} CONFIG.HAS_TDEST {false} CONFIG.SWITCH_PACKET_MODE {true} CONFIG.C_SWITCH_MAX_XFERS_PER_ARB {0} CONFIG.C_M00_AXIS_REG_CONFIG {1} CONFIG.C_S00_AXIS_REG_CONFIG {1} CONFIG.C_S01_AXIS_REG_CONFIG {1} CONFIG.C_S02_AXIS_REG_CONFIG {1} CONFIG.C_S03_AXIS_REG_CONFIG {1} CONFIG.C_SWITCH_NUM_CYCLES_TIMEOUT {0} CONFIG.M00_AXIS_TDATA_NUM_BYTES {32} CONFIG.S00_AXIS_TDATA_NUM_BYTES {32} CONFIG.S01_AXIS_TDATA_NUM_BYTES {32} CONFIG.S02_AXIS_TDATA_NUM_BYTES {32} CONFIG.S03_AXIS_TDATA_NUM_BYTES {32} CONFIG.M00_S01_CONNECTIVITY {true} CONFIG.M00_S02_CONNECTIVITY {true} CONFIG.M00_S03_CONNECTIVITY {true}] [get_ips axis_interconnect_256_4to1] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_interconnect_4to1/axis_interconnect_256_4to1.xci] +update_compile_order -fileset sources_1 + +create_ip -name axis_interconnect -vendor xilinx.com -library ip -version 1.1 -module_name axis_interconnect_512_2to1 -dir $device_ip_dir +set_property -dict [list CONFIG.C_NUM_SI_SLOTS {2} CONFIG.SWITCH_TDATA_NUM_BYTES {64} CONFIG.HAS_TSTRB {false} CONFIG.HAS_TID {false} CONFIG.HAS_TDEST {false} CONFIG.SWITCH_PACKET_MODE {true} CONFIG.C_SWITCH_MAX_XFERS_PER_ARB {0} CONFIG.C_M00_AXIS_REG_CONFIG {1} CONFIG.C_S00_AXIS_REG_CONFIG {1} CONFIG.C_S01_AXIS_REG_CONFIG {1} CONFIG.C_SWITCH_NUM_CYCLES_TIMEOUT {0} CONFIG.M00_AXIS_TDATA_NUM_BYTES {64} CONFIG.S00_AXIS_TDATA_NUM_BYTES {64} CONFIG.S01_AXIS_TDATA_NUM_BYTES {64} CONFIG.M00_S01_CONNECTIVITY {true}] [get_ips axis_interconnect_512_2to1] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_interconnect_512_2to1/axis_interconnect_512_2to1.xci] +update_compile_order -fileset sources_1 + +create_ip -name axis_interconnect -vendor xilinx.com -library ip -version 1.1 -module_name axis_interconnect_512_4to1 -dir $device_ip_dir +set_property -dict [list CONFIG.C_NUM_SI_SLOTS {4} CONFIG.SWITCH_TDATA_NUM_BYTES {64} CONFIG.HAS_TSTRB {false} CONFIG.HAS_TID {false} CONFIG.HAS_TDEST {false} CONFIG.SWITCH_PACKET_MODE {true} CONFIG.C_SWITCH_MAX_XFERS_PER_ARB {0} CONFIG.C_M00_AXIS_REG_CONFIG {1} CONFIG.C_S00_AXIS_REG_CONFIG {1} CONFIG.C_S01_AXIS_REG_CONFIG {1} CONFIG.C_S02_AXIS_REG_CONFIG {1} CONFIG.C_S03_AXIS_REG_CONFIG {1} CONFIG.C_SWITCH_NUM_CYCLES_TIMEOUT {0} CONFIG.M00_AXIS_TDATA_NUM_BYTES {64} CONFIG.S00_AXIS_TDATA_NUM_BYTES {64} CONFIG.S01_AXIS_TDATA_NUM_BYTES {64} CONFIG.S02_AXIS_TDATA_NUM_BYTES {64} CONFIG.S03_AXIS_TDATA_NUM_BYTES {64} CONFIG.M00_S01_CONNECTIVITY {true} CONFIG.M00_S02_CONNECTIVITY {true} CONFIG.M00_S03_CONNECTIVITY {true}] [get_ips axis_interconnect_512_4to1] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_interconnect_4to1/axis_interconnect_512_4to1.xci] +update_compile_order -fileset sources_1 + +#Data Width Converter + +create_ip -name axis_dwidth_converter -vendor xilinx.com -library ip -version 1.1 -module_name axis_64_to_512_converter -dir $device_ip_dir +set_property -dict [list CONFIG.S_TDATA_NUM_BYTES {8} CONFIG.M_TDATA_NUM_BYTES {64} CONFIG.HAS_TLAST {1} CONFIG.HAS_TKEEP {1} CONFIG.HAS_MI_TKEEP {1} CONFIG.TDEST_WIDTH {1} CONFIG.Component_Name {axis_64_to_512_converter}] [get_ips axis_64_to_512_converter] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_64_to_512_converter/axis_64_to_512_converter.xci] +update_compile_order -fileset sources_1 + +create_ip -name axis_dwidth_converter -vendor xilinx.com -library ip -version 1.1 -module_name axis_512_to_64_converter -dir $device_ip_dir +set_property -dict [list CONFIG.S_TDATA_NUM_BYTES {64} CONFIG.M_TDATA_NUM_BYTES {8} CONFIG.HAS_TLAST {1} CONFIG.HAS_TKEEP {1} CONFIG.HAS_MI_TKEEP {1} CONFIG.Component_Name {axis_512_to_64_converter}] [get_ips axis_512_to_64_converter] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_512_to_64_converter/axis_512_to_64_converter.xci] +update_compile_order -fileset sources_1 + +create_ip -name axis_dwidth_converter -vendor xilinx.com -library ip -version 1.1 -module_name axis_128_to_512_converter -dir $device_ip_dir +set_property -dict [list CONFIG.S_TDATA_NUM_BYTES {16} CONFIG.M_TDATA_NUM_BYTES {64} CONFIG.HAS_TLAST {1} CONFIG.HAS_TKEEP {1} CONFIG.HAS_MI_TKEEP {1} CONFIG.TDEST_WIDTH {1} CONFIG.Component_Name {axis_128_to_512_converter}] [get_ips axis_128_to_512_converter] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_128_to_512_converter/axis_128_to_512_converter.xci] +update_compile_order -fileset sources_1 + +create_ip -name axis_dwidth_converter -vendor xilinx.com -library ip -version 1.1 -module_name axis_512_to_128_converter -dir $device_ip_dir +set_property -dict [list CONFIG.S_TDATA_NUM_BYTES {64} CONFIG.M_TDATA_NUM_BYTES {16} CONFIG.HAS_TLAST {1} CONFIG.HAS_TKEEP {1} CONFIG.HAS_MI_TKEEP {1} CONFIG.Component_Name {axis_512_to_128_converter}] [get_ips axis_512_to_128_converter] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_512_to_128_converter/axis_512_to_128_converter.xci] +update_compile_order -fileset sources_1 + +create_ip -name axis_dwidth_converter -vendor xilinx.com -library ip -version 1.1 -module_name axis_256_to_512_converter -dir $device_ip_dir +set_property -dict [list CONFIG.S_TDATA_NUM_BYTES {32} CONFIG.M_TDATA_NUM_BYTES {64} CONFIG.HAS_TLAST {1} CONFIG.HAS_TKEEP {1} CONFIG.HAS_MI_TKEEP {1} CONFIG.TDEST_WIDTH {1} CONFIG.Component_Name {axis_256_to_512_converter}] [get_ips axis_256_to_512_converter] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_256_to_512_converter/axis_256_to_512_converter.xci] +update_compile_order -fileset sources_1 + +create_ip -name axis_dwidth_converter -vendor xilinx.com -library ip -version 1.1 -module_name axis_512_to_256_converter -dir $device_ip_dir +set_property -dict [list CONFIG.S_TDATA_NUM_BYTES {64} CONFIG.M_TDATA_NUM_BYTES {32} CONFIG.HAS_TLAST {1} CONFIG.HAS_TKEEP {1} CONFIG.HAS_MI_TKEEP {1} CONFIG.Component_Name {axis_512_to_256_converter}] [get_ips axis_512_to_256_converter] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_512_to_256_converter/axis_512_to_256_converter.xci] +update_compile_order -fileset sources_1 + +create_ip -name axis_dwidth_converter -vendor xilinx.com -library ip -version 1.1 -module_name axis_64_to_128_converter -dir $device_ip_dir +set_property -dict [list CONFIG.S_TDATA_NUM_BYTES {8} CONFIG.M_TDATA_NUM_BYTES {16} CONFIG.HAS_TLAST {1} CONFIG.HAS_TKEEP {1} CONFIG.HAS_MI_TKEEP {1} CONFIG.TDEST_WIDTH {1} CONFIG.Component_Name {axis_64_to_128_converter}] [get_ips axis_64_to_128_converter] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_64_to_128_converter/axis_64_to_128_converter.xci] +update_compile_order -fileset sources_1 + +create_ip -name axis_dwidth_converter -vendor xilinx.com -library ip -version 1.1 -module_name axis_128_to_64_converter -dir $device_ip_dir +set_property -dict [list CONFIG.S_TDATA_NUM_BYTES {16} CONFIG.M_TDATA_NUM_BYTES {8} CONFIG.HAS_TLAST {1} CONFIG.HAS_TKEEP {1} CONFIG.HAS_MI_TKEEP {1} CONFIG.Component_Name {axis_128_to_64_converter}] [get_ips axis_128_to_64_converter] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_128_to_64_converter/axis_128_to_64_converter.xci] +update_compile_order -fileset sources_1 + +create_ip -name axis_dwidth_converter -vendor xilinx.com -library ip -version 1.1 -module_name axis_64_to_256_converter -dir $device_ip_dir +set_property -dict [list CONFIG.S_TDATA_NUM_BYTES {8} CONFIG.M_TDATA_NUM_BYTES {32} CONFIG.HAS_TLAST {1} CONFIG.HAS_TKEEP {1} CONFIG.HAS_MI_TKEEP {1} CONFIG.TDEST_WIDTH {1} CONFIG.Component_Name {axis_64_to_256_converter}] [get_ips axis_64_to_256_converter] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_64_to_256_converter/axis_64_to_256_converter.xci] +update_compile_order -fileset sources_1 + +create_ip -name axis_dwidth_converter -vendor xilinx.com -library ip -version 1.1 -module_name axis_256_to_64_converter -dir $device_ip_dir +set_property -dict [list CONFIG.S_TDATA_NUM_BYTES {32} CONFIG.M_TDATA_NUM_BYTES {8} CONFIG.HAS_TLAST {1} CONFIG.HAS_TKEEP {1} CONFIG.HAS_MI_TKEEP {1} CONFIG.Component_Name {axis_256to_64_converter}] [get_ips axis_256_to_64_converter] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_256_to_64_converter/axis_256_to_64_converter.xci] +update_compile_order -fileset sources_1 + +#HLS IP cores + +#create_ip -name toe -vendor ethz.systems -library hls -version 1.6 -module_name toe_ip -dir $device_ip_dir +#generate_target {instantiation_template} [get_files $device_ip_dir/toe_ip/toe_ip.xci] +#update_compile_order -fileset sources_1 + +create_ip -name ip_handler -vendor ethz.systems.fpga -library hls -version 2.0 -module_name ip_handler_ip -dir $device_ip_dir +generate_target {instantiation_template} [get_files $device_ip_dir/ip_handler_ip/ip_handler_ip.xci] +update_compile_order -fileset sources_1 + +create_ip -name mac_ip_encode -vendor ethz.systems.fpga -library hls -version 2.0 -module_name mac_ip_encode_ip -dir $device_ip_dir +generate_target {instantiation_template} [get_files $device_ip_dir/mac_ip_encode_ip/mac_ip_encode_ip.xci] +update_compile_order -fileset sources_1 + +create_ip -name icmp_server -vendor xilinx.labs -library hls -version 1.67 -module_name icmp_server_ip -dir $device_ip_dir +generate_target {instantiation_template} [get_files $device_ip_dir/icmp_server_ip/icmp_server_ip.xci] +update_compile_order -fileset sources_1 + +#create_ip -name echo_server_application -vendor ethz.systems -library hls -version 1.2 -module_name echo_server_application_ip -dir $device_ip_dir +#generate_target {instantiation_template} [get_files $device_ip_dir/echo_server_application_ip/echo_server_application_ip.xci] +#update_compile_order -fileset sources_1 + +#create_ip -name iperf_client -vendor ethz.systems.fpga -library hls -version 1.0 -module_name iperf_client_ip -dir $device_ip_dir +#generate_target {instantiation_template} [get_files $device_ip_dir/iperf_client_ip/iperf_client_ip.xci] +#update_compile_order -fileset sources_1 + +create_ip -name arp_server_subnet -vendor ethz.systems.fpga -library hls -version 1.1 -module_name arp_server_subnet_ip -dir $device_ip_dir +generate_target {instantiation_template} [get_files $device_ip_dir/arp_server_subnet_ip/arp_server_subnet_ip.xci] +update_compile_order -fileset sources_1 + +#create_ip -name ipv4 -vendor ethz.systems.fpga -library hls -version 0.1 -module_name ipv4_ip -dir $device_ip_dir +#generate_target {instantiation_template} [get_files $device_ip_dir/ipv4_ip/ipv4_ip.xci] +#update_compile_order -fileset sources_1 + +#create_ip -name udp -vendor ethz.systems.fpga -library hls -version 0.4 -module_name udp_ip -dir $device_ip_dir +#generate_target {instantiation_template} [get_files $device_ip_dir/udp_ip/udp_ip.xci] +#update_compile_order -fileset sources_1 + +#create_ip -name iperf_udp -vendor ethz.systems.fpga -library hls -version 0.9 -module_name iperf_udp_ip -dir $device_ip_dir +#generate_target {instantiation_template} [get_files $device_ip_dir/iperf_udp_ip/iperf_udp_ip.xci] +#update_compile_order -fileset sources_1 + +#create_ip -name udpAppMux -vendor xilinx.labs -library hls -version 1.05 -module_name udpAppMux_0 -dir $device_ip_dir +#generate_target {instantiation_template} [get_files $device_ip_dir/udpAppMux_0/udpAppMux_0.xci] +#update_compile_order -fileset sources_1 + +#create_ip -name dhcp_client -vendor xilinx.labs -library hls -version 1.05 -module_name dhcp_client_ip -dir $device_ip_dir +#generate_target {instantiation_template} [get_files $device_ip_dir/dhcp_client_ip/dhcp_client_ip.xci] +#update_compile_order -fileset sources_1 + +create_ip -name rocev2 -vendor ethz.systems.fpga -library hls -version 0.82 -module_name rocev2_ip -dir $device_ip_dir +generate_target {instantiation_template} [get_files $device_ip_dir/rocev2_ip/rocev2_ip.xci] +update_compile_order -fileset sources_1 + +#create_ip -name hash_table -vendor ethz.systems.fpga -library hls -version 1.0 -module_name hash_table_ip -dir $device_ip_dir +#generate_target {instantiation_template} [get_files $device_ip_dir/hash_table_ip/hash_table_ip.xci] +#update_compile_order -fileset sources_1 + +## Crossings +create_ip -name axis_clock_converter -vendor xilinx.com -library ip -version 1.1 -module_name axis_clock_converter_32_0 +set_property -dict [list CONFIG.TDATA_NUM_BYTES {4} CONFIG.Component_Name {axis_clock_converter_32_0}] [get_ips axis_clock_converter_32_0] + +create_ip -name axis_clock_converter -vendor xilinx.com -library ip -version 1.1 -module_name axis_clock_converter_56_0 +set_property -dict [list CONFIG.TDATA_NUM_BYTES {7} CONFIG.Component_Name {axis_clock_converter_56_0}] [get_ips axis_clock_converter_56_0] + +create_ip -name axis_clock_converter -vendor xilinx.com -library ip -version 1.1 -module_name axis_clock_converter_8_0 +set_property -dict [list CONFIG.TDATA_NUM_BYTES {1} CONFIG.Component_Name {axis_clock_converter_8_0}] [get_ips axis_clock_converter_8_0] + +create_ip -name axis_clock_converter -vendor xilinx.com -library ip -version 1.1 -module_name axis_clock_converter_144_0 +set_property -dict [list CONFIG.TDATA_NUM_BYTES {18} CONFIG.Component_Name {axis_clock_converter_144_0}] [get_ips axis_clock_converter_144_0] + +create_ip -name axis_clock_converter -vendor xilinx.com -library ip -version 1.1 -module_name axis_clock_converter_184_0 +set_property -dict [list CONFIG.TDATA_NUM_BYTES {23} CONFIG.Component_Name {axis_clock_converter_184_0}] [get_ips axis_clock_converter_184_0] + +create_ip -name axis_data_fifo -vendor xilinx.com -library ip -version 2.0 -module_name axis_data_fifo_req_rdma_256 +set_property -dict [list CONFIG.TDATA_NUM_BYTES {32} CONFIG.FIFO_DEPTH {128} CONFIG.IS_ACLK_ASYNC {1} CONFIG.Component_Name {axis_data_fifo_req_rdma_256}] [get_ips axis_data_fifo_req_rdma_256] + +create_ip -name axis_data_fifo -vendor xilinx.com -library ip -version 2.0 -module_name axis_data_fifo_cmd_rdma_96 +set_property -dict [list CONFIG.TDATA_NUM_BYTES {12} CONFIG.IS_ACLK_ASYNC {1} CONFIG.FIFO_DEPTH {256} CONFIG.Component_Name {axis_data_fifo_cmd_rdma_96}] [get_ips axis_data_fifo_cmd_rdma_96] + +create_ip -name axis_data_fifo -vendor xilinx.com -library ip -version 2.0 -module_name axis_data_fifo_rdma_512 +set_property -dict [list CONFIG.TDATA_NUM_BYTES {64} CONFIG.IS_ACLK_ASYNC {1} CONFIG.FIFO_DEPTH {1024} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1} CONFIG.Component_Name {axis_data_fifo_rdma_512}] [get_ips axis_data_fifo_rdma_512] + +create_ip -name axis_data_fifo -vendor xilinx.com -library ip -version 2.0 -module_name axis_data_fifo_cnfg_rdma_256 +set_property -dict [list CONFIG.TDATA_NUM_BYTES {32} CONFIG.HAS_WR_DATA_COUNT {1} CONFIG.FIFO_DEPTH {128} CONFIG.Component_Name {axis_data_fifo_cnfg_rdma_256}] [get_ips axis_data_fifo_cnfg_rdma_256] + diff --git a/hw/scripts/ip_inst/network_ultraplus.tcl b/hw/scripts/ip_inst/network_ultraplus.tcl new file mode 100644 index 00000000..ec7d25ee --- /dev/null +++ b/hw/scripts/ip_inst/network_ultraplus.tcl @@ -0,0 +1,71 @@ +#Network +create_ip -name axis_data_fifo -vendor xilinx.com -library ip -version 2.0 -module_name axis_data_fifo_64_cc -dir $device_ip_dir +set_property -dict [list CONFIG.TDATA_NUM_BYTES {8} CONFIG.IS_ACLK_ASYNC {1} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1} CONFIG.SYNCHRONIZATION_STAGES {3} CONFIG.Component_Name {axis_data_fifo_64_cc}] [get_ips axis_data_fifo_64_cc] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_data_fifo_64_cc/axis_data_fifo_64_cc.xci] + +create_ip -name fifo_generator -vendor xilinx.com -library ip -version 13.2 -module_name axis_sync_fifo -dir $device_ip_dir +set_property -dict [list CONFIG.INTERFACE_TYPE {AXI_STREAM} CONFIG.FIFO_Implementation_axis {Common_Clock_Block_RAM} CONFIG.TDATA_NUM_BYTES {8} CONFIG.TUSER_WIDTH {0} CONFIG.Enable_TLAST {true} CONFIG.HAS_TKEEP {true} CONFIG.Enable_Data_Counts_axis {true} CONFIG.Reset_Type {Asynchronous_Reset} CONFIG.Full_Flags_Reset_Value {1} CONFIG.TSTRB_WIDTH {8} CONFIG.TKEEP_WIDTH {8} CONFIG.FIFO_Implementation_wach {Common_Clock_Distributed_RAM} CONFIG.Full_Threshold_Assert_Value_wach {15} CONFIG.Empty_Threshold_Assert_Value_wach {14} CONFIG.FIFO_Implementation_wrch {Common_Clock_Distributed_RAM} CONFIG.Full_Threshold_Assert_Value_wrch {15} CONFIG.Empty_Threshold_Assert_Value_wrch {14} CONFIG.FIFO_Implementation_rach {Common_Clock_Distributed_RAM} CONFIG.Full_Threshold_Assert_Value_rach {15} CONFIG.Empty_Threshold_Assert_Value_rach {14}] [get_ips axis_sync_fifo] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_sync_fifo/axis_sync_fifo.xci] +update_compile_order -fileset sources_1 + +create_ip -name fifo_generator -vendor xilinx.com -library ip -version 13.2 -module_name cmd_fifo_xgemac_rxif -dir $device_ip_dir +set_property -dict [list CONFIG.Fifo_Implementation {Common_Clock_Block_RAM} CONFIG.Input_Data_Width {16} CONFIG.Output_Data_Width {16} CONFIG.Reset_Type {Asynchronous_Reset} CONFIG.Full_Flags_Reset_Value {1} CONFIG.Use_Embedded_Registers {false} CONFIG.Full_Threshold_Assert_Value {1022} CONFIG.Full_Threshold_Negate_Value {1021} CONFIG.Enable_Safety_Circuit {false}] [get_ips cmd_fifo_xgemac_rxif] +generate_target {instantiation_template} [get_files $device_ip_dir/cmd_fifo_xgemac_rxif/cmd_fifo_xgemac_rxif.xci] +update_compile_order -fileset sources_1 + +create_ip -name fifo_generator -vendor xilinx.com -library ip -version 13.2 -module_name cmd_fifo_xgemac_txif -dir $device_ip_dir +set_property -dict [list CONFIG.Fifo_Implementation {Common_Clock_Block_RAM} CONFIG.Input_Data_Width {1} CONFIG.Output_Data_Width {1} CONFIG.Reset_Type {Asynchronous_Reset} CONFIG.Full_Flags_Reset_Value {1} CONFIG.Full_Threshold_Assert_Value {1022} CONFIG.Full_Threshold_Negate_Value {1021} CONFIG.Enable_Safety_Circuit {false}] [get_ips cmd_fifo_xgemac_txif] +generate_target {instantiation_template} [get_files $device_ip_dir/cmd_fifo_xgemac_txif/cmd_fifo_xgemac_txif.xci] +update_compile_order -fileset sources_1 + +#create_ip -name ethernet_frame_padding -vendor ethz.systems.fpga -library hls -version 0.1 -module_name ethernet_frame_padding_ip -dir $device_ip_dir +#generate_target {instantiation_template} [get_files $device_ip_dir/ethernet_frame_padding_ip/ethernet_frame_padding_ip.xci] +#update_compile_order -fileset sources_1 + + +#100G +if {$cfg(fdev) eq "vcu118"} { + create_ip -name cmac_usplus -vendor xilinx.com -library ip -version 3.0 -module_name cmac_usplus_axis -dir $device_ip_dir + set_property -dict [list CONFIG.CMAC_CAUI4_MODE {1} CONFIG.NUM_LANES {4x25} CONFIG.GT_REF_CLK_FREQ {156.25} CONFIG.USER_INTERFACE {AXIS} CONFIG.GT_DRP_CLK {125} CONFIG.TX_FLOW_CONTROL {0} CONFIG.RX_FLOW_CONTROL {0} CONFIG.CMAC_CORE_SELECT {CMACE4_X0Y8} CONFIG.GT_GROUP_SELECT {X1Y52~X1Y55} CONFIG.LANE1_GT_LOC {X1Y52} CONFIG.LANE2_GT_LOC {X1Y53} CONFIG.LANE3_GT_LOC {X1Y54} CONFIG.LANE4_GT_LOC {X1Y55} CONFIG.LANE5_GT_LOC {NA} CONFIG.LANE6_GT_LOC {NA} CONFIG.LANE7_GT_LOC {NA} CONFIG.LANE8_GT_LOC {NA} CONFIG.LANE9_GT_LOC {NA} CONFIG.LANE10_GT_LOC {NA} CONFIG.Component_Name {cmac_usplus_axis}] [get_ips cmac_usplus_axis] + generate_target {instantiation_template} [get_files $device_ip_dir/cmac_usplus_axis/cmac_usplus_axis.xci] + update_compile_order -fileset sources_1 +} + +if {$cfg(fdev) eq "u250"} { + if {$cfg(qsfp) eq 0} { + create_ip -name cmac_usplus -vendor xilinx.com -library ip -version 3.0 -module_name cmac_usplus_axis -dir $device_ip_dir + set_property -dict [list CONFIG.CMAC_CAUI4_MODE {1} CONFIG.NUM_LANES {4x25} CONFIG.GT_REF_CLK_FREQ {156.25} CONFIG.USER_INTERFACE {AXIS} CONFIG.GT_DRP_CLK {250} CONFIG.TX_FLOW_CONTROL {0} CONFIG.RX_FLOW_CONTROL {0} CONFIG.CMAC_CORE_SELECT {CMACE4_X0Y7} CONFIG.GT_GROUP_SELECT {X1Y44~X1Y47} CONFIG.LANE1_GT_LOC {X1Y44} CONFIG.LANE2_GT_LOC {X1Y45} CONFIG.LANE3_GT_LOC {X1Y46} CONFIG.LANE4_GT_LOC {X1Y47} CONFIG.Component_Name {cmac_usplus_axis}] [get_ips cmac_usplus_axis] + generate_target {instantiation_template} [get_files $device_ip_dir/cmac_usplus_axis/cmac_usplus_axis.xci] + update_compile_order -fileset sources_1 + } else { + create_ip -name cmac_usplus -vendor xilinx.com -library ip -version 3.0 -module_name cmac_usplus_axis -dir $device_ip_dir + set_property -dict [list CONFIG.CMAC_CAUI4_MODE {1} CONFIG.NUM_LANES {4x25} CONFIG.GT_REF_CLK_FREQ {156.25} CONFIG.USER_INTERFACE {AXIS} CONFIG.GT_DRP_CLK {250} CONFIG.TX_FLOW_CONTROL {0} CONFIG.RX_FLOW_CONTROL {0} CONFIG.CMAC_CORE_SELECT {CMACE4_X0Y7} CONFIG.GT_GROUP_SELECT {X1Y40~X1Y43} CONFIG.LANE1_GT_LOC {X1Y40} CONFIG.LANE2_GT_LOC {X1Y41} CONFIG.LANE3_GT_LOC {X1Y42} CONFIG.LANE4_GT_LOC {X1Y43} CONFIG.Component_Name {cmac_usplus_axis}] [get_ips cmac_usplus_axis] + generate_target {instantiation_template} [get_files $device_ip_dir/cmac_usplus_axis/cmac_usplus_axis.xci] + update_compile_order -fileset sources_1 + } +} + +if {$cfg(fdev) eq "u280"} { + if {$cfg(qsfp) eq 0} { + create_ip -name cmac_usplus -vendor xilinx.com -library ip -version 3.0 -module_name cmac_usplus_axis -dir $device_ip_dir + set_property -dict [list CONFIG.CMAC_CAUI4_MODE {1} CONFIG.NUM_LANES {4x25} CONFIG.GT_REF_CLK_FREQ {156.25} CONFIG.USER_INTERFACE {AXIS} CONFIG.GT_DRP_CLK {250} CONFIG.TX_FLOW_CONTROL {0} CONFIG.RX_FLOW_CONTROL {0} CONFIG.CMAC_CORE_SELECT {CMACE4_X0Y6} CONFIG.GT_GROUP_SELECT {X0Y40~X0Y43} CONFIG.LANE1_GT_LOC {X0Y40} CONFIG.LANE2_GT_LOC {X0Y41} CONFIG.LANE3_GT_LOC {X0Y42} CONFIG.LANE4_GT_LOC {X0Y43} CONFIG.Component_Name {cmac_usplus_axis} ] [get_ips cmac_usplus_axis] + generate_target {instantiation_template} [get_files $device_ip_dir/cmac_usplus_axis/cmac_usplus_axis.xci] + update_compile_order -fileset sources_1 + } else { + create_ip -name cmac_usplus -vendor xilinx.com -library ip -version 3.0 -module_name cmac_usplus_axis -dir $device_ip_dir + set_property -dict [list CONFIG.CMAC_CAUI4_MODE {1} CONFIG.NUM_LANES {4x25} CONFIG.GT_REF_CLK_FREQ {156.25} CONFIG.USER_INTERFACE {AXIS} CONFIG.GT_DRP_CLK {250} CONFIG.TX_FLOW_CONTROL {0} CONFIG.RX_FLOW_CONTROL {0} CONFIG.CMAC_CORE_SELECT {CMACE4_X0Y6} CONFIG.GT_GROUP_SELECT {X0Y44~X0Y47} CONFIG.LANE1_GT_LOC {X0Y44} CONFIG.LANE2_GT_LOC {X0Y45} CONFIG.LANE3_GT_LOC {X0Y46} CONFIG.LANE4_GT_LOC {X0Y47} CONFIG.Component_Name {cmac_usplus_axis} ] [get_ips cmac_usplus_axis] + generate_target {instantiation_template} [get_files $device_ip_dir/cmac_usplus_axis/cmac_usplus_axis.xci] + update_compile_order -fileset sources_1 + } +} + +create_ip -name axis_data_fifo -vendor xilinx.com -library ip -version 2.0 -module_name axis_pkg_fifo_512 -dir $device_ip_dir +set_property -dict [list CONFIG.TDATA_NUM_BYTES {64} CONFIG.FIFO_MODE {2} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1} CONFIG.Component_Name {axis_pkg_fifo_512}] [get_ips axis_pkg_fifo_512] +generate_target {instantiation_template} [get_files $device_ip_dir/axis_pkg_fifo_512/axis_pkg_fifo_512.xci] +update_compile_order -fileset sources_1 + + +create_ip -name ethernet_frame_padding_512 -vendor ethz.systems.fpga -library hls -version 0.1 -module_name ethernet_frame_padding_512_ip -dir $device_ip_dir +generate_target {instantiation_template} [get_files $device_ip_dir/ethernet_frame_padding_512_ip/ethernet_frame_padding_512_ip.xci] +update_compile_order -fileset sources_1 + diff --git a/hw/scripts/post.tcl.in b/hw/scripts/post.tcl.in new file mode 100644 index 00000000..c48a219a --- /dev/null +++ b/hw/scripts/post.tcl.in @@ -0,0 +1,106 @@ +######################################################################################################## +## Lynx - top +######################################################################################################## +set project "${CMAKE_PROJECT_NAME}" +set part "${FPGA_PART}" +#set board_part "${FPGA_BOARD}" + +######################################################################################################## +# Source files +######################################################################################################## +set root_dir "${CMAKE_SOURCE_DIR}" +set build_dir "${CMAKE_BINARY_DIR}" +set scripts_dir "$root_dir/scripts" +set hw_dir "$root_dir" +set ip_dir "$root_dir/ip" +set iprepo_dir "$build_dir/iprepo" + +set device_ip_dir "$ip_dir/dev" +file mkdir $device_ip_dir + +######################################################################################################## +# Config +######################################################################################################## +set cfg(fdev) ${FDEV_NAME} + +set cfg(n_reg) ${N_REGIONS} + +set cfg(en_strm) ${EN_STRM} +set cfg(en_ddr) ${EN_DDR} +set cfg(en_pr) ${EN_PR} + +set cfg(n_chan) [expr {$cfg(en_strm) + $cfg(en_ddr) + $cfg(en_pr)}] +set cfg(n_ddr_chan) ${N_DDR_CHAN} + +set cfg(en_bpss) ${EN_BPSS} +set cfg(en_avx) ${EN_AVX} + +set cfg(qsfp) ${QSFP_PORT} + +set cfg(ddr_0) 0 +set cfg(ddr_1) 0 +set cfg(ddr_2) 0 +set cfg(ddr_3) 0 + +if {$cfg(n_ddr_chan) >= 1} { + set cfg(ddr_0) 1 +} +if {$cfg(n_ddr_chan) >= 2} { + set cfg(ddr_1) 1 +} +if {$cfg(n_ddr_chan) >= 4} { + set cfg(ddr_2) 1 + set cfg(ddr_3) 1 +} + +set cfg(en_fv) ${EN_FV} +set cfg(en_fvv) ${EN_FVV} +set cfg(en_tcp) ${EN_TCP} + +set nn 0 +if {$cfg(en_strm) eq 1} { + set cfg(strm_chan) $nn + incr nn +} +if {$cfg(en_ddr) eq 1} { + set cfg(ddr_chan) $nn + incr nn +} +if {$cfg(en_pr) eq 1} { + set cfg(pr_chan) $nn + incr nn +} + +######################################################################################################## +# Project +######################################################################################################## +set proj_dir "$build_dir/$project" + +######################################################################################################## +# Extract the bitstreams +######################################################################################################## +set i 1 +while {[file isdirectory "$proj_dir/hdl/config_$i"]} { + incr i +} + +exec rm -rf "$build_dir/bitstreams" +file mkdir "$build_dir/bitstreams" + +# Static image +exec cp "$proj_dir/lynx.runs/impl_1/top.bit" "$build_dir/bitstreams/top.bit" + +if {$cfg(en_pr) eq 1} { + set i 1 + while {[file isdirectory "$proj_dir/hdl/config_$i"]} { + incr i + } + + for {set j 0} {$j < $i} {incr j} { + set t [expr {$j + 1}] + file mkdir "$build_dir/bitstreams/config_$j" + for {set k 0} {$k < $cfg(n_reg)} {incr k} { + exec cp "$proj_dir/lynx.runs/impl_$t/inst_dynamic\_inst_user_wrapper_$k\_design_user_wrapper_c$j\_$k\_partial.bin" "$build_dir/bitstreams/config_$j/part_bstream_c$j\_$k.bin" + } + } +} \ No newline at end of file diff --git a/hw/scripts/shell.tcl.in b/hw/scripts/shell.tcl.in new file mode 100644 index 00000000..ec8adc0b --- /dev/null +++ b/hw/scripts/shell.tcl.in @@ -0,0 +1,217 @@ +######################################################################################################## +## Lynx - top +######################################################################################################## +set project "${CMAKE_PROJECT_NAME}" +set part "${FPGA_PART}" +#set board_part "${FPGA_BOARD}" + +######################################################################################################## +# Source files +######################################################################################################## +set root_dir "${CMAKE_SOURCE_DIR}" +set build_dir "${CMAKE_BINARY_DIR}" +set scripts_dir "$root_dir/scripts" +set hw_dir "$root_dir" +set ip_dir "$root_dir/ip" +set iprepo_dir "$build_dir/iprepo" + +set device_ip_dir "$ip_dir/dev" +file mkdir $device_ip_dir + +######################################################################################################## +# Config +######################################################################################################## +set cfg(fdev) ${FDEV_NAME} + +set cfg(n_reg) ${N_REGIONS} + +set cfg(en_strm) ${EN_STRM} +set cfg(en_ddr) ${EN_DDR} +set cfg(en_pr) ${EN_PR} + +set cfg(n_chan) [expr {$cfg(en_strm) + $cfg(en_ddr) + $cfg(en_pr)}] +set cfg(n_ddr_chan) ${N_DDR_CHAN} + +set cfg(en_bpss) ${EN_BPSS} +set cfg(en_avx) ${EN_AVX} + +set cfg(qsfp) ${QSFP_PORT} + +set cfg(ddr_0) 0 +set cfg(ddr_1) 0 +set cfg(ddr_2) 0 +set cfg(ddr_3) 0 + +if {$cfg(n_ddr_chan) >= 1} { + set cfg(ddr_0) 1 +} +if {$cfg(n_ddr_chan) >= 2} { + set cfg(ddr_1) 1 +} +if {$cfg(n_ddr_chan) >= 4} { + set cfg(ddr_2) 1 + set cfg(ddr_3) 1 +} + +set cfg(en_fv) ${EN_FV} +set cfg(en_fvv) ${EN_FVV} +set cfg(en_tcp) ${EN_TCP} + +set nn 0 +if {$cfg(en_strm) eq 1} { + set cfg(strm_chan) $nn + incr nn +} +if {$cfg(en_ddr) eq 1} { + set cfg(ddr_chan) $nn + incr nn +} +if {$cfg(en_pr) eq 1} { + set cfg(pr_chan) $nn + incr nn +} + +######################################################################################################## +# Project +######################################################################################################## +set proj_dir "$build_dir/$project" + +# Check iprepo +if { [file isdirectory $iprepo_dir] } { + set lib_dir "$iprepo_dir" +} else { + puts "iprepo directory could not be found." + #exit 1 +} + +# Create project +create_project $project $proj_dir -part $part +set proj [current_project] +set_property IP_REPO_PATHS $lib_dir [current_fileset] +update_ip_catalog + +######################################################################################################## +# Set project properties +######################################################################################################## +#set_property "board_part" $board_part $proj +set_property "default_lib" "xil_defaultlib" $proj +set_property "ip_cache_permissions" "read write" $proj +set_property "ip_output_repo" "$proj_dir/$project.cache/ip" $proj +set_property "sim.ip.auto_export_scripts" "1" $proj +set_property "target_language" "Verilog" $proj +set_property "simulator_language" "Mixed" $proj +set_property "xpm_libraries" "XPM_CDC XPM_MEMORY" $proj +if {$cfg(en_pr) eq 1} { + set_property "pr_flow" "1" $proj +} + +######################################################################################################## +# Create and add source files +######################################################################################################## +file mkdir "$proj_dir/hdl/wrappers" +file mkdir "$proj_dir/hdl/wrappers/config_0" +file mkdir "$proj_dir/hdl/config_0" + +# Package +source "$scripts_dir/wr_hdl/wr_hdl_pkg.tcl" -notrace +wr_hdl_pkg "$proj_dir/hdl/lynx_pkg.sv" + +# Top level +source "$scripts_dir/wr_hdl/wr_hdl_top_$cfg(fdev).tcl" -notrace +wr_hdl_top "$proj_dir/hdl/wrappers/top.sv" + +# Dynamic wrapper +source "$scripts_dir/wr_hdl/wr_hdl_dynamic.tcl" -notrace +wr_hdl_dynamic "$proj_dir/hdl/wrappers/dynamic_wrapper.sv" 0 + +# Init config +source "$scripts_dir/wr_hdl/wr_hdl_user.tcl" -notrace +for {set i 0} {$i < $cfg(n_reg)} {incr i} { + wr_hdl_user_wrapper "$proj_dir/hdl/wrappers/config_0/design_user_wrapper_c0_$i.sv" $i + wr_hdl_user "$proj_dir/hdl/config_0/design_user_logic_c0_$i.sv" $i +} + +# Add source files +add_files "$hw_dir/hdl/mmu" +add_files "$hw_dir/hdl/mux" +add_files "$hw_dir/hdl/pkg" +add_files "$hw_dir/hdl/regs" +add_files "$hw_dir/hdl/slave" +add_files "$hw_dir/hdl/util" + +if {$cfg(en_ddr) eq 1} { + #add_files "$hw_dir/hdl/cdma" + add_files "$hw_dir/hdl/cdma_unaglined" +} + +if {$cfg(en_fv) eq 1} { + add_files "$hw_dir/hdl/network" +} + +# Add wrappers and user logic +add_files "$proj_dir/hdl" + +# Top level +set_property "top" "top" [current_fileset] + +# Constraints +add_files -norecurse -fileset [get_filesets constrs_1] "$hw_dir/constraints/$cfg(fdev)" + +# Create a project-local constraint file to take debugging constraints that we +# don't want to propagate to the repository. +file mkdir "$proj_dir/$project.srcs/constrs_1" +close [ open "$proj_dir/$project.srcs/constrs_1/local.xdc" w ] + +set_property target_constrs_file "$proj_dir/$project.srcs/constrs_1/local.xdc" [current_fileset -constrset] + +######################################################################################################## +# IPs +######################################################################################################## +source "$scripts_dir/ip_inst/base_infrastructure.tcl" -notrace + +######################################################################################################## +# RDMA +######################################################################################################## +if {$cfg(en_fv) eq 1} { + add_files $ip_dir/SmartCamCtl.dcp + + source "$scripts_dir/ip_inst/network_infrastructure.tcl" + source "$scripts_dir/ip_inst/network_ultraplus.tcl" + source "$scripts_dir/ip_inst/network_stack.tcl" +} + +######################################################################################################## +# STATIC LAYER +######################################################################################################## +source "$scripts_dir/bd/cr_static_$cfg(fdev).tcl" -notrace + +cr_bd_design_static "" +set_property SYNTH_CHECKPOINT_MODE "Hierarchical" [get_files design_static.bd ] + +######################################################################################################## +# DYNAMIC LAYER +######################################################################################################## +# Create dynamic regions +if {$cfg(en_pr) eq 1} { + for {set i 0} {$i < $cfg(n_reg)} {incr i} { + create_partition_def -name "dynamic_$i" -module "design_user_wrapper_$i" + create_reconfig_module -name "design_user_wrapper_c0_$i" -partition_def [get_partition_defs "dynamic_$i" ] -define_from "design_user_wrapper_$i" + set obj [get_partition_defs "dynamic_$i"] + set_property -name "name" -value "dynamic_$i" -objects $obj + set_property -name "use_blackbox_stub" -value "1" -objects $obj + } + + set cmd "create_pr_configuration -name config_0 -partitions \[list \ " + for {set i 0} {$i < $cfg(n_reg)} {incr i} { + append cmd "inst_dynamic/inst_user_wrapper_$i:design_user_wrapper_c0_$i " + } + append cmd "]" + eval $cmd + set_property PR_CONFIGURATION config_0 [get_runs impl_1] + set_property STEPS.WRITE_BITSTREAM.ARGS.BIN_FILE true [get_runs impl_1] +} + + + + + diff --git a/hw/scripts/util_scripts/cr_hdl.tcl b/hw/scripts/util_scripts/cr_hdl.tcl new file mode 100644 index 00000000..401f6f6e --- /dev/null +++ b/hw/scripts/util_scripts/cr_hdl.tcl @@ -0,0 +1,47 @@ +# Adds the block of code to generation script +proc add_block {target} { + upvar #1 contents cont + upvar #1 template temp + + set l_idx [lsearch $cont $target] + set curr [lrange $cont 0 [expr $l_idx-1]] + set cont [lrange $cont [expr $l_idx+1] end] + + foreach s $curr { + set s_tmp [concat "append entity \"$s\\n\""] + lappend temp $s_tmp + } +} + +# Create hdl from template +proc create_hdl_script {f_in} { + set f [open $f_in] + set contents [split [read $f] "\n"] + close $f + + # Template + set template {} + + # + add_block "-- eof" + + # Write out + set ent_write {} + append ent_write "lappend template \$entity\n" + append ent_write "set vho_file \[open hdl.v w]\n" + append ent_write "foreach line \$template {\n" + append ent_write " puts \$vho_file \$line\n" + append ent_write "}\n" + append ent_write "close \$vho_file\n" + lappend template $ent_write + + # Write the script + set out_file [open "created.v" w] + + foreach line $template { + puts $out_file $line + } + close $out_file +} + +create_hdl_script hdl.sv \ No newline at end of file diff --git a/hw/scripts/util_scripts/ila.tcl b/hw/scripts/util_scripts/ila.tcl new file mode 100644 index 00000000..a8198546 --- /dev/null +++ b/hw/scripts/util_scripts/ila.tcl @@ -0,0 +1,39 @@ +# CMAC TX +create_ip -name ila -vendor xilinx.com -library ip -version 6.2 -module_name ila_cmac_tx +set_property -dict [list CONFIG.C_PROBE10_WIDTH {32} CONFIG.C_PROBE9_WIDTH {32} CONFIG.C_PROBE8_WIDTH {32} CONFIG.C_PROBE7_WIDTH {32} CONFIG.C_PROBE5_WIDTH {4} CONFIG.C_NUM_OF_PROBES {15} CONFIG.Component_Name {ila_cmac_tx} CONFIG.C_EN_STRG_QUAL {1} CONFIG.C_ADV_TRIGGER {true} CONFIG.C_PROBE14_MU_CNT {2} CONFIG.C_PROBE13_MU_CNT {2} CONFIG.C_PROBE12_MU_CNT {2} CONFIG.C_PROBE11_MU_CNT {2} CONFIG.C_PROBE10_MU_CNT {2} CONFIG.C_PROBE9_MU_CNT {2} CONFIG.C_PROBE8_MU_CNT {2} CONFIG.C_PROBE7_MU_CNT {2} CONFIG.C_PROBE6_MU_CNT {2} CONFIG.C_PROBE5_MU_CNT {2} CONFIG.C_PROBE4_MU_CNT {2} CONFIG.C_PROBE3_MU_CNT {2} CONFIG.C_PROBE2_MU_CNT {2} CONFIG.C_PROBE1_MU_CNT {2} CONFIG.C_PROBE0_MU_CNT {2} CONFIG.ALL_PROBE_SAME_MU_CNT {2}] [get_ips ila_cmac_tx] + +# CMAC RX +create_ip -name ila -vendor xilinx.com -library ip -version 6.2 -module_name ila_cmac_rx +set_property -dict [list CONFIG.C_PROBE17_WIDTH {6} CONFIG.C_PROBE16_WIDTH {7} CONFIG.C_PROBE15_WIDTH {20} CONFIG.C_PROBE14_WIDTH {32} CONFIG.C_PROBE13_WIDTH {32} CONFIG.C_PROBE12_WIDTH {32} CONFIG.C_PROBE11_WIDTH {32} CONFIG.C_PROBE10_WIDTH {80} CONFIG.C_PROBE9_WIDTH {3} CONFIG.C_PROBE5_WIDTH {4} CONFIG.C_NUM_OF_PROBES {19} CONFIG.Component_Name {ila_cmac_rx} CONFIG.C_EN_STRG_QUAL {1} CONFIG.C_ADV_TRIGGER {true} CONFIG.C_PROBE18_MU_CNT {2} CONFIG.C_PROBE17_MU_CNT {2} CONFIG.C_PROBE16_MU_CNT {2} CONFIG.C_PROBE15_MU_CNT {2} CONFIG.C_PROBE14_MU_CNT {2} CONFIG.C_PROBE13_MU_CNT {2} CONFIG.C_PROBE12_MU_CNT {2} CONFIG.C_PROBE11_MU_CNT {2} CONFIG.C_PROBE10_MU_CNT {2} CONFIG.C_PROBE9_MU_CNT {2} CONFIG.C_PROBE8_MU_CNT {2} CONFIG.C_PROBE7_MU_CNT {2} CONFIG.C_PROBE6_MU_CNT {2} CONFIG.C_PROBE5_MU_CNT {2} CONFIG.C_PROBE4_MU_CNT {2} CONFIG.C_PROBE3_MU_CNT {2} CONFIG.C_PROBE2_MU_CNT {2} CONFIG.C_PROBE1_MU_CNT {2} CONFIG.C_PROBE0_MU_CNT {2} CONFIG.ALL_PROBE_SAME_MU_CNT {2}] [get_ips ila_cmac_rx] + +# NETWORK STACK +create_ip -name ila -vendor xilinx.com -library ip -version 6.2 -module_name ila_network_stack +set_property -dict [list CONFIG.C_PROBE35_WIDTH {256} CONFIG.C_PROBE30_WIDTH {256} CONFIG.C_PROBE29_WIDTH {512} CONFIG.C_PROBE21_WIDTH {4} CONFIG.C_PROBE19_WIDTH {28} CONFIG.C_PROBE18_WIDTH {48} CONFIG.C_PROBE14_WIDTH {4} CONFIG.C_PROBE12_WIDTH {28} CONFIG.C_PROBE11_WIDTH {48} CONFIG.C_NUM_OF_PROBES {36} CONFIG.Component_Name {ila_network_stack} CONFIG.C_EN_STRG_QUAL {1} CONFIG.C_ADV_TRIGGER {true} CONFIG.C_PROBE35_MU_CNT {2} CONFIG.C_PROBE34_MU_CNT {2} CONFIG.C_PROBE33_MU_CNT {2} CONFIG.C_PROBE32_MU_CNT {2} CONFIG.C_PROBE31_MU_CNT {2} CONFIG.C_PROBE30_MU_CNT {2} CONFIG.C_PROBE29_MU_CNT {2} CONFIG.C_PROBE28_MU_CNT {2} CONFIG.C_PROBE27_MU_CNT {2} CONFIG.C_PROBE26_MU_CNT {2} CONFIG.C_PROBE25_MU_CNT {2} CONFIG.C_PROBE24_MU_CNT {2} CONFIG.C_PROBE23_MU_CNT {2} CONFIG.C_PROBE22_MU_CNT {2} CONFIG.C_PROBE21_MU_CNT {2} CONFIG.C_PROBE20_MU_CNT {2} CONFIG.C_PROBE19_MU_CNT {2} CONFIG.C_PROBE18_MU_CNT {2} CONFIG.C_PROBE17_MU_CNT {2} CONFIG.C_PROBE16_MU_CNT {2} CONFIG.C_PROBE15_MU_CNT {2} CONFIG.C_PROBE14_MU_CNT {2} CONFIG.C_PROBE13_MU_CNT {2} CONFIG.C_PROBE12_MU_CNT {2} CONFIG.C_PROBE11_MU_CNT {2} CONFIG.C_PROBE10_MU_CNT {2} CONFIG.C_PROBE9_MU_CNT {2} CONFIG.C_PROBE8_MU_CNT {2} CONFIG.C_PROBE7_MU_CNT {2} CONFIG.C_PROBE6_MU_CNT {2} CONFIG.C_PROBE5_MU_CNT {2} CONFIG.C_PROBE4_MU_CNT {2} CONFIG.C_PROBE3_MU_CNT {2} CONFIG.C_PROBE2_MU_CNT {2} CONFIG.C_PROBE1_MU_CNT {2} CONFIG.C_PROBE0_MU_CNT {2} CONFIG.ALL_PROBE_SAME_MU_CNT {2}] [get_ips ila_network_stack] + +# CDMA RD unaglined +create_ip -name ila -vendor xilinx.com -library ip -version 6.2 -module_name ila_ddr_rd +set_property -dict [list CONFIG.C_PROBE10_WIDTH {512} CONFIG.C_PROBE9_WIDTH {64} CONFIG.C_PROBE5_WIDTH {28} CONFIG.C_PROBE4_WIDTH {28} CONFIG.C_PROBE3_WIDTH {23} CONFIG.C_PROBE2_WIDTH {23} CONFIG.C_NUM_OF_PROBES {12} CONFIG.Component_Name {ila_ddr_rd} CONFIG.C_EN_STRG_QUAL {1} CONFIG.C_ADV_TRIGGER {true} CONFIG.C_PROBE11_MU_CNT {2} CONFIG.C_PROBE10_MU_CNT {2} CONFIG.C_PROBE9_MU_CNT {2} CONFIG.C_PROBE8_MU_CNT {2} CONFIG.C_PROBE7_MU_CNT {2} CONFIG.C_PROBE6_MU_CNT {2} CONFIG.C_PROBE5_MU_CNT {2} CONFIG.C_PROBE4_MU_CNT {2} CONFIG.C_PROBE3_MU_CNT {2} CONFIG.C_PROBE2_MU_CNT {2} CONFIG.C_PROBE1_MU_CNT {2} CONFIG.C_PROBE0_MU_CNT {2} CONFIG.ALL_PROBE_SAME_MU_CNT {2}] [get_ips ila_ddr_rd] + +# CDMA WR unaglined +create_ip -name ila -vendor xilinx.com -library ip -version 6.2 -module_name ila_ddr_wr +set_property -dict [list CONFIG.C_PROBE9_WIDTH {512} CONFIG.C_PROBE8_WIDTH {64} CONFIG.C_PROBE4_WIDTH {28} CONFIG.C_PROBE3_WIDTH {28} CONFIG.C_PROBE2_WIDTH {23} CONFIG.C_PROBE1_WIDTH {23} CONFIG.C_PROBE0_WIDTH {3} CONFIG.C_NUM_OF_PROBES {11} CONFIG.Component_Name {ila_ddr_wr} CONFIG.C_EN_STRG_QUAL {1} CONFIG.C_ADV_TRIGGER {true} CONFIG.C_PROBE10_MU_CNT {2} CONFIG.C_PROBE9_MU_CNT {2} CONFIG.C_PROBE8_MU_CNT {2} CONFIG.C_PROBE7_MU_CNT {2} CONFIG.C_PROBE6_MU_CNT {2} CONFIG.C_PROBE5_MU_CNT {2} CONFIG.C_PROBE4_MU_CNT {2} CONFIG.C_PROBE3_MU_CNT {2} CONFIG.C_PROBE2_MU_CNT {2} CONFIG.C_PROBE1_MU_CNT {2} CONFIG.C_PROBE0_MU_CNT {2} CONFIG.ALL_PROBE_SAME_MU_CNT {2}] [get_ips ila_ddr_wr] + +# RD CREDITS +create_ip -name ila -vendor xilinx.com -library ip -version 6.2 -module_name ila_rd_cred +set_property -dict [list CONFIG.C_PROBE12_WIDTH {16} CONFIG.C_PROBE11_WIDTH {16} CONFIG.C_PROBE6_WIDTH {23} CONFIG.C_PROBE5_WIDTH {23} CONFIG.C_PROBE4_WIDTH {8} CONFIG.C_PROBE3_WIDTH {28} CONFIG.C_NUM_OF_PROBES {13} CONFIG.Component_Name {ila_rd_cred} CONFIG.C_EN_STRG_QUAL {1} CONFIG.C_ADV_TRIGGER {true} CONFIG.C_PROBE12_MU_CNT {2} CONFIG.C_PROBE11_MU_CNT {2} CONFIG.C_PROBE10_MU_CNT {2} CONFIG.C_PROBE9_MU_CNT {2} CONFIG.C_PROBE8_MU_CNT {2} CONFIG.C_PROBE7_MU_CNT {2} CONFIG.C_PROBE6_MU_CNT {2} CONFIG.C_PROBE5_MU_CNT {2} CONFIG.C_PROBE4_MU_CNT {2} CONFIG.C_PROBE3_MU_CNT {2} CONFIG.C_PROBE2_MU_CNT {2} CONFIG.C_PROBE1_MU_CNT {2} CONFIG.C_PROBE0_MU_CNT {2} CONFIG.ALL_PROBE_SAME_MU_CNT {2}] [get_ips ila_rd_cred] + +# WR CREDITS +create_ip -name ila -vendor xilinx.com -library ip -version 6.2 -module_name ila_wr_cred +set_property -dict [list CONFIG.C_PROBE9_WIDTH {16} CONFIG.C_PROBE8_WIDTH {16} CONFIG.C_PROBE6_WIDTH {23} CONFIG.C_PROBE5_WIDTH {23} CONFIG.C_PROBE2_WIDTH {28} CONFIG.C_NUM_OF_PROBES {10} CONFIG.Component_Name {ila_wr_cred} CONFIG.C_EN_STRG_QUAL {1} CONFIG.C_ADV_TRIGGER {true} CONFIG.C_PROBE9_MU_CNT {2} CONFIG.C_PROBE8_MU_CNT {2} CONFIG.C_PROBE7_MU_CNT {2} CONFIG.C_PROBE6_MU_CNT {2} CONFIG.C_PROBE5_MU_CNT {2} CONFIG.C_PROBE4_MU_CNT {2} CONFIG.C_PROBE3_MU_CNT {2} CONFIG.C_PROBE2_MU_CNT {2} CONFIG.C_PROBE1_MU_CNT {2} CONFIG.C_PROBE0_MU_CNT {2} CONFIG.ALL_PROBE_SAME_MU_CNT {2}] [get_ips ila_wr_cred] + +# TLB RD +create_ip -name ila -vendor xilinx.com -library ip -version 6.2 -module_name ila_fsm_rd +set_property -dict [list CONFIG.C_PROBE21_WIDTH {16} CONFIG.C_PROBE14_WIDTH {40} CONFIG.C_PROBE13_WIDTH {40} CONFIG.C_PROBE12_WIDTH {28} CONFIG.C_PROBE8_WIDTH {28} CONFIG.C_PROBE7_WIDTH {48} CONFIG.C_PROBE6_WIDTH {64} CONFIG.C_PROBE5_WIDTH {64} CONFIG.C_PROBE2_WIDTH {48} CONFIG.C_PROBE1_WIDTH {28} CONFIG.C_PROBE0_WIDTH {4} CONFIG.C_NUM_OF_PROBES {22} CONFIG.Component_Name {ila_fsm_rd} CONFIG.C_EN_STRG_QUAL {1} CONFIG.C_ADV_TRIGGER {true} CONFIG.C_PROBE21_MU_CNT {2} CONFIG.C_PROBE20_MU_CNT {2} CONFIG.C_PROBE19_MU_CNT {2} CONFIG.C_PROBE18_MU_CNT {2} CONFIG.C_PROBE17_MU_CNT {2} CONFIG.C_PROBE16_MU_CNT {2} CONFIG.C_PROBE15_MU_CNT {2} CONFIG.C_PROBE14_MU_CNT {2} CONFIG.C_PROBE13_MU_CNT {2} CONFIG.C_PROBE12_MU_CNT {2} CONFIG.C_PROBE11_MU_CNT {2} CONFIG.C_PROBE10_MU_CNT {2} CONFIG.C_PROBE9_MU_CNT {2} CONFIG.C_PROBE8_MU_CNT {2} CONFIG.C_PROBE7_MU_CNT {2} CONFIG.C_PROBE6_MU_CNT {2} CONFIG.C_PROBE5_MU_CNT {2} CONFIG.C_PROBE4_MU_CNT {2} CONFIG.C_PROBE3_MU_CNT {2} CONFIG.C_PROBE2_MU_CNT {2} CONFIG.C_PROBE1_MU_CNT {2} CONFIG.C_PROBE0_MU_CNT {2} CONFIG.ALL_PROBE_SAME_MU_CNT {2}] [get_ips ila_fsm_rd] + +# TLB WR +create_ip -name ila -vendor xilinx.com -library ip -version 6.2 -module_name ila_fsm_wr +set_property -dict [list CONFIG.C_PROBE24_WIDTH {16} CONFIG.C_PROBE14_WIDTH {40} CONFIG.C_PROBE13_WIDTH {40} CONFIG.C_PROBE12_WIDTH {28} CONFIG.C_PROBE8_WIDTH {28} CONFIG.C_PROBE7_WIDTH {48} CONFIG.C_PROBE6_WIDTH {64} CONFIG.C_PROBE5_WIDTH {64} CONFIG.C_PROBE2_WIDTH {48} CONFIG.C_PROBE1_WIDTH {28} CONFIG.C_PROBE0_WIDTH {4} CONFIG.C_NUM_OF_PROBES {25} CONFIG.Component_Name {ila_fsm_wr} CONFIG.C_EN_STRG_QUAL {1} CONFIG.C_ADV_TRIGGER {true} CONFIG.C_PROBE24_MU_CNT {2} CONFIG.C_PROBE23_MU_CNT {2} CONFIG.C_PROBE22_MU_CNT {2} CONFIG.C_PROBE21_MU_CNT {2} CONFIG.C_PROBE20_MU_CNT {2} CONFIG.C_PROBE19_MU_CNT {2} CONFIG.C_PROBE18_MU_CNT {2} CONFIG.C_PROBE17_MU_CNT {2} CONFIG.C_PROBE16_MU_CNT {2} CONFIG.C_PROBE15_MU_CNT {2} CONFIG.C_PROBE14_MU_CNT {2} CONFIG.C_PROBE13_MU_CNT {2} CONFIG.C_PROBE12_MU_CNT {2} CONFIG.C_PROBE11_MU_CNT {2} CONFIG.C_PROBE10_MU_CNT {2} CONFIG.C_PROBE9_MU_CNT {2} CONFIG.C_PROBE8_MU_CNT {2} CONFIG.C_PROBE7_MU_CNT {2} CONFIG.C_PROBE6_MU_CNT {2} CONFIG.C_PROBE5_MU_CNT {2} CONFIG.C_PROBE4_MU_CNT {2} CONFIG.C_PROBE3_MU_CNT {2} CONFIG.C_PROBE2_MU_CNT {2} CONFIG.C_PROBE1_MU_CNT {2} CONFIG.C_PROBE0_MU_CNT {2} CONFIG.ALL_PROBE_SAME_MU_CNT {2}] [get_ips ila_fsm_wr] + +# LINK +create_ip -name ila -vendor xilinx.com -library ip -version 6.2 -module_name ila_link +set_property -dict [list CONFIG.C_PROBE1_WIDTH {4} CONFIG.C_NUM_OF_PROBES {2} CONFIG.Component_Name {ila_link} CONFIG.C_EN_STRG_QUAL {1} CONFIG.C_ADV_TRIGGER {true} CONFIG.C_PROBE1_MU_CNT {2} CONFIG.C_PROBE0_MU_CNT {2} CONFIG.ALL_PROBE_SAME_MU_CNT {2}] [get_ips ila_link] \ No newline at end of file diff --git a/hw/scripts/util_scripts/load_jtag.tcl b/hw/scripts/util_scripts/load_jtag.tcl new file mode 100644 index 00000000..b7a8cc35 --- /dev/null +++ b/hw/scripts/util_scripts/load_jtag.tcl @@ -0,0 +1,4 @@ +set_property PROBES.FILE {/tmp/bstreams/top.ltx} [get_hw_devices xcu250_0] +set_property FULL_PROBES.FILE {/tmp/bstreams/top.ltx} [get_hw_devices xcu250_0] +set_property PROGRAM.FILE {/tmp/bstreams/top.bit} [get_hw_devices xcu250_0] +program_hw_devices [get_hw_devices xcu250_0] \ No newline at end of file diff --git a/hw/scripts/wr_hdl/wr_hdl_dynamic.tcl b/hw/scripts/wr_hdl/wr_hdl_dynamic.tcl new file mode 100644 index 00000000..febcd76a --- /dev/null +++ b/hw/scripts/wr_hdl/wr_hdl_dynamic.tcl @@ -0,0 +1,830 @@ +######################################################################################### +# Dynamic wrapper +######################################################################################### +proc wr_hdl_dynamic_wrapper {f_out c_reg} { + upvar #0 cfg cnfg + + set template {} + set entity {} + append entity "`timescale 1ns / 1ps\n" + append entity "\n" + append entity "import lynxTypes::*;\n" + append entity "\n" + append entity "`include \"axi_macros.svh\"\n" + append entity "`include \"lynx_macros.svh\"\n" + append entity "\n" + append entity "module design_dynamic_wrapper #(\n" + append entity " parameter integer ID_DYN = 0\n" + append entity ") (\n" + append entity " // Clock and reset\n" + append entity " input logic sys_rst,\n" + append entity " input logic aresetn,\n" + append entity " input logic aclk,\n" + append entity "\n" + append entity " // AXI4 Lite control\n" + append entity " AXI4L.s axi_ctrl \[N_REGIONS],\n" + append entity " \n" + if {$cnfg(en_avx) eq 1} { + append entity " // AXI4 AVX control\n" + append entity " AXI4.s axim_ctrl \[N_REGIONS],\n" + append entity " \n" + } + if {$cnfg(en_ddr) eq 1} { + append entity " // AXI4 DDR \n" + append entity " AXI4.m axi_ddr_in \[N_DDR_CHAN*2],\n" + append entity "\n" + } + if {$cnfg(en_strm) eq 1} { + append entity " // AXI4S host\n" + append entity " AXI4S.m axis_host_in,\n" + append entity " AXI4S.s axis_host_out,\n" + append entity " xdmaIntf.m host_xdma_req,\n" + append entity " \n" + } + if {$cnfg(en_ddr) eq 1} { + append entity " // AXI4S card\n" + append entity " AXI4S.m axis_card_in,\n" + append entity " AXI4S.s axis_card_out,\n" + append entity " xdmaIntf.m card_xdma_req,\n" + append entity " \n" + } + if {$cnfg(en_fv) eq 1} { + append entity " metaIntf.m rdma_req_host \[N_REGIONS],\n" + if {$cnfg(en_fvv) eq 1} { + append entity " metaIntf.m rdma_req_card \[N_REGIONS],\n" + append entity " metaIntf.s rdma_req_fv \[N_REGIONS],\n" + } + append entity " reqIntf.s rdma_rd_cmd \[N_REGIONS],\n" + append entity " reqIntf.s rdma_wr_cmd \[N_REGIONS],\n" + append entity " AXI4S.m axis_rdma_rd_data \[N_REGIONS],\n" + append entity " AXI4S.s axis_rdma_wr_data \[N_REGIONS],\n" + append entity " \n" + } + if {$cnfg(en_tcp) eq 1} { + append entity " metaIntf.s tcp_req_in \[N_REGIONS],\n" + append entity " metaIntf.m tcp_req_out \[N_REGIONS],\n" + } + append entity " // IRQ\n" + append entity " output logic\[N_REGIONS-1:0] usr_irq,\n" + append entity "\n" + append entity " // BSCAN\n" + append entity " input logic S_BSCAN_drck,\n" + append entity " input logic S_BSCAN_shift,\n" + append entity " input logic S_BSCAN_tdi,\n" + append entity " input logic S_BSCAN_update,\n" + append entity " input logic S_BSCAN_sel,\n" + append entity " output logic S_BSCAN_tdo,\n" + append entity " input logic S_BSCAN_tms,\n" + append entity " input logic S_BSCAN_tck,\n" + append entity " input logic S_BSCAN_runtest,\n" + append entity " input logic S_BSCAN_reset,\n" + append entity " input logic S_BSCAN_capture,\n" + append entity " input logic S_BSCAN_bscanid_en\n" + append entity ");\n" + append entity "\n" + append entity "// Control lTLB\n" + append entity "AXI4L axi_ctrl_lTlb \[N_REGIONS] ();\n" + append entity "\n" + append entity "// Control sTLB\n" + append entity "AXI4L axi_ctrl_sTlb \[N_REGIONS] ();\n" + append entity "\n" + append entity "// Control config\n" + append entity "AXI4L axi_ctrl_cnfg \[N_REGIONS] ();\n" + append entity "\n" + append entity "// Control user logic\n" + append entity "AXI4L axi_ctrl_user \[N_REGIONS] ();\n" + append entity "\n" + append entity "// Decoupling signals\n" + append entity "logic \[N_REGIONS-1:0] decouple;\n" + append entity "\n" + if {$cnfg(en_strm) eq 1} { + append entity "// ----------------------------------------------------------------------\n" + append entity "// HOST \n" + append entity "// ----------------------------------------------------------------------\n" + append entity "// XDMA host sync\n" + append entity "dmaIntf rdXDMA_host();\n" + append entity "dmaIntf wrXDMA_host();\n" + append entity "\n" + append entity "assign host_xdma_req.h2c_ctl = {{11{1'b0}}, rdXDMA_host.req.ctl, {2{1'b0}}, {2{rdXDMA_host.req.ctl}}};\n" + append entity "assign host_xdma_req.h2c_addr = rdXDMA_host.req.paddr;\n" + append entity "assign host_xdma_req.h2c_len = rdXDMA_host.req.len;\n" + append entity "assign host_xdma_req.h2c_valid = rdXDMA_host.valid;\n" + append entity "\n" + append entity "assign host_xdma_req.c2h_ctl = {{11{1'b0}}, wrXDMA_host.req.ctl, {2{1'b0}}, {2{wrXDMA_host.req.ctl}}};\n" + append entity "assign host_xdma_req.c2h_addr = wrXDMA_host.req.paddr;\n" + append entity "assign host_xdma_req.c2h_len = wrXDMA_host.req.len;\n" + append entity "assign host_xdma_req.c2h_valid = wrXDMA_host.valid;\n" + append entity "\n" + append entity "assign rdXDMA_host.ready = host_xdma_req.h2c_ready;\n" + append entity "assign wrXDMA_host.ready = host_xdma_req.c2h_ready;\n" + append entity "assign rdXDMA_host.done = host_xdma_req.h2c_status\[1];\n" + append entity "assign wrXDMA_host.done = host_xdma_req.c2h_status\[1];\n" + append entity "\n" + append entity "// Slice host 0 \n" + append entity "// ----------------------------------------------------------------------\n" + append entity "AXI4S axis_host_s0_in();\n" + append entity "AXI4S axis_host_s0_out();\n" + append entity "axis_reg_array #(.N_STAGES(N_REG_HOST_S0)) inst_host_reg_s0_out (.aclk(aclk), .aresetn(aresetn), .axis_in(axis_host_out), .axis_out(axis_host_s0_out));\n" + append entity "axis_reg_array #(.N_STAGES(N_REG_HOST_S0)) inst_host_reg_s0_in (.aclk(aclk), .aresetn(aresetn), .axis_in(axis_host_s0_in), .axis_out(axis_host_in));\n" + append entity "\n" + append entity "// Multiplexing \n" + append entity "// ----------------------------------------------------------------------\n" + append entity "AXI4S axis_host_s1_in \[N_REGIONS] ();\n" + append entity "AXI4S axis_host_s1_out \[N_REGIONS] ();\n" + if {$cnfg(n_reg) > 1} { + append entity "muxUserIntf mux_host_rd_user ();\n" + append entity "muxUserIntf mux_host_wr_user ();\n" + append entity "axis_mux_user_sink (.aclk(aclk), .aresetn(aresetn), .mux(mux_host_wr_user), .axis_in(axis_host_s1_in), .axis_out(axis_host_s0_in));\n" + append entity "axis_mux_user_src (.aclk(aclk), .aresetn(aresetn), .mux(mux_host_rd_user), .axis_in(axis_host_s0_out), .axis_out(axis_host_s1_out));\n" + append entity "\n" + } else { + append entity "`AXIS_ASSIGN(axis_host_s0_out, axis_host_s1_out\[0])\n" + append entity "`AXIS_ASSIGN(axis_host_s1_in\[0], axis_host_s0_in)\n" + append entity "\n" + } + append entity "// Credits \n" + append entity "// ----------------------------------------------------------------------\n" + append entity "AXI4SR axis_host_s2_in \[N_REGIONS] ();\n" + append entity "AXI4SR axis_host_s2_out \[N_REGIONS] ();\n" + append entity "logic \[N_REGIONS-1:0] rxfer_host;\n" + append entity "logic \[N_REGIONS-1:0] wxfer_host;\n" + append entity "logic \[N_REGIONS-1:0]\[3:0] rd_dest_host;\n" + append entity "for(genvar i = 0; i < N_REGIONS; i++) begin\n" + append entity " data_queue_credits_src inst_cred_que_host_out (.aclk(aclk), .aresetn(aresetn), .axis_in(axis_host_s1_out\[i]), .axis_out(axis_host_s2_out\[i]), .rd_dest(rd_dest_host\[i]));\n" + append entity " data_queue_credits_sink inst_cred_que_host_in (.aclk(aclk), .aresetn(aresetn), .axis_in(axis_host_s2_in\[i]), .axis_out(axis_host_s1_in\[i]));\n" + append entity " assign rxfer_host\[i] = axis_host_s2_out\[i].tvalid & axis_host_s2_out\[i].tready;\n" + append entity " assign wxfer_host\[i] = axis_host_s2_in\[i].tvalid & axis_host_s2_in\[i].tready;\n" + append entity "end\n" + append entity "\n" + append entity "// Slice host 1 \n" + append entity "// ----------------------------------------------------------------------\n" + append entity "AXI4SR axis_host_s3_in \[N_REGIONS] ();\n" + append entity "AXI4SR axis_host_s3_out \[N_REGIONS] ();\n" + append entity "for(genvar i = 0; i < N_REGIONS; i++) begin\n" + append entity " axisr_reg_array #(.N_STAGES(N_REG_HOST_S1)) inst_host_reg_s1_out (.aclk(aclk), .aresetn(aresetn), .axis_in(axis_host_s2_out\[i]), .axis_out(axis_host_s3_out\[i]));\n" + append entity " axisr_reg_array #(.N_STAGES(N_REG_HOST_S1)) inst_host_reg_s1_in (.aclk(aclk), .aresetn(aresetn), .axis_in(axis_host_s3_in\[i]), .axis_out(axis_host_s2_in\[i]));\n" + append entity "end\n" + append entity "\n" + append entity "// Decoupling \n" + append entity "// ----------------------------------------------------------------------\n" + append entity "AXI4SR axis_host_dcpl_in \[N_REGIONS] ();\n" + append entity "AXI4SR axis_host_dcpl_out \[N_REGIONS] ();\n" + if {$cnfg(en_pr) eq 1} { + append entity "axisr_decoupler (.decouple(decouple), .axis_in(axis_host_s3_out), .axis_out(axis_host_dcpl_out));\n" + append entity "axisr_decoupler (.decouple(decouple), .axis_in(axis_host_dcpl_in), .axis_out(axis_host_s3_in));\n" + } else { + append entity "for(genvar i = 0; i < N_REGIONS; i++) begin\n" + append entity " `AXISR_ASSIGN(axis_host_s3_out\[i], axis_host_dcpl_out\[i])\n" + append entity " `AXISR_ASSIGN(axis_host_dcpl_in\[i], axis_host_s3_in\[i])\n" + append entity "end\n" + } + append entity "\n" + } + if {$cnfg(en_ddr) eq 1} { + append entity "// ----------------------------------------------------------------------\n" + append entity "// CARD \n" + append entity "// ----------------------------------------------------------------------\n" + append entity "// XDMA card sync\n" + append entity "dmaIntf rdXDMA_sync();\n" + append entity "dmaIntf wrXDMA_sync();\n" + append entity "\n" + append entity "assign card_xdma_req.h2c_ctl = {{11{1'b0}}, rdXDMA_sync.req.ctl, {2{1'b0}}, {2{rdXDMA_sync.req.ctl}}};\n" + append entity "assign card_xdma_req.h2c_addr = rdXDMA_sync.req.paddr;\n" + append entity "assign card_xdma_req.h2c_len = rdXDMA_sync.req.len;\n" + append entity "assign card_xdma_req.h2c_valid = rdXDMA_sync.valid;\n" + append entity "\n" + append entity "assign card_xdma_req.c2h_ctl = {{11{1'b0}}, wrXDMA_sync.req.ctl, {2{1'b0}}, {2{wrXDMA_sync.req.ctl}}};\n" + append entity "assign card_xdma_req.c2h_addr = wrXDMA_sync.req.paddr;\n" + append entity "assign card_xdma_req.c2h_len = wrXDMA_sync.req.len;\n" + append entity "assign card_xdma_req.c2h_valid = wrXDMA_sync.valid;\n" + append entity "\n" + append entity "assign rdXDMA_sync.ready = card_xdma_req.h2c_ready;\n" + append entity "assign wrXDMA_sync.ready = card_xdma_req.c2h_ready;\n" + append entity "assign rdXDMA_sync.done = card_xdma_req.h2c_status\[1];\n" + append entity "assign wrXDMA_sync.done = card_xdma_req.c2h_status\[1];\n" + append entity "\n" + append entity "// Slice card 0 \n" + append entity "// ----------------------------------------------------------------------\n" + append entity "AXI4S axis_card_s0_in();\n" + append entity "AXI4S axis_card_s0_out();\n" + append entity "axis_reg_array #(.N_STAGES(N_REG_CARD_S0)) inst_card_reg_s0_out (.aclk(aclk), .aresetn(aresetn), .axis_in(axis_card_out), .axis_out(axis_card_s0_out));\n" + append entity "axis_reg_array #(.N_STAGES(N_REG_CARD_S0)) inst_card_reg_s0_in (.aclk(aclk), .aresetn(aresetn), .axis_in(axis_card_s0_in), .axis_out(axis_card_in));\n" + append entity "\n" + append entity "// Card memory\n" + append entity "// ----------------------------------------------------------------------\n" + append entity "dmaIntf rdCDMA_sync ();\n" + append entity "dmaIntf wrCDMA_sync ();\n" + append entity "dmaIntf rdCDMA_sync_adj \[N_DDR_CHAN] ();\n" + append entity "dmaIntf wrCDMA_sync_adj \[N_DDR_CHAN] ();\n" + append entity "dmaIntf rdCDMA_card ();\n" + append entity "dmaIntf wrCDMA_card ();\n" + append entity "dmaIntf rdCDMA_card_adj \[N_DDR_CHAN] ();\n" + append entity "dmaIntf wrCDMA_card_adj \[N_DDR_CHAN] ();\n" + append entity "\n" + append entity "AXI4S #(.AXI4S_DATA_BITS(N_DDR_CHAN*AXI_DATA_BITS)) axis_card_s1_in();\n" + append entity "AXI4S #(.AXI4S_DATA_BITS(N_DDR_CHAN*AXI_DATA_BITS)) axis_card_s1_out();\n" + append entity "AXI4S axis_ddr_in \[N_DDR_CHAN*2] ();\n" + append entity "AXI4S axis_ddr_out \[N_DDR_CHAN*2] ();\n" + append entity "\n" + if {$cnfg(n_ddr_chan) > 1} { + append entity "axis_mux_ddr_host (.aclk(aclk), .aresetn(aresetn), .axis_in_host(axis_card_s0_out), .axis_out_host(axis_card_s0_in), .axis_in_card(axis_ddr_out\[0+:N_DDR_CHAN]), .axis_out_card(axis_ddr_in\[0+:N_DDR_CHAN]));\n" + append entity "axis_mux_ddr_user (.aclk(aclk), .aresetn(aresetn), .axis_in_user(axis_card_s1_in), .axis_out_user(axis_card_s1_out), .axis_in_card(axis_ddr_out\[N_DDR_CHAN+:N_DDR_CHAN]), .axis_out_card(axis_ddr_in\[N_DDR_CHAN+:N_DDR_CHAN]));\n" + } else { + append entity "`AXIS_ASSIGN(axis_card_s0_out, axis_ddr_in\[0])\n" + append entity "`AXIS_ASSIGN(axis_ddr_out\[0], axis_card_s0_in)\n" + append entity "`AXIS_ASSIGN(axis_card_s1_in, axis_ddr_in\[N_DDR_CHAN])\n" + append entity "`AXIS_ASSIGN(axis_ddr_out\[N_DDR_CHAN], axis_card_s1_out)\n" + } + append entity "\n" + if {$cnfg(n_ddr_chan) > 1} { + append entity "axis_mux_cdma (.aclk(aclk), .aresetn(aresetn), .CDMA(rdCDMA_sync), .CDMA_adj(rdCDMA_sync_adj));\n" + append entity "axis_mux_cdma (.aclk(aclk), .aresetn(aresetn), .CDMA(wrCDMA_sync), .CDMA_adj(wrCDMA_sync_adj));\n" + append entity "axis_mux_cdma (.aclk(aclk), .aresetn(aresetn), .CDMA(rdCDMA_card), .CDMA_adj(rdCDMA_card_adj));\n" + append entity "axis_mux_cdma (.aclk(aclk), .aresetn(aresetn), .CDMA(wrCDMA_card), .CDMA_adj(wrCDMA_card_adj));\n" + } else { + append entity "`DMA_REQ_ASSIGN(rdCDMA_sync, rdCDMA_sync_adj\[0])\n" + append entity "`DMA_REQ_ASSIGN(wrCDMA_sync, wrCDMA_sync_adj\[0])\n" + append entity "`DMA_REQ_ASSIGN(rdCDMA_card, rdCDMA_card_adj\[0])\n" + append entity "`DMA_REQ_ASSIGN(wrCDMA_card, wrCDMA_card_adj\[0])\n" + + } + append entity "\n" + append entity "for(genvar i = 0; i < N_DDR_CHAN; i++) begin\n" + append entity " // CDMA sync\n" + append entity " cdma inst_cdma_sync_engine (\n" + append entity " .aclk(aclk),\n" + append entity " .aresetn(aresetn),\n" + append entity " .rdCDMA(rdCDMA_sync_adj\[i]),\n" + append entity " .wrCDMA(wrCDMA_sync_adj\[i]),\n" + append entity " .axi_ddr_in(axi_ddr_in\[i]),\n" + append entity " .axis_ddr_in(axis_ddr_in\[i]),\n" + append entity " .axis_ddr_out(axis_ddr_out\[i])\n" + append entity " );\n" + append entity "\n" + append entity " // CDMA user\n" + append entity " cdma inst_cdma_user_engine (\n" + append entity " .aclk(aclk),\n" + append entity " .aresetn(aresetn),\n" + append entity " .rdCDMA(rdCDMA_card_adj\[i]),\n" + append entity " .wrCDMA(wrCDMA_card_adj\[i]),\n" + append entity " .axi_ddr_in(axi_ddr_in\[N_DDR_CHAN+i]),\n" + append entity " .axis_ddr_in(axis_ddr_in\[N_DDR_CHAN+i]),\n" + append entity " .axis_ddr_out(axis_ddr_out\[N_DDR_CHAN+i])\n" + append entity " );\n" + append entity "end\n" + append entity "\n" + append entity "// Slice card 1 \n" + append entity "// ----------------------------------------------------------------------\n" + append entity "AXI4S #(.AXI4S_DATA_BITS(N_DDR_CHAN*AXI_DATA_BITS)) axis_card_s2_in();\n" + append entity "AXI4S #(.AXI4S_DATA_BITS(N_DDR_CHAN*AXI_DATA_BITS)) axis_card_s2_out();\n" + append entity "axis_reg_array #(.N_STAGES(N_REG_CARD_S1), .DATA_BITS(N_DDR_CHAN*AXI_DATA_BITS)) inst_card_reg_s1_out (.aclk(aclk), .aresetn(aresetn), .axis_in(axis_card_s1_out), .axis_out(axis_card_s2_out));\n" + append entity "axis_reg_array #(.N_STAGES(N_REG_CARD_S1), .DATA_BITS(N_DDR_CHAN*AXI_DATA_BITS)) inst_card_reg_s1_in (.aclk(aclk), .aresetn(aresetn), .axis_in(axis_card_s2_in), .axis_out(axis_card_s1_in));\n" + append entity "\n" + append entity "// Multiplexing \n" + append entity "// ----------------------------------------------------------------------\n" + append entity "AXI4S #(.AXI4S_DATA_BITS(N_DDR_CHAN*AXI_DATA_BITS)) axis_card_s3_in \[N_REGIONS] ();\n" + append entity "AXI4S #(.AXI4S_DATA_BITS(N_DDR_CHAN*AXI_DATA_BITS)) axis_card_s3_out \[N_REGIONS] ();\n" + if {$cnfg(n_reg) > 1} { + append entity "muxUserIntf mux_card_rd_user ();\n" + append entity "muxUserIntf mux_card_wr_user ();\n" + append entity "axis_mux_user_sink #(.MUX_DATA_BITS(N_DDR_CHAN*AXI_DATA_BITS)) (.aclk(aclk), .aresetn(aresetn), .mux(mux_card_wr_user), .axis_in(axis_card_s3_in), .axis_out(axis_card_s2_in));\n" + append entity "axis_mux_user_src #(.MUX_DATA_BITS(N_DDR_CHAN*AXI_DATA_BITS)) (.aclk(aclk), .aresetn(aresetn), .mux(mux_card_rd_user), .axis_in(axis_card_s2_out), .axis_out(axis_card_s3_out));\n" + append entity "\n" + } else { + append entity "`AXIS_ASSIGN(axis_card_s2_out, axis_card_s3_out\[0])\n" + append entity "`AXIS_ASSIGN(axis_card_s3_in\[0], axis_card_s2_in)\n" + append entity "\n" + } + append entity "// Credits \n" + append entity "// ----------------------------------------------------------------------\n" + append entity "AXI4SR #(.AXI4S_DATA_BITS(N_DDR_CHAN*AXI_DATA_BITS)) axis_card_s4_in \[N_REGIONS] ();\n" + append entity "AXI4SR #(.AXI4S_DATA_BITS(N_DDR_CHAN*AXI_DATA_BITS)) axis_card_s4_out \[N_REGIONS] ();\n" + append entity "logic \[N_REGIONS-1:0] rxfer_card;\n" + append entity "logic \[N_REGIONS-1:0] wxfer_card;\n" + append entity "logic \[N_REGIONS-1:0]\[3:0] rd_dest_card;\n" + append entity "for(genvar i = 0; i < N_REGIONS; i++) begin\n" + append entity " data_queue_credits_src #(.DATA_BITS(N_DDR_CHAN*AXI_DATA_BITS)) inst_cred_que_card_out (.aclk(aclk), .aresetn(aresetn), .axis_in(axis_card_s3_out\[i]), .axis_out(axis_card_s4_out\[i]), .rd_dest(rd_dest_card\[i]));\n" + append entity " data_queue_credits_sink #(.DATA_BITS(N_DDR_CHAN*AXI_DATA_BITS)) inst_cred_que_card_in (.aclk(aclk), .aresetn(aresetn), .axis_in(axis_card_s4_in\[i]), .axis_out(axis_card_s3_in\[i]));\n" + append entity " assign rxfer_card\[i] = axis_card_s4_out\[i].tvalid & axis_card_s4_out\[i].tready;\n" + append entity " assign wxfer_card\[i] = axis_card_s4_in\[i].tvalid & axis_card_s4_in\[i].tready;\n" + append entity "end\n" + append entity "\n" + append entity "// Slice card 2 \n" + append entity "// ----------------------------------------------------------------------\n" + append entity "AXI4SR #(.AXI4S_DATA_BITS(N_DDR_CHAN*AXI_DATA_BITS)) axis_card_s5_in \[N_REGIONS] ();\n" + append entity "AXI4SR #(.AXI4S_DATA_BITS(N_DDR_CHAN*AXI_DATA_BITS)) axis_card_s5_out \[N_REGIONS] ();\n" + append entity "for(genvar i = 0; i < N_REGIONS; i++) begin\n" + append entity " axisr_reg_array #(.N_STAGES(N_REG_CARD_S2), .DATA_BITS(N_DDR_CHAN*AXI_DATA_BITS)) inst_card_reg_s2_out (.aclk(aclk), .aresetn(aresetn), .axis_in(axis_card_s4_out\[i]), .axis_out(axis_card_s5_out\[i]));\n" + append entity " axisr_reg_array #(.N_STAGES(N_REG_CARD_S2), .DATA_BITS(N_DDR_CHAN*AXI_DATA_BITS)) inst_card_reg_s2_in (.aclk(aclk), .aresetn(aresetn), .axis_in(axis_card_s5_in\[i]), .axis_out(axis_card_s4_in\[i]));\n" + append entity "end\n" + append entity "\n" + append entity "// Decoupling \n" + append entity "// ----------------------------------------------------------------------\n" + append entity "AXI4SR #(.AXI4S_DATA_BITS(N_DDR_CHAN*AXI_DATA_BITS)) axis_card_dcpl_in \[N_REGIONS] ();\n" + append entity "AXI4SR #(.AXI4S_DATA_BITS(N_DDR_CHAN*AXI_DATA_BITS)) axis_card_dcpl_out \[N_REGIONS] ();\n" + if {$cnfg(en_pr) eq 1} { + append entity "axisr_decoupler (.decouple(decouple), .axis_in(axis_card_s5_out), .axis_out(axis_card_dcpl_out));\n" + append entity "axisr_decoupler (.decouple(decouple), .axis_in(axis_card_dcpl_in), .axis_out(axis_card_s5_in));\n" + } else { + append entity "for(genvar i = 0; i < N_REGIONS; i++) begin\n" + append entity " `AXISR_ASSIGN(axis_card_s5_out\[i], axis_card_dcpl_out\[i])\n" + append entity " `AXISR_ASSIGN(axis_card_dcpl_in\[i], axis_card_s5_in\[i])\n" + append entity "end\n" + } + append entity "\n" + } + if {$cnfg(en_fv) eq 1} { + append entity "// ----------------------------------------------------------------------\n" + append entity "// RDMA \n" + append entity "// ----------------------------------------------------------------------\n" + if {$cnfg(en_fvv) eq 1} { + append entity "metaIntf #(.DATA_BITS(FV_REQ_BITS)) rdma_req_s0_fv \[N_REGIONS] ();\n" + append entity "metaIntf #(.DATA_BITS(FV_REQ_BITS)) rdma_req_s0_card \[N_REGIONS] ();\n" + append entity "\n" + append entity "for(genvar i = 0; i < N_REGIONS; i++) begin\n" + append entity " meta_reg_array #(.N_STAGES(N_REG_HOST_S0)) inst_reg_s0_rdma_fv (.aclk(aclk), .aresetn(aresetn), .meta_in(rdma_req_fv\[i]), .meta_out(rdma_req_s0_fv\[i]));\n" + append entity " network_req_parser #(.ID_REG(ID_DYN*N_REGIONS+i), .HOST(0)) inst_fv_parser (.aclk(aclk), .aresetn(aresetn), .req_in(rdma_req_s0_card\[i]), .req_out(rdma_req_card\[i]), .used());\n" + append entity "end\n" + append entity "\n" + } + append entity "AXI4S axis_rdma_s0_rd \[N_REGIONS] ();\n" + append entity "AXI4S axis_rdma_s0_wr \[N_REGIONS] ();\n" + append entity "reqIntf rdma_cmd_s0_rd \[N_REGIONS] ();\n" + append entity "reqIntf rdma_cmd_s0_wr \[N_REGIONS] ();\n" + append entity "\n" + append entity "for(genvar i = 0; i < N_REGIONS; i++) begin\n" + append entity " axis_reg_array #(.N_STAGES(N_REG_HOST_S0)) inst_reg_s0_rdma_rd (.aclk(aclk), .aresetn(aresetn), .axis_in(axis_rdma_s0_rd\[i]), .axis_out(axis_rdma_rd_data\[i]));\n" + append entity " axis_reg_array #(.N_STAGES(N_REG_HOST_S0)) inst_reg_s0_rdma_wr (.aclk(aclk), .aresetn(aresetn), .axis_in(axis_rdma_wr_data\[i]), .axis_out(axis_rdma_s0_wr\[i]));\n" + append entity " req_reg_array #(.N_STAGES(N_REG_HOST_S0)) inst_reg_s0_rdma_cmd_rd (.aclk(aclk), .aresetn(aresetn), .req_in(rdma_rd_cmd\[i]), .req_out(rdma_cmd_s0_rd\[i]));\n" + append entity " req_reg_array #(.N_STAGES(N_REG_HOST_S0)) inst_reg_s0_rdma_cmd_wr (.aclk(aclk), .aresetn(aresetn), .req_in(rdma_wr_cmd\[i]), .req_out(rdma_cmd_s0_wr\[i]));\n" + append entity "end\n" + append entity "\n" + append entity "// Decoupling \n" + append entity "// ----------------------------------------------------------------------\n" + if {$cnfg(en_fv) eq 1} { + if {$cnfg(en_fvv) eq 1} { + append entity "metaIntf #(.DATA_BITS(FV_REQ_BITS)) rdma_req_dcpl_fv \[N_REGIONS] ();\n" + append entity "metaIntf #(.DATA_BITS(FV_REQ_BITS)) rdma_req_dcpl_card \[N_REGIONS] (); \n" + } + append entity "reqIntf rdma_cmd_dcpl_rd \[N_REGIONS] ();\n" + append entity "reqIntf rdma_cmd_dcpl_wr \[N_REGIONS] ();\n" + append entity "AXI4S axis_rdma_dcpl_wr \[N_REGIONS] ();\n" + append entity "AXI4S axis_rdma_dcpl_rd \[N_REGIONS] ();\n" + } + if {$cnfg(en_pr) eq 1} { + if {$cnfg(en_fvv) eq 1} { + append entity "meta_decoupler (.decouple(decouple), .meta_in(rdma_req_s0_fv), .meta_out(rdma_req_dcpl_fv);\n" + append entity "meta_decoupler (.decouple(decouple), .meta_in(rdma_req_dcpl_card), .meta_out(rdma_req_s0_card);\n" + } + append entity "req_decoupler (.decouple(decouple), .req_in(rdma_cmd_s0_rd), .req_out(rdma_cmd_dcpl_rd);\n" + append entity "req_decoupler (.decouple(decouple), .req_in(rdma_cmd_s0_wr), .req_out(rdma_cmd_dcpl_wr));\n" + append entity "axis_decoupler (.decouple(decouple), .axis_in(axis_rdma_s0_wr), .axis_out(axis_rdma_dcpl_wr));\n" + append entity "axis_decoupler (.decouple(decouple), .axis_in(axis_rdma_dcpl_rd), .axis_out(axis_rdma_s0_rd));\n" + } else { + append entity "for(genvar i = 0; i < N_REGIONS; i++) begin\n" + if {$cnfg(en_fvv) eq 1} { + append entity " `META_ASSIGN(rdma_req_s0_fv\[i], rdma_req_dcpl_fv\[i])\n" + append entity " `META_ASSIGN(rdma_req_dcpl_card\[i], rdma_req_s0_card\[i])\n" + } + append entity " `REQ_ASSIGN(rdma_cmd_s0_rd\[i], rdma_cmd_dcpl_rd\[i])\n" + append entity " `REQ_ASSIGN(rdma_cmd_s0_wr\[i], rdma_cmd_dcpl_wr\[i])\n" + append entity " `AXIS_ASSIGN(axis_rdma_s0_wr\[i], axis_rdma_dcpl_wr\[i])\n" + append entity " `AXIS_ASSIGN(axis_rdma_dcpl_rd\[i], axis_rdma_s0_rd\[i])\n" + append entity "end\n" + } + append entity "\n" + } + append entity "// ----------------------------------------------------------------------\n" + append entity "// Rest of decoupling \n" + append entity "// ----------------------------------------------------------------------\n" + append entity "AXI4L axi_ctrl_dcpl \[N_REGIONS] ();\n" + if {$cnfg(en_bpss) eq 1} { + append entity "reqIntf rd_req_user \[N_REGIONS] ();\n" + append entity "reqIntf wr_req_user \[N_REGIONS] ();\n" + append entity "reqIntf rd_req_dcpl_user \[N_REGIONS] ();\n" + append entity "reqIntf wr_req_dcpl_user \[N_REGIONS] ();\n" + } + if {$cnfg(en_pr) eq 1} { + append entity "axil_decoupler inst_dcpl_ctrl (.decouple(decouple), .axi_in(axi_ctrl_user), .axi_out(axi_ctrl_dcpl));\n" + if {$cnfg(en_bpss) eq 1} { + append entity "req_decoupler (.decouple(decouple), .req_in(rd_req_dcpl_user), .req_out(rd_req_user));\n" + append entity "req_decoupler (.decouple(decouple), .req_in(wr_req_dcpl_user), .req_out(wr_req_user));\n" + } + } else { + append entity "for(genvar i = 0; i < N_REGIONS; i++) begin\n" + append entity " `AXIL_ASSIGN(axi_ctrl_user\[i], axi_ctrl_dcpl\[i])\n" + if {$cnfg(en_bpss) eq 1} { + append entity " `REQ_ASSIGN(rd_req_dcpl_user\[i], rd_req_user\[i])\n" + append entity " `REQ_ASSIGN(wr_req_dcpl_user\[i], wr_req_user\[i])\n" + } + append entity "end\n" + } + append entity "\n" + append entity "// ----------------------------------------------------------------------\n" + append entity "// MMU \n" + append entity "// ----------------------------------------------------------------------\n" + append entity "tlb_top #(\n" + append entity " .ID_DYN(ID_DYN)\n" + append entity ") inst_tlb_top (\n" + append entity " .aclk(aclk),\n" + append entity " .aresetn(aresetn),\n" + append entity " .axi_ctrl_lTlb(axi_ctrl_lTlb),\n" + append entity " .axi_ctrl_sTlb(axi_ctrl_sTlb),\n" + if {$cnfg(en_avx) eq 1} { + append entity " .axim_ctrl_cnfg(axim_ctrl),\n" + } else { + append entity " .axi_ctrl_cnfg(axi_ctrl_cnfg),\n" + } + if {$cnfg(en_bpss) eq 1} { + append entity " .rd_req_user(rd_req_user),\n" + append entity " .wr_req_user(wr_req_user),\n" + } + if {$cnfg(en_fv) eq 1} { + append entity " .rdma_req(rdma_req_host),\n" + } + if {$cnfg(en_strm) eq 1} { + append entity " .rdXDMA_host(rdXDMA_host),\n" + append entity " .wrXDMA_host(wrXDMA_host),\n" + append entity " .rxfer_host(rxfer_host),\n" + append entity " .wxfer_host(wxfer_host),\n" + if {$cnfg(n_reg) > 1} { + append entity " .mux_host_rd_user(mux_host_rd_user),\n" + append entity " .mux_host_wr_user(mux_host_wr_user),\n" + } + append entity " .rd_dest_host(rd_dest_host),\n" + } + if {$cnfg(en_ddr) eq 1} { + append entity " .rdXDMA_sync(rdXDMA_sync),\n" + append entity " .wrXDMA_sync(wrXDMA_sync),\n" + append entity " .rdCDMA_sync(rdCDMA_sync),\n" + append entity " .wrCDMA_sync(wrCDMA_sync),\n" + append entity " .rdCDMA_card(rdCDMA_card),\n" + append entity " .wrCDMA_card(wrCDMA_card),\n" + append entity " .rxfer_card(rxfer_card),\n" + append entity " .wxfer_card(wxfer_card),\n" + if {$cnfg(n_reg) > 1} { + append entity " .mux_card_rd_user(mux_card_rd_user),\n" + append entity " .mux_card_wr_user(mux_card_wr_user),\n" + } + append entity " .rd_dest_card(rd_dest_card),\n" + } + append entity " .decouple(decouple),\n" + append entity " .pf_irq(usr_irq)\n" + append entity ");\n" + append entity "\n" + append entity "// ----------------------------------------------------------------------\n" + append entity "// USER \n" + append entity "// ----------------------------------------------------------------------\n" + for {set i 0} {$i < $cnfg(n_reg)} {incr i} { + append entity "// User logic wrappers \n" + append entity "design_user_wrapper_$i inst_user_wrapper_$i (\n" + append entity " .axi_ctrl_araddr (axi_ctrl_dcpl\[$i].araddr),\n" + append entity " .axi_ctrl_arprot (axi_ctrl_dcpl\[$i].arprot),\n" + append entity " .axi_ctrl_arready (axi_ctrl_dcpl\[$i].arready),\n" + append entity " .axi_ctrl_arvalid (axi_ctrl_dcpl\[$i].arvalid),\n" + append entity " .axi_ctrl_awaddr (axi_ctrl_dcpl\[$i].awaddr),\n" + append entity " .axi_ctrl_awprot (axi_ctrl_dcpl\[$i].awprot),\n" + append entity " .axi_ctrl_awready (axi_ctrl_dcpl\[$i].awready),\n" + append entity " .axi_ctrl_awvalid (axi_ctrl_dcpl\[$i].awvalid),\n" + append entity " .axi_ctrl_bready (axi_ctrl_dcpl\[$i].bready),\n" + append entity " .axi_ctrl_bresp (axi_ctrl_dcpl\[$i].bresp),\n" + append entity " .axi_ctrl_bvalid (axi_ctrl_dcpl\[$i].bvalid),\n" + append entity " .axi_ctrl_rdata (axi_ctrl_dcpl\[$i].rdata),\n" + append entity " .axi_ctrl_rready (axi_ctrl_dcpl\[$i].rready),\n" + append entity " .axi_ctrl_rresp (axi_ctrl_dcpl\[$i].rresp),\n" + append entity " .axi_ctrl_rvalid (axi_ctrl_dcpl\[$i].rvalid),\n" + append entity " .axi_ctrl_wdata (axi_ctrl_dcpl\[$i].wdata),\n" + append entity " .axi_ctrl_wready (axi_ctrl_dcpl\[$i].wready),\n" + append entity " .axi_ctrl_wstrb (axi_ctrl_dcpl\[$i].wstrb),\n" + append entity " .axi_ctrl_wvalid (axi_ctrl_dcpl\[$i].wvalid),\n" + if {$cnfg(en_bpss) eq 1} { + append entity " .rd_req_user_valid (rd_req_dcpl_user\[$i].valid),\n" + append entity " .rd_req_user_ready (rd_req_dcpl_user\[$i].ready),\n" + append entity " .rd_req_user_req (rd_req_dcpl_user\[$i].req),\n" + append entity " .wr_req_user_valid (wr_req_dcpl_user\[$i].valid),\n" + append entity " .wr_req_user_ready (wr_req_dcpl_user\[$i].ready),\n" + append entity " .wr_req_user_req (wr_req_dcpl_user\[$i].req),\n" + } + if {$cnfg(en_fv) eq 1} { + if {$cnfg(en_fvv) eq 1} { + append entity " .fv_req_valid (rdma_req_dcpl_fv\[$i].valid),\n" + append entity " .fv_req_ready (rdma_req_dcpl_fv\[$i].ready),\n" + append entity " .fv_req_data (rdma_req_dcpl_fv\[$i].data),\n" + append entity " .fv_cmd_valid (rdma_req_dcpl_card\[$i].valid),\n" + append entity " .fv_cmd_ready (rdma_req_dcpl_card\[$i].ready),\n" + append entity " .fv_cmd_data (rdma_req_dcpl_card\[$i].data),\n" + } + append entity " .rd_req_rdma_valid (rdma_cmd_dcpl_rd\[$i].valid),\n" + append entity " .rd_req_rdma_ready (rdma_cmd_dcpl_rd\[$i].ready),\n" + append entity " .rd_req_rdma_req (rdma_cmd_dcpl_rd\[$i].req),\n" + append entity " .wr_req_rdma_valid (rdma_cmd_dcpl_wr\[$i].valid),\n" + append entity " .wr_req_rdma_ready (rdma_cmd_dcpl_wr\[$i].ready),\n" + append entity " .wr_req_rdma_req (rdma_cmd_dcpl_wr\[$i].req),\n" + append entity " .axis_rdma_src_tdata (axis_rdma_dcpl_rd\[$i].tdata),\n" + append entity " .axis_rdma_src_tkeep (axis_rdma_dcpl_rd\[$i].tkeep),\n" + append entity " .axis_rdma_src_tlast (axis_rdma_dcpl_rd\[$i].tlast),\n" + append entity " .axis_rdma_src_tready (axis_rdma_dcpl_rd\[$i].tready),\n" + append entity " .axis_rdma_src_tvalid (axis_rdma_dcpl_rd\[$i].tvalid),\n" + append entity " .axis_rdma_sink_tdata (axis_rdma_dcpl_wr\[$i].tdata),\n" + append entity " .axis_rdma_sink_tkeep (axis_rdma_dcpl_wr\[$i].tkeep),\n" + append entity " .axis_rdma_sink_tlast (axis_rdma_dcpl_wr\[$i].tlast),\n" + append entity " .axis_rdma_sink_tready(axis_rdma_dcpl_wr\[$i].tready),\n" + append entity " .axis_rdma_sink_tvalid(axis_rdma_dcpl_wr\[$i].tvalid),\n" + } + if {$cnfg(en_strm) eq 1} { + append entity " .axis_host_src_tdata (axis_host_dcpl_in\[$i].tdata),\n" + append entity " .axis_host_src_tkeep (axis_host_dcpl_in\[$i].tkeep),\n" + append entity " .axis_host_src_tlast (axis_host_dcpl_in\[$i].tlast),\n" + append entity " .axis_host_src_tdest (axis_host_dcpl_in\[$i].tdest),\n" + append entity " .axis_host_src_tready (axis_host_dcpl_in\[$i].tready),\n" + append entity " .axis_host_src_tvalid (axis_host_dcpl_in\[$i].tvalid),\n" + append entity " .axis_host_sink_tdata (axis_host_dcpl_out\[$i].tdata),\n" + append entity " .axis_host_sink_tkeep (axis_host_dcpl_out\[$i].tkeep),\n" + append entity " .axis_host_sink_tlast (axis_host_dcpl_out\[$i].tlast),\n" + append entity " .axis_host_sink_tdest (axis_host_dcpl_out\[$i].tdest),\n" + append entity " .axis_host_sink_tready (axis_host_dcpl_out\[$i].tready),\n" + append entity " .axis_host_sink_tvalid (axis_host_dcpl_out\[$i].tvalid),\n" + } + if {$cnfg(en_ddr) eq 1} { + append entity " .axis_card_src_tdata (axis_card_dcpl_in\[$i].tdata),\n" + append entity " .axis_card_src_tkeep (axis_card_dcpl_in\[$i].tkeep),\n" + append entity " .axis_card_src_tlast (axis_card_dcpl_in\[$i].tlast),\n" + append entity " .axis_card_src_tdest (axis_card_dcpl_in\[$i].tdest),\n" + append entity " .axis_card_src_tready (axis_card_dcpl_in\[$i].tready),\n" + append entity " .axis_card_src_tvalid (axis_card_dcpl_in\[$i].tvalid),\n" + append entity " .axis_card_sink_tdata (axis_card_dcpl_out\[$i].tdata),\n" + append entity " .axis_card_sink_tkeep (axis_card_dcpl_out\[$i].tkeep),\n" + append entity " .axis_card_sink_tlast (axis_card_dcpl_out\[$i].tlast),\n" + append entity " .axis_card_sink_tdest (axis_card_dcpl_out\[$i].tdest),\n" + append entity " .axis_card_sink_tready (axis_card_dcpl_out\[$i].tready),\n" + append entity " .axis_card_sink_tvalid (axis_card_dcpl_out\[$i].tvalid),\n" + } + append entity " .aclk (aclk),\n" + append entity " .aresetn (aresetn)\n" + append entity ");\n" + append entity "\n" + } + append entity "// ----------------------------------------------------------------------\n" + append entity "// Control crossbar - move to new file maybe \n" + append entity "// ----------------------------------------------------------------------\n" + if {$cnfg(en_avx) eq 1} { + append entity "// Crossbar out\n" + append entity "logic\[N_REGIONS-1:0]\[3*AXI_ADDR_BITS-1:0] axi_xbar_araddr;\n" + append entity "logic\[N_REGIONS-1:0]\[8:0] axi_xbar_arprot;\n" + append entity "logic\[N_REGIONS-1:0]\[2:0] axi_xbar_arready;\n" + append entity "logic\[N_REGIONS-1:0]\[2:0] axi_xbar_arvalid;\n" + append entity "logic\[N_REGIONS-1:0]\[3*AXI_ADDR_BITS-1:0] axi_xbar_awaddr;\n" + append entity "logic\[N_REGIONS-1:0]\[8:0] axi_xbar_awprot;\n" + append entity "logic\[N_REGIONS-1:0]\[2:0] axi_xbar_awready;\n" + append entity "logic\[N_REGIONS-1:0]\[2:0] axi_xbar_awvalid;\n" + append entity "logic\[N_REGIONS-1:0]\[2:0] axi_xbar_bready;\n" + append entity "logic\[N_REGIONS-1:0]\[5:0] axi_xbar_bresp;\n" + append entity "logic\[N_REGIONS-1:0]\[2:0] axi_xbar_bvalid;\n" + append entity "logic\[N_REGIONS-1:0]\[3*AXIL_DATA_BITS-1:0] axi_xbar_rdata;\n" + append entity "logic\[N_REGIONS-1:0]\[2:0] axi_xbar_rready;\n" + append entity "logic\[N_REGIONS-1:0]\[5:0] axi_xbar_rresp;\n" + append entity "logic\[N_REGIONS-1:0]\[2:0] axi_xbar_rvalid;\n" + append entity "logic\[N_REGIONS-1:0]\[3*AXIL_DATA_BITS-1:0] axi_xbar_wdata;\n" + append entity "logic\[N_REGIONS-1:0]\[2:0] axi_xbar_wready;\n" + append entity "logic\[N_REGIONS-1:0]\[3*(AXIL_DATA_BITS/8)-1:0] axi_xbar_wstrb;\n" + append entity "logic\[N_REGIONS-1:0]\[2:0] axi_xbar_wvalid;\n" + append entity "\n" + append entity "for(genvar i = 0; i < N_REGIONS; i++) begin\n" + append entity "\n" + append entity "// lTlb\n" + append entity "assign axi_ctrl_lTlb\[i].araddr = axi_xbar_araddr\[i]\[AXI_ADDR_BITS-1:0];\n" + append entity "assign axi_ctrl_lTlb\[i].arprot = axi_xbar_arprot\[i]\[2:0];\n" + append entity "assign axi_ctrl_lTlb\[i].arvalid = axi_xbar_arvalid\[i]\[0];\n" + append entity "assign axi_ctrl_lTlb\[i].awaddr = axi_xbar_awaddr\[i]\[AXI_ADDR_BITS-1:0];\n" + append entity "assign axi_ctrl_lTlb\[i].awprot = axi_xbar_awprot\[i]\[2:0];\n" + append entity "assign axi_ctrl_lTlb\[i].awvalid = axi_xbar_awvalid\[i]\[0];\n" + append entity "assign axi_ctrl_lTlb\[i].bready = axi_xbar_bready\[i]\[0];\n" + append entity "assign axi_ctrl_lTlb\[i].rready = axi_xbar_rready\[i]\[0];\n" + append entity "assign axi_ctrl_lTlb\[i].wdata = axi_xbar_wdata\[i]\[AXIL_DATA_BITS-1:0];\n" + append entity "assign axi_ctrl_lTlb\[i].wstrb = axi_xbar_wstrb\[i]\[(AXIL_DATA_BITS/8)-1:0];\n" + append entity "assign axi_ctrl_lTlb\[i].wvalid = axi_xbar_wvalid\[i]\[0];\n" + append entity "\n" + append entity "assign axi_xbar_arready\[i]\[0] = axi_ctrl_lTlb\[i].arready;\n" + append entity "assign axi_xbar_awready\[i]\[0] = axi_ctrl_lTlb\[i].awready;\n" + append entity "assign axi_xbar_bresp\[i]\[1:0] = axi_ctrl_lTlb\[i].bresp;\n" + append entity "assign axi_xbar_bvalid\[i]\[0] = axi_ctrl_lTlb\[i].bvalid;\n" + append entity "assign axi_xbar_rdata\[i]\[AXIL_DATA_BITS-1:0] = axi_ctrl_lTlb\[i].rdata;\n" + append entity "assign axi_xbar_rresp\[i]\[1:0] = axi_ctrl_lTlb\[i].rresp;\n" + append entity "assign axi_xbar_rvalid\[i]\[0] = axi_ctrl_lTlb\[i].rvalid;\n" + append entity "assign axi_xbar_wready\[i]\[0] = axi_ctrl_lTlb\[i].wready;\n" + append entity "\n" + append entity "// sTlb\n" + append entity "assign axi_ctrl_sTlb\[i].araddr = axi_xbar_araddr\[i]\[2*AXI_ADDR_BITS-1:AXI_ADDR_BITS];\n" + append entity "assign axi_ctrl_sTlb\[i].arprot = axi_xbar_arprot\[i]\[5:3];\n" + append entity "assign axi_ctrl_sTlb\[i].arvalid = axi_xbar_arvalid\[i]\[1];\n" + append entity "assign axi_ctrl_sTlb\[i].awaddr = axi_xbar_awaddr\[i]\[2*AXI_ADDR_BITS-1:AXI_ADDR_BITS];\n" + append entity "assign axi_ctrl_sTlb\[i].awprot = axi_xbar_awprot\[i]\[5:3];\n" + append entity "assign axi_ctrl_sTlb\[i].awvalid = axi_xbar_awvalid\[i]\[1];\n" + append entity "assign axi_ctrl_sTlb\[i].bready = axi_xbar_bready\[i]\[1];\n" + append entity "assign axi_ctrl_sTlb\[i].rready = axi_xbar_rready\[i]\[1];\n" + append entity "assign axi_ctrl_sTlb\[i].wdata = axi_xbar_wdata\[i]\[2*AXIL_DATA_BITS-1:AXIL_DATA_BITS];\n" + append entity "assign axi_ctrl_sTlb\[i].wstrb = axi_xbar_wstrb\[i]\[2*(AXIL_DATA_BITS/8)-1:AXIL_DATA_BITS/8];\n" + append entity "assign axi_ctrl_sTlb\[i].wvalid = axi_xbar_wvalid\[i]\[1];\n" + append entity "\n" + append entity "assign axi_xbar_arready\[i]\[1] = axi_ctrl_sTlb\[i].arready;\n" + append entity "assign axi_xbar_awready\[i]\[1] = axi_ctrl_sTlb\[i].awready;\n" + append entity "assign axi_xbar_bresp\[i]\[3:2] = axi_ctrl_sTlb\[i].bresp;\n" + append entity "assign axi_xbar_bvalid\[i]\[1] = axi_ctrl_sTlb\[i].bvalid;\n" + append entity "assign axi_xbar_rdata\[i]\[2*AXIL_DATA_BITS-1:AXIL_DATA_BITS] = axi_ctrl_sTlb\[i].rdata;\n" + append entity "assign axi_xbar_rresp\[i]\[3:2] = axi_ctrl_sTlb\[i].rresp;\n" + append entity "assign axi_xbar_rvalid\[i]\[1] = axi_ctrl_sTlb\[i].rvalid;\n" + append entity "assign axi_xbar_wready\[i]\[1] = axi_ctrl_sTlb\[i].wready;\n" + append entity "\n" + append entity "// User logic\n" + append entity "assign axi_ctrl_user\[i].araddr = axi_xbar_araddr\[i]\[3*AXI_ADDR_BITS-1:2*AXI_ADDR_BITS];\n" + append entity "assign axi_ctrl_user\[i].arprot = axi_xbar_arprot\[i]\[8:6];\n" + append entity "assign axi_ctrl_user\[i].arvalid = axi_xbar_arvalid\[i]\[2];\n" + append entity "assign axi_ctrl_user\[i].awaddr = axi_xbar_awaddr\[i]\[3*AXI_ADDR_BITS-1:2*AXI_ADDR_BITS];\n" + append entity "assign axi_ctrl_user\[i].awprot = axi_xbar_awprot\[i]\[8:6];\n" + append entity "assign axi_ctrl_user\[i].awvalid = axi_xbar_awvalid\[i]\[2];\n" + append entity "assign axi_ctrl_user\[i].bready = axi_xbar_bready\[i]\[2];\n" + append entity "assign axi_ctrl_user\[i].rready = axi_xbar_rready\[i]\[2];\n" + append entity "assign axi_ctrl_user\[i].wdata = axi_xbar_wdata\[i]\[3*AXIL_DATA_BITS-1:2*AXIL_DATA_BITS];\n" + append entity "assign axi_ctrl_user\[i].wstrb = axi_xbar_wstrb\[i]\[3*(AXIL_DATA_BITS/8)-1:2*(AXIL_DATA_BITS/8)];\n" + append entity "assign axi_ctrl_user\[i].wvalid = axi_xbar_wvalid\[i]\[2];\n" + append entity "\n" + append entity "assign axi_xbar_arready\[i]\[2] = axi_ctrl_user\[i].arready;\n" + append entity "assign axi_xbar_awready\[i]\[2] = axi_ctrl_user\[i].awready;\n" + append entity "assign axi_xbar_bresp\[i]\[5:4] = axi_ctrl_user\[i].bresp;\n" + append entity "assign axi_xbar_bvalid\[i]\[2] = axi_ctrl_user\[i].bvalid;\n" + append entity "assign axi_xbar_rdata\[i]\[3*AXIL_DATA_BITS-1:2*AXIL_DATA_BITS] = axi_ctrl_user\[i].rdata;\n" + append entity "assign axi_xbar_rresp\[i]\[5:4] = axi_ctrl_user\[i].rresp;\n" + append entity "assign axi_xbar_rvalid\[i]\[2] = axi_ctrl_user\[i].rvalid;\n" + append entity "assign axi_xbar_wready\[i]\[2] = axi_ctrl_user\[i].wready;\n" + append entity "\n" + append entity "end\n" + append entity "\n" + } else { + append entity "// Crossbar out\n" + append entity "logic\[N_REGIONS-1:0]\[4*AXI_ADDR_BITS-1:0] axi_xbar_araddr;\n" + append entity "logic\[N_REGIONS-1:0]\[11:0] axi_xbar_arprot;\n" + append entity "logic\[N_REGIONS-1:0]\[3:0] axi_xbar_arready;\n" + append entity "logic\[N_REGIONS-1:0]\[3:0] axi_xbar_arvalid;\n" + append entity "logic\[N_REGIONS-1:0]\[4*AXI_ADDR_BITS-1:0] axi_xbar_awaddr;\n" + append entity "logic\[N_REGIONS-1:0]\[11:0] axi_xbar_awprot;\n" + append entity "logic\[N_REGIONS-1:0]\[3:0] axi_xbar_awready;\n" + append entity "logic\[N_REGIONS-1:0]\[3:0] axi_xbar_awvalid;\n" + append entity "logic\[N_REGIONS-1:0]\[3:0] axi_xbar_bready;\n" + append entity "logic\[N_REGIONS-1:0]\[7:0] axi_xbar_bresp;\n" + append entity "logic\[N_REGIONS-1:0]\[3:0] axi_xbar_bvalid;\n" + append entity "logic\[N_REGIONS-1:0]\[4*AXIL_DATA_BITS-1:0] axi_xbar_rdata;\n" + append entity "logic\[N_REGIONS-1:0]\[3:0] axi_xbar_rready;\n" + append entity "logic\[N_REGIONS-1:0]\[7:0] axi_xbar_rresp;\n" + append entity "logic\[N_REGIONS-1:0]\[3:0] axi_xbar_rvalid;\n" + append entity "logic\[N_REGIONS-1:0]\[4*AXIL_DATA_BITS-1:0] axi_xbar_wdata;\n" + append entity "logic\[N_REGIONS-1:0]\[3:0] axi_xbar_wready;\n" + append entity "logic\[N_REGIONS-1:0]\[4*(AXIL_DATA_BITS/8)-1:0] axi_xbar_wstrb;\n" + append entity "logic\[N_REGIONS-1:0]\[3:0] axi_xbar_wvalid;\n" + append entity "\n" + append entity "for(genvar i = 0; i < N_REGIONS; i++) begin\n" + append entity "\n" + append entity "// lTlb\n" + append entity "assign axi_ctrl_lTlb\[i].araddr = axi_xbar_araddr\[i]\[AXI_ADDR_BITS-1:0];\n" + append entity "assign axi_ctrl_lTlb\[i].arprot = axi_xbar_arprot\[i]\[2:0];\n" + append entity "assign axi_ctrl_lTlb\[i].arvalid = axi_xbar_arvalid\[i]\[0];\n" + append entity "assign axi_ctrl_lTlb\[i].awaddr = axi_xbar_awaddr\[i]\[AXI_ADDR_BITS-1:0];\n" + append entity "assign axi_ctrl_lTlb\[i].awprot = axi_xbar_awprot\[i]\[2:0];\n" + append entity "assign axi_ctrl_lTlb\[i].awvalid = axi_xbar_awvalid\[i]\[0];\n" + append entity "assign axi_ctrl_lTlb\[i].bready = axi_xbar_bready\[i]\[0];\n" + append entity "assign axi_ctrl_lTlb\[i].rready = axi_xbar_rready\[i]\[0];\n" + append entity "assign axi_ctrl_lTlb\[i].wdata = axi_xbar_wdata\[i]\[AXIL_DATA_BITS-1:0];\n" + append entity "assign axi_ctrl_lTlb\[i].wstrb = axi_xbar_wstrb\[i]\[(AXIL_DATA_BITS/8)-1:0];\n" + append entity "assign axi_ctrl_lTlb\[i].wvalid = axi_xbar_wvalid\[i]\[0];\n" + append entity "\n" + append entity "assign axi_xbar_arready\[i]\[0] = axi_ctrl_lTlb\[i].arready;\n" + append entity "assign axi_xbar_awready\[i]\[0] = axi_ctrl_lTlb\[i].awready;\n" + append entity "assign axi_xbar_bresp\[i]\[1:0] = axi_ctrl_lTlb\[i].bresp;\n" + append entity "assign axi_xbar_bvalid\[i]\[0] = axi_ctrl_lTlb\[i].bvalid;\n" + append entity "assign axi_xbar_rdata\[i]\[AXIL_DATA_BITS-1:0] = axi_ctrl_lTlb\[i].rdata;\n" + append entity "assign axi_xbar_rresp\[i]\[1:0] = axi_ctrl_lTlb\[i].rresp;\n" + append entity "assign axi_xbar_rvalid\[i]\[0] = axi_ctrl_lTlb\[i].rvalid;\n" + append entity "assign axi_xbar_wready\[i]\[0] = axi_ctrl_lTlb\[i].wready;\n" + append entity "\n" + append entity "// sTlb\n" + append entity "assign axi_ctrl_sTlb\[i].araddr = axi_xbar_araddr\[i]\[2*AXI_ADDR_BITS-1:AXI_ADDR_BITS];\n" + append entity "assign axi_ctrl_sTlb\[i].arprot = axi_xbar_arprot\[i]\[5:3];\n" + append entity "assign axi_ctrl_sTlb\[i].arvalid = axi_xbar_arvalid\[i]\[1];\n" + append entity "assign axi_ctrl_sTlb\[i].awaddr = axi_xbar_awaddr\[i]\[2*AXI_ADDR_BITS-1:AXI_ADDR_BITS];\n" + append entity "assign axi_ctrl_sTlb\[i].awprot = axi_xbar_awprot\[i]\[5:3];\n" + append entity "assign axi_ctrl_sTlb\[i].awvalid = axi_xbar_awvalid\[i]\[1];\n" + append entity "assign axi_ctrl_sTlb\[i].bready = axi_xbar_bready\[i]\[1];\n" + append entity "assign axi_ctrl_sTlb\[i].rready = axi_xbar_rready\[i]\[1];\n" + append entity "assign axi_ctrl_sTlb\[i].wdata = axi_xbar_wdata\[i]\[2*AXIL_DATA_BITS-1:AXIL_DATA_BITS];\n" + append entity "assign axi_ctrl_sTlb\[i].wstrb = axi_xbar_wstrb\[i]\[2*(AXIL_DATA_BITS/8)-1:AXIL_DATA_BITS/8];\n" + append entity "assign axi_ctrl_sTlb\[i].wvalid = axi_xbar_wvalid\[i]\[1];\n" + append entity "\n" + append entity "assign axi_xbar_arready\[i]\[1] = axi_ctrl_sTlb\[i].arready;\n" + append entity "assign axi_xbar_awready\[i]\[1] = axi_ctrl_sTlb\[i].awready;\n" + append entity "assign axi_xbar_bresp\[i]\[3:2] = axi_ctrl_sTlb\[i].bresp;\n" + append entity "assign axi_xbar_bvalid\[i]\[1] = axi_ctrl_sTlb\[i].bvalid;\n" + append entity "assign axi_xbar_rdata\[i]\[2*AXIL_DATA_BITS-1:AXIL_DATA_BITS] = axi_ctrl_sTlb\[i].rdata;\n" + append entity "assign axi_xbar_rresp\[i]\[3:2] = axi_ctrl_sTlb\[i].rresp;\n" + append entity "assign axi_xbar_rvalid\[i]\[1] = axi_ctrl_sTlb\[i].rvalid;\n" + append entity "assign axi_xbar_wready\[i]\[1] = axi_ctrl_sTlb\[i].wready;\n" + append entity "\n" + append entity "// User logic\n" + append entity "assign axi_ctrl_user\[i].araddr = axi_xbar_araddr\[i]\[3*AXI_ADDR_BITS-1:2*AXI_ADDR_BITS];\n" + append entity "assign axi_ctrl_user\[i].arprot = axi_xbar_arprot\[i]\[8:6];\n" + append entity "assign axi_ctrl_user\[i].arvalid = axi_xbar_arvalid\[i]\[2];\n" + append entity "assign axi_ctrl_user\[i].awaddr = axi_xbar_awaddr\[i]\[3*AXI_ADDR_BITS-1:2*AXI_ADDR_BITS];\n" + append entity "assign axi_ctrl_user\[i].awprot = axi_xbar_awprot\[i]\[8:6];\n" + append entity "assign axi_ctrl_user\[i].awvalid = axi_xbar_awvalid\[i]\[2];\n" + append entity "assign axi_ctrl_user\[i].bready = axi_xbar_bready\[i]\[2];\n" + append entity "assign axi_ctrl_user\[i].rready = axi_xbar_rready\[i]\[2];\n" + append entity "assign axi_ctrl_user\[i].wdata = axi_xbar_wdata\[i]\[3*AXIL_DATA_BITS-1:2*AXIL_DATA_BITS];\n" + append entity "assign axi_ctrl_user\[i].wstrb = axi_xbar_wstrb\[i]\[3*(AXIL_DATA_BITS/8)-1:2*(AXIL_DATA_BITS/8)];\n" + append entity "assign axi_ctrl_user\[i].wvalid = axi_xbar_wvalid\[i]\[2];\n" + append entity "\n" + append entity "assign axi_xbar_arready\[i]\[2] = axi_ctrl_user\[i].arready;\n" + append entity "assign axi_xbar_awready\[i]\[2] = axi_ctrl_user\[i].awready;\n" + append entity "assign axi_xbar_bresp\[i]\[5:4] = axi_ctrl_user\[i].bresp;\n" + append entity "assign axi_xbar_bvalid\[i]\[2] = axi_ctrl_user\[i].bvalid;\n" + append entity "assign axi_xbar_rdata\[i]\[3*AXIL_DATA_BITS-1:2*AXIL_DATA_BITS] = axi_ctrl_user\[i].rdata;\n" + append entity "assign axi_xbar_rresp\[i]\[5:4] = axi_ctrl_user\[i].rresp;\n" + append entity "assign axi_xbar_rvalid\[i]\[2] = axi_ctrl_user\[i].rvalid;\n" + append entity "assign axi_xbar_wready\[i]\[2] = axi_ctrl_user\[i].wready;\n" + append entity "\n" + append entity "// Config\n" + append entity "assign axi_ctrl_cnfg\[i].araddr = axi_xbar_araddr\[i]\[4*AXI_ADDR_BITS-1:3*AXI_ADDR_BITS];\n" + append entity "assign axi_ctrl_cnfg\[i].arprot = axi_xbar_arprot\[i]\[11:9];\n" + append entity "assign axi_ctrl_cnfg\[i].arvalid = axi_xbar_arvalid\[i]\[3];\n" + append entity "assign axi_ctrl_cnfg\[i].awaddr = axi_xbar_awaddr\[i]\[4*AXI_ADDR_BITS-1:3*AXI_ADDR_BITS];\n" + append entity "assign axi_ctrl_cnfg\[i].awprot = axi_xbar_awprot\[i]\[11:9];\n" + append entity "assign axi_ctrl_cnfg\[i].awvalid = axi_xbar_awvalid\[i]\[3];\n" + append entity "assign axi_ctrl_cnfg\[i].bready = axi_xbar_bready\[i]\[3];\n" + append entity "assign axi_ctrl_cnfg\[i].rready = axi_xbar_rready\[i]\[3];\n" + append entity "assign axi_ctrl_cnfg\[i].wdata = axi_xbar_wdata\[i]\[4*AXIL_DATA_BITS-1:3*AXIL_DATA_BITS];\n" + append entity "assign axi_ctrl_cnfg\[i].wstrb = axi_xbar_wstrb\[i]\[4*(AXIL_DATA_BITS/8)-1:3*(AXIL_DATA_BITS/8)];\n" + append entity "assign axi_ctrl_cnfg\[i].wvalid = axi_xbar_wvalid\[i]\[3];\n" + append entity "\n" + append entity "assign axi_xbar_arready\[i]\[3] = axi_ctrl_cnfg\[i].arready;\n" + append entity "assign axi_xbar_awready\[i]\[3] = axi_ctrl_cnfg\[i].awready;\n" + append entity "assign axi_xbar_bresp\[i]\[7:6] = axi_ctrl_cnfg\[i].bresp;\n" + append entity "assign axi_xbar_bvalid\[i]\[3] = axi_ctrl_cnfg\[i].bvalid;\n" + append entity "assign axi_xbar_rdata\[i]\[4*AXIL_DATA_BITS-1:3*AXIL_DATA_BITS] = axi_ctrl_cnfg\[i].rdata;\n" + append entity "assign axi_xbar_rresp\[i]\[7:6] = axi_ctrl_cnfg\[i].rresp;\n" + append entity "assign axi_xbar_rvalid\[i]\[3] = axi_ctrl_cnfg\[i].rvalid;\n" + append entity "assign axi_xbar_wready\[i]\[3] = axi_ctrl_cnfg\[i].wready;\n" + append entity "\n" + append entity "end\n" + append entity "\n" + } + for {set i 0} {$i < $cnfg(n_reg)} {incr i} { + append entity "dyn_crossbar_$i inst_dyn_crossbar_$i (\n" + append entity " .aclk(aclk), \n" + append entity " .aresetn(aresetn), \n" + append entity " .s_axi_awaddr(axi_ctrl\[$i].awaddr), \n" + append entity " .s_axi_awprot(axi_ctrl\[$i].awprot), \n" + append entity " .s_axi_awvalid(axi_ctrl\[$i].awvalid), \n" + append entity " .s_axi_awready(axi_ctrl\[$i].awready), \n" + append entity " .s_axi_wdata(axi_ctrl\[$i].wdata), \n" + append entity " .s_axi_wstrb(axi_ctrl\[$i].wstrb), \n" + append entity " .s_axi_wvalid(axi_ctrl\[$i].wvalid), \n" + append entity " .s_axi_wready(axi_ctrl\[$i].wready), \n" + append entity " .s_axi_bresp(axi_ctrl\[$i].bresp), \n" + append entity " .s_axi_bvalid(axi_ctrl\[$i].bvalid), \n" + append entity " .s_axi_bready(axi_ctrl\[$i].bready), \n" + append entity " .s_axi_araddr(axi_ctrl\[$i].araddr), \n" + append entity " .s_axi_arprot(axi_ctrl\[$i].arprot), \n" + append entity " .s_axi_arvalid(axi_ctrl\[$i].arvalid), \n" + append entity " .s_axi_arready(axi_ctrl\[$i].arready), \n" + append entity " .s_axi_rdata(axi_ctrl\[$i].rdata), \n" + append entity " .s_axi_rresp(axi_ctrl\[$i].rresp), \n" + append entity " .s_axi_rvalid(axi_ctrl\[$i].rvalid), \n" + append entity " .s_axi_rready(axi_ctrl\[$i].rready), \n" + append entity " .m_axi_awaddr(axi_xbar_awaddr\[$i]), \n" + append entity " .m_axi_awprot(axi_xbar_awprot\[$i]), \n" + append entity " .m_axi_awvalid(axi_xbar_awvalid\[$i]), \n" + append entity " .m_axi_awready(axi_xbar_awready\[$i]), \n" + append entity " .m_axi_wdata(axi_xbar_wdata\[$i]), \n" + append entity " .m_axi_wstrb(axi_xbar_wstrb\[$i]), \n" + append entity " .m_axi_wvalid(axi_xbar_wvalid\[$i]), \n" + append entity " .m_axi_wready(axi_xbar_wready\[$i]), \n" + append entity " .m_axi_bresp(axi_xbar_bresp\[$i]), \n" + append entity " .m_axi_bvalid(axi_xbar_bvalid\[$i]), \n" + append entity " .m_axi_bready(axi_xbar_bready\[$i]), \n" + append entity " .m_axi_araddr(axi_xbar_araddr\[$i]), \n" + append entity " .m_axi_arprot(axi_xbar_arprot\[$i]), \n" + append entity " .m_axi_arvalid(axi_xbar_arvalid\[$i]), \n" + append entity " .m_axi_arready(axi_xbar_arready\[$i]), \n" + append entity " .m_axi_rdata(axi_xbar_rdata\[$i]), \n" + append entity " .m_axi_rresp(axi_xbar_rresp\[$i]), \n" + append entity " .m_axi_rvalid(axi_xbar_rvalid\[$i]), \n" + append entity " .m_axi_rready(axi_xbar_rready\[$i])\n" + append entity ");\n\n" + } + append entity "\n" + append entity "endmodule\n" + append entity "\n" + lappend template $entity + set vho_file [open $f_out w] + foreach line $template { + puts $vho_file $line + } + close $vho_file +} \ No newline at end of file diff --git a/hw/scripts/wr_hdl/wr_hdl_pkg.tcl b/hw/scripts/wr_hdl/wr_hdl_pkg.tcl new file mode 100644 index 00000000..1e65a5a5 --- /dev/null +++ b/hw/scripts/wr_hdl/wr_hdl_pkg.tcl @@ -0,0 +1,157 @@ +######################################################################################### +# Package +######################################################################################### +proc wr_hdl_pkg {f_out} { + upvar #0 cfg cnfg + + set template {} + set entity {} + if {$cnfg(en_strm) eq 1} { + append entity "`define EN_STRM\n" + } + if {$cnfg(en_ddr) eq 1} { + append entity "`define EN_DDR\n" + } + if {$cnfg(en_pr) eq 1} { + append entity "`define EN_PR\n" + } + if {$cnfg(en_bpss) eq 1} { + append entity "`define EN_BPSS\n" + } + if {$cnfg(en_avx) eq 1} { + append entity "`define EN_AVX\n" + } + if {$cnfg(en_fv) eq 1} { + append entity "`define EN_FV\n" + } + if {$cnfg(en_fvv) eq 1} { + append entity "`define EN_FVV\n" + } + if {$cnfg(n_reg) > 1} { + append entity "`define MULT_REGIONS\n" + } + append entity "\n" + append entity "package lynxTypes;\n" + append entity "\n" + append entity " // AXI\n" + append entity " parameter integer AXIL_DATA_BITS = 64;\n" + append entity " parameter integer AVX_DATA_BITS = 256;\n" + append entity " parameter integer AXI_DATA_BITS = 512;\n" + append entity " parameter integer AXI_ADDR_BITS = 64;\n" + append entity "\n" + append entity " // TLB ram\n" + append entity " parameter integer TLB_S_ORDER = 10;\n" + append entity " parameter integer PG_S_BITS = 12;\n" + append entity " parameter integer N_S_ASSOC = 4;\n" + append entity "\n" + append entity " parameter integer TLB_L_ORDER = 6;\n" + append entity " parameter integer PG_L_BITS = 21;\n" + append entity " parameter integer N_L_ASSOC = 2;\n" + append entity "\n" + append entity " // Data\n" + append entity " parameter integer ADDR_BITS = 64;\n" + append entity " parameter integer PADDR_BITS = 40;\n" + append entity " parameter integer VADDR_BITS = 48;\n" + append entity " parameter integer LEN_BITS = 28;\n" + append entity " parameter integer TLB_DATA_BITS = 64;\n" + append entity "\n" + append entity " // Queue depth\n" + append entity " parameter integer QUEUE_DEPTH = 8;\n" + append entity " parameter integer N_OUTSTANDING = 8;\n" + append entity "\n" + append entity " // Slices\n" + append entity " parameter integer N_REG_HOST_S0 = 2;\n" + append entity " parameter integer N_REG_HOST_S1 = 2;\n" + append entity " parameter integer N_REG_HOST_S2 = 2;\n" + append entity " parameter integer N_REG_CARD_S0 = 2;\n" + append entity " parameter integer N_REG_CARD_S1 = 2;\n" + append entity " parameter integer N_REG_CARD_S2 = 2;\n" + append entity "\n" + append entity " // Network\n" + append entity " parameter integer FV_REQ_BITS = 256;\n" + append entity " parameter integer PMTU_BITS = 1408;\n" + append entity "\n" + append entity " // -----------------------------------------------------------------\n" + append entity " // Dynamic\n" + append entity " // -----------------------------------------------------------------\n" + append entity "\n" + append entity " // Flow\n" + append entity " parameter integer N_DDR_CHAN = $cnfg(n_ddr_chan);\n" + append entity " parameter integer N_CHAN = $cnfg(n_chan); \n" + append entity " parameter integer N_REGIONS = $cnfg(n_reg);\n" + append entity " parameter integer PR_FLOW = $cnfg(en_pr);\n" + append entity " parameter integer AVX_FLOW = $cnfg(en_avx);\n" + append entity " parameter integer BPSS_FLOW = $cnfg(en_bpss);\n" + append entity " parameter integer DDR_FLOW = $cnfg(en_ddr);\n" + append entity " parameter integer FV_FLOW = $cnfg(en_fv);\n" + append entity " parameter integer FV_VERBS = $cnfg(en_fvv);\n" + if {$cnfg(n_reg) == 1} { + set nn 2 + } else { + set nn $cnfg(n_reg) + } + append entity " parameter integer N_REGIONS_BITS = \$clog2($nn);\n" + append entity " parameter integer N_REQUEST_BITS = 4;\n" + append entity "\n" + append entity "// ----------------------------------------------------------------------------\n" + append entity "// -- Structs\n" + append entity "// ----------------------------------------------------------------------------\n" + append entity "typedef struct packed {\n" + append entity " logic \[VADDR_BITS-1:0] vaddr;\n" + append entity " logic \[LEN_BITS-1:0] len;\n" + append entity " logic stream;\n" + append entity " logic sync;\n" + append entity " logic ctl;\n" + append entity " logic \[3:0] dest;\n" + append entity " logic \[12:0] rsrvd;\n" + append entity "} req_t;\n" + append entity "\n" + append entity "typedef struct packed {\n" + append entity " logic \[VADDR_BITS-1:0] vaddr;\n" + append entity " logic \[LEN_BITS-1:0] len;\n" + append entity " logic stream;\n" + append entity " logic sync;\n" + append entity " logic ctl;\n" + append entity " logic \[3:0] dest;\n" + append entity " logic \[N_REQUEST_BITS-1:0] id;\n" + append entity " logic host;\n" + append entity " logic \[7:0] rsrvd;\n" + append entity "} rdma_req_t;\n" + append entity "\n" + append entity "typedef struct packed {\n" + append entity " logic \[PADDR_BITS-1:0] paddr;\n" + append entity " logic \[LEN_BITS-1:0] len;\n" + append entity " logic ctl;\n" + append entity " logic \[3:0] dest;\n" + append entity " logic \[22:0] rsrvd;\n" + append entity "} dma_req_t;\n" + append entity "\n" + append entity "typedef struct packed {\n" + append entity " logic \[PADDR_BITS-1:0] paddr_card;\n" + append entity " logic \[PADDR_BITS-1:0] paddr_host;\n" + append entity " logic \[LEN_BITS-1:0] len;\n" + append entity " logic ctl;\n" + append entity " logic \[3:0] dest;\n" + append entity " logic isr;\n" + append entity " logic \[13:0] rsrvd;\n" + append entity "} dma_isr_req_t;\n" + append entity "\n" + append entity "typedef struct packed {\n" + append entity " logic miss;\n" + append entity " logic \[VADDR_BITS-1:0] vaddr;\n" + append entity " logic \[LEN_BITS-1:0] len;\n" + append entity "} pf_t;\n" + append entity "\n" + append entity "typedef struct packed {\n" + append entity " logic \[N_REGIONS_BITS-1:0] id;\n" + append entity " logic \[LEN_BITS-1:0] len;\n" + append entity "} mux_t;\n" + append entity "\n" + append entity "endpackage\n" + lappend template $entity + set vho_file [open $f_out w] + foreach line $template { + puts $vho_file $line + } + close $vho_file +} \ No newline at end of file diff --git a/hw/scripts/wr_hdl/wr_hdl_top_u250.tcl b/hw/scripts/wr_hdl/wr_hdl_top_u250.tcl new file mode 100644 index 00000000..ad206ab8 --- /dev/null +++ b/hw/scripts/wr_hdl/wr_hdl_top_u250.tcl @@ -0,0 +1,692 @@ +######################################################################################### +# Write top level file +######################################################################################### +proc wr_hdl_top {f_out} { + upvar #0 cfg cnfg + + set template {} + set entity {} + append entity "`timescale 1ns / 1ps\n" + append entity "\n" + append entity "import lynxTypes::*;\n" + append entity "\n" + append entity "`include \"axi_macros.svh\"\n" + append entity "`include \"lynx_macros.svh\"\n" + append entity "//\n" + append entity "// Top Level\n" + append entity "//\n" + append entity "module top (\n" + if {$cnfg(en_fv) eq 1} { + append entity " output wire qsfp$cnfg(qsfp)_resetn,\n" + append entity " output wire qsfp$cnfg(qsfp)_lpmode,\n" + append entity " output wire qsfp$cnfg(qsfp)_modseln,\n" + append entity " input wire\[3:0] gt$cnfg(qsfp)_rxp_in,\n" + append entity " input wire\[3:0] gt$cnfg(qsfp)_rxn_in,\n" + append entity " output wire\[3:0] gt$cnfg(qsfp)_txp_out,\n" + append entity " output wire\[3:0] gt$cnfg(qsfp)_txn_out,\n" + append entity " input wire gt$cnfg(qsfp)_refclk_p,\n" + append entity " input wire gt$cnfg(qsfp)_refclk_n,\n" + } + if {$cnfg(en_ddr) eq 1} { + if {$cnfg(ddr_0) eq 1} { + append entity " output wire c0_ddr4_act_n,\n" + append entity " output wire\[16:0] c0_ddr4_adr,\n" + append entity " output wire\[1:0] c0_ddr4_ba,\n" + append entity " output wire\[1:0] c0_ddr4_bg,\n" + append entity " output wire\[0:0] c0_ddr4_ck_c,\n" + append entity " output wire\[0:0] c0_ddr4_ck_t,\n" + append entity " output wire\[0:0] c0_ddr4_cke,\n" + append entity " output wire\[0:0] c0_ddr4_cs_n,\n" + append entity " inout wire\[71:0] c0_ddr4_dq,\n" + append entity " inout wire\[17:0] c0_ddr4_dqs_c,\n" + append entity " inout wire\[17:0] c0_ddr4_dqs_t,\n" + append entity " output wire\[0:0] c0_ddr4_odt,\n" + append entity " output wire c0_ddr4_par,\n" + append entity " output wire c0_ddr4_reset_n,\n" + append entity " input wire c0_sys_clk_p,\n" + append entity " input wire c0_sys_clk_n,\n" + } + if {$cnfg(ddr_1) eq 1} { + append entity " output wire c1_ddr4_act_n,\n" + append entity " output wire\[16:0] c1_ddr4_adr,\n" + append entity " output wire\[1:0] c1_ddr4_ba,\n" + append entity " output wire\[1:0] c1_ddr4_bg,\n" + append entity " output wire\[0:0] c1_ddr4_ck_c,\n" + append entity " output wire\[0:0] c1_ddr4_ck_t,\n" + append entity " output wire\[0:0] c1_ddr4_cke,\n" + append entity " output wire\[0:0] c1_ddr4_cs_n,\n" + append entity " inout wire\[71:0] c1_ddr4_dq,\n" + append entity " inout wire\[17:0] c1_ddr4_dqs_c,\n" + append entity " inout wire\[17:0] c1_ddr4_dqs_t,\n" + append entity " output wire\[0:0] c1_ddr4_odt,\n" + append entity " output wire c1_ddr4_par,\n" + append entity " output wire c1_ddr4_reset_n,\n" + append entity " input wire c1_sys_clk_p,\n" + append entity " input wire c1_sys_clk_n,\n" + } + if {$cnfg(ddr_2) eq 1} { + append entity " output wire c2_ddr4_act_n,\n" + append entity " output wire\[16:0] c2_ddr4_adr,\n" + append entity " output wire\[1:0] c2_ddr4_ba,\n" + append entity " output wire\[1:0] c2_ddr4_bg,\n" + append entity " output wire\[0:0] c2_ddr4_ck_c,\n" + append entity " output wire\[0:0] c2_ddr4_ck_t,\n" + append entity " output wire\[0:0] c2_ddr4_cke,\n" + append entity " output wire\[0:0] c2_ddr4_cs_n,\n" + append entity " inout wire\[71:0] c2_ddr4_dq,\n" + append entity " inout wire\[17:0] c2_ddr4_dqs_c,\n" + append entity " inout wire\[17:0] c2_ddr4_dqs_t,\n" + append entity " output wire\[0:0] c2_ddr4_odt,\n" + append entity " output wire c2_ddr4_par,\n" + append entity " output wire c2_ddr4_reset_n,\n" + append entity " input wire c2_sys_clk_p,\n" + append entity " input wire c2_sys_clk_n,\n" + } + if {$cnfg(ddr_3) eq 1} { + append entity " output wire c3_ddr4_act_n,\n" + append entity " output wire\[16:0] c3_ddr4_adr,\n" + append entity " output wire\[1:0] c3_ddr4_ba,\n" + append entity " output wire\[1:0] c3_ddr4_bg,\n" + append entity " output wire\[0:0] c3_ddr4_ck_c,\n" + append entity " output wire\[0:0] c3_ddr4_ck_t,\n" + append entity " output wire\[0:0] c3_ddr4_cke,\n" + append entity " output wire\[0:0] c3_ddr4_cs_n,\n" + append entity " inout wire\[71:0] c3_ddr4_dq,\n" + append entity " inout wire\[17:0] c3_ddr4_dqs_c,\n" + append entity " inout wire\[17:0] c3_ddr4_dqs_t,\n" + append entity " output wire\[0:0] c3_ddr4_odt,\n" + append entity " output wire c3_ddr4_par,\n" + append entity " output wire c3_ddr4_reset_n,\n" + append entity " input wire c3_sys_clk_p,\n" + append entity " input wire c3_sys_clk_n,\n" + } + } + append entity " input wire\[0:0] pcie_clk_clk_n,\n" + append entity " input wire\[0:0] pcie_clk_clk_p,\n" + append entity " input wire\[15:0] pcie_x16_rxn,\n" + append entity " input wire\[15:0] pcie_x16_rxp,\n" + append entity " output wire\[15:0] pcie_x16_txn,\n" + append entity " output wire\[15:0] pcie_x16_txp,\n" + append entity " input wire perst_n_nb,\n" + append entity " input wire resetn_0_nb\n" + append entity ");\n" + append entity "\n" + append entity " \n" + append entity " // AXI resetn\n" + append entity " wire\[0:0] aresetn;\n" + append entity " // AXI clk (250 MHz)\n" + append entity " wire aclk;\n" + append entity "\n" + append entity " // IRQ\n" + append entity " wire\[N_REGIONS-1:0] usr_irq;\n" + append entity "\n" + append entity " wire resetn_0;\n" + append entity " wire perst_n;\n" + append entity "\n" + append entity " // Static config\n" + append entity " AXI4L axi_cnfg ();\n" + append entity "\n" + append entity " // Application control\n" + append entity " AXI4L axi_ctrl \[N_REGIONS] ();\n" + append entity "\n" + if {$cnfg(en_avx) eq 1} { + append entity " // Application control AVX\n" + append entity " AXI4 #(.AXI4_DATA_BITS(AVX_DATA_BITS)) axim_ctrl \[N_REGIONS] ();\n" + } + append entity "\n" + append entity " // Stream to application\n" + append entity " AXI4S axis_dyn_out\[N_CHAN] ();\n" + append entity "\n" + append entity " // Stream from application\n" + append entity " AXI4S axis_dyn_in\[N_CHAN] ();\n" + append entity "\n" + append entity " // Descriptor bypass\n" + append entity " xdmaIntf xdma_req \[N_CHAN] ();\n" + append entity "\n" + if {$cnfg(en_ddr) eq 1} { + append entity " // DDR AXI mm\n" + append entity " AXI4 axi_ddr_in\[2*N_DDR_CHAN] ();\n" + append entity "\n" + } + if {$cnfg(en_fv) eq 1} { + append entity "\n" + append entity " // RDMA\n" + append entity " metaIntf #(.DATA_BITS(32)) arp_lookup_request();\n" + append entity " metaIntf #(.DATA_BITS(56)) arp_lookup_reply();\n" + append entity " metaIntf #(.DATA_BITS(32)) set_ip_addr();\n" + append entity " metaIntf #(.DATA_BITS(4)) set_board_number();\n" + append entity " metaIntf #(.DATA_BITS(144)) qp_interface ();\n" + append entity " metaIntf #(.DATA_BITS(184)) conn_interface ();\n" + append entity "\n" + append entity " // FV requests\n" + append entity " metaIntf #(.DATA_BITS(FV_REQ_BITS)) rdma_req_host \[N_REGIONS] ();\n" + if {$cnfg(en_fvv) eq 1} { + append entity " metaIntf #(.DATA_BITS(FV_REQ_BITS)) rdma_req_card \[N_REGIONS] ();\n" + append entity " metaIntf #(.DATA_BITS(FV_REQ_BITS)) rdma_req_fv \[N_REGIONS] ();\n" + } + append entity "\n" + append entity " // RDMA interface\n" + append entity " reqIntf rdma_rd_cmd \[N_REGIONS] ();\n" + append entity " reqIntf rdma_wr_cmd \[N_REGIONS] ();\n" + append entity " AXI4S axis_rdma_rd_data \[N_REGIONS] ();\n" + append entity " AXI4S axis_rdma_wr_data \[N_REGIONS] ();\n" + append entity "\n" + append entity " logic [2:0] lowspeed_ctrl;\n" + append entity "\n" + } + if {$cnfg(en_pr) eq 1} { + append entity " // PR clock\n" + append entity " wire pclk;\n" + append entity " wire presetn;\n" + append entity "\n" + append entity " // PR streams\n" + append entity " AXI4S axis_pr();\n" + append entity "\n" + append entity " wire\[31:0] axis_pr_data;\n" + append entity " wire axis_pr_ready;\n" + append entity " wire axis_pr_valid;\n" + append entity " \n" + append entity " // PR ICAP\n" + append entity " wire\[31:0] icap_out;\n" + append entity " wire icap_avail;\n" + append entity " wire icap_error;\n" + append entity " wire icap_done;\n" + append entity " wire icap_csn;\n" + append entity " wire\[31:0] icap_data;\n" + append entity "\n" + } + append entity " // IO buffers\n" + append entity " IBUF rst_IBUF_inst (\n" + append entity " .O(resetn_0), // Buffer output\n" + append entity " .I(resetn_0_nb) // Buffer input (connect directly to top-level port)\n" + append entity " );\n" + append entity "\n" + append entity " IBUF perst_n_IBUF_inst (\n" + append entity " .O(perst_n),\n" + append entity " .I(perst_n_nb)\n" + append entity " );\n" + append entity "\n" + append entity " // -----------------------------------------------------------------\n" + append entity " // STATIC LAYER \n" + append entity " // -----------------------------------------------------------------\n" + append entity " design_static design_static_i\n" + append entity " (.aclk(aclk),\n" + append entity " .aresetn(aresetn),\n" + append entity " .axi_cnfg_araddr(axi_cnfg.araddr),\n" + append entity " .axi_cnfg_arprot(axi_cnfg.arprot),\n" + append entity " .axi_cnfg_arready(axi_cnfg.arready),\n" + append entity " .axi_cnfg_arvalid(axi_cnfg.arvalid),\n" + append entity " .axi_cnfg_awaddr(axi_cnfg.awaddr),\n" + append entity " .axi_cnfg_awprot(axi_cnfg.awprot),\n" + append entity " .axi_cnfg_awready(axi_cnfg.awready),\n" + append entity " .axi_cnfg_awvalid(axi_cnfg.awvalid),\n" + append entity " .axi_cnfg_bready(axi_cnfg.bready),\n" + append entity " .axi_cnfg_bresp(axi_cnfg.bresp),\n" + append entity " .axi_cnfg_bvalid(axi_cnfg.bvalid),\n" + append entity " .axi_cnfg_rdata(axi_cnfg.rdata),\n" + append entity " .axi_cnfg_rready(axi_cnfg.rready),\n" + append entity " .axi_cnfg_rresp(axi_cnfg.rresp),\n" + append entity " .axi_cnfg_rvalid(axi_cnfg.rvalid),\n" + append entity " .axi_cnfg_wdata(axi_cnfg.wdata),\n" + append entity " .axi_cnfg_wready(axi_cnfg.wready),\n" + append entity " .axi_cnfg_wstrb(axi_cnfg.wstrb),\n" + append entity " .axi_cnfg_wvalid(axi_cnfg.wvalid),\n" + for {set i 0} {$i < $cnfg(n_reg)} {incr i} { + append entity " .axi_ctrl_$i\_araddr(axi_ctrl\[$i].araddr),\n" + append entity " .axi_ctrl_$i\_arprot(axi_ctrl\[$i].arprot),\n" + append entity " .axi_ctrl_$i\_arready(axi_ctrl\[$i].arready),\n" + append entity " .axi_ctrl_$i\_arvalid(axi_ctrl\[$i].arvalid),\n" + append entity " .axi_ctrl_$i\_awaddr(axi_ctrl\[$i].awaddr),\n" + append entity " .axi_ctrl_$i\_awprot(axi_ctrl\[$i].awprot),\n" + append entity " .axi_ctrl_$i\_awready(axi_ctrl\[$i].awready),\n" + append entity " .axi_ctrl_$i\_awvalid(axi_ctrl\[$i].awvalid),\n" + append entity " .axi_ctrl_$i\_bready(axi_ctrl\[$i].bready),\n" + append entity " .axi_ctrl_$i\_bresp(axi_ctrl\[$i].bresp),\n" + append entity " .axi_ctrl_$i\_bvalid(axi_ctrl\[$i].bvalid),\n" + append entity " .axi_ctrl_$i\_rdata(axi_ctrl\[$i].rdata),\n" + append entity " .axi_ctrl_$i\_rready(axi_ctrl\[$i].rready),\n" + append entity " .axi_ctrl_$i\_rresp(axi_ctrl\[$i].rresp),\n" + append entity " .axi_ctrl_$i\_rvalid(axi_ctrl\[$i].rvalid),\n" + append entity " .axi_ctrl_$i\_wdata(axi_ctrl\[$i].wdata),\n" + append entity " .axi_ctrl_$i\_wready(axi_ctrl\[$i].wready),\n" + append entity " .axi_ctrl_$i\_wstrb(axi_ctrl\[$i].wstrb),\n" + append entity " .axi_ctrl_$i\_wvalid(axi_ctrl\[$i].wvalid),\n" + } + if {$cnfg(en_avx) eq 1} { + for {set i 0} {$i < $cnfg(n_reg)} {incr i} { + append entity " .axim_ctrl_$i\_araddr(axim_ctrl\[$i].araddr),\n" + append entity " .axim_ctrl_$i\_arburst(axim_ctrl\[$i].arburst),\n" + append entity " .axim_ctrl_$i\_arcache(axim_ctrl\[$i].arcache),\n" + append entity " .axim_ctrl_$i\_arlen(axim_ctrl\[$i].arlen),\n" + append entity " .axim_ctrl_$i\_arlock(axim_ctrl\[$i].arlock),\n" + append entity " .axim_ctrl_$i\_arprot(axim_ctrl\[$i].arprot),\n" + append entity " .axim_ctrl_$i\_arqos(axim_ctrl\[$i].arqos),\n" + append entity " .axim_ctrl_$i\_arready(axim_ctrl\[$i].arready),\n" + append entity " .axim_ctrl_$i\_arregion(axim_ctrl\[$i].arregion),\n" + append entity " .axim_ctrl_$i\_arsize(axim_ctrl\[$i].arsize),\n" + append entity " .axim_ctrl_$i\_arvalid(axim_ctrl\[$i].arvalid),\n" + append entity " .axim_ctrl_$i\_awaddr(axim_ctrl\[$i].awaddr),\n" + append entity " .axim_ctrl_$i\_awburst(axim_ctrl\[$i].awburst),\n" + append entity " .axim_ctrl_$i\_awcache(axim_ctrl\[$i].awcache),\n" + append entity " .axim_ctrl_$i\_awlen(axim_ctrl\[$i].awlen),\n" + append entity " .axim_ctrl_$i\_awlock(axim_ctrl\[$i].awlock),\n" + append entity " .axim_ctrl_$i\_awprot(axim_ctrl\[$i].awprot),\n" + append entity " .axim_ctrl_$i\_awqos(axim_ctrl\[$i].awqos),\n" + append entity " .axim_ctrl_$i\_awready(axim_ctrl\[$i].awready),\n" + append entity " .axim_ctrl_$i\_awregion(axim_ctrl\[$i].awregion),\n" + append entity " .axim_ctrl_$i\_awsize(axim_ctrl\[$i].awsize),\n" + append entity " .axim_ctrl_$i\_awvalid(axim_ctrl\[$i].awvalid),\n" + append entity " .axim_ctrl_$i\_bready(axim_ctrl\[$i].bready),\n" + append entity " .axim_ctrl_$i\_bresp(axim_ctrl\[$i].bresp),\n" + append entity " .axim_ctrl_$i\_bvalid(axim_ctrl\[$i].bvalid),\n" + append entity " .axim_ctrl_$i\_rdata(axim_ctrl\[$i].rdata),\n" + append entity " .axim_ctrl_$i\_rlast(axim_ctrl\[$i].rlast),\n" + append entity " .axim_ctrl_$i\_rready(axim_ctrl\[$i].rready),\n" + append entity " .axim_ctrl_$i\_rresp(axim_ctrl\[$i].rresp),\n" + append entity " .axim_ctrl_$i\_rvalid(axim_ctrl\[$i].rvalid),\n" + append entity " .axim_ctrl_$i\_wdata(axim_ctrl\[$i].wdata),\n" + append entity " .axim_ctrl_$i\_wlast(axim_ctrl\[$i].wlast),\n" + append entity " .axim_ctrl_$i\_wready(axim_ctrl\[$i].wready),\n" + append entity " .axim_ctrl_$i\_wstrb(axim_ctrl\[$i].wstrb),\n" + append entity " .axim_ctrl_$i\_wvalid(axim_ctrl\[$i].wvalid),\n" + } + } + for {set i 0} {$i < $cnfg(n_chan)} {incr i} { + append entity " .axis_dyn_in_$i\_tdata(axis_dyn_in\[$i].tdata),\n" + append entity " .axis_dyn_in_$i\_tkeep(axis_dyn_in\[$i].tkeep),\n" + append entity " .axis_dyn_in_$i\_tlast(axis_dyn_in\[$i].tlast),\n" + append entity " .axis_dyn_in_$i\_tready(axis_dyn_in\[$i].tready),\n" + append entity " .axis_dyn_in_$i\_tvalid(axis_dyn_in\[$i].tvalid),\n" + append entity " .axis_dyn_out_$i\_tdata(axis_dyn_out\[$i].tdata),\n" + append entity " .axis_dyn_out_$i\_tkeep(axis_dyn_out\[$i].tkeep),\n" + append entity " .axis_dyn_out_$i\_tlast(axis_dyn_out\[$i].tlast),\n" + append entity " .axis_dyn_out_$i\_tready(axis_dyn_out\[$i].tready),\n" + append entity " .axis_dyn_out_$i\_tvalid(axis_dyn_out\[$i].tvalid),\n" + append entity " .dsc_bypass_c2h_$i\_dsc_byp_ctl(xdma_req\[$i].c2h_ctl),\n" + append entity " .dsc_bypass_c2h_$i\_dsc_byp_dst_addr(xdma_req\[$i].c2h_addr),\n" + append entity " .dsc_bypass_c2h_$i\_dsc_byp_len(xdma_req\[$i].c2h_len),\n" + append entity " .dsc_bypass_c2h_$i\_dsc_byp_load(xdma_req\[$i].c2h_valid),\n" + append entity " .dsc_bypass_c2h_$i\_dsc_byp_ready(xdma_req\[$i].c2h_ready),\n" + append entity " .dsc_bypass_c2h_$i\_dsc_byp_src_addr(0),\n" + append entity " .dsc_bypass_h2c_$i\_dsc_byp_ctl(xdma_req\[$i].h2c_ctl),\n" + append entity " .dsc_bypass_h2c_$i\_dsc_byp_dst_addr(0),\n" + append entity " .dsc_bypass_h2c_$i\_dsc_byp_len(xdma_req\[$i].h2c_len),\n" + append entity " .dsc_bypass_h2c_$i\_dsc_byp_load(xdma_req\[$i].h2c_valid),\n" + append entity " .dsc_bypass_h2c_$i\_dsc_byp_ready(xdma_req\[$i].h2c_ready),\n" + append entity " .dsc_bypass_h2c_$i\_dsc_byp_src_addr(xdma_req\[$i].h2c_addr),\n" + append entity " .dsc_status_c2h_sts$i\(xdma_req\[$i].c2h_status),\n" + append entity " .dsc_status_h2c_sts$i\(xdma_req\[$i].h2c_status),\n" + } + if {$cnfg(en_ddr) eq 1} { + if {$cnfg(ddr_0) eq 1} { + append entity " .c0_ddr4_act_n(c0_ddr4_act_n),\n" + append entity " .c0_ddr4_adr(c0_ddr4_adr),\n" + append entity " .c0_ddr4_ba(c0_ddr4_ba),\n" + append entity " .c0_ddr4_bg(c0_ddr4_bg),\n" + append entity " .c0_ddr4_ck_c(c0_ddr4_ck_c),\n" + append entity " .c0_ddr4_ck_t(c0_ddr4_ck_t),\n" + append entity " .c0_ddr4_cke(c0_ddr4_cke),\n" + append entity " .c0_ddr4_cs_n(c0_ddr4_cs_n),\n" + append entity " .c0_ddr4_dq(c0_ddr4_dq),\n" + append entity " .c0_ddr4_dqs_c(c0_ddr4_dqs_c),\n" + append entity " .c0_ddr4_dqs_t(c0_ddr4_dqs_t),\n" + append entity " .c0_ddr4_odt(c0_ddr4_odt),\n" + append entity " .c0_ddr4_par(c0_ddr4_par),\n" + append entity " .c0_ddr4_reset_n(c0_ddr4_reset_n),\n" + append entity " .c0_sys_clk_0_clk_n(c0_sys_clk_n),\n" + append entity " .c0_sys_clk_0_clk_p(c0_sys_clk_p),\n" + append entity " .axi_ctrl_ddr_0_araddr(0),\n" + append entity " .axi_ctrl_ddr_0_arready(),\n" + append entity " .axi_ctrl_ddr_0_arvalid(1'b0),\n" + append entity " .axi_ctrl_ddr_0_awaddr(0),\n" + append entity " .axi_ctrl_ddr_0_awready(),\n" + append entity " .axi_ctrl_ddr_0_awvalid(1'b0),\n" + append entity " .axi_ctrl_ddr_0_bready(1'b0),\n" + append entity " .axi_ctrl_ddr_0_bresp(),\n" + append entity " .axi_ctrl_ddr_0_bvalid(),\n" + append entity " .axi_ctrl_ddr_0_rdata(),\n" + append entity " .axi_ctrl_ddr_0_rready(1'b0),\n" + append entity " .axi_ctrl_ddr_0_rresp(),\n" + append entity " .axi_ctrl_ddr_0_rvalid(),\n" + append entity " .axi_ctrl_ddr_0_wdata(0),\n" + append entity " .axi_ctrl_ddr_0_wready(),\n" + append entity " .axi_ctrl_ddr_0_wvalid(1'b0),\n" + } + if {$cnfg(ddr_1) eq 1} { + append entity " .c1_ddr4_act_n(c1_ddr4_act_n),\n" + append entity " .c1_ddr4_adr(c1_ddr4_adr),\n" + append entity " .c1_ddr4_ba(c1_ddr4_ba),\n" + append entity " .c1_ddr4_bg(c1_ddr4_bg),\n" + append entity " .c1_ddr4_ck_c(c1_ddr4_ck_c),\n" + append entity " .c1_ddr4_ck_t(c1_ddr4_ck_t),\n" + append entity " .c1_ddr4_cke(c1_ddr4_cke),\n" + append entity " .c1_ddr4_cs_n(c1_ddr4_cs_n),\n" + append entity " .c1_ddr4_dq(c1_ddr4_dq),\n" + append entity " .c1_ddr4_dqs_c(c1_ddr4_dqs_c),\n" + append entity " .c1_ddr4_dqs_t(c1_ddr4_dqs_t),\n" + append entity " .c1_ddr4_odt(c1_ddr4_odt),\n" + append entity " .c1_ddr4_par(c1_ddr4_par),\n" + append entity " .c1_ddr4_reset_n(c1_ddr4_reset_n),\n" + append entity " .c1_sys_clk_0_clk_n(c1_sys_clk_n),\n" + append entity " .c1_sys_clk_0_clk_p(c1_sys_clk_p),\n" + append entity " .axi_ctrl_ddr_1_araddr(0),\n" + append entity " .axi_ctrl_ddr_1_arready(),\n" + append entity " .axi_ctrl_ddr_1_arvalid(1'b0),\n" + append entity " .axi_ctrl_ddr_1_awaddr(0),\n" + append entity " .axi_ctrl_ddr_1_awready(),\n" + append entity " .axi_ctrl_ddr_1_awvalid(1'b0),\n" + append entity " .axi_ctrl_ddr_1_bready(1'b0),\n" + append entity " .axi_ctrl_ddr_1_bresp(),\n" + append entity " .axi_ctrl_ddr_1_bvalid(),\n" + append entity " .axi_ctrl_ddr_1_rdata(),\n" + append entity " .axi_ctrl_ddr_1_rready(1'b0),\n" + append entity " .axi_ctrl_ddr_1_rresp(),\n" + append entity " .axi_ctrl_ddr_1_rvalid(),\n" + append entity " .axi_ctrl_ddr_1_wdata(0),\n" + append entity " .axi_ctrl_ddr_1_wready(),\n" + append entity " .axi_ctrl_ddr_1_wvalid(1'b0),\n" + } + if {$cnfg(ddr_2) eq 1} { + append entity " .c2_ddr4_act_n(c2_ddr4_act_n),\n" + append entity " .c2_ddr4_adr(c2_ddr4_adr),\n" + append entity " .c2_ddr4_ba(c2_ddr4_ba),\n" + append entity " .c2_ddr4_bg(c2_ddr4_bg),\n" + append entity " .c2_ddr4_ck_c(c2_ddr4_ck_c),\n" + append entity " .c2_ddr4_ck_t(c2_ddr4_ck_t),\n" + append entity " .c2_ddr4_cke(c2_ddr4_cke),\n" + append entity " .c2_ddr4_cs_n(c2_ddr4_cs_n),\n" + append entity " .c2_ddr4_dq(c2_ddr4_dq),\n" + append entity " .c2_ddr4_dqs_c(c2_ddr4_dqs_c),\n" + append entity " .c2_ddr4_dqs_t(c2_ddr4_dqs_t),\n" + append entity " .c2_ddr4_odt(c2_ddr4_odt),\n" + append entity " .c2_ddr4_par(c2_ddr4_par),\n" + append entity " .c2_ddr4_reset_n(c2_ddr4_reset_n),\n" + append entity " .c2_sys_clk_0_clk_n(c2_sys_clk_n),\n" + append entity " .c2_sys_clk_0_clk_p(c2_sys_clk_p),\n" + append entity " .axi_ctrl_ddr_2_araddr(0),\n" + append entity " .axi_ctrl_ddr_2_arready(),\n" + append entity " .axi_ctrl_ddr_2_arvalid(1'b0),\n" + append entity " .axi_ctrl_ddr_2_awaddr(0),\n" + append entity " .axi_ctrl_ddr_2_awready(),\n" + append entity " .axi_ctrl_ddr_2_awvalid(1'b0),\n" + append entity " .axi_ctrl_ddr_2_bready(1'b0),\n" + append entity " .axi_ctrl_ddr_2_bresp(),\n" + append entity " .axi_ctrl_ddr_2_bvalid(),\n" + append entity " .axi_ctrl_ddr_2_rdata(),\n" + append entity " .axi_ctrl_ddr_2_rready(1'b0),\n" + append entity " .axi_ctrl_ddr_2_rresp(),\n" + append entity " .axi_ctrl_ddr_2_rvalid(),\n" + append entity " .axi_ctrl_ddr_2_wdata(0),\n" + append entity " .axi_ctrl_ddr_2_wready(),\n" + append entity " .axi_ctrl_ddr_2_wvalid(1'b0),\n" + } + if {$cnfg(ddr_3) eq 1} { + append entity " .c3_ddr4_act_n(c3_ddr4_act_n),\n" + append entity " .c3_ddr4_adr(c3_ddr4_adr),\n" + append entity " .c3_ddr4_ba(c3_ddr4_ba),\n" + append entity " .c3_ddr4_bg(c3_ddr4_bg),\n" + append entity " .c3_ddr4_ck_c(c3_ddr4_ck_c),\n" + append entity " .c3_ddr4_ck_t(c3_ddr4_ck_t),\n" + append entity " .c3_ddr4_cke(c3_ddr4_cke),\n" + append entity " .c3_ddr4_cs_n(c3_ddr4_cs_n),\n" + append entity " .c3_ddr4_dq(c3_ddr4_dq),\n" + append entity " .c3_ddr4_dqs_c(c3_ddr4_dqs_c),\n" + append entity " .c3_ddr4_dqs_t(c3_ddr4_dqs_t),\n" + append entity " .c3_ddr4_odt(c3_ddr4_odt),\n" + append entity " .c3_ddr4_par(c3_ddr4_par),\n" + append entity " .c3_ddr4_reset_n(c3_ddr4_reset_n),\n" + append entity " .c3_sys_clk_0_clk_n(c3_sys_clk_n),\n" + append entity " .c3_sys_clk_0_clk_p(c3_sys_clk_p),\n" + append entity " .axi_ctrl_ddr_3_araddr(0),\n" + append entity " .axi_ctrl_ddr_3_arready(),\n" + append entity " .axi_ctrl_ddr_3_arvalid(1'b0),\n" + append entity " .axi_ctrl_ddr_3_awaddr(0),\n" + append entity " .axi_ctrl_ddr_3_awready(),\n" + append entity " .axi_ctrl_ddr_3_awvalid(1'b0),\n" + append entity " .axi_ctrl_ddr_3_bready(1'b0),\n" + append entity " .axi_ctrl_ddr_3_bresp(),\n" + append entity " .axi_ctrl_ddr_3_bvalid(),\n" + append entity " .axi_ctrl_ddr_3_rdata(),\n" + append entity " .axi_ctrl_ddr_3_rready(1'b0),\n" + append entity " .axi_ctrl_ddr_3_rresp(),\n" + append entity " .axi_ctrl_ddr_3_rvalid(),\n" + append entity " .axi_ctrl_ddr_3_wdata(0),\n" + append entity " .axi_ctrl_ddr_3_wready(),\n" + append entity " .axi_ctrl_ddr_3_wvalid(1'b0),\n" + } + for {set i 0} {$i < 2 * $cnfg(n_ddr_chan)} {incr i} { + append entity " .axi_ddr_in_$i\_araddr(axi_ddr_in\[$i].araddr),\n" + append entity " .axi_ddr_in_$i\_arburst(axi_ddr_in\[$i].arburst),\n" + append entity " .axi_ddr_in_$i\_arcache(axi_ddr_in\[$i].arcache),\n" + append entity " .axi_ddr_in_$i\_arid(axi_ddr_in\[$i].arid),\n" + append entity " .axi_ddr_in_$i\_arlen(axi_ddr_in\[$i].arlen),\n" + append entity " .axi_ddr_in_$i\_arlock(axi_ddr_in\[$i].arlock),\n" + append entity " .axi_ddr_in_$i\_arprot(axi_ddr_in\[$i].arprot),\n" + append entity " .axi_ddr_in_$i\_arqos(axi_ddr_in\[$i].arqos),\n" + append entity " .axi_ddr_in_$i\_arready(axi_ddr_in\[$i].arready),\n" + append entity " .axi_ddr_in_$i\_arregion(axi_ddr_in\[$i].arregion),\n" + append entity " .axi_ddr_in_$i\_arsize(axi_ddr_in\[$i].arsize),\n" + append entity " .axi_ddr_in_$i\_arvalid(axi_ddr_in\[$i].arvalid),\n" + append entity " .axi_ddr_in_$i\_awaddr(axi_ddr_in\[$i].awaddr),\n" + append entity " .axi_ddr_in_$i\_awburst(axi_ddr_in\[$i].awburst),\n" + append entity " .axi_ddr_in_$i\_awcache(axi_ddr_in\[$i].awcache),\n" + append entity " .axi_ddr_in_$i\_awid(axi_ddr_in\[$i].awid),\n" + append entity " .axi_ddr_in_$i\_awlen(axi_ddr_in\[$i].awlen),\n" + append entity " .axi_ddr_in_$i\_awlock(axi_ddr_in\[$i].awlock),\n" + append entity " .axi_ddr_in_$i\_awprot(axi_ddr_in\[$i].awprot),\n" + append entity " .axi_ddr_in_$i\_awqos(axi_ddr_in\[$i].awqos),\n" + append entity " .axi_ddr_in_$i\_awready(axi_ddr_in\[$i].awready),\n" + append entity " .axi_ddr_in_$i\_awregion(axi_ddr_in\[$i].awregion),\n" + append entity " .axi_ddr_in_$i\_awsize(axi_ddr_in\[$i].awsize),\n" + append entity " .axi_ddr_in_$i\_awvalid(axi_ddr_in\[$i].awvalid),\n" + append entity " .axi_ddr_in_$i\_bid(axi_ddr_in\[$i].bid),\n" + append entity " .axi_ddr_in_$i\_bready(axi_ddr_in\[$i].bready),\n" + append entity " .axi_ddr_in_$i\_bresp(axi_ddr_in\[$i].bresp),\n" + append entity " .axi_ddr_in_$i\_bvalid(axi_ddr_in\[$i].bvalid),\n" + append entity " .axi_ddr_in_$i\_rdata(axi_ddr_in\[$i].rdata),\n" + append entity " .axi_ddr_in_$i\_rid(axi_ddr_in\[$i].rid),\n" + append entity " .axi_ddr_in_$i\_rlast(axi_ddr_in\[$i].rlast),\n" + append entity " .axi_ddr_in_$i\_rready(axi_ddr_in\[$i].rready),\n" + append entity " .axi_ddr_in_$i\_rresp(axi_ddr_in\[$i].rresp),\n" + append entity " .axi_ddr_in_$i\_rvalid(axi_ddr_in\[$i].rvalid),\n" + append entity " .axi_ddr_in_$i\_wdata(axi_ddr_in\[$i].wdata),\n" + append entity " .axi_ddr_in_$i\_wlast(axi_ddr_in\[$i].wlast),\n" + append entity " .axi_ddr_in_$i\_wready(axi_ddr_in\[$i].wready),\n" + append entity " .axi_ddr_in_$i\_wstrb(axi_ddr_in\[$i].wstrb),\n" + append entity " .axi_ddr_in_$i\_wvalid(axi_ddr_in\[$i].wvalid),\n" + } + } + append entity " .pcie_clk_clk_n(pcie_clk_clk_n),\n" + append entity " .pcie_clk_clk_p(pcie_clk_clk_p),\n" + append entity " .pcie_x16_rxn(pcie_x16_rxn),\n" + append entity " .pcie_x16_rxp(pcie_x16_rxp),\n" + append entity " .pcie_x16_txn(pcie_x16_txn),\n" + append entity " .pcie_x16_txp(pcie_x16_txp),\n" + if {$cnfg(en_pr) eq 1} { + append entity " .pclk(pclk),\n" + append entity " .presetn(presetn),\n" + } + append entity " .perst_n(perst_n),\n" + append entity " .reset_0(~resetn_0),\n" + append entity " .usr_irq(usr_irq)\n" + append entity " );\n" + append entity " \n" + append entity " // -----------------------------------------------------------------\n" + append entity " // STATIC CONFIG \n" + append entity " // -----------------------------------------------------------------\n" + append entity " static_slave inst_static_slave (\n" + append entity " .aclk(aclk),\n" + append entity " .aresetn(aresetn),\n" + if {$cnfg(en_pr) eq 1} { + append entity " .pr_xdma_req(xdma_req\[$cnfg(pr_chan)]),\n" + } + if {$cnfg(en_fv) eq 1} { + append entity " .arp_lookup_request(arp_lookup_request),\n" + append entity " .arp_lookup_reply(arp_lookup_reply),\n" + append entity " .set_ip_addr(set_ip_addr),\n" + append entity " .set_board_number(set_board_number),\n" + append entity " .qp_interface(qp_interface),\n" + append entity " .conn_interface(conn_interface),\n" + append entity " .lowspeed_ctrl(lowspeed_ctrl),\n" + } else { + append entity " .lowspeed_ctrl(),\n" + } + append entity " .axi_ctrl(axi_cnfg)\n" + append entity " );\n" + append entity "\n" + if {$cnfg(en_fv) eq 1} { + append entity " // -----------------------------------------------------------------\n" + append entity " // RDMA \n" + append entity " // -----------------------------------------------------------------\n" + append entity " assign qsfp$cnfg(qsfp)_resetn = lowspeed_ctrl[0];\n" + append entity " assign qsfp$cnfg(qsfp)_lpmode = lowspeed_ctrl[1];\n" + append entity " assign qsfp$cnfg(qsfp)_modseln = lowspeed_ctrl[2];\n" + append entity "\n" + append entity " network_top inst_network_top (\n" + append entity " .aclk(aclk),\n" + append entity " .aresetn(aresetn),\n" + append entity " .sys_reset(~resetn_0),\n" + append entity " .dclk(aclk),\n" + append entity " .gt_refclk_p(gt$cnfg(qsfp)_refclk_p),\n" + append entity " .gt_refclk_n(gt$cnfg(qsfp)_refclk_n),\n" + append entity " .gt_rxp_in(gt$cnfg(qsfp)_rxp_in),\n" + append entity " .gt_rxn_in(gt$cnfg(qsfp)_rxn_in),\n" + append entity " .gt_txp_out(gt$cnfg(qsfp)_txp_out),\n" + append entity " .gt_txn_out(gt$cnfg(qsfp)_txn_out),\n" + append entity " .arp_lookup_request(arp_lookup_request),\n" + append entity " .arp_lookup_reply(arp_lookup_reply),\n" + append entity " .set_ip_addr(set_ip_addr),\n" + append entity " .set_board_number(set_board_number),\n" + append entity " .qp_interface(qp_interface),\n" + append entity " .conn_interface(conn_interface),\n" + append entity " .rdma_req_host(rdma_req_host),\n" + if {$cnfg(en_fvv) eq 1} { + append entity " .rdma_req_card(rdma_req_card),\n" + append entity " .rdma_req_fv(rdma_req_fv),\n" + } + append entity " .rdma_rd_cmd(rdma_rd_cmd),\n" + append entity " .rdma_wr_cmd(rdma_wr_cmd),\n" + append entity " .axis_rdma_rd_data(axis_rdma_rd_data),\n" + append entity " .axis_rdma_wr_data(axis_rdma_wr_data)\n" + append entity " );\n" + append entity "\n" + } + if {$cnfg(en_pr) eq 1} { + append entity " // -----------------------------------------------------------------\n" + append entity " // PR \n" + append entity " // -----------------------------------------------------------------\n" + append entity " pr_clock_converter inst_clk_cnvrt_pr (\n" + append entity " .s_axis_aresetn(aresetn),\n" + append entity " .m_axis_aresetn(presetn),\n" + append entity " .s_axis_aclk(aclk),\n" + append entity " .s_axis_tvalid(axis_dyn_out\[$cnfg(pr_chan)].tvalid),\n" + append entity " .s_axis_tready(axis_dyn_out\[$cnfg(pr_chan)].tready),\n" + append entity " .s_axis_tdata(axis_dyn_out\[$cnfg(pr_chan)].tdata),\n" + append entity " .s_axis_tkeep(axis_dyn_out\[$cnfg(pr_chan)].tkeep),\n" + append entity " .s_axis_tlast(axis_dyn_out\[$cnfg(pr_chan)].tlast),\n" + append entity " .m_axis_aclk(pclk),\n" + append entity " .m_axis_tvalid(axis_pr.tvalid),\n" + append entity " .m_axis_tready(axis_pr.tready),\n" + append entity " .m_axis_tdata(axis_pr.tdata),\n" + append entity " .m_axis_tkeep(axis_pr.tkeep),\n" + append entity " .m_axis_tlast(axis_pr.tlast)\n" + append entity " );\n" + append entity " \n" + append entity " pr_dwidth_converter inst_dwidth_cnvrt_pr (\n" + append entity " .aclk(pclk),\n" + append entity " .aresetn(presetn),\n" + append entity " .s_axis_tvalid(axis_pr.tvalid),\n" + append entity " .s_axis_tready(axis_pr.tready),\n" + append entity " .s_axis_tdata(axis_pr.tdata),\n" + append entity " .s_axis_tkeep(axis_pr.tkeep),\n" + append entity " .s_axis_tlast(axis_pr.tlast),\n" + append entity " .m_axis_tvalid(axis_pr_valid),\n" + append entity " .m_axis_tready(axis_pr_ready),\n" + append entity " .m_axis_tdata(axis_pr_data),\n" + append entity " .m_axis_tkeep(),\n" + append entity " .m_axis_tlast()\n" + append entity " );\n" + append entity " \n" + append entity " // Partial reconfiguration\n" + append entity " assign icap_csn = ~axis_pr_valid;\n" + append entity " assign axis_pr_ready = 1'b1;\n" + append entity " assign icap_data = {axis_pr_data\[24], axis_pr_data\[25], axis_pr_data\[26], axis_pr_data\[27], axis_pr_data\[28], axis_pr_data\[29], axis_pr_data\[30], axis_pr_data\[31],\n" + append entity " axis_pr_data\[16], axis_pr_data\[17], axis_pr_data\[18], axis_pr_data\[19], axis_pr_data\[20], axis_pr_data\[21], axis_pr_data\[22], axis_pr_data\[23],\n" + append entity " axis_pr_data\[8], axis_pr_data\[9], axis_pr_data\[10], axis_pr_data\[11], axis_pr_data\[12], axis_pr_data\[13], axis_pr_data\[14], axis_pr_data\[15],\n" + append entity " axis_pr_data\[0], axis_pr_data\[1], axis_pr_data\[2], axis_pr_data\[3], axis_pr_data\[4], axis_pr_data\[5], axis_pr_data\[6], axis_pr_data\[7]};\n" + append entity " \n" + append entity " ICAPE3 #(\n" + append entity " .ICAP_AUTO_SWITCH(\"DISABLE\"),\n" + append entity " .SIM_CFG_FILE_NAME(\"NONE\")\n" + append entity " )\n" + append entity " ICAPE3_inst (\n" + append entity " .AVAIL(icap_avail), // 1-bit output: Availability status of ICAP\n" + append entity " .O(icap_out), // 32-bit output: Configuration data output bus\n" + append entity " .PRDONE(icap_done), // 1-bit output: Indicates completion of Partial Reconfiguration\n" + append entity " .PRERROR(icap_error), // 1-bit output: Indicates Error during Partial Reconfiguration\n" + append entity " .CLK(pclk), // 1-bit input: Clock input\n" + append entity " .CSIB(icap_csn), // 1-bit input: Active-Low ICAP enable\n" + append entity " .I(icap_data), // 32-bit input: Configuration data input bus\n" + append entity " .RDWRB(1'b0) // 1-bit input: Read/Write Select input\n" + append entity " );\n" + append entity " \n" + } + append entity " // -----------------------------------------------------------------\n" + append entity " // DYNAMIC LAYER \n" + append entity " // -----------------------------------------------------------------\n" + append entity " design_dynamic_wrapper inst_dynamic (\n" + append entity " .sys_rst(~resetn_0),\n" + append entity " .aresetn(aresetn),\n" + append entity " .aclk(aclk),\n" + append entity " .axi_ctrl(axi_ctrl\[0+:$cnfg(n_reg)]),\n" + if {$cnfg(en_avx) eq 1} { + append entity " .axim_ctrl(axim_ctrl\[0+:$cnfg(n_reg)]),\n" + } + if {$cnfg(en_ddr) eq 1} { + append entity " .axi_ddr_in(axi_ddr_in\[0+:$cnfg(n_ddr_chan)*2]),\n" + } + if {$cnfg(en_strm) eq 1} { + append entity " .axis_host_in(axis_dyn_in\[$cnfg(strm_chan)]),\n" + append entity " .axis_host_out(axis_dyn_out\[$cnfg(strm_chan)]),\n" + append entity " .host_xdma_req(xdma_req\[$cnfg(strm_chan)]),\n" + } + if {$cnfg(en_ddr) eq 1} { + append entity " .axis_card_in(axis_dyn_in\[$cnfg(ddr_chan)]),\n" + append entity " .axis_card_out(axis_dyn_out\[$cnfg(ddr_chan)]),\n" + append entity " .card_xdma_req(xdma_req\[$cnfg(ddr_chan)]),\n" + } + if {$cnfg(en_fv) eq 1} { + append entity " .rdma_req_host(rdma_req_host\[0+:$cnfg(n_reg)]),\n" + if {$cnfg(en_fvv) eq 1} { + append entity " .rdma_req_card(rdma_req_card\[0+:$cnfg(n_reg)]),\n" + append entity " .rdma_req_fv(rdma_req_fv\[0+:$cnfg(n_reg)]),\n" + } + append entity " .rdma_rd_cmd(rdma_rd_cmd\[0+:$cnfg(n_reg)]),\n" + append entity " .rdma_wr_cmd(rdma_wr_cmd\[0+:$cnfg(n_reg)]),\n" + append entity " .axis_rdma_rd_data(axis_rdma_rd_data\[0+:$cnfg(n_reg)]),\n" + append entity " .axis_rdma_wr_data(axis_rdma_wr_data\[0+:$cnfg(n_reg)]),\n" + } + append entity " .usr_irq(usr_irq\[0+:$cnfg(n_reg)]),\n" + append entity " .S_BSCAN_drck(),\n" + append entity " .S_BSCAN_shift(),\n" + append entity " .S_BSCAN_tdi(),\n" + append entity " .S_BSCAN_update(),\n" + append entity " .S_BSCAN_sel(),\n" + append entity " .S_BSCAN_tdo(),\n" + append entity " .S_BSCAN_tms(),\n" + append entity " .S_BSCAN_tck(),\n" + append entity " .S_BSCAN_runtest(),\n" + append entity " .S_BSCAN_reset(),\n" + append entity " .S_BSCAN_capture(),\n" + append entity " .S_BSCAN_bscanid_en() \n" + append entity " );\n" + append entity " \n" + append entity "endmodule\n" + append entity "\n" + lappend template $entity + set vho_file [open $f_out w] + foreach line $template { + puts $vho_file $line + } + close $vho_file +} \ No newline at end of file diff --git a/hw/scripts/wr_hdl/wr_hdl_top_u280.tcl b/hw/scripts/wr_hdl/wr_hdl_top_u280.tcl new file mode 100644 index 00000000..c60587ed --- /dev/null +++ b/hw/scripts/wr_hdl/wr_hdl_top_u280.tcl @@ -0,0 +1,581 @@ +######################################################################################### +# Write top level file +######################################################################################### +proc wr_hdl_top {f_out} { + upvar #0 cfg cnfg + + set template {} + set entity {} + append entity "`timescale 1ns / 1ps\n" + append entity "\n" + append entity "import lynxTypes::*;\n" + append entity "\n" + append entity "`include \"axi_macros.svh\"\n" + append entity "`include \"lynx_macros.svh\"\n" + append entity "//\n" + append entity "// Top Level\n" + append entity "//\n" + append entity "module top (\n" + if {$cnfg(en_fv) eq 1} { + append entity " input wire\[3:0] gt$cnfg(qsfp)_rxp_in,\n" + append entity " input wire\[3:0] gt$cnfg(qsfp)_rxn_in,\n" + append entity " output wire\[3:0] gt$cnfg(qsfp)_txp_out,\n" + append entity " output wire\[3:0] gt$cnfg(qsfp)_txn_out,\n" + append entity " input wire gt$cnfg(qsfp)_refclk_p,\n" + append entity " input wire gt$cnfg(qsfp)_refclk_n,\n" + } + if {$cnfg(en_ddr) eq 1} { + if {$cnfg(ddr_0) eq 1} { + append entity " output wire c0_ddr4_act_n,\n" + append entity " output wire\[16:0] c0_ddr4_adr,\n" + append entity " output wire\[1:0] c0_ddr4_ba,\n" + append entity " output wire\[1:0] c0_ddr4_bg,\n" + append entity " output wire\[0:0] c0_ddr4_ck_c,\n" + append entity " output wire\[0:0] c0_ddr4_ck_t,\n" + append entity " output wire\[0:0] c0_ddr4_cke,\n" + append entity " output wire\[0:0] c0_ddr4_cs_n,\n" + append entity " inout wire\[71:0] c0_ddr4_dq,\n" + append entity " inout wire\[17:0] c0_ddr4_dqs_c,\n" + append entity " inout wire\[17:0] c0_ddr4_dqs_t,\n" + append entity " output wire\[0:0] c0_ddr4_odt,\n" + append entity " output wire c0_ddr4_par,\n" + append entity " output wire c0_ddr4_reset_n,\n" + append entity " input wire c0_sys_clk_p,\n" + append entity " input wire c0_sys_clk_n,\n" + } + if {$cnfg(ddr_1) eq 1} { + append entity " output wire c1_ddr4_act_n,\n" + append entity " output wire\[16:0] c1_ddr4_adr,\n" + append entity " output wire\[1:0] c1_ddr4_ba,\n" + append entity " output wire\[1:0] c1_ddr4_bg,\n" + append entity " output wire\[0:0] c1_ddr4_ck_c,\n" + append entity " output wire\[0:0] c1_ddr4_ck_t,\n" + append entity " output wire\[0:0] c1_ddr4_cke,\n" + append entity " output wire\[0:0] c1_ddr4_cs_n,\n" + append entity " inout wire\[71:0] c1_ddr4_dq,\n" + append entity " inout wire\[17:0] c1_ddr4_dqs_c,\n" + append entity " inout wire\[17:0] c1_ddr4_dqs_t,\n" + append entity " output wire\[0:0] c1_ddr4_odt,\n" + append entity " output wire c1_ddr4_par,\n" + append entity " output wire c1_ddr4_reset_n,\n" + append entity " input wire c1_sys_clk_p,\n" + append entity " input wire c1_sys_clk_n,\n" + } + } + append entity " output wire fpga_burn,\n" + append entity " input wire\[0:0] pcie_clk_clk_n,\n" + append entity " input wire\[0:0] pcie_clk_clk_p,\n" + append entity " input wire\[15:0] pcie_x16_rxn,\n" + append entity " input wire\[15:0] pcie_x16_rxp,\n" + append entity " output wire\[15:0] pcie_x16_txn,\n" + append entity " output wire\[15:0] pcie_x16_txp,\n" + append entity " input wire perst_n_nb,\n" + append entity " input wire resetn_0_nb\n" + append entity ");\n" + append entity "\n" + append entity " \n" + append entity " // AXI resetn\n" + append entity " wire\[0:0] aresetn;\n" + append entity " // AXI clk (250 MHz)\n" + append entity " wire aclk;\n" + append entity "\n" + append entity " // IRQ\n" + append entity " wire\[N_REGIONS-1:0] usr_irq;\n" + append entity "\n" + append entity " wire resetn_0;\n" + append entity " wire perst_n;\n" + append entity "\n" + append entity " // Static config\n" + append entity " AXI4L axi_cnfg ();\n" + append entity "\n" + append entity " // Application control\n" + append entity " AXI4L axi_ctrl \[N_REGIONS] ();\n" + append entity "\n" + if {$cnfg(en_avx) eq 1} { + append entity " // Application control AVX\n" + append entity " AXI4 #(.AXI4_DATA_BITS(AVX_DATA_BITS)) axim_ctrl \[N_REGIONS] ();\n" + } + append entity "\n" + append entity " // Stream to application\n" + append entity " AXI4S axis_dyn_out\[N_CHAN] ();\n" + append entity "\n" + append entity " // Stream from application\n" + append entity " AXI4S axis_dyn_in\[N_CHAN] ();\n" + append entity "\n" + append entity " // Descriptor bypass\n" + append entity " xdmaIntf xdma_req \[N_CHAN] ();\n" + append entity "\n" + if {$cnfg(en_ddr) eq 1} { + append entity " // DDR AXI mm\n" + append entity " AXI4 axi_ddr_in\[2*N_DDR_CHAN] ();\n" + append entity "\n" + } + if {$cnfg(en_fv) eq 1} { + append entity "\n" + append entity " // RDMA\n" + append entity " metaIntf #(.DATA_BITS(32)) arp_lookup_request();\n" + append entity " metaIntf #(.DATA_BITS(56)) arp_lookup_reply();\n" + append entity " metaIntf #(.DATA_BITS(32)) set_ip_addr();\n" + append entity " metaIntf #(.DATA_BITS(4)) set_board_number();\n" + append entity " metaIntf #(.DATA_BITS(144)) qp_interface ();\n" + append entity " metaIntf #(.DATA_BITS(184)) conn_interface ();\n" + append entity "\n" + append entity " // FV requests\n" + append entity " metaIntf #(.DATA_BITS(FV_REQ_BITS)) rdma_req_host \[N_REGIONS] ();\n" + if {$cnfg(en_fvv) eq 1} { + append entity " metaIntf #(.DATA_BITS(FV_REQ_BITS)) rdma_req_card \[N_REGIONS] ();\n" + append entity " metaIntf #(.DATA_BITS(FV_REQ_BITS)) rdma_req_fv \[N_REGIONS] ();\n" + } + append entity "\n" + append entity " // RDMA interface\n" + append entity " reqIntf rdma_rd_cmd \[N_REGIONS] ();\n" + append entity " reqIntf rdma_wr_cmd \[N_REGIONS] ();\n" + append entity " AXI4S axis_rdma_rd_data \[N_REGIONS] ();\n" + append entity " AXI4S axis_rdma_wr_data \[N_REGIONS] ();\n" + append entity "\n" + } + if {$cnfg(en_pr) eq 1} { + append entity " // PR clock\n" + append entity " wire pclk;\n" + append entity " wire presetn;\n" + append entity "\n" + append entity " // PR streams\n" + append entity " AXI4S axis_pr();\n" + append entity "\n" + append entity " wire\[31:0] axis_pr_data;\n" + append entity " wire axis_pr_ready;\n" + append entity " wire axis_pr_valid;\n" + append entity " \n" + append entity " // PR ICAP\n" + append entity " wire\[31:0] icap_out;\n" + append entity " wire icap_avail;\n" + append entity " wire icap_error;\n" + append entity " wire icap_done;\n" + append entity " wire icap_csn;\n" + append entity " wire\[31:0] icap_data;\n" + append entity "\n" + } + append entity " // IO buffers\n" + append entity " IBUF rst_IBUF_inst (\n" + append entity " .O(resetn_0), // Buffer output\n" + append entity " .I(resetn_0_nb) // Buffer input (connect directly to top-level port)\n" + append entity " );\n" + append entity "\n" + append entity " IBUF perst_n_IBUF_inst (\n" + append entity " .O(perst_n),\n" + append entity " .I(perst_n_nb)\n" + append entity " );\n" + append entity "\n" + append entity " // The notorius D32 pin\n" + append entity " assign fpga_burn = 1'b0;\n" + append entity "\n" + append entity " // -----------------------------------------------------------------\n" + append entity " // STATIC LAYER \n" + append entity " // -----------------------------------------------------------------\n" + append entity " design_static design_static_i\n" + append entity " (.aclk(aclk),\n" + append entity " .aresetn(aresetn),\n" + append entity " .axi_cnfg_araddr(axi_cnfg.araddr),\n" + append entity " .axi_cnfg_arprot(axi_cnfg.arprot),\n" + append entity " .axi_cnfg_arready(axi_cnfg.arready),\n" + append entity " .axi_cnfg_arvalid(axi_cnfg.arvalid),\n" + append entity " .axi_cnfg_awaddr(axi_cnfg.awaddr),\n" + append entity " .axi_cnfg_awprot(axi_cnfg.awprot),\n" + append entity " .axi_cnfg_awready(axi_cnfg.awready),\n" + append entity " .axi_cnfg_awvalid(axi_cnfg.awvalid),\n" + append entity " .axi_cnfg_bready(axi_cnfg.bready),\n" + append entity " .axi_cnfg_bresp(axi_cnfg.bresp),\n" + append entity " .axi_cnfg_bvalid(axi_cnfg.bvalid),\n" + append entity " .axi_cnfg_rdata(axi_cnfg.rdata),\n" + append entity " .axi_cnfg_rready(axi_cnfg.rready),\n" + append entity " .axi_cnfg_rresp(axi_cnfg.rresp),\n" + append entity " .axi_cnfg_rvalid(axi_cnfg.rvalid),\n" + append entity " .axi_cnfg_wdata(axi_cnfg.wdata),\n" + append entity " .axi_cnfg_wready(axi_cnfg.wready),\n" + append entity " .axi_cnfg_wstrb(axi_cnfg.wstrb),\n" + append entity " .axi_cnfg_wvalid(axi_cnfg.wvalid),\n" + for {set i 0} {$i < $cnfg(n_reg)} {incr i} { + append entity " .axi_ctrl_$i\_araddr(axi_ctrl\[$i].araddr),\n" + append entity " .axi_ctrl_$i\_arprot(axi_ctrl\[$i].arprot),\n" + append entity " .axi_ctrl_$i\_arready(axi_ctrl\[$i].arready),\n" + append entity " .axi_ctrl_$i\_arvalid(axi_ctrl\[$i].arvalid),\n" + append entity " .axi_ctrl_$i\_awaddr(axi_ctrl\[$i].awaddr),\n" + append entity " .axi_ctrl_$i\_awprot(axi_ctrl\[$i].awprot),\n" + append entity " .axi_ctrl_$i\_awready(axi_ctrl\[$i].awready),\n" + append entity " .axi_ctrl_$i\_awvalid(axi_ctrl\[$i].awvalid),\n" + append entity " .axi_ctrl_$i\_bready(axi_ctrl\[$i].bready),\n" + append entity " .axi_ctrl_$i\_bresp(axi_ctrl\[$i].bresp),\n" + append entity " .axi_ctrl_$i\_bvalid(axi_ctrl\[$i].bvalid),\n" + append entity " .axi_ctrl_$i\_rdata(axi_ctrl\[$i].rdata),\n" + append entity " .axi_ctrl_$i\_rready(axi_ctrl\[$i].rready),\n" + append entity " .axi_ctrl_$i\_rresp(axi_ctrl\[$i].rresp),\n" + append entity " .axi_ctrl_$i\_rvalid(axi_ctrl\[$i].rvalid),\n" + append entity " .axi_ctrl_$i\_wdata(axi_ctrl\[$i].wdata),\n" + append entity " .axi_ctrl_$i\_wready(axi_ctrl\[$i].wready),\n" + append entity " .axi_ctrl_$i\_wstrb(axi_ctrl\[$i].wstrb),\n" + append entity " .axi_ctrl_$i\_wvalid(axi_ctrl\[$i].wvalid),\n" + } + if {$cnfg(en_avx) eq 1} { + for {set i 0} {$i < $cnfg(n_reg)} {incr i} { + append entity " .axim_ctrl_$i\_araddr(axim_ctrl\[$i].araddr),\n" + append entity " .axim_ctrl_$i\_arburst(axim_ctrl\[$i].arburst),\n" + append entity " .axim_ctrl_$i\_arcache(axim_ctrl\[$i].arcache),\n" + append entity " .axim_ctrl_$i\_arlen(axim_ctrl\[$i].arlen),\n" + append entity " .axim_ctrl_$i\_arlock(axim_ctrl\[$i].arlock),\n" + append entity " .axim_ctrl_$i\_arprot(axim_ctrl\[$i].arprot),\n" + append entity " .axim_ctrl_$i\_arqos(axim_ctrl\[$i].arqos),\n" + append entity " .axim_ctrl_$i\_arready(axim_ctrl\[$i].arready),\n" + append entity " .axim_ctrl_$i\_arregion(axim_ctrl\[$i].arregion),\n" + append entity " .axim_ctrl_$i\_arsize(axim_ctrl\[$i].arsize),\n" + append entity " .axim_ctrl_$i\_arvalid(axim_ctrl\[$i].arvalid),\n" + append entity " .axim_ctrl_$i\_awaddr(axim_ctrl\[$i].awaddr),\n" + append entity " .axim_ctrl_$i\_awburst(axim_ctrl\[$i].awburst),\n" + append entity " .axim_ctrl_$i\_awcache(axim_ctrl\[$i].awcache),\n" + append entity " .axim_ctrl_$i\_awlen(axim_ctrl\[$i].awlen),\n" + append entity " .axim_ctrl_$i\_awlock(axim_ctrl\[$i].awlock),\n" + append entity " .axim_ctrl_$i\_awprot(axim_ctrl\[$i].awprot),\n" + append entity " .axim_ctrl_$i\_awqos(axim_ctrl\[$i].awqos),\n" + append entity " .axim_ctrl_$i\_awready(axim_ctrl\[$i].awready),\n" + append entity " .axim_ctrl_$i\_awregion(axim_ctrl\[$i].awregion),\n" + append entity " .axim_ctrl_$i\_awsize(axim_ctrl\[$i].awsize),\n" + append entity " .axim_ctrl_$i\_awvalid(axim_ctrl\[$i].awvalid),\n" + append entity " .axim_ctrl_$i\_bready(axim_ctrl\[$i].bready),\n" + append entity " .axim_ctrl_$i\_bresp(axim_ctrl\[$i].bresp),\n" + append entity " .axim_ctrl_$i\_bvalid(axim_ctrl\[$i].bvalid),\n" + append entity " .axim_ctrl_$i\_rdata(axim_ctrl\[$i].rdata),\n" + append entity " .axim_ctrl_$i\_rlast(axim_ctrl\[$i].rlast),\n" + append entity " .axim_ctrl_$i\_rready(axim_ctrl\[$i].rready),\n" + append entity " .axim_ctrl_$i\_rresp(axim_ctrl\[$i].rresp),\n" + append entity " .axim_ctrl_$i\_rvalid(axim_ctrl\[$i].rvalid),\n" + append entity " .axim_ctrl_$i\_wdata(axim_ctrl\[$i].wdata),\n" + append entity " .axim_ctrl_$i\_wlast(axim_ctrl\[$i].wlast),\n" + append entity " .axim_ctrl_$i\_wready(axim_ctrl\[$i].wready),\n" + append entity " .axim_ctrl_$i\_wstrb(axim_ctrl\[$i].wstrb),\n" + append entity " .axim_ctrl_$i\_wvalid(axim_ctrl\[$i].wvalid),\n" + } + } + for {set i 0} {$i < $cnfg(n_chan)} {incr i} { + append entity " .axis_dyn_in_$i\_tdata(axis_dyn_in\[$i].tdata),\n" + append entity " .axis_dyn_in_$i\_tkeep(axis_dyn_in\[$i].tkeep),\n" + append entity " .axis_dyn_in_$i\_tlast(axis_dyn_in\[$i].tlast),\n" + append entity " .axis_dyn_in_$i\_tready(axis_dyn_in\[$i].tready),\n" + append entity " .axis_dyn_in_$i\_tvalid(axis_dyn_in\[$i].tvalid),\n" + append entity " .axis_dyn_out_$i\_tdata(axis_dyn_out\[$i].tdata),\n" + append entity " .axis_dyn_out_$i\_tkeep(axis_dyn_out\[$i].tkeep),\n" + append entity " .axis_dyn_out_$i\_tlast(axis_dyn_out\[$i].tlast),\n" + append entity " .axis_dyn_out_$i\_tready(axis_dyn_out\[$i].tready),\n" + append entity " .axis_dyn_out_$i\_tvalid(axis_dyn_out\[$i].tvalid),\n" + append entity " .dsc_bypass_c2h_$i\_dsc_byp_ctl(xdma_req\[$i].c2h_ctl),\n" + append entity " .dsc_bypass_c2h_$i\_dsc_byp_dst_addr(xdma_req\[$i].c2h_addr),\n" + append entity " .dsc_bypass_c2h_$i\_dsc_byp_len(xdma_req\[$i].c2h_len),\n" + append entity " .dsc_bypass_c2h_$i\_dsc_byp_load(xdma_req\[$i].c2h_valid),\n" + append entity " .dsc_bypass_c2h_$i\_dsc_byp_ready(xdma_req\[$i].c2h_ready),\n" + append entity " .dsc_bypass_c2h_$i\_dsc_byp_src_addr(0),\n" + append entity " .dsc_bypass_h2c_$i\_dsc_byp_ctl(xdma_req\[$i].h2c_ctl),\n" + append entity " .dsc_bypass_h2c_$i\_dsc_byp_dst_addr(0),\n" + append entity " .dsc_bypass_h2c_$i\_dsc_byp_len(xdma_req\[$i].h2c_len),\n" + append entity " .dsc_bypass_h2c_$i\_dsc_byp_load(xdma_req\[$i].h2c_valid),\n" + append entity " .dsc_bypass_h2c_$i\_dsc_byp_ready(xdma_req\[$i].h2c_ready),\n" + append entity " .dsc_bypass_h2c_$i\_dsc_byp_src_addr(xdma_req\[$i].h2c_addr),\n" + append entity " .dsc_status_c2h_sts$i\(xdma_req\[$i].c2h_status),\n" + append entity " .dsc_status_h2c_sts$i\(xdma_req\[$i].h2c_status),\n" + } + if {$cnfg(en_ddr) eq 1} { + if {$cnfg(ddr_0) eq 1} { + append entity " .c0_ddr4_act_n(c0_ddr4_act_n),\n" + append entity " .c0_ddr4_adr(c0_ddr4_adr),\n" + append entity " .c0_ddr4_ba(c0_ddr4_ba),\n" + append entity " .c0_ddr4_bg(c0_ddr4_bg),\n" + append entity " .c0_ddr4_ck_c(c0_ddr4_ck_c),\n" + append entity " .c0_ddr4_ck_t(c0_ddr4_ck_t),\n" + append entity " .c0_ddr4_cke(c0_ddr4_cke),\n" + append entity " .c0_ddr4_cs_n(c0_ddr4_cs_n),\n" + append entity " .c0_ddr4_dq(c0_ddr4_dq),\n" + append entity " .c0_ddr4_dqs_c(c0_ddr4_dqs_c),\n" + append entity " .c0_ddr4_dqs_t(c0_ddr4_dqs_t),\n" + append entity " .c0_ddr4_odt(c0_ddr4_odt),\n" + append entity " .c0_ddr4_par(c0_ddr4_par),\n" + append entity " .c0_ddr4_reset_n(c0_ddr4_reset_n),\n" + append entity " .c0_sys_clk_0_clk_n(c0_sys_clk_n),\n" + append entity " .c0_sys_clk_0_clk_p(c0_sys_clk_p),\n" + append entity " .axi_ctrl_ddr_0_araddr(0),\n" + append entity " .axi_ctrl_ddr_0_arready(),\n" + append entity " .axi_ctrl_ddr_0_arvalid(1'b0),\n" + append entity " .axi_ctrl_ddr_0_awaddr(0),\n" + append entity " .axi_ctrl_ddr_0_awready(),\n" + append entity " .axi_ctrl_ddr_0_awvalid(1'b0),\n" + append entity " .axi_ctrl_ddr_0_bready(1'b0),\n" + append entity " .axi_ctrl_ddr_0_bresp(),\n" + append entity " .axi_ctrl_ddr_0_bvalid(),\n" + append entity " .axi_ctrl_ddr_0_rdata(),\n" + append entity " .axi_ctrl_ddr_0_rready(1'b0),\n" + append entity " .axi_ctrl_ddr_0_rresp(),\n" + append entity " .axi_ctrl_ddr_0_rvalid(),\n" + append entity " .axi_ctrl_ddr_0_wdata(0),\n" + append entity " .axi_ctrl_ddr_0_wready(),\n" + append entity " .axi_ctrl_ddr_0_wvalid(1'b0),\n" + } + if {$cnfg(ddr_1) eq 1} { + append entity " .c1_ddr4_act_n(c1_ddr4_act_n),\n" + append entity " .c1_ddr4_adr(c1_ddr4_adr),\n" + append entity " .c1_ddr4_ba(c1_ddr4_ba),\n" + append entity " .c1_ddr4_bg(c1_ddr4_bg),\n" + append entity " .c1_ddr4_ck_c(c1_ddr4_ck_c),\n" + append entity " .c1_ddr4_ck_t(c1_ddr4_ck_t),\n" + append entity " .c1_ddr4_cke(c1_ddr4_cke),\n" + append entity " .c1_ddr4_cs_n(c1_ddr4_cs_n),\n" + append entity " .c1_ddr4_dq(c1_ddr4_dq),\n" + append entity " .c1_ddr4_dqs_c(c1_ddr4_dqs_c),\n" + append entity " .c1_ddr4_dqs_t(c1_ddr4_dqs_t),\n" + append entity " .c1_ddr4_odt(c1_ddr4_odt),\n" + append entity " .c1_ddr4_par(c1_ddr4_par),\n" + append entity " .c1_ddr4_reset_n(c1_ddr4_reset_n),\n" + append entity " .c1_sys_clk_0_clk_n(c1_sys_clk_n),\n" + append entity " .c1_sys_clk_0_clk_p(c1_sys_clk_p),\n" + append entity " .axi_ctrl_ddr_1_araddr(0),\n" + append entity " .axi_ctrl_ddr_1_arready(),\n" + append entity " .axi_ctrl_ddr_1_arvalid(1'b0),\n" + append entity " .axi_ctrl_ddr_1_awaddr(0),\n" + append entity " .axi_ctrl_ddr_1_awready(),\n" + append entity " .axi_ctrl_ddr_1_awvalid(1'b0),\n" + append entity " .axi_ctrl_ddr_1_bready(1'b0),\n" + append entity " .axi_ctrl_ddr_1_bresp(),\n" + append entity " .axi_ctrl_ddr_1_bvalid(),\n" + append entity " .axi_ctrl_ddr_1_rdata(),\n" + append entity " .axi_ctrl_ddr_1_rready(1'b0),\n" + append entity " .axi_ctrl_ddr_1_rresp(),\n" + append entity " .axi_ctrl_ddr_1_rvalid(),\n" + append entity " .axi_ctrl_ddr_1_wdata(0),\n" + append entity " .axi_ctrl_ddr_1_wready(),\n" + append entity " .axi_ctrl_ddr_1_wvalid(1'b0),\n" + } + for {set i 0} {$i < 2 * $cnfg(n_ddr_chan)} {incr i} { + append entity " .axi_ddr_in_$i\_araddr(axi_ddr_in\[$i].araddr),\n" + append entity " .axi_ddr_in_$i\_arburst(axi_ddr_in\[$i].arburst),\n" + append entity " .axi_ddr_in_$i\_arcache(axi_ddr_in\[$i].arcache),\n" + append entity " .axi_ddr_in_$i\_arid(axi_ddr_in\[$i].arid),\n" + append entity " .axi_ddr_in_$i\_arlen(axi_ddr_in\[$i].arlen),\n" + append entity " .axi_ddr_in_$i\_arlock(axi_ddr_in\[$i].arlock),\n" + append entity " .axi_ddr_in_$i\_arprot(axi_ddr_in\[$i].arprot),\n" + append entity " .axi_ddr_in_$i\_arqos(axi_ddr_in\[$i].arqos),\n" + append entity " .axi_ddr_in_$i\_arready(axi_ddr_in\[$i].arready),\n" + append entity " .axi_ddr_in_$i\_arregion(axi_ddr_in\[$i].arregion),\n" + append entity " .axi_ddr_in_$i\_arsize(axi_ddr_in\[$i].arsize),\n" + append entity " .axi_ddr_in_$i\_arvalid(axi_ddr_in\[$i].arvalid),\n" + append entity " .axi_ddr_in_$i\_awaddr(axi_ddr_in\[$i].awaddr),\n" + append entity " .axi_ddr_in_$i\_awburst(axi_ddr_in\[$i].awburst),\n" + append entity " .axi_ddr_in_$i\_awcache(axi_ddr_in\[$i].awcache),\n" + append entity " .axi_ddr_in_$i\_awid(axi_ddr_in\[$i].awid),\n" + append entity " .axi_ddr_in_$i\_awlen(axi_ddr_in\[$i].awlen),\n" + append entity " .axi_ddr_in_$i\_awlock(axi_ddr_in\[$i].awlock),\n" + append entity " .axi_ddr_in_$i\_awprot(axi_ddr_in\[$i].awprot),\n" + append entity " .axi_ddr_in_$i\_awqos(axi_ddr_in\[$i].awqos),\n" + append entity " .axi_ddr_in_$i\_awready(axi_ddr_in\[$i].awready),\n" + append entity " .axi_ddr_in_$i\_awregion(axi_ddr_in\[$i].awregion),\n" + append entity " .axi_ddr_in_$i\_awsize(axi_ddr_in\[$i].awsize),\n" + append entity " .axi_ddr_in_$i\_awvalid(axi_ddr_in\[$i].awvalid),\n" + append entity " .axi_ddr_in_$i\_bid(axi_ddr_in\[$i].bid),\n" + append entity " .axi_ddr_in_$i\_bready(axi_ddr_in\[$i].bready),\n" + append entity " .axi_ddr_in_$i\_bresp(axi_ddr_in\[$i].bresp),\n" + append entity " .axi_ddr_in_$i\_bvalid(axi_ddr_in\[$i].bvalid),\n" + append entity " .axi_ddr_in_$i\_rdata(axi_ddr_in\[$i].rdata),\n" + append entity " .axi_ddr_in_$i\_rid(axi_ddr_in\[$i].rid),\n" + append entity " .axi_ddr_in_$i\_rlast(axi_ddr_in\[$i].rlast),\n" + append entity " .axi_ddr_in_$i\_rready(axi_ddr_in\[$i].rready),\n" + append entity " .axi_ddr_in_$i\_rresp(axi_ddr_in\[$i].rresp),\n" + append entity " .axi_ddr_in_$i\_rvalid(axi_ddr_in\[$i].rvalid),\n" + append entity " .axi_ddr_in_$i\_wdata(axi_ddr_in\[$i].wdata),\n" + append entity " .axi_ddr_in_$i\_wlast(axi_ddr_in\[$i].wlast),\n" + append entity " .axi_ddr_in_$i\_wready(axi_ddr_in\[$i].wready),\n" + append entity " .axi_ddr_in_$i\_wstrb(axi_ddr_in\[$i].wstrb),\n" + append entity " .axi_ddr_in_$i\_wvalid(axi_ddr_in\[$i].wvalid),\n" + } + } + append entity " .pcie_clk_clk_n(pcie_clk_clk_n),\n" + append entity " .pcie_clk_clk_p(pcie_clk_clk_p),\n" + append entity " .pcie_x16_rxn(pcie_x16_rxn),\n" + append entity " .pcie_x16_rxp(pcie_x16_rxp),\n" + append entity " .pcie_x16_txn(pcie_x16_txn),\n" + append entity " .pcie_x16_txp(pcie_x16_txp),\n" + if {$cnfg(en_pr) eq 1} { + append entity " .pclk(pclk),\n" + append entity " .presetn(presetn),\n" + } + append entity " .perst_n(perst_n),\n" + append entity " .reset_0(~resetn_0),\n" + append entity " .usr_irq(usr_irq)\n" + append entity " );\n" + append entity " \n" + append entity " // -----------------------------------------------------------------\n" + append entity " // STATIC CONFIG \n" + append entity " // -----------------------------------------------------------------\n" + append entity " static_slave inst_static_slave (\n" + append entity " .aclk(aclk),\n" + append entity " .aresetn(aresetn),\n" + if {$cnfg(en_pr) eq 1} { + append entity " .pr_xdma_req(xdma_req\[$cnfg(pr_chan)]),\n" + } + if {$cnfg(en_fv) eq 1} { + append entity " .arp_lookup_request(arp_lookup_request),\n" + append entity " .arp_lookup_reply(arp_lookup_reply),\n" + append entity " .set_ip_addr(set_ip_addr),\n" + append entity " .set_board_number(set_board_number),\n" + append entity " .qp_interface(qp_interface),\n" + append entity " .conn_interface(conn_interface),\n" + } + append entity " .lowspeed_ctrl(),\n" + append entity " .axi_ctrl(axi_cnfg)\n" + append entity " );\n" + append entity "\n" + if {$cnfg(en_fv) eq 1} { + append entity " // -----------------------------------------------------------------\n" + append entity " // RDMA \n" + append entity " // -----------------------------------------------------------------\n" + append entity " network_top inst_network_top (\n" + append entity " .aclk(aclk),\n" + append entity " .aresetn(aresetn),\n" + append entity " .sys_reset(~resetn_0),\n" + append entity " .dclk(aclk),\n" + append entity " .gt_refclk_p(gt$cnfg(qsfp)_refclk_p),\n" + append entity " .gt_refclk_n(gt$cnfg(qsfp)_refclk_n),\n" + append entity " .gt_rxp_in(gt$cnfg(qsfp)_rxp_in),\n" + append entity " .gt_rxn_in(gt$cnfg(qsfp)_rxn_in),\n" + append entity " .gt_txp_out(gt$cnfg(qsfp)_txp_out),\n" + append entity " .gt_txn_out(gt$cnfg(qsfp)_txn_out),\n" + append entity " .arp_lookup_request(arp_lookup_request),\n" + append entity " .arp_lookup_reply(arp_lookup_reply),\n" + append entity " .set_ip_addr(set_ip_addr),\n" + append entity " .set_board_number(set_board_number),\n" + append entity " .qp_interface(qp_interface),\n" + append entity " .conn_interface(conn_interface),\n" + append entity " .rdma_req_host(rdma_req_host),\n" + if {$cnfg(en_fvv) eq 1} { + append entity " .rdma_req_card(rdma_req_card),\n" + append entity " .rdma_req_fv(rdma_req_fv),\n" + } + append entity " .rdma_rd_cmd(rdma_rd_cmd),\n" + append entity " .rdma_wr_cmd(rdma_wr_cmd),\n" + append entity " .axis_rdma_rd_data(axis_rdma_rd_data),\n" + append entity " .axis_rdma_wr_data(axis_rdma_wr_data)\n" + append entity " );\n" + append entity "\n" + } + if {$cnfg(en_pr) eq 1} { + append entity " // -----------------------------------------------------------------\n" + append entity " // PR \n" + append entity " // -----------------------------------------------------------------\n" + append entity " pr_clock_converter inst_clk_cnvrt_pr (\n" + append entity " .s_axis_aresetn(aresetn),\n" + append entity " .m_axis_aresetn(presetn),\n" + append entity " .s_axis_aclk(aclk),\n" + append entity " .s_axis_tvalid(axis_dyn_out\[$cnfg(pr_chan)].tvalid),\n" + append entity " .s_axis_tready(axis_dyn_out\[$cnfg(pr_chan)].tready),\n" + append entity " .s_axis_tdata(axis_dyn_out\[$cnfg(pr_chan)].tdata),\n" + append entity " .s_axis_tkeep(axis_dyn_out\[$cnfg(pr_chan)].tkeep),\n" + append entity " .s_axis_tlast(axis_dyn_out\[$cnfg(pr_chan)].tlast),\n" + append entity " .m_axis_aclk(pclk),\n" + append entity " .m_axis_tvalid(axis_pr.tvalid),\n" + append entity " .m_axis_tready(axis_pr.tready),\n" + append entity " .m_axis_tdata(axis_pr.tdata),\n" + append entity " .m_axis_tkeep(axis_pr.tkeep),\n" + append entity " .m_axis_tlast(axis_pr.tlast)\n" + append entity " );\n" + append entity " \n" + append entity " pr_dwidth_converter inst_dwidth_cnvrt_pr (\n" + append entity " .aclk(pclk),\n" + append entity " .aresetn(presetn),\n" + append entity " .s_axis_tvalid(axis_pr.tvalid),\n" + append entity " .s_axis_tready(axis_pr.tready),\n" + append entity " .s_axis_tdata(axis_pr.tdata),\n" + append entity " .s_axis_tkeep(axis_pr.tkeep),\n" + append entity " .s_axis_tlast(axis_pr.tlast),\n" + append entity " .m_axis_tvalid(axis_pr_valid),\n" + append entity " .m_axis_tready(axis_pr_ready),\n" + append entity " .m_axis_tdata(axis_pr_data),\n" + append entity " .m_axis_tkeep(),\n" + append entity " .m_axis_tlast()\n" + append entity " );\n" + append entity " \n" + append entity " // Partial reconfiguration\n" + append entity " assign icap_csn = ~axis_pr_valid;\n" + append entity " assign axis_pr_ready = 1'b1;\n" + append entity " assign icap_data = {axis_pr_data\[24], axis_pr_data\[25], axis_pr_data\[26], axis_pr_data\[27], axis_pr_data\[28], axis_pr_data\[29], axis_pr_data\[30], axis_pr_data\[31],\n" + append entity " axis_pr_data\[16], axis_pr_data\[17], axis_pr_data\[18], axis_pr_data\[19], axis_pr_data\[20], axis_pr_data\[21], axis_pr_data\[22], axis_pr_data\[23],\n" + append entity " axis_pr_data\[8], axis_pr_data\[9], axis_pr_data\[10], axis_pr_data\[11], axis_pr_data\[12], axis_pr_data\[13], axis_pr_data\[14], axis_pr_data\[15],\n" + append entity " axis_pr_data\[0], axis_pr_data\[1], axis_pr_data\[2], axis_pr_data\[3], axis_pr_data\[4], axis_pr_data\[5], axis_pr_data\[6], axis_pr_data\[7]};\n" + append entity " \n" + append entity " ICAPE3 #(\n" + append entity " .ICAP_AUTO_SWITCH(\"DISABLE\"),\n" + append entity " .SIM_CFG_FILE_NAME(\"NONE\")\n" + append entity " )\n" + append entity " ICAPE3_inst (\n" + append entity " .AVAIL(icap_avail), // 1-bit output: Availability status of ICAP\n" + append entity " .O(icap_out), // 32-bit output: Configuration data output bus\n" + append entity " .PRDONE(icap_done), // 1-bit output: Indicates completion of Partial Reconfiguration\n" + append entity " .PRERROR(icap_error), // 1-bit output: Indicates Error during Partial Reconfiguration\n" + append entity " .CLK(pclk), // 1-bit input: Clock input\n" + append entity " .CSIB(icap_csn), // 1-bit input: Active-Low ICAP enable\n" + append entity " .I(icap_data), // 32-bit input: Configuration data input bus\n" + append entity " .RDWRB(1'b0) // 1-bit input: Read/Write Select input\n" + append entity " );\n" + append entity " \n" + } + append entity " // -----------------------------------------------------------------\n" + append entity " // DYNAMIC LAYER \n" + append entity " // -----------------------------------------------------------------\n" + append entity " design_dynamic_wrapper inst_dynamic (\n" + append entity " .sys_rst(~resetn_0),\n" + append entity " .aresetn(aresetn),\n" + append entity " .aclk(aclk),\n" + append entity " .axi_ctrl(axi_ctrl\[0+:$cnfg(n_reg)]),\n" + if {$cnfg(en_avx) eq 1} { + append entity " .axim_ctrl(axim_ctrl\[0+:$cnfg(n_reg)]),\n" + } + if {$cnfg(en_ddr) eq 1} { + append entity " .axi_ddr_in(axi_ddr_in\[0+:$cnfg(n_ddr_chan)*2]),\n" + } + if {$cnfg(en_strm) eq 1} { + append entity " .axis_host_in(axis_dyn_in\[$cnfg(strm_chan)]),\n" + append entity " .axis_host_out(axis_dyn_out\[$cnfg(strm_chan)]),\n" + append entity " .host_xdma_req(xdma_req\[$cnfg(strm_chan)]),\n" + } + if {$cnfg(en_ddr) eq 1} { + append entity " .axis_card_in(axis_dyn_in\[$cnfg(ddr_chan)]),\n" + append entity " .axis_card_out(axis_dyn_out\[$cnfg(ddr_chan)]),\n" + append entity " .card_xdma_req(xdma_req\[$cnfg(ddr_chan)]),\n" + } + if {$cnfg(en_fv) eq 1} { + append entity " .rdma_req_host(rdma_req_host\[0+:$cnfg(n_reg)]),\n" + if {$cnfg(en_fvv) eq 1} { + append entity " .rdma_req_card(rdma_req_card\[0+:$cnfg(n_reg)]),\n" + append entity " .rdma_req_fv(rdma_req_fv\[0+:$cnfg(n_reg)]),\n" + } + append entity " .rdma_rd_cmd(rdma_rd_cmd\[0+:$cnfg(n_reg)]),\n" + append entity " .rdma_wr_cmd(rdma_wr_cmd\[0+:$cnfg(n_reg)]),\n" + append entity " .axis_rdma_rd_data(axis_rdma_rd_data\[0+:$cnfg(n_reg)]),\n" + append entity " .axis_rdma_wr_data(axis_rdma_wr_data\[0+:$cnfg(n_reg)]),\n" + } + append entity " .usr_irq(usr_irq\[0+:$cnfg(n_reg)]),\n" + append entity " .S_BSCAN_drck(),\n" + append entity " .S_BSCAN_shift(),\n" + append entity " .S_BSCAN_tdi(),\n" + append entity " .S_BSCAN_update(),\n" + append entity " .S_BSCAN_sel(),\n" + append entity " .S_BSCAN_tdo(),\n" + append entity " .S_BSCAN_tms(),\n" + append entity " .S_BSCAN_tck(),\n" + append entity " .S_BSCAN_runtest(),\n" + append entity " .S_BSCAN_reset(),\n" + append entity " .S_BSCAN_capture(),\n" + append entity " .S_BSCAN_bscanid_en() \n" + append entity " );\n" + append entity " \n" + append entity "endmodule\n" + append entity "\n" + lappend template $entity + set vho_file [open $f_out w] + foreach line $template { + puts $vho_file $line + } + close $vho_file +} \ No newline at end of file diff --git a/hw/scripts/wr_hdl/wr_hdl_top_vcu118.tcl b/hw/scripts/wr_hdl/wr_hdl_top_vcu118.tcl new file mode 100644 index 00000000..02c450e4 --- /dev/null +++ b/hw/scripts/wr_hdl/wr_hdl_top_vcu118.tcl @@ -0,0 +1,562 @@ +######################################################################################### +# Write top level file +######################################################################################### +proc wr_hdl_top {f_out} { + upvar #0 cfg cnfg + + set template {} + set entity {} + append entity "`timescale 1ns / 1ps\n" + append entity "\n" + append entity "import lynxTypes::*;\n" + append entity "\n" + append entity "`include \"axi_macros.svh\"\n" + append entity "`include \"lynx_macros.svh\"\n" + append entity "//\n" + append entity "// Top Level\n" + append entity "//\n" + append entity "module top (\n" + if {$cnfg(en_fv) eq 1} { + append entity " input wire\[3:0] gt_rxp_in,\n" + append entity " input wire\[3:0] gt_rxn_in,\n" + append entity " output wire\[3:0] gt_txp_out,\n" + append entity " output wire\[3:0] gt_txn_out,\n" + append entity " input wire gt_refclk_p,\n" + append entity " input wire gt_refclk_n,\n" + append entity " input wire dclk_p,\n" + append entity " input wire dclk_n,\n" + } + if {$cnfg(en_ddr) eq 1} { + if {$cnfg(ddr_0) eq 1} { + append entity " input wire c0_sys_clk_p,\n" + append entity " input wire c0_sys_clk_n,\n" + append entity " output wire c0_ddr4_act_n,\n" + append entity " output wire\[16:0] c0_ddr4_adr,\n" + append entity " output wire\[1:0] c0_ddr4_ba,\n" + append entity " output wire\[0:0] c0_ddr4_bg,\n" + append entity " output wire\[0:0] c0_ddr4_cke,\n" + append entity " output wire\[0:0] c0_ddr4_odt,\n" + append entity " output wire\[0:0] c0_ddr4_cs_n,\n" + append entity " output wire\[0:0] c0_ddr4_ck_t,\n" + append entity " output wire\[0:0] c0_ddr4_ck_c,\n" + append entity " output wire c0_ddr4_reset_n,\n" + append entity " inout wire\[7:0] c0_ddr4_dm_dbi_n,\n" + append entity " inout wire\[63:0] c0_ddr4_dq,\n" + append entity " inout wire\[7:0] c0_ddr4_dqs_t,\n" + append entity " inout wire\[7:0] c0_ddr4_dqs_c,\n" + } + if {$cnfg(ddr_1) eq 1} { + append entity " input wire c1_sys_clk_p,\n" + append entity " input wire c1_sys_clk_n,\n" + append entity " output wire c1_ddr4_act_n,\n" + append entity " output wire\[16:0] c1_ddr4_adr,\n" + append entity " output wire\[1:0] c1_ddr4_ba,\n" + append entity " output wire\[0:0] c1_ddr4_bg,\n" + append entity " output wire\[0:0] c1_ddr4_cke,\n" + append entity " output wire\[0:0] c1_ddr4_odt,\n" + append entity " output wire\[0:0] c1_ddr4_cs_n,\n" + append entity " output wire\[0:0] c1_ddr4_ck_t,\n" + append entity " output wire\[0:0] c1_ddr4_ck_c,\n" + append entity " output wire c1_ddr4_reset_n,\n" + append entity " inout wire\[7:0] c1_ddr4_dm_dbi_n,\n" + append entity " inout wire\[63:0] c1_ddr4_dq,\n" + append entity " inout wire\[7:0] c1_ddr4_dqs_t,\n" + append entity " inout wire\[7:0] c1_ddr4_dqs_c,\n" + } + } + append entity " input wire\[0:0] pcie_clk_clk_n,\n" + append entity " input wire\[0:0] pcie_clk_clk_p,\n" + append entity " input wire\[15:0] pcie_x16_rxn,\n" + append entity " input wire\[15:0] pcie_x16_rxp,\n" + append entity " output wire\[15:0] pcie_x16_txn,\n" + append entity " output wire\[15:0] pcie_x16_txp,\n" + append entity " input wire perst_n_nb,\n" + append entity " input wire reset_0_nb\n" + append entity ");\n" + append entity "\n" + append entity " \n" + append entity " // AXI resetn\n" + append entity " wire\[0:0] aresetn;\n" + append entity " // AXI clk (250 MHz)\n" + append entity " wire aclk;\n" + append entity "\n" + append entity " // IRQ\n" + append entity " wire\[N_REGIONS-1:0] usr_irq;\n" + append entity "\n" + append entity " wire reset_0;\n" + append entity " wire perst_n;\n" + append entity "\n" + append entity " // Static config\n" + append entity " AXI4L axi_cnfg ();\n" + append entity "\n" + append entity " // Application control\n" + append entity " AXI4L axi_ctrl \[N_REGIONS] ();\n" + append entity "\n" + if {$cnfg(en_avx) eq 1} { + append entity " // Application control AVX\n" + append entity " AXI4 #(.AXI4_DATA_BITS(AVX_DATA_BITS)) axim_ctrl \[N_REGIONS] ();\n" + } + append entity "\n" + append entity " // Stream to application\n" + append entity " AXI4S axis_dyn_out\[N_CHAN] ();\n" + append entity "\n" + append entity " // Stream from application\n" + append entity " AXI4S axis_dyn_in\[N_CHAN] ();\n" + append entity "\n" + append entity " // Descriptor bypass\n" + append entity " xdmaIntf xdma_req \[N_CHAN] ();\n" + append entity "\n" + if {$cnfg(en_ddr) eq 1} { + append entity " // DDR AXI mm\n" + append entity " AXI4 axi_ddr_in\[2*N_DDR_CHAN] ();\n" + append entity "\n" + } + if {$cnfg(en_fv) eq 1} { + append entity "\n" + append entity " // RDMA clk\n" + append entity " wire dclk;\n" + append entity "\n" + append entity " metaIntf #(.DATA_BITS(32)) arp_lookup_request();\n" + append entity " metaIntf #(.DATA_BITS(56)) arp_lookup_reply();\n" + append entity " metaIntf #(.DATA_BITS(32)) set_ip_addr();\n" + append entity " metaIntf #(.DATA_BITS(4)) set_board_number();\n" + append entity " metaIntf #(.DATA_BITS(144)) qp_interface ();\n" + append entity " metaIntf #(.DATA_BITS(184)) conn_interface ();\n" + append entity "\n" + append entity " // FV requests\n" + append entity " metaIntf #(.DATA_BITS(FV_REQ_BITS)) rdma_req_host \[N_REGIONS] ();\n" + if {$cnfg(en_fvv) eq 1} { + append entity " metaIntf #(.DATA_BITS(FV_REQ_BITS)) rdma_req_card \[N_REGIONS] ();\n" + append entity " metaIntf #(.DATA_BITS(FV_REQ_BITS)) rdma_req_rpc \[N_REGIONS] ();\n" + } + append entity "\n" + append entity " // RDMA interface\n" + append entity " reqIntf rdma_rd_cmd \[N_REGIONS] ();\n" + append entity " reqIntf rdma_wr_cmd \[N_REGIONS] ();\n" + append entity " AXI4S axis_rdma_rd_data \[N_REGIONS] ();\n" + append entity " AXI4S axis_rdma_wr_data \[N_REGIONS] ();\n" + append entity "\n" + } + if {$cnfg(en_pr) eq 1} { + append entity " // PR clock\n" + append entity " wire pclk;\n" + append entity " wire presetn;\n" + append entity "\n" + append entity " // PR streams\n" + append entity " AXI4S axis_pr();\n" + append entity "\n" + append entity " wire\[31:0] axis_pr_data;\n" + append entity " wire axis_pr_ready;\n" + append entity " wire axis_pr_valid;\n" + append entity " \n" + append entity " // PR ICAP\n" + append entity " wire\[31:0] icap_out;\n" + append entity " wire icap_avail;\n" + append entity " wire icap_error;\n" + append entity " wire icap_done;\n" + append entity " wire icap_csn;\n" + append entity " wire\[31:0] icap_data;\n" + append entity "\n" + } + append entity " // IO buffers\n" + append entity " IBUF rst_IBUF_inst (\n" + append entity " .O(reset_0), // Buffer output\n" + append entity " .I(reset_0_nb) // Buffer input (connect directly to top-level port)\n" + append entity " );\n" + append entity "\n" + append entity " IBUF perst_n_IBUF_inst (\n" + append entity " .O(perst_n),\n" + append entity " .I(perst_n_nb)\n" + append entity " );\n" + append entity "\n" + append entity " // -----------------------------------------------------------------\n" + append entity " // STATIC LAYER \n" + append entity " // -----------------------------------------------------------------\n" + append entity " design_static design_static_i\n" + append entity " (.aclk(aclk),\n" + append entity " .aresetn(aresetn),\n" + append entity " .axi_cnfg_araddr(axi_cnfg.araddr),\n" + append entity " .axi_cnfg_arprot(axi_cnfg.arprot),\n" + append entity " .axi_cnfg_arready(axi_cnfg.arready),\n" + append entity " .axi_cnfg_arvalid(axi_cnfg.arvalid),\n" + append entity " .axi_cnfg_awaddr(axi_cnfg.awaddr),\n" + append entity " .axi_cnfg_awprot(axi_cnfg.awprot),\n" + append entity " .axi_cnfg_awready(axi_cnfg.awready),\n" + append entity " .axi_cnfg_awvalid(axi_cnfg.awvalid),\n" + append entity " .axi_cnfg_bready(axi_cnfg.bready),\n" + append entity " .axi_cnfg_bresp(axi_cnfg.bresp),\n" + append entity " .axi_cnfg_bvalid(axi_cnfg.bvalid),\n" + append entity " .axi_cnfg_rdata(axi_cnfg.rdata),\n" + append entity " .axi_cnfg_rready(axi_cnfg.rready),\n" + append entity " .axi_cnfg_rresp(axi_cnfg.rresp),\n" + append entity " .axi_cnfg_rvalid(axi_cnfg.rvalid),\n" + append entity " .axi_cnfg_wdata(axi_cnfg.wdata),\n" + append entity " .axi_cnfg_wready(axi_cnfg.wready),\n" + append entity " .axi_cnfg_wstrb(axi_cnfg.wstrb),\n" + append entity " .axi_cnfg_wvalid(axi_cnfg.wvalid),\n" + for {set i 0} {$i < $cnfg(n_reg)} {incr i} { + append entity " .axi_ctrl_$i\_araddr(axi_ctrl\[$i].araddr),\n" + append entity " .axi_ctrl_$i\_arprot(axi_ctrl\[$i].arprot),\n" + append entity " .axi_ctrl_$i\_arready(axi_ctrl\[$i].arready),\n" + append entity " .axi_ctrl_$i\_arvalid(axi_ctrl\[$i].arvalid),\n" + append entity " .axi_ctrl_$i\_awaddr(axi_ctrl\[$i].awaddr),\n" + append entity " .axi_ctrl_$i\_awprot(axi_ctrl\[$i].awprot),\n" + append entity " .axi_ctrl_$i\_awready(axi_ctrl\[$i].awready),\n" + append entity " .axi_ctrl_$i\_awvalid(axi_ctrl\[$i].awvalid),\n" + append entity " .axi_ctrl_$i\_bready(axi_ctrl\[$i].bready),\n" + append entity " .axi_ctrl_$i\_bresp(axi_ctrl\[$i].bresp),\n" + append entity " .axi_ctrl_$i\_bvalid(axi_ctrl\[$i].bvalid),\n" + append entity " .axi_ctrl_$i\_rdata(axi_ctrl\[$i].rdata),\n" + append entity " .axi_ctrl_$i\_rready(axi_ctrl\[$i].rready),\n" + append entity " .axi_ctrl_$i\_rresp(axi_ctrl\[$i].rresp),\n" + append entity " .axi_ctrl_$i\_rvalid(axi_ctrl\[$i].rvalid),\n" + append entity " .axi_ctrl_$i\_wdata(axi_ctrl\[$i].wdata),\n" + append entity " .axi_ctrl_$i\_wready(axi_ctrl\[$i].wready),\n" + append entity " .axi_ctrl_$i\_wstrb(axi_ctrl\[$i].wstrb),\n" + append entity " .axi_ctrl_$i\_wvalid(axi_ctrl\[$i].wvalid),\n" + } + if {$cnfg(en_avx) eq 1} { + for {set i 0} {$i < $cnfg(n_reg)} {incr i} { + append entity " .axim_ctrl_$i\_araddr(axim_ctrl\[$i].araddr),\n" + append entity " .axim_ctrl_$i\_arburst(axim_ctrl\[$i].arburst),\n" + append entity " .axim_ctrl_$i\_arcache(axim_ctrl\[$i].arcache),\n" + append entity " .axim_ctrl_$i\_arlen(axim_ctrl\[$i].arlen),\n" + append entity " .axim_ctrl_$i\_arlock(axim_ctrl\[$i].arlock),\n" + append entity " .axim_ctrl_$i\_arprot(axim_ctrl\[$i].arprot),\n" + append entity " .axim_ctrl_$i\_arqos(axim_ctrl\[$i].arqos),\n" + append entity " .axim_ctrl_$i\_arready(axim_ctrl\[$i].arready),\n" + append entity " .axim_ctrl_$i\_arregion(axim_ctrl\[$i].arregion),\n" + append entity " .axim_ctrl_$i\_arsize(axim_ctrl\[$i].arsize),\n" + append entity " .axim_ctrl_$i\_arvalid(axim_ctrl\[$i].arvalid),\n" + append entity " .axim_ctrl_$i\_awaddr(axim_ctrl\[$i].awaddr),\n" + append entity " .axim_ctrl_$i\_awburst(axim_ctrl\[$i].awburst),\n" + append entity " .axim_ctrl_$i\_awcache(axim_ctrl\[$i].awcache),\n" + append entity " .axim_ctrl_$i\_awlen(axim_ctrl\[$i].awlen),\n" + append entity " .axim_ctrl_$i\_awlock(axim_ctrl\[$i].awlock),\n" + append entity " .axim_ctrl_$i\_awprot(axim_ctrl\[$i].awprot),\n" + append entity " .axim_ctrl_$i\_awqos(axim_ctrl\[$i].awqos),\n" + append entity " .axim_ctrl_$i\_awready(axim_ctrl\[$i].awready),\n" + append entity " .axim_ctrl_$i\_awregion(axim_ctrl\[$i].awregion),\n" + append entity " .axim_ctrl_$i\_awsize(axim_ctrl\[$i].awsize),\n" + append entity " .axim_ctrl_$i\_awvalid(axim_ctrl\[$i].awvalid),\n" + append entity " .axim_ctrl_$i\_bready(axim_ctrl\[$i].bready),\n" + append entity " .axim_ctrl_$i\_bresp(axim_ctrl\[$i].bresp),\n" + append entity " .axim_ctrl_$i\_bvalid(axim_ctrl\[$i].bvalid),\n" + append entity " .axim_ctrl_$i\_rdata(axim_ctrl\[$i].rdata),\n" + append entity " .axim_ctrl_$i\_rlast(axim_ctrl\[$i].rlast),\n" + append entity " .axim_ctrl_$i\_rready(axim_ctrl\[$i].rready),\n" + append entity " .axim_ctrl_$i\_rresp(axim_ctrl\[$i].rresp),\n" + append entity " .axim_ctrl_$i\_rvalid(axim_ctrl\[$i].rvalid),\n" + append entity " .axim_ctrl_$i\_wdata(axim_ctrl\[$i].wdata),\n" + append entity " .axim_ctrl_$i\_wlast(axim_ctrl\[$i].wlast),\n" + append entity " .axim_ctrl_$i\_wready(axim_ctrl\[$i].wready),\n" + append entity " .axim_ctrl_$i\_wstrb(axim_ctrl\[$i].wstrb),\n" + append entity " .axim_ctrl_$i\_wvalid(axim_ctrl\[$i].wvalid),\n" + } + } + for {set i 0} {$i < $cnfg(n_chan)} {incr i} { + append entity " .axis_dyn_in_$i\_tdata(axis_dyn_in\[$i].tdata),\n" + append entity " .axis_dyn_in_$i\_tkeep(axis_dyn_in\[$i].tkeep),\n" + append entity " .axis_dyn_in_$i\_tlast(axis_dyn_in\[$i].tlast),\n" + append entity " .axis_dyn_in_$i\_tready(axis_dyn_in\[$i].tready),\n" + append entity " .axis_dyn_in_$i\_tvalid(axis_dyn_in\[$i].tvalid),\n" + append entity " .axis_dyn_out_$i\_tdata(axis_dyn_out\[$i].tdata),\n" + append entity " .axis_dyn_out_$i\_tkeep(axis_dyn_out\[$i].tkeep),\n" + append entity " .axis_dyn_out_$i\_tlast(axis_dyn_out\[$i].tlast),\n" + append entity " .axis_dyn_out_$i\_tready(axis_dyn_out\[$i].tready),\n" + append entity " .axis_dyn_out_$i\_tvalid(axis_dyn_out\[$i].tvalid),\n" + append entity " .dsc_bypass_c2h_$i\_dsc_byp_ctl(xdma_req\[$i].c2h_ctl),\n" + append entity " .dsc_bypass_c2h_$i\_dsc_byp_dst_addr(xdma_req\[$i].c2h_addr),\n" + append entity " .dsc_bypass_c2h_$i\_dsc_byp_len(xdma_req\[$i].c2h_len),\n" + append entity " .dsc_bypass_c2h_$i\_dsc_byp_load(xdma_req\[$i].c2h_valid),\n" + append entity " .dsc_bypass_c2h_$i\_dsc_byp_ready(xdma_req\[$i].c2h_ready),\n" + append entity " .dsc_bypass_c2h_$i\_dsc_byp_src_addr(0),\n" + append entity " .dsc_bypass_h2c_$i\_dsc_byp_ctl(xdma_req\[$i].h2c_ctl),\n" + append entity " .dsc_bypass_h2c_$i\_dsc_byp_dst_addr(0),\n" + append entity " .dsc_bypass_h2c_$i\_dsc_byp_len(xdma_req\[$i].h2c_len),\n" + append entity " .dsc_bypass_h2c_$i\_dsc_byp_load(xdma_req\[$i].h2c_valid),\n" + append entity " .dsc_bypass_h2c_$i\_dsc_byp_ready(xdma_req\[$i].h2c_ready),\n" + append entity " .dsc_bypass_h2c_$i\_dsc_byp_src_addr(xdma_req\[$i].h2c_addr),\n" + append entity " .dsc_status_c2h_sts$i\(xdma_req\[$i].c2h_status),\n" + append entity " .dsc_status_h2c_sts$i\(xdma_req\[$i].h2c_status),\n" + } + if {$cnfg(en_ddr) eq 1} { + if {$cnfg(ddr_0) eq 1} { + append entity " .c0_ddr4_act_n(c0_ddr4_act_n),\n" + append entity " .c0_ddr4_adr(c0_ddr4_adr),\n" + append entity " .c0_ddr4_ba(c0_ddr4_ba),\n" + append entity " .c0_ddr4_bg(c0_ddr4_bg),\n" + append entity " .c0_ddr4_ck_c(c0_ddr4_ck_c),\n" + append entity " .c0_ddr4_ck_t(c0_ddr4_ck_t),\n" + append entity " .c0_ddr4_cke(c0_ddr4_cke),\n" + append entity " .c0_ddr4_cs_n(c0_ddr4_cs_n),\n" + append entity " .c0_ddr4_dm_n(c0_ddr4_dm_dbi_n),\n" + append entity " .c0_ddr4_dq(c0_ddr4_dq),\n" + append entity " .c0_ddr4_dqs_c(c0_ddr4_dqs_c),\n" + append entity " .c0_ddr4_dqs_t(c0_ddr4_dqs_t),\n" + append entity " .c0_ddr4_odt(c0_ddr4_odt),\n" + append entity " .c0_ddr4_reset_n(c0_ddr4_reset_n),\n" + append entity " .c0_sys_clk_0_clk_n(c0_sys_clk_n),\n" + append entity " .c0_sys_clk_0_clk_p(c0_sys_clk_p),\n" + } + if {$cnfg(ddr_1) eq 1} { + append entity " .c1_ddr4_act_n(c1_ddr4_act_n),\n" + append entity " .c1_ddr4_adr(c1_ddr4_adr),\n" + append entity " .c1_ddr4_ba(c1_ddr4_ba),\n" + append entity " .c1_ddr4_bg(c1_ddr4_bg),\n" + append entity " .c1_ddr4_ck_c(c1_ddr4_ck_c),\n" + append entity " .c1_ddr4_ck_t(c1_ddr4_ck_t),\n" + append entity " .c1_ddr4_cke(c1_ddr4_cke),\n" + append entity " .c1_ddr4_cs_n(c1_ddr4_cs_n),\n" + append entity " .c1_ddr4_dm_n(c1_ddr4_dm_dbi_n),\n" + append entity " .c1_ddr4_dq(c1_ddr4_dq),\n" + append entity " .c1_ddr4_dqs_c(c1_ddr4_dqs_c),\n" + append entity " .c1_ddr4_dqs_t(c1_ddr4_dqs_t),\n" + append entity " .c1_ddr4_odt(c1_ddr4_odt),\n" + append entity " .c1_ddr4_reset_n(c1_ddr4_reset_n),\n" + append entity " .c1_sys_clk_0_clk_n(c1_sys_clk_n),\n" + append entity " .c1_sys_clk_0_clk_p(c1_sys_clk_p),\n" + } + for {set i 0} {$i < 2 * $cnfg(n_ddr_chan)} {incr i} { + append entity " .axi_ddr_in_$i\_araddr(axi_ddr_in\[$i].araddr),\n" + append entity " .axi_ddr_in_$i\_arburst(axi_ddr_in\[$i].arburst),\n" + append entity " .axi_ddr_in_$i\_arcache(axi_ddr_in\[$i].arcache),\n" + append entity " .axi_ddr_in_$i\_arid(axi_ddr_in\[$i].arid),\n" + append entity " .axi_ddr_in_$i\_arlen(axi_ddr_in\[$i].arlen),\n" + append entity " .axi_ddr_in_$i\_arlock(axi_ddr_in\[$i].arlock),\n" + append entity " .axi_ddr_in_$i\_arprot(axi_ddr_in\[$i].arprot),\n" + append entity " .axi_ddr_in_$i\_arqos(axi_ddr_in\[$i].arqos),\n" + append entity " .axi_ddr_in_$i\_arready(axi_ddr_in\[$i].arready),\n" + append entity " .axi_ddr_in_$i\_arregion(axi_ddr_in\[$i].arregion),\n" + append entity " .axi_ddr_in_$i\_arsize(axi_ddr_in\[$i].arsize),\n" + append entity " .axi_ddr_in_$i\_arvalid(axi_ddr_in\[$i].arvalid),\n" + append entity " .axi_ddr_in_$i\_awaddr(axi_ddr_in\[$i].awaddr),\n" + append entity " .axi_ddr_in_$i\_awburst(axi_ddr_in\[$i].awburst),\n" + append entity " .axi_ddr_in_$i\_awcache(axi_ddr_in\[$i].awcache),\n" + append entity " .axi_ddr_in_$i\_awid(axi_ddr_in\[$i].awid),\n" + append entity " .axi_ddr_in_$i\_awlen(axi_ddr_in\[$i].awlen),\n" + append entity " .axi_ddr_in_$i\_awlock(axi_ddr_in\[$i].awlock),\n" + append entity " .axi_ddr_in_$i\_awprot(axi_ddr_in\[$i].awprot),\n" + append entity " .axi_ddr_in_$i\_awqos(axi_ddr_in\[$i].awqos),\n" + append entity " .axi_ddr_in_$i\_awready(axi_ddr_in\[$i].awready),\n" + append entity " .axi_ddr_in_$i\_awregion(axi_ddr_in\[$i].awregion),\n" + append entity " .axi_ddr_in_$i\_awsize(axi_ddr_in\[$i].awsize),\n" + append entity " .axi_ddr_in_$i\_awvalid(axi_ddr_in\[$i].awvalid),\n" + append entity " .axi_ddr_in_$i\_bid(axi_ddr_in\[$i].bid),\n" + append entity " .axi_ddr_in_$i\_bready(axi_ddr_in\[$i].bready),\n" + append entity " .axi_ddr_in_$i\_bresp(axi_ddr_in\[$i].bresp),\n" + append entity " .axi_ddr_in_$i\_bvalid(axi_ddr_in\[$i].bvalid),\n" + append entity " .axi_ddr_in_$i\_rdata(axi_ddr_in\[$i].rdata),\n" + append entity " .axi_ddr_in_$i\_rid(axi_ddr_in\[$i].rid),\n" + append entity " .axi_ddr_in_$i\_rlast(axi_ddr_in\[$i].rlast),\n" + append entity " .axi_ddr_in_$i\_rready(axi_ddr_in\[$i].rready),\n" + append entity " .axi_ddr_in_$i\_rresp(axi_ddr_in\[$i].rresp),\n" + append entity " .axi_ddr_in_$i\_rvalid(axi_ddr_in\[$i].rvalid),\n" + append entity " .axi_ddr_in_$i\_wdata(axi_ddr_in\[$i].wdata),\n" + append entity " .axi_ddr_in_$i\_wlast(axi_ddr_in\[$i].wlast),\n" + append entity " .axi_ddr_in_$i\_wready(axi_ddr_in\[$i].wready),\n" + append entity " .axi_ddr_in_$i\_wstrb(axi_ddr_in\[$i].wstrb),\n" + append entity " .axi_ddr_in_$i\_wvalid(axi_ddr_in\[$i].wvalid),\n" + } + } + append entity " .pcie_clk_clk_n(pcie_clk_clk_n),\n" + append entity " .pcie_clk_clk_p(pcie_clk_clk_p),\n" + append entity " .pcie_x16_rxn(pcie_x16_rxn),\n" + append entity " .pcie_x16_rxp(pcie_x16_rxp),\n" + append entity " .pcie_x16_txn(pcie_x16_txn),\n" + append entity " .pcie_x16_txp(pcie_x16_txp),\n" + if {$cnfg(en_pr) eq 1} { + append entity " .pclk(pclk),\n" + append entity " .presetn(presetn),\n" + } + append entity " .perst_n(perst_n),\n" + append entity " .reset_0(reset_0),\n" + append entity " .usr_irq(usr_irq)\n" + append entity " );\n" + append entity " \n" + append entity " // -----------------------------------------------------------------\n" + append entity " // STATIC CONFIG \n" + append entity " // -----------------------------------------------------------------\n" + append entity " static_slave inst_static_slave (\n" + append entity " .aclk(aclk),\n" + append entity " .aresetn(aresetn),\n" + if {$cnfg(en_pr) eq 1} { + append entity " .pr_xdma_req(xdma_req\[$cnfg(pr_chan)]),\n" + } + if {$cnfg(en_fv) eq 1} { + append entity " .arp_lookup_request(arp_lookup_request),\n" + append entity " .arp_lookup_reply(arp_lookup_reply),\n" + append entity " .set_ip_addr(set_ip_addr),\n" + append entity " .set_board_number(set_board_number),\n" + append entity " .qp_interface(qp_interface),\n" + append entity " .conn_interface(conn_interface),\n" + } + append entity " .lowspeed_ctrl(),\n" + append entity " .axi_ctrl(axi_cnfg)\n" + append entity " );\n" + append entity "\n" + if {$cnfg(en_fv) eq 1} { + append entity " // -----------------------------------------------------------------\n" + append entity " // RDMA \n" + append entity " // -----------------------------------------------------------------\n" + append entity " // Clock buffer\n" + append entity " IBUFDS #(\n" + append entity " .DQS_BIAS(\"FALSE\") // (FALSE, TRUE)\n" + append entity " ) dclk_BUFG_inst (\n" + append entity " .O(dclk), // 1-bit output: Buffer output\n" + append entity " .I(dclk_p), // 1-bit input: Diff_p buffer input (connect directly to top-level port)\n" + append entity " .IB(dclk_n) // 1-bit input: Diff_n buffer input (connect directly to top-level port)\n" + append entity " );\n" + append entity "\n" + append entity " network_top inst_network_top (\n" + append entity " .aclk(aclk),\n" + append entity " .aresetn(aresetn),\n" + append entity " .sys_reset(reset_0),\n" + append entity " .dclk(dclk),\n" + append entity " .gt_refclk_p(gt_refclk_p),\n" + append entity " .gt_refclk_n(gt_refclk_n),\n" + append entity " .gt_rxp_in(gt_rxp_in),\n" + append entity " .gt_rxn_in(gt_rxn_in),\n" + append entity " .gt_txp_out(gt_txp_out),\n" + append entity " .gt_txn_out(gt_txn_out),\n" + append entity " .arp_lookup_request(arp_lookup_request),\n" + append entity " .arp_lookup_reply(arp_lookup_reply),\n" + append entity " .set_ip_addr(set_ip_addr),\n" + append entity " .set_board_number(set_board_number),\n" + append entity " .qp_interface(qp_interface),\n" + append entity " .conn_interface(conn_interface),\n" + append entity " .rdma_req_host(rdma_req_host),\n" + if {$cnfg(en_fvv) eq 1} { + append entity " .rdma_req_card(rdma_req_card),\n" + append entity " .rdma_req_rpc(rdma_req_rpc),\n" + } + append entity " .rdma_rd_cmd(rdma_rd_cmd),\n" + append entity " .rdma_wr_cmd(rdma_wr_cmd),\n" + append entity " .axis_rdma_rd_data(axis_rdma_rd_data),\n" + append entity " .axis_rdma_wr_data(axis_rdma_wr_data)\n" + append entity " );\n" + append entity "\n" + } + if {$cnfg(en_pr) eq 1} { + append entity " // -----------------------------------------------------------------\n" + append entity " // PR \n" + append entity " // -----------------------------------------------------------------\n" + append entity " pr_clock_converter inst_clk_cnvrt_pr (\n" + append entity " .s_axis_aresetn(aresetn),\n" + append entity " .m_axis_aresetn(presetn),\n" + append entity " .s_axis_aclk(aclk),\n" + append entity " .s_axis_tvalid(axis_dyn_out\[$cnfg(pr_chan)].tvalid),\n" + append entity " .s_axis_tready(axis_dyn_out\[$cnfg(pr_chan)].tready),\n" + append entity " .s_axis_tdata(axis_dyn_out\[$cnfg(pr_chan)].tdata),\n" + append entity " .s_axis_tkeep(axis_dyn_out\[$cnfg(pr_chan)].tkeep),\n" + append entity " .s_axis_tlast(axis_dyn_out\[$cnfg(pr_chan)].tlast),\n" + append entity " .m_axis_aclk(pclk),\n" + append entity " .m_axis_tvalid(axis_pr.tvalid),\n" + append entity " .m_axis_tready(axis_pr.tready),\n" + append entity " .m_axis_tdata(axis_pr.tdata),\n" + append entity " .m_axis_tkeep(axis_pr.tkeep),\n" + append entity " .m_axis_tlast(axis_pr.tlast)\n" + append entity " );\n" + append entity " \n" + append entity " pr_dwidth_converter inst_dwidth_cnvrt_pr (\n" + append entity " .aclk(pclk),\n" + append entity " .aresetn(presetn),\n" + append entity " .s_axis_tvalid(axis_pr.tvalid),\n" + append entity " .s_axis_tready(axis_pr.tready),\n" + append entity " .s_axis_tdata(axis_pr.tdata),\n" + append entity " .s_axis_tkeep(axis_pr.tkeep),\n" + append entity " .s_axis_tlast(axis_pr.tlast),\n" + append entity " .m_axis_tvalid(axis_pr_valid),\n" + append entity " .m_axis_tready(axis_pr_ready),\n" + append entity " .m_axis_tdata(axis_pr_data),\n" + append entity " .m_axis_tkeep(),\n" + append entity " .m_axis_tlast()\n" + append entity " );\n" + append entity " \n" + append entity " // Partial reconfiguration\n" + append entity " assign icap_csn = ~axis_pr_valid;\n" + append entity " assign axis_pr_ready = 1'b1;\n" + append entity " assign icap_data = {axis_pr_data\[24], axis_pr_data\[25], axis_pr_data\[26], axis_pr_data\[27], axis_pr_data\[28], axis_pr_data\[29], axis_pr_data\[30], axis_pr_data\[31],\n" + append entity " axis_pr_data\[16], axis_pr_data\[17], axis_pr_data\[18], axis_pr_data\[19], axis_pr_data\[20], axis_pr_data\[21], axis_pr_data\[22], axis_pr_data\[23],\n" + append entity " axis_pr_data\[8], axis_pr_data\[9], axis_pr_data\[10], axis_pr_data\[11], axis_pr_data\[12], axis_pr_data\[13], axis_pr_data\[14], axis_pr_data\[15],\n" + append entity " axis_pr_data\[0], axis_pr_data\[1], axis_pr_data\[2], axis_pr_data\[3], axis_pr_data\[4], axis_pr_data\[5], axis_pr_data\[6], axis_pr_data\[7]};\n" + append entity " \n" + append entity " ICAPE3 #(\n" + append entity " .ICAP_AUTO_SWITCH(\"DISABLE\"),\n" + append entity " .SIM_CFG_FILE_NAME(\"NONE\")\n" + append entity " )\n" + append entity " ICAPE3_inst (\n" + append entity " .AVAIL(icap_avail), // 1-bit output: Availability status of ICAP\n" + append entity " .O(icap_out), // 32-bit output: Configuration data output bus\n" + append entity " .PRDONE(icap_done), // 1-bit output: Indicates completion of Partial Reconfiguration\n" + append entity " .PRERROR(icap_error), // 1-bit output: Indicates Error during Partial Reconfiguration\n" + append entity " .CLK(pclk), // 1-bit input: Clock input\n" + append entity " .CSIB(icap_csn), // 1-bit input: Active-Low ICAP enable\n" + append entity " .I(icap_data), // 32-bit input: Configuration data input bus\n" + append entity " .RDWRB(1'b0) // 1-bit input: Read/Write Select input\n" + append entity " );\n" + append entity " \n" + } + append entity " // -----------------------------------------------------------------\n" + append entity " // DYNAMIC LAYER \n" + append entity " // -----------------------------------------------------------------\n" + append entity " design_dynamic_wrapper inst_dynamic (\n" + append entity " .sys_rst(reset_0),\n" + append entity " .aresetn(aresetn),\n" + append entity " .aclk(aclk),\n" + append entity " .axi_ctrl(axi_ctrl\[0+:$cnfg(n_reg)]),\n" + if {$cnfg(en_avx) eq 1} { + append entity " .axim_ctrl(axim_ctrl\[0+:$cnfg(n_reg)]),\n" + } + if {$cnfg(en_ddr) eq 1} { + if {$cnfg(ddr_cnfg) eq 1} { + append entity " .axi_ddr_in(axi_ddr_in\[0+:$cnfg(n_ddr_chan)*2]),\n" + } else { + append entity " .axi_ddr_in(axi_ddr_in\[0+:2]),\n" + } + } + if {$cnfg(en_strm) eq 1} { + append entity " .axis_host_in(axis_dyn_in\[$cnfg(strm_chan)]),\n" + append entity " .axis_host_out(axis_dyn_out\[$cnfg(strm_chan)]),\n" + append entity " .host_xdma_req(xdma_req\[$cnfg(strm_chan)]),\n" + } + if {$cnfg(en_ddr) eq 1} { + append entity " .axis_card_in(axis_dyn_in\[$cnfg(ddr_chan)]),\n" + append entity " .axis_card_out(axis_dyn_out\[$cnfg(ddr_chan)]),\n" + append entity " .card_xdma_req(xdma_req\[$cnfg(ddr_chan)]),\n" + } + if {$cnfg(en_fv) eq 1} { + append entity " .rdma_req_host(rdma_req_host\[0+:$cnfg(n_reg)]),\n" + if {$cnfg(en_fvv) eq 1} { + append entity " .rdma_req_card(rdma_req_card\[0+:$cnfg(n_reg)]),\n" + append entity " .rdma_req_rpc(rdma_req_rpc\[0+:$cnfg(n_reg)]),\n" + } + append entity " .rdma_rd_cmd(rdma_rd_cmd\[0+:$cnfg(n_reg)]),\n" + append entity " .rdma_wr_cmd(rdma_wr_cmd\[0+:$cnfg(n_reg)]),\n" + append entity " .axis_rdma_rd_data(axis_rdma_rd_data\[0+:$cnfg(n_reg)]),\n" + append entity " .axis_rdma_wr_data(axis_rdma_wr_data\[0+:$cnfg(n_reg)]),\n" + } + append entity " .usr_irq(usr_irq\[0+:$cnfg(n_reg)]),\n" + append entity " .S_BSCAN_drck(),\n" + append entity " .S_BSCAN_shift(),\n" + append entity " .S_BSCAN_tdi(),\n" + append entity " .S_BSCAN_update(),\n" + append entity " .S_BSCAN_sel(),\n" + append entity " .S_BSCAN_tdo(),\n" + append entity " .S_BSCAN_tms(),\n" + append entity " .S_BSCAN_tck(),\n" + append entity " .S_BSCAN_runtest(),\n" + append entity " .S_BSCAN_reset(),\n" + append entity " .S_BSCAN_capture(),\n" + append entity " .S_BSCAN_bscanid_en() \n" + append entity " );\n" + append entity " \n" + append entity "endmodule\n" + append entity "\n" + lappend template $entity + set vho_file [open $f_out w] + foreach line $template { + puts $vho_file $line + } + close $vho_file +} \ No newline at end of file diff --git a/hw/scripts/wr_hdl/wr_hdl_user.tcl b/hw/scripts/wr_hdl/wr_hdl_user.tcl new file mode 100644 index 00000000..2630e23d --- /dev/null +++ b/hw/scripts/wr_hdl/wr_hdl_user.tcl @@ -0,0 +1,400 @@ +######################################################################################### +# User shell wrapper (Needed because of PR) +######################################################################################### +proc wr_hdl_user_wrapper {f_out c_reg} { + upvar #0 cfg cnfg + + set template {} + set entity {} + append entity "`timescale 1ns / 1ps\n" + append entity "\n" + append entity "import lynxTypes::*;\n" + append entity "\n" + append entity "/**\n" + append entity " * User logic wrapper\n" + append entity " * \n" + append entity " */\n" + append entity "module design_user_wrapper_$c_reg #(\n" + append entity ") (\n" + append entity " // AXI4 control\n" + append entity " input logic\[AXI_ADDR_BITS-1:0] axi_ctrl_araddr,\n" + append entity " input logic\[2:0] axi_ctrl_arprot,\n" + append entity " output logic axi_ctrl_arready,\n" + append entity " input logic axi_ctrl_arvalid,\n" + append entity " input logic\[AXI_ADDR_BITS-1:0] axi_ctrl_awaddr,\n" + append entity " input logic\[2:0] axi_ctrl_awprot,\n" + append entity " output logic axi_ctrl_awready,\n" + append entity " input logic axi_ctrl_awvalid, \n" + append entity " input logic axi_ctrl_bready,\n" + append entity " output logic\[1:0] axi_ctrl_bresp,\n" + append entity " output logic axi_ctrl_bvalid,\n" + append entity " output logic\[AXI_ADDR_BITS-1:0] axi_ctrl_rdata,\n" + append entity " input logic axi_ctrl_rready,\n" + append entity " output logic\[1:0] axi_ctrl_rresp,\n" + append entity " output logic axi_ctrl_rvalid,\n" + append entity " input logic\[AXIL_DATA_BITS-1:0] axi_ctrl_wdata,\n" + append entity " output logic axi_ctrl_wready,\n" + append entity " input logic\[(AXIL_DATA_BITS/8)-1:0] axi_ctrl_wstrb,\n" + append entity " input logic axi_ctrl_wvalid,\n" + append entity "\n" + if {$cnfg(en_bpss) eq 1} { + append entity " // Descriptor bypass\n" + append entity " output logic rd_req_user_valid,\n" + append entity " input logic rd_req_user_ready,\n" + append entity " output req_t rd_req_user_req,\n" + append entity " output logic wr_req_user_valid,\n" + append entity " input logic wr_req_user_ready,\n" + append entity " output req_t wr_req_user_req,\n" + append entity "\n" + } + if {$cnfg(en_fv) eq 1} { + if {$cnfg(en_fvv) eq 1} { + append entity " // RDMA Farview\n" + append entity " input logic fv_req_valid,\n" + append entity " output logic fv_req_ready,\n" + append entity " input logic\[FV_REQ_BITS-1:0] fv_req_data,\n" + append entity " output logic fv_cmd_valid,\n" + append entity " input logic fv_cmd_ready,\n" + append entity " output logic\[FV_REQ_BITS-1:0] fv_cmd_data,\n" + append entity "\n" + } + append entity " // RDMA mem\n" + append entity " input logic rd_req_rdma_valid,\n" + append entity " output logic rd_req_rdma_ready,\n" + append entity " input req_t rd_req_rdma_req,\n" + append entity " input logic wr_req_rdma_valid,\n" + append entity " output logic wr_req_rdma_ready,\n" + append entity " input req_t wr_req_rdma_req,\n" + append entity "\n" + append entity " // RDMA DATA\n" + append entity " output logic axis_rdma_src_tlast,\n" + append entity " input logic axis_rdma_src_tready,\n" + append entity " output logic axis_rdma_src_tvalid,\n" + append entity " output logic\[AXI_DATA_BITS-1:0] axis_rdma_src_tdata,\n" + append entity " output logic\[AXI_DATA_BITS/8-1:0] axis_rdma_src_tkeep,\n" + append entity " input logic axis_rdma_sink_tlast,\n" + append entity " output logic axis_rdma_sink_tready,\n" + append entity " input logic axis_rdma_sink_tvalid,\n" + append entity " input logic\[AXI_DATA_BITS-1:0] axis_rdma_sink_tdata,\n" + append entity " input logic\[AXI_DATA_BITS/8-1:0] axis_rdma_sink_tkeep,\n" + append entity "\n" + } + if {$cnfg(en_strm) eq 1} { + append entity " // AXI4S HOST src\n" + append entity " output logic\[AXI_DATA_BITS-1:0] axis_host_src_tdata,\n" + append entity " output logic\[AXI_DATA_BITS/8-1:0] axis_host_src_tkeep,\n" + append entity " output logic axis_host_src_tlast,\n" + append entity " output logic\[3:0] axis_host_src_tdest,\n" + append entity " input logic axis_host_src_tready,\n" + append entity " output logic axis_host_src_tvalid,\n" + append entity "\n" + append entity " // AXI4S HOST sink\n" + append entity " input logic\[AXI_DATA_BITS-1:0] axis_host_sink_tdata,\n" + append entity " input logic\[AXI_DATA_BITS/8-1:0] axis_host_sink_tkeep,\n" + append entity " input logic axis_host_sink_tlast,\n" + append entity " input logic\[3:0] axis_host_sink_tdest,\n" + append entity " output logic axis_host_sink_tready,\n" + append entity " input logic axis_host_sink_tvalid,\n" + append entity "\n" + } + if {$cnfg(en_ddr) eq 1} { + append entity " // AXI4S CARD src\n" + append entity " output logic\[N_DDR_CHAN*AXI_DATA_BITS-1:0] axis_card_src_tdata,\n" + append entity " output logic\[N_DDR_CHAN*AXI_DATA_BITS/8-1:0] axis_card_src_tkeep,\n" + append entity " output logic axis_card_src_tlast,\n" + append entity " output logic\[3:0] axis_card_src_tdest,\n" + append entity " input logic axis_card_src_tready,\n" + append entity " output logic axis_card_src_tvalid,\n" + append entity "\n" + append entity " // AXI4S CARD sink\n" + append entity " input logic\[N_DDR_CHAN*AXI_DATA_BITS-1:0] axis_card_sink_tdata,\n" + append entity " input logic\[N_DDR_CHAN*AXI_DATA_BITS/8-1:0] axis_card_sink_tkeep,\n" + append entity " input logic axis_card_sink_tlast,\n" + append entity " input logic\[3:0] axis_card_sink_tdest,\n" + append entity " output logic axis_card_sink_tready,\n" + append entity " input logic axis_card_sink_tvalid,\n" + append entity "\n" + } + append entity " // Clock and reset\n" + append entity " input logic aclk,\n" + append entity " input logic\[0:0] aresetn\n" + append entity ");\n" + append entity "\n" + append entity "// Control\n" + append entity "AXI4L axi_ctrl_user();\n" + append entity "\n" + append entity "assign axi_ctrl_user.araddr = axi_ctrl_araddr;\n" + append entity "assign axi_ctrl_user.arprot = axi_ctrl_arprot;\n" + append entity "assign axi_ctrl_user.arvalid = axi_ctrl_arvalid;\n" + append entity "assign axi_ctrl_user.awaddr = axi_ctrl_awaddr;\n" + append entity "assign axi_ctrl_user.awprot = axi_ctrl_awprot;\n" + append entity "assign axi_ctrl_user.awvalid = axi_ctrl_awvalid;\n" + append entity "assign axi_ctrl_user.bready = axi_ctrl_bready;\n" + append entity "assign axi_ctrl_user.rready = axi_ctrl_rready;\n" + append entity "assign axi_ctrl_user.wdata = axi_ctrl_wdata;\n" + append entity "assign axi_ctrl_user.wstrb = axi_ctrl_wstrb;\n" + append entity "assign axi_ctrl_user.wvalid = axi_ctrl_wvalid;\n" + append entity "\n" + append entity "assign axi_ctrl_arready = axi_ctrl_user.arready;\n" + append entity "assign axi_ctrl_awready = axi_ctrl_user.awready;\n" + append entity "assign axi_ctrl_bresp = axi_ctrl_user.bresp;\n" + append entity "assign axi_ctrl_bvalid = axi_ctrl_user.bvalid;\n" + append entity "assign axi_ctrl_rdata = axi_ctrl_user.rdata;\n" + append entity "assign axi_ctrl_rresp = axi_ctrl_user.rresp;\n" + append entity "assign axi_ctrl_rvalid = axi_ctrl_user.rvalid;\n" + append entity "assign axi_ctrl_wready = axi_ctrl_user.wready;\n" + append entity "\n" + if {$cnfg(en_bpss) eq 1} { + append entity "// Descriptor bypass\n" + append entity "reqIntf rd_req_user();\n" + append entity "reqIntf wr_req_user();\n" + append entity "\n" + append entity "assign rd_req_user_valid = rd_req_user.valid;\n" + append entity "assign rd_req_user.ready = rd_req_user_ready;\n" + append entity "assign rd_req_user_req = rd_req_user.req;\n" + append entity "assign wr_req_user_valid = wr_req_user.valid;\n" + append entity "assign wr_req_user.ready = wr_req_user_ready;\n" + append entity "assign wr_req_user_req = wr_req_user.req;\n" + append entity "\n" + } + if {$cnfg(en_fv) eq 1} { + if {$cnfg(en_fvv) eq 1} { + append entity "// RDMA Farview\n" + append entity "metaIntf #(.DATA_BITS(FV_REQ_BITS)) fv_req();\n" + append entity "metaIntf #(.DATA_BITS(FV_REQ_BITS)) fv_cmd();\n" + append entity "\n" + append entity "assign fv_req.valid = fv_req_valid;\n" + append entity "assign fv_req_ready = fv_req.ready;\n" + append entity "assign fv_req.data = fv_req_data;\n" + append entity "assign fv_cmd_valid = fv_cmd.valid;\n" + append entity "assign fv_cmd.ready = fv_cmd_ready;\n" + append entity "assign fv_cmd_data = fv_cmd.data;\n" + append entity "\n" + } + append entity "// RDMA commands\n" + append entity "reqIntf rd_req_rdma();\n" + append entity "reqIntf wr_req_rdma();\n" + append entity "\n" + append entity "assign rd_req_rdma.valid = rd_req_rdma_valid;\n" + append entity "assign rd_req_rdma_ready = rd_req_rdma.ready;\n" + append entity "assign rd_req_rdma.req = rd_req_rdma_req;\n" + append entity "assign wr_req_rdma.valid = wr_req_rdma_valid;\n" + append entity "assign wr_req_rdma_ready = wr_req_rdma.ready;\n" + append entity "assign wr_req_rdma.req = wr_req_rdma_req;\n" + append entity "\n" + append entity "// AXIS RDMA source\n" + append entity "AXI4S axis_rdma_src();\n" + append entity "\n" + append entity "assign axis_rdma_src_tdata = axis_rdma_src.tdata;\n" + append entity "assign axis_rdma_src_tkeep = axis_rdma_src.tkeep;\n" + append entity "assign axis_rdma_src_tlast = axis_rdma_src.tlast;\n" + append entity "assign axis_rdma_src_tvalid = axis_rdma_src.tvalid;\n" + append entity "\n" + append entity "assign axis_rdma_src.tready = axis_rdma_src_tready;\n" + append entity "\n" + append entity "// AXIS RDMA sink\n" + append entity "AXI4S axis_rdma_sink();\n" + append entity "\n" + append entity "assign axis_rdma_sink.tdata = axis_rdma_sink_tdata;\n" + append entity "assign axis_rdma_sink.tkeep = axis_rdma_sink_tkeep;\n" + append entity "assign axis_rdma_sink.tlast = axis_rdma_sink_tlast;\n" + append entity "assign axis_rdma_sink.tvalid = axis_rdma_sink_tvalid;\n" + append entity "\n" + append entity "assign axis_rdma_sink_tready = axis_rdma_sink.tready;\n" + append entity "\n" + } + if {$cnfg(en_strm) eq 1} { + append entity "// AXIS host source\n" + append entity "AXI4SR axis_host_src();\n" + append entity "\n" + append entity "assign axis_host_src_tdata = axis_host_src.tdata;\n" + append entity "assign axis_host_src_tkeep = axis_host_src.tkeep;\n" + append entity "assign axis_host_src_tlast = axis_host_src.tlast;\n" + append entity "assign axis_host_src_tdest = axis_host_src.tdest;\n" + append entity "assign axis_host_src_tvalid = axis_host_src.tvalid;\n" + append entity "\n" + append entity "assign axis_host_src.tready = axis_host_src_tready;\n" + append entity "\n" + append entity "// AXIS host sink\n" + append entity "AXI4SR axis_host_sink();\n" + append entity "\n" + append entity "assign axis_host_sink.tdata = axis_host_sink_tdata;\n" + append entity "assign axis_host_sink.tkeep = axis_host_sink_tkeep;\n" + append entity "assign axis_host_sink.tlast = axis_host_sink_tlast;\n" + append entity "assign axis_host_sink.tdest = axis_host_sink_tdest;\n" + append entity "assign axis_host_sink.tvalid = axis_host_sink_tvalid;\n" + append entity "\n" + append entity "assign axis_host_sink_tready = axis_host_sink.tready;\n" + append entity "\n" + } + if {$cnfg(en_ddr) eq 1} { + append entity "// AXIS card source\n" + append entity "AXI4SR #(.AXI4S_DATA_BITS(N_DDR_CHAN*AXI_DATA_BITS)) axis_card_src();\n" + append entity "\n" + append entity "assign axis_card_src_tdata = axis_card_src.tdata;\n" + append entity "assign axis_card_src_tkeep = axis_card_src.tkeep;\n" + append entity "assign axis_card_src_tlast = axis_card_src.tlast;\n" + append entity "assign axis_card_src_tdest = axis_card_src.tdest;\n" + append entity "assign axis_card_src_tvalid = axis_card_src.tvalid;\n" + append entity "\n" + append entity "assign axis_card_src.tready = axis_card_src_tready;\n" + append entity "\n" + append entity "// AXIS card sink\n" + append entity "AXI4SR #(.AXI4S_DATA_BITS(N_DDR_CHAN*AXI_DATA_BITS)) axis_card_sink();\n" + append entity "\n" + append entity "assign axis_card_sink.tdata = axis_card_sink_tdata;\n" + append entity "assign axis_card_sink.tkeep = axis_card_sink_tkeep;\n" + append entity "assign axis_card_sink.tlast = axis_card_sink_tlast;\n" + append entity "assign axis_card_sink.tdest = axis_card_sink_tdest;\n" + append entity "assign axis_card_sink.tvalid = axis_card_sink_tvalid;\n" + append entity "\n" + append entity "assign axis_card_sink_tready = axis_card_sink.tready;\n" + append entity "\n" + } + append entity "// USER LOGIC\n" + append entity "design_user_logic_$c_reg inst_user_$c_reg (\n" + append entity " .axi_ctrl(axi_ctrl_user),\n" + if {$cnfg(en_bpss) eq 1} { + append entity " .rd_req_user(rd_req_user),\n" + append entity " .wr_req_user(wr_req_user),\n" + } + if {$cnfg(en_fv) eq 1} { + if {$cnfg(en_fvv) eq 1} { + append entity " .fv_src(fv_cmd),\n" + append entity " .fv_sink(fv_req),\n" + } + append entity " .rd_req_rdma(rd_req_rdma),\n" + append entity " .wr_req_rdma(wr_req_rdma),\n" + append entity " .axis_rdma_src(axis_rdma_src),\n" + append entity " .axis_rdma_sink(axis_rdma_sink),\n" + } + if {$cnfg(en_strm) eq 1} { + append entity " .axis_host_src(axis_host_src),\n" + append entity " .axis_host_sink(axis_host_sink),\n" + } + if {$cnfg(en_ddr) eq 1} { + append entity " .axis_card_src(axis_card_src),\n" + append entity " .axis_card_sink(axis_card_sink),\n" + } + append entity " .aclk(aclk),\n" + append entity " .aresetn(aresetn)\n" + append entity ");\n" + append entity "\n" + append entity "\n" + append entity "endmodule\n" + append entity "\n" + lappend template $entity + set vho_file [open $f_out w] + foreach line $template { + puts $vho_file $line + } + close $vho_file +} + +######################################################################################### +# User logic shell +######################################################################################### +proc wr_hdl_user {f_out c_reg} { + upvar #0 cfg cnfg + + set template {} + set entity {} + append entity "`timescale 1ns / 1ps\n" + append entity "\n" + append entity "`include \"axi_macros.svh\"\n" + append entity "`include \"lynx_macros.svh\"\n" + append entity "\n" + append entity "import lynxTypes::*;\n" + append entity "\n" + append entity "/**\n" + append entity " * User logic\n" + append entity " * \n" + append entity " */\n" + append entity "module design_user_logic_$c_reg (\n" + append entity " // AXI4L CONTROL\n" + append entity " // Slave control. Utilize this interface for any kind of CSR implementation.\n" + append entity " AXI4L.s axi_ctrl,\n" + append entity "\n" + if {$cnfg(en_bpss) eq 1} { + append entity " // DESCRIPTOR BYPASS\n" + append entity " // vaddr[48] - virt. address, len[28] - length, ctl[1] - final, stream[1], sync[1] - explicit move\n" + append entity " // Explicit transfer requests from user logic.\n" + append entity " reqIntf.m rd_req_user,\n" + append entity " reqIntf.m wr_req_user,\n" + append entity "\n" + } + if {$cnfg(en_fv) eq 1} { + append entity " // RDMA\n" + append entity " // vaddr[48] - virtual address, len[28] - length, ctl[1] - final transfer, sync[1] - host synchronization\n" + append entity " // Read and write descriptors arriving from the network stack.\n" + append entity " reqIntf.s rd_req_rdma,\n" + append entity " reqIntf.s wr_req_rdma,\n" + append entity "\n" + if {$cnfg(en_fvv) eq 1} { + append entity " // FARVIEW\n" + append entity " // Remote one-sided RPC calls and response commands \[256]-req, \[256]-cmd. bits.\n" + append entity " metaIntf.m fv_src,\n" + append entity " metaIntf.s fv_sink,\n" + append entity "\n" + } + append entity " // AXI4S RDMA DATA\n" + append entity " // Network data.\n" + append entity " AXI4S.m axis_rdma_src,\n" + append entity " AXI4S.s axis_rdma_sink,\n" + append entity "\n" + } + if {$cnfg(en_strm) eq 1} { + append entity " // AXI4S host\n" + append entity " // Host streams.\n" + append entity " AXI4SR.m axis_host_src,\n" + append entity " AXI4SR.s axis_host_sink,\n" + } + if {$cnfg(en_ddr) eq 1} { + append entity " // AXI4S host\n" + append entity " // Card streams.\n" + append entity " AXI4SR.m axis_card_src,\n" + append entity " AXI4SR.s axis_card_sink,\n" + } + append entity "\n" + append entity " // Clock and reset\n" + append entity " input wire aclk,\n" + append entity " input wire\[0:0] aresetn\n" + append entity ");\n" + append entity "\n" + append entity "/* -- Tie-off unused interfaces and signals ----------------------------- */\n" + append entity "always_comb axi_ctrl.tie_off_s();\n" + if {$cnfg(en_bpss) eq 1} { + append entity "always_comb rd_req_user.tie_off_m();\n" + append entity "always_comb wr_req_user.tie_off_m();\n" + } + if {$cnfg(en_fv) eq 1} { + append entity "always_comb rd_req_rdma.tie_off_s();\n" + append entity "always_comb wr_req_rdma.tie_off_s();\n" + if {$cnfg(en_fvv) eq 1} { + append entity "always_comb fv_src.tie_off_m();\n" + append entity "always_comb fv_sink.tie_off_s();\n" + } + append entity "always_comb axis_rdma_src.tie_off_m();\n" + append entity "always_comb axis_rdma_sink.tie_off_s();\n" + } + if {$cnfg(en_strm) eq 1} { + append entity "always_comb axis_host_src.tie_off_m();\n" + append entity "always_comb axis_host_sink.tie_off_s();\n" + } + if {$cnfg(en_ddr) eq 1} { + append entity "always_comb axis_card_src.tie_off_m();\n" + append entity "always_comb axis_card_sink.tie_off_s();\n" + } + append entity "\n" + append entity "/* -- USER LOGIC -------------------------------------------------------- */\n" + append entity "\n" + append entity "\n" + append entity "\n" + append entity "endmodule\n" + lappend template $entity + set vho_file [open $f_out w] + foreach line $template { + puts $vho_file $line + } + close $vho_file +} \ No newline at end of file diff --git a/hw/sim/axi_intf_sim.sv b/hw/sim/axi_intf_sim.sv new file mode 100644 index 00000000..d3f5a0ba --- /dev/null +++ b/hw/sim/axi_intf_sim.sv @@ -0,0 +1,446 @@ +package axiSimTypes; + + import lynxTypes::*; + + // + // AXI4 Stream driver + // + class AXI4Sdrv; + + // Interface handle + virtual AXI4S axis; + + // ID + integer id; + + // Constructor + function new(virtual AXI4S axis, input integer id); + this.axis = axis; + this.id = id; + endfunction + + // Cycle wait + task cycle_wait; + @(posedge axis.aclk); + endtask + + // Reset + task reset_m; + axis.tvalid <= 1'b0; + axis.tdata <= 0; + axis.tkeep <= 0; + axis.tlast <= 1'b0; + endtask + + task reset_s; + axis.tready <= 1'b0; + endtask + + // Drive + task send_data_incr ( + input logic [AXI_DATA_BITS-1:0] tdata, + input integer n_tr + ); + for(int i = 0; i < 8; i++) begin + axis.tdata[i*64+:64] <= tdata+i; + end + axis.tkeep <= ~0; + axis.tlast <= 1'b0; + axis.tvalid <= 1'b1; + for(int i = 0; i < n_tr; i++) begin + if(i == n_tr-1) axis.tlast <= 1'b1; + cycle_wait(); + while(axis.tready != 1'b1) begin cycle_wait(); end + for(int j = 0; j < 8; j++) begin + axis.tdata[j*64+:64] <= axis.tdata[j*64+:64] + 8; + end + end + axis.tdata <= 0; + axis.tkeep <= 0; + axis.tlast <= 1'b0; + axis.tvalid <= 1'b0; + //cycle_wait(); + endtask + + task send_data ( + input logic [AXI_DATA_BITS-1:0] tdata, + input integer n_tr + ); + axis.tdata <= tdata; + axis.tkeep <= ~0; + axis.tlast <= 1'b0; + axis.tvalid <= 1'b1; + for(int i = 0; i < n_tr; i++) begin + if(i == n_tr-1) axis.tlast <= 1'b1; + cycle_wait(); + while(axis.tready != 1'b1) begin cycle_wait(); end + end + axis.tdata <= 0; + axis.tkeep <= 0; + axis.tlast <= 1'b0; + axis.tvalid <= 1'b0; + //cycle_wait(); + endtask + + task recv ( + input integer n_tr + ); + axis.tready <= 1'b1; + for(int i = 0; i < n_tr; i++) begin + cycle_wait(); + while(axis.tvalid != 1'b1) begin cycle_wait(); end + end + axis.tready <= 1'b0; + //cycle_wait(); + endtask + + endclass; + + // + // AXI4 Lite driver + // + class AXI4Ldrv; + + // Interface handle + virtual AXI4L axi; + + // ID + integer id; + + // Constructor + function new(virtual AXI4L axi, input integer id); + this.axi = axi; + this.id = id; + endfunction + + // Cycle wait + task cycle_wait; + @(posedge axi.aclk); + endtask + + // Reset + task reset_m; + axi.araddr <= 0; + axi.arprot <= 0; + axi.arqos <= 0; + axi.arregion <= 0; + axi.arvalid <= 0; + axi.awaddr <= 0; + axi.awprot <= 0; + axi.awqos <= 0; + axi.awregion <= 0; + axi.awvalid <= 0; + axi.bready <= 0; + axi.rready <= 0; + axi.wdata <= 0; + axi.wstrb <= 0; + axi.wvalid <= 0; + endtask + + task reset_s; + axi.arready <= 0; + axi.awready <= 0; + axi.bresp <= 0; + axi.bvalid <= 0; + axi.rdata <= 0; + axi.rresp <= 0; + axi.rvalid <= 0; + axi.wready <= 0; + endtask + + // Write + task write ( + input logic [AXI_ADDR_BITS-1:0] addr, + input logic [AXIL_DATA_BITS-1:0] data + ); + // Request + axi.awaddr <= addr; + axi.awvalid <= 1'b1; + axi.wdata <= data; + axi.wstrb <= ~0; + axi.wvalid <= 1'b1; + cycle_wait(); + while(axi.awready != 1'b1 && axi.wready != 1'b1) begin cycle_wait(); end + axi.awaddr <= 0; + axi.awvalid <= 1'b0; + axi.wdata <= 0; + axi.wstrb <= 0; + axi.wvalid <= 1'b0; + // Response + axi.bready <= 1'b1; + cycle_wait(); + while(axi.bvalid != 1) begin cycle_wait(); end + axi.bready <= 1'b0; + $display("AXIL: Data %x written at addr %x, id %d", data, addr, id); + endtask + + // Read + task read ( + input logic [AXI_ADDR_BITS-1:0] addr + ); + // Request + axi.araddr <= addr; + axi.arvalid <= 1'b1; + cycle_wait(); + while(axi.arready != 1'b1) begin cycle_wait(); end + axi.araddr <= 0; + axi.arvalid <= 1'b0; + // Response + axi.rready <= 1'b1; + cycle_wait(); + while(axi.rvalid != 1) begin cycle_wait(); end + axi.rready <= 1'b0; + $display("AXIL: Data %x read at addr %x, id %d", axi.rdata, addr, id); + endtask + + endclass; + + // + // AXI4 driver + // + class AXI4drv; + + // Interface handle + virtual AXI4 axi; + + // ID + integer id; + + // Constructor + function new(virtual AXI4 axi, input integer id); + this.axi = axi; + this.id = id; + endfunction + + // Cycle wait + task cycle_wait; + @(posedge axi.aclk); + endtask + + // Reset + task reset_m; + axi.araddr <= 0; + axi.arburst <= 0; + axi.arcache <= 0; + axi.arid <= 0; + axi.arlen <= 0; + axi.arlock <= 0; + axi.arprot <= 0; + axi.arqos <= 0; + axi.arregion <= 0; + axi.arsize <= 0; + axi.arvalid <= 0; + axi.awaddr <= 0; + axi.awburst <= 0; + axi.awcache <= 0; + axi.awid <= 0; + axi.awlen <= 0; + axi.awlock <= 0; + axi.awprot <= 0; + axi.awqos <= 0; + axi.awregion <= 0; + axi.awsize <= 0; + axi.awvalid <= 0; + axi.wdata <= 0; + axi.wlast <= 0; + axi.wstrb <= 0; + axi.wvalid <= 0; + axi.rready <= 0; + axi.bready <= 0; + endtask + + task reset_s; + axi.arready <= 0; + axi.awready <= 0; + axi.bresp <= 0; + axi.bvalid <= 0; + axi.bid <= 0; + axi.rdata <= 0; + axi.rid <= 0; + axi.rresp <= 0; + axi.rlast <= 0; + axi.rvalid <= 0; + axi.wready <= 0; + endtask + + // Write AW + task write_aw ( + input logic [AXI_ADDR_BITS-1:0] addr, + input logic [LEN_BITS-1:0] len, + input logic [2:0] size + ); + axi.awaddr <= addr; + axi.awburst <= 2'b01; + axi.awcache <= 0; + axi.awid <= 0; + axi.awlen <= len; + axi.awlock <= 0; + axi.awprot <= 0; + axi.awqos <= 0; + axi.awregion <= 0; + axi.awsize <= size; + axi.awvalid <= 1'b1; + cycle_wait(); + while(axi.awready != 1'b1) begin cycle_wait(); end + axi.awaddr <= 0; + axi.awburst <= 0; + axi.awcache <= 0; + axi.awid <= 0; + axi.awlen <= 0; + axi.awlock <= 0; + axi.awprot <= 0; + axi.awqos <= 0; + axi.awregion <= 0; + axi.awsize <= 0; + axi.awvalid <= 1'b0; + endtask + + // Write AR + task write_ar ( + input logic [AXI_ADDR_BITS-1:0] addr, + input logic [LEN_BITS-1:0] len, + input logic [2:0] size + ); + axi.araddr <= addr; + axi.arburst <= 2'b01; + axi.arcache <= 0; + axi.arid <= 0; + axi.arlen <= len; + axi.arlock <= 0; + axi.arprot <= 0; + axi.arqos <= 0; + axi.arregion <= 0; + axi.arsize <= size; + axi.arvalid <= 1'b1; + cycle_wait(); + while(axi.arready != 1'b1) begin cycle_wait(); end + axi.araddr <= 0; + axi.arburst <= 0; + axi.arcache <= 0; + axi.arid <= 0; + axi.arlen <= 0; + axi.arlock <= 0; + axi.arprot <= 0; + axi.arqos <= 0; + axi.arregion <= 0; + axi.arsize <= 0; + axi.arvalid <= 1'b0; + endtask + + // Write W + task write_w ( + input logic [AXI_DATA_BITS-1:0] wdata, + input logic [AXI_DATA_BITS/8-1:0] wstrb, + input logic wlast + ); + axi.wdata <= wdata; + axi.wlast <= wlast; + axi.wstrb <= wstrb; + axi.wvalid <= 1'b1; + cycle_wait(); + while(axi.wready != 1'b1) begin cycle_wait(); end + axi.wdata <= 0; + axi.wlast <= 0; + axi.wstrb <= 0; + axi.wvalid <= 1'b0; + endtask + + // Write r + task write_r ( + input logic [AXI_DATA_BITS-1:0] rdata, + input logic rlast + ); + axi.rid <= 0; + axi.rresp <= 0; + axi.rdata <= rdata; + axi.rlast <= rlast; + axi.rvalid <= 1'b1; + cycle_wait(); + while(axi.rready != 1'b1) begin cycle_wait(); end + axi.rid <= 0; + axi.rresp <= 0; + axi.rdata <= 0; + axi.rlast <= 0; + axi.rvalid <= 1'b0; + endtask + + // Write B + task write_b (); + axi.bid <= 0; + axi.bresp <= 0; + axi.bvalid <= 1'b1; + cycle_wait(); + while(axi.bready != 1'b1) begin cycle_wait(); end + axi.bid <= 0; + axi.bresp <= 0; + axi.bvalid <= 1'b0; + endtask + + // Read AW + task read_aw ( + output logic [AXI_ADDR_BITS-1:0] addr, + output logic [LEN_BITS-1:0] len + ); + axi.awready <= 1'b1; + cycle_wait(); + while(axi.awvalid != 1'b1) begin cycle_wait(); end + addr = axi.awaddr; + len = axi.awlen; + cycle_wait(); + axi.awready = 1'b0; + endtask + + // Read AR + task read_ar (); + axi.arready <= 1'b1; + cycle_wait(); + while(axi.arvalid != 1'b1) begin cycle_wait(); end + $display("AR - addr: %x, len: %x", axi.araddr, axi.arlen); + cycle_wait(); + axi.arready = 1'b0; + endtask + + // Read W + task read_w ( + output logic [AXI_DATA_BITS-1:0] wdata, + output logic [AXI_DATA_BITS/8-1:0] wstrb, + output logic wlast + ); + axi.wready <= 1'b1; + cycle_wait(); + while(axi.wvalid != 1'b1) begin cycle_wait(); end + wdata = axi.wdata; + wstrb = axi.wstrb; + wlast = axi.wlast; + cycle_wait(); + axi.wready <= 1'b0; + endtask + + // Read R + task read_r ( + output logic [AXI_DATA_BITS-1:0] rdata, + output logic rlast + ); + axi.rready <= 1'b1; + cycle_wait(); + while(axi.rvalid != 1'b1) begin cycle_wait(); end + rdata = axi.rdata; + rlast = axi.rlast; + cycle_wait(); + axi.rready <= 1'b0; + endtask + + // Read B + task read_b (); + axi.bready <= 1'b1; + cycle_wait(); + while(axi.bvalid != 1'b1) begin cycle_wait(); end + cycle_wait(); + axi.bready <= 1'b0; + endtask + + endclass; + + + +endpackage \ No newline at end of file diff --git a/hw/sim/lynx_intf_sim.sv b/hw/sim/lynx_intf_sim.sv new file mode 100644 index 00000000..1b2c2cdf --- /dev/null +++ b/hw/sim/lynx_intf_sim.sv @@ -0,0 +1,227 @@ +package lynxSimTypes; + + import lynxTypes::*; + + // + // Request driver + // + class REQdrv; + + // Interface handle; + virtual reqIntf req; + + // ID + integer id; + + // Constructor + function new(virtual reqIntf req, input integer id); + this.req = req; + this.id = id; + endfunction + + // Cycle wait + task cycle_wait; + @(posedge req.aclk); + endtask + + task cycle_n_wait(input integer n_cyc); + for(int i = 0; i < n_cyc; i++) cycle_wait(); + endtask + + // Reset + task reset_m; + req.valid <= 1'b0; + req.req <= 0; + endtask + + task reset_s; + req.ready <= 1'b0; + endtask + + // Drive + task send ( + input logic [63:0] len, + input logic [47:0] vaddr, + input logic sync, + input logic ctl, + input logic stream, + input integer n_tr + ); + req.req.len <= len; + req.req.vaddr <= vaddr; + req.req.sync <= sync; + req.req.ctl <= ctl; + req.req.stream <= stream; + req.req.rsrvd = 0; + req.valid <= 1'b1; + for(int i = 0; i < n_tr; i++) begin + cycle_wait(); + while(req.ready != 1'b1) begin cycle_wait(); end + end + req.valid <= 1'b0; + req.req = 0; + //$display("REQ: Request sent %d", id); + endtask + + task recv ( + input integer n_tr + ); + req.ready <= 1'b1; + for(int i = 0; i < n_tr; i++) begin + cycle_wait(); + while(req.valid != 1'b1) begin cycle_wait(); end + end + req.ready <= 1'b0; + //$display("REQ: Request received %d", id); + endtask + + endclass + + // + // Meta driver + // + class METAdrv #( + parameter integer DB = 128 + ); + + // Interface handle; + virtual metaIntf #(.DATA_BITS(DB)) meta; + + // ID + integer id; + + // Constructor + function new(virtual metaIntf #(.DATA_BITS(DB)) meta, input integer id); + this.meta = meta; + this.id = id; + endfunction + + // Cycle wait + task cycle_wait; + @(posedge meta.aclk); + endtask + + task cycle_n_wait(input integer n_cyc); + for(int i = 0; i < n_cyc; i++) cycle_wait(); + endtask + + // Reset + task reset_m; + meta.valid <= 1'b0; + meta.data <= 0; + endtask + + task reset_s; + meta.ready <= 1'b0; + endtask + + // Drive + task send ( + input logic [DB-1:0] data, + input integer n_tr + ); + meta.data <= data; + meta.valid <= 1'b1; + for(int i = 0; i < n_tr; i++) begin + cycle_wait(); + while(meta.ready != 1'b1) begin cycle_wait(); end + end + meta.valid <= 1'b0; + //$display("META: Request sent %d", id); + endtask + + task recv ( + input integer n_tr + ); + meta.ready <= 1'b1; + for(int i = 0; i < n_tr; i++) begin + cycle_wait(); + while(meta.valid != 1'b1) begin cycle_wait(); end + end + meta.ready <= 1'b0; + //$display("META: Request received %d", id); + endtask + + endclass + + // + // DMA drivers + // + class DMAdrv; + + // Interface handle; + virtual dmaIntf req; + + // ID + integer id; + + // Constructor + function new(virtual dmaIntf req, input integer id); + this.req = req; + this.id = id; + endfunction + + // Cycle wait + task cycle_wait; + @(posedge req.aclk); + endtask + + task cycle_n_wait(input integer n_cyc); + for(int i = 0; i < n_cyc; i++) cycle_wait(); + endtask + + // Reset + task reset_m; + req.valid <= 1'b0; + req.req <= 0; + endtask + + task reset_s; + req.ready <= 1'b0; + req.done <= 0; + endtask + + // Recv request + task recv_dma ( + output logic [PADDR_BITS-1:0] paddr, + output logic [LEN_BITS-1:0] len, + output logic ctl + ); + req.ready <= 1'b1; + cycle_wait(); + while(req.valid != 1'b1) begin cycle_wait(); end + paddr = req.req.paddr; + len = req.req.len; + ctl = req.req.ctl; + cycle_wait(); + req.ready <= 1'b0; + endtask + + // Send request + task send_dma ( + input logic [PADDR_BITS-1:0] paddr, + input logic [LEN_BITS-1:0] len, + input logic ctl + ); + req.req.paddr <= paddr; + req.req.len <= len; + req.req.ctl <= ctl; + req.valid <= 1'b1; + cycle_wait(); + while(req.ready != 1'b1) begin cycle_wait(); end + req.req.paddr <= 0; + req.req.len <= 0; + req.req.ctl <= 0; + req.valid <= 1'b0; + endtask + + // Send done + task send_done (); + req.done <= 1'b1; + cycle_wait(); + req.done <= 1'b0; + endtask + + endclass + +endpackage \ No newline at end of file diff --git a/hw/sim/lynx_pkg.sv b/hw/sim/lynx_pkg.sv new file mode 100644 index 00000000..015ad099 --- /dev/null +++ b/hw/sim/lynx_pkg.sv @@ -0,0 +1,113 @@ +`define EN_STRM +`define EN_BPSS + +package lynxTypes; + + // AXI + parameter integer AXIL_DATA_BITS = 64; + parameter integer AVX_DATA_BITS = 256; + parameter integer AXI_DATA_BITS = 512; + parameter integer AXI_ADDR_BITS = 64; + + // TLB ram + parameter integer TLB_S_ORDER = 10; + parameter integer PG_S_BITS = 12; + parameter integer N_S_ASSOC = 4; + + parameter integer TLB_L_ORDER = 6; + parameter integer PG_L_BITS = 21; + parameter integer N_L_ASSOC = 2; + + // Data + parameter integer ADDR_BITS = 64; + parameter integer PADDR_BITS = 40; + parameter integer VADDR_BITS = 48; + parameter integer LEN_BITS = 28; + parameter integer TLB_DATA_BITS = 64; + + // Queue depth + parameter integer QUEUE_DEPTH = 8; + parameter integer N_OUTSTANDING = 8; + + // Slices + parameter integer N_REG_HOST_S0 = 2; + parameter integer N_REG_HOST_S1 = 2; + parameter integer N_REG_HOST_S2 = 2; + parameter integer N_REG_CARD_S0 = 2; + parameter integer N_REG_CARD_S1 = 2; + parameter integer N_REG_CARD_S2 = 2; + + // Network + parameter integer FV_REQ_BITS = 256; + parameter integer PMTU_BITS = 1408; + + // ----------------------------------------------------------------- + // Dynamic + // ----------------------------------------------------------------- + + // Flow + parameter integer N_DDR_CHAN = 2; + parameter integer N_CHAN = 1; + parameter integer N_REGIONS = 1; + parameter integer PR_FLOW = 0; + parameter integer AVX_FLOW = 0; + parameter integer BPSS_FLOW = 1; + parameter integer DDR_FLOW = 0; + parameter integer DDR_CONFIG = 0; + parameter integer FV_FLOW = 0; + parameter integer FV_VERBS = 0; + parameter integer N_REGIONS_BITS = $clog2(2); + parameter integer N_REQUEST_BITS = 4; + +// ---------------------------------------------------------------------------- +// -- Structs +// ---------------------------------------------------------------------------- +typedef struct packed { + logic [VADDR_BITS-1:0] vaddr; + logic [LEN_BITS-1:0] len; + logic stream; + logic sync; + logic ctl; + logic [16:0] rsrvd; +} req_t; + +typedef struct packed { + logic [VADDR_BITS-1:0] vaddr; + logic [LEN_BITS-1:0] len; + logic stream; + logic sync; + logic ctl; + logic [N_REQUEST_BITS-1:0] id; + logic host; + logic [11:0] rsrvd; +} rdma_req_t; + +typedef struct packed { + logic [PADDR_BITS-1:0] paddr; + logic [LEN_BITS-1:0] len; + logic ctl; + logic [26:0] rsrvd; +} dma_req_t; + +typedef struct packed { + logic [PADDR_BITS-1:0] paddr_card; + logic [PADDR_BITS-1:0] paddr_host; + logic [LEN_BITS-1:0] len; + logic ctl; + logic isr; + logic [17:0] rsrvd; +} dma_isr_req_t; + +typedef struct packed { + logic miss; + logic [VADDR_BITS-1:0] vaddr; + logic [LEN_BITS-1:0] len; +} pf_t; + +typedef struct packed { + logic [N_REGIONS_BITS-1:0] id; + logic [LEN_BITS-1:0] len; +} mux_t; + +endpackage + diff --git a/hw/sim/scripts_sim/tb.tcl b/hw/sim/scripts_sim/tb.tcl new file mode 100644 index 00000000..726a701e --- /dev/null +++ b/hw/sim/scripts_sim/tb.tcl @@ -0,0 +1,168 @@ +#***************************************************************************************** +# Vivado (TM) v2019.1 (64-bit) +# +# tb.tcl: Tcl script for re-creating project 'tb_user' +# +# Generated by Vivado on Fri May 15 23:36:34 CEST 2020 +# IP Build 2548770 on Fri May 24 18:01:18 MDT 2019 +# +# This file contains the Vivado Tcl commands for re-creating the project to the state* +# when this script was generated. In order to re-create the project, please source this +# file in the Vivado Tcl Shell. +# +# * Note that the runs in the created project will be configured the same way as the +# original project, however they will not be launched automatically. To regenerate the +# run results please launch the synthesis/implementation runs as needed. +# +#***************************************************************************************** +# NOTE: In order to use this script for source control purposes, please make sure that the +# following files are added to the source control system:- +# +# 1. This project restoration tcl script (tb.tcl) that was generated. +# +# 2. The following source(s) files that were local or imported into the original project. +# (Please see the '$orig_proj_dir' and '$origin_dir' variable setting below at the start of the script) +# +# +# +# 3. The following remote source files that were added to the original project:- +# +# "/home/dk23/dev/lynx/hw/sim/lynx_pkg.sv" +# "/home/dk23/dev/lynx/hw/hdl/pkg/axi_intf.sv" +# "/home/dk23/dev/lynx/hw/sim/axi_intf_sim.sv" +# "/home/dk23/dev/lynx/hw/hdl/pkg/lynx_intf.sv" +# "/home/dk23/dev/lynx/hw/sim/lynx_intf_sim.sv" +# "/home/dk23/dev/lynx/hw/hdl/util/fifo.sv" +# "/home/dk23/dev/lynx/hw/hdl/pkg/axi_macros.svh" +# "/home/dk23/dev/lynx/hw/hdl/pkg/lynx_macros.svh" +# +#***************************************************************************************** + +# Set the reference directory for source file relative paths (by default the value is script directory path) +set origin_dir "." + +# Use origin directory path location variable, if specified in the tcl shell +if { [info exists ::origin_dir_loc] } { + set origin_dir $::origin_dir_loc +} + +# Set the project name +set _xil_proj_name_ "tb_user" + +# Use project name variable, if specified in the tcl shell +if { [info exists ::user_project_name] } { + set _xil_proj_name_ $::user_project_name +} + +variable script_file +set script_file "tb.tcl" + +# Help information for this script +proc print_help {} { + variable script_file + puts "\nDescription:" + puts "Recreate a Vivado project from this script. The created project will be" + puts "functionally equivalent to the original project for which this script was" + puts "generated. The script contains commands for creating a project, filesets," + puts "runs, adding/importing sources and setting properties on various objects.\n" + puts "Syntax:" + puts "$script_file" + puts "$script_file -tclargs \[--origin_dir \]" + puts "$script_file -tclargs \[--project_name \]" + puts "$script_file -tclargs \[--help\]\n" + puts "Usage:" + puts "Name Description" + puts "-------------------------------------------------------------------------" + puts "\[--origin_dir \] Determine source file paths wrt this path. Default" + puts " origin_dir path value is \".\", otherwise, the value" + puts " that was set with the \"-paths_relative_to\" switch" + puts " when this script was generated.\n" + puts "\[--project_name \] Create project with the specified name. Default" + puts " name is the name of the project from where this" + puts " script was generated.\n" + puts "\[--help\] Print help information for this script" + puts "-------------------------------------------------------------------------\n" + exit 0 +} + +if { $::argc > 0 } { + for {set i 0} {$i < $::argc} {incr i} { + set option [string trim [lindex $::argv $i]] + switch -regexp -- $option { + "--origin_dir" { incr i; set origin_dir [lindex $::argv $i] } + "--project_name" { incr i; set _xil_proj_name_ [lindex $::argv $i] } + "--help" { print_help } + default { + if { [regexp {^-} $option] } { + puts "ERROR: Unknown option '$option' specified, please type '$script_file -tclargs --help' for usage info.\n" + return 1 + } + } + } + } +} + +# Set the directory path for the original project from where this script was exported +set orig_proj_dir "[file normalize "$origin_dir/tb_user"]" + +# Create project +create_project ${_xil_proj_name_} ./${_xil_proj_name_} -part xc7vx485tffg1157-1 + +# Set the directory path for the new project +set proj_dir [get_property directory [current_project]] + +# Set project properties +set obj [current_project] +set_property -name "board_part_repo_paths" -value "/home/dk23/.Xilinx/Vivado/2019.1/xhub/board_store" -objects $obj +set_property -name "default_lib" -value "xil_defaultlib" -objects $obj +set_property -name "dsa.accelerator_binary_content" -value "bitstream" -objects $obj +set_property -name "dsa.accelerator_binary_format" -value "xclbin2" -objects $obj +set_property -name "dsa.description" -value "Vivado generated DSA" -objects $obj +set_property -name "dsa.dr_bd_base_address" -value "0" -objects $obj +set_property -name "dsa.emu_dir" -value "emu" -objects $obj +set_property -name "dsa.flash_interface_type" -value "bpix16" -objects $obj +set_property -name "dsa.flash_offset_address" -value "0" -objects $obj +set_property -name "dsa.flash_size" -value "1024" -objects $obj +set_property -name "dsa.host_architecture" -value "x86_64" -objects $obj +set_property -name "dsa.host_interface" -value "pcie" -objects $obj +set_property -name "dsa.num_compute_units" -value "60" -objects $obj +set_property -name "dsa.platform_state" -value "pre_synth" -objects $obj +set_property -name "dsa.vendor" -value "xilinx" -objects $obj +set_property -name "dsa.version" -value "0.0" -objects $obj +set_property -name "enable_vhdl_2008" -value "1" -objects $obj +set_property -name "ip_cache_permissions" -value "read write" -objects $obj +set_property -name "ip_output_repo" -value "$proj_dir/${_xil_proj_name_}.cache/ip" -objects $obj +set_property -name "mem.enable_memory_map_generation" -value "1" -objects $obj +set_property -name "part" -value "xc7vx485tffg1157-1" -objects $obj +set_property -name "sim.central_dir" -value "$proj_dir/${_xil_proj_name_}.ip_user_files" -objects $obj +set_property -name "sim.ip.auto_export_scripts" -value "1" -objects $obj +set_property -name "simulator_language" -value "Mixed" -objects $obj +set_property -name "webtalk.xsim_launch_sim" -value "20" -objects $obj + +# Create 'sources_1' fileset (if not found) +if {[string equal [get_filesets -quiet sources_1] ""]} { + create_fileset -srcset sources_1 +} + +# Copy base +file mkdir "tb_user/hdl" + +exec cp "../tb/tb_user.sv" "tb_user/hdl" +exec cp "../tb/tb_design_user_logic.sv" "tb_user/hdl" + +# Set 'sources_1' fileset object +set obj [get_filesets sources_1] +set files [list \ + [file normalize "${origin_dir}/../../sim/lynx_pkg.sv"] \ + [file normalize "${origin_dir}/../../hdl/pkg/axi_intf.sv"] \ + [file normalize "${origin_dir}/../../sim/axi_intf_sim.sv"] \ + [file normalize "${origin_dir}/../../hdl/pkg/lynx_intf.sv"] \ + [file normalize "${origin_dir}/../../sim/lynx_intf_sim.sv"] \ + [file normalize "${origin_dir}/../../hdl/util/fifo.sv"] \ + [file normalize "${origin_dir}/../../hdl/pkg/axi_macros.svh"] \ + [file normalize "${origin_dir}/../../hdl/pkg/lynx_macros.svh"] \ + [file normalize "${origin_dir}/tb_user/hdl/tb_user.sv"] \ + [file normalize "${origin_dir}/tb_user/hdl/tb_design_user_logic.sv"] \ +] +add_files -norecurse -fileset $obj $files + diff --git a/hw/sim/tb/axis_mux_ddr_sink_tb.sv b/hw/sim/tb/axis_mux_ddr_sink_tb.sv new file mode 100644 index 00000000..84dc6430 --- /dev/null +++ b/hw/sim/tb/axis_mux_ddr_sink_tb.sv @@ -0,0 +1,115 @@ +import lynxTypes::*; + +module axis_mux_ddr_sink_tb; + + localparam CLK_PERIOD = 5ns; + + logic aclk = 1'b0; + logic aresetn = 1'b1; + + logic done = 0; + + // Sink + AXI4S #(.AXI4S_DATA_BITS(512)) axis_in_host (aclk); + AXI4S #(.AXI4S_DATA_BITS(512)) axis_in_card (aclk); + AXI4S #(.AXI4S_DATA_BITS(512)) axis_out (aclk); + + // Memory subsystem + muxCardIntf mux_sink (); + + // Drivers + axiSimTypes::AXI4Sdrv #(.AXIS_DATA_BITS(512)) axis_drv_host = new(axis_in_host, 1); + axiSimTypes::AXI4Sdrv #(.AXIS_DATA_BITS(512)) axis_drv_card = new(axis_in_card, 2); + axiSimTypes::AXI4Sdrv axis_drv_out = new(axis_out, 3); + + // Clock gen + initial begin + while (!done) begin + aclk <= 1; + #(CLK_PERIOD/2); + aclk <= 0; + #(CLK_PERIOD/2); + end + end + + // Reset gen + initial begin + aresetn = 0; + #CLK_PERIOD aresetn = 1; + end + + // DUTs + axis_mux_ddr_sink inst_DUT_sink ( + .aclk(aclk), + .aresetn(aresetn), + .mux(mux_sink), + .axis_in_host(axis_in_host), + .axis_in_card(axis_in_card), + .axis_out(axis_out) + ); + + logic mux_load_sink; + logic mux_load_card_sink; + logic [N_REGIONS_BITS-1:0] mux_load_id_sink; + logic [LEN_BITS-1:0] mux_load_len_sink; + + // Memory subsystem queues + queue #( + .QTYPE(logic[1+N_REGIONS_BITS+LEN_BITS-1:0]) + ) inst_mux_que_sink ( + .aclk(aclk), + .aresetn(aresetn), + .val_snk(mux_load_sink), + .rdy_snk(), + .data_snk({mux_load_card_sink, mux_load_id_sink, mux_load_len_sink}), + .val_src(mux_sink.valid), + .rdy_src(mux_sink.ready), + .data_src({mux_sink.card, mux_sink.id_in, mux_sink.len}) + ); + + /* Sink */ + initial begin + mux_load_sink = 1'b0; + mux_load_card_sink = 1'b0; + mux_load_id_sink = 0; + mux_load_len_sink = 0; + #(2*CLK_PERIOD) + mux_load_sink = 1'b1; + mux_load_card_sink = 1'b1; + mux_load_id_sink = 2; + mux_load_len_sink = 28'h180; + #(CLK_PERIOD) + mux_load_id_sink = 1'b1; + mux_load_card_sink = 1'b0; + mux_load_len_sink = 28'h200; + #(CLK_PERIOD) + mux_load_sink = 1'b0; + end + + /* Card */ + initial begin + axis_drv_card.reset_m(); + #(4*CLK_PERIOD) + @(posedge aclk); + axis_drv_card.send(44, 6); + end + + /* Host */ + initial begin + axis_drv_host.reset_m(); + #(4*CLK_PERIOD) + @(posedge aclk); + axis_drv_host.send(33, 8); + end + + /* Out */ + initial begin + axis_drv_out.reset_s(); + #(4*CLK_PERIOD) + @(posedge aclk); + axis_drv_out.recv(6); + axis_drv_out.recv(8); + done = 1; + end + +endmodule \ No newline at end of file diff --git a/hw/sim/tb/axis_mux_ddr_sink_wide_tb.sv b/hw/sim/tb/axis_mux_ddr_sink_wide_tb.sv new file mode 100644 index 00000000..df34f573 --- /dev/null +++ b/hw/sim/tb/axis_mux_ddr_sink_wide_tb.sv @@ -0,0 +1,127 @@ +import lynxTypes::*; + +module axis_mux_ddr_sink_wide_tb; + + localparam CLK_PERIOD = 5ns; + + logic aclk = 1'b0; + logic aresetn = 1'b1; + + logic done_out_0 = 0; + logic done_out_1 = 0; + + // Sink + AXI4S #(.AXI4S_DATA_BITS(512)) axis_in_host (aclk); + AXI4S #(.AXI4S_DATA_BITS(1024)) axis_in_card (aclk); + AXI4S #(.AXI4S_DATA_BITS(512)) axis_out [2] (aclk); + + // Memory subsystem + muxCardIntf mux_sink (); + + // Drivers + axiSimTypes::AXI4Sdrv #(.AXIS_DATA_BITS(512)) axis_drv_host = new(axis_in_host, 1); + axiSimTypes::AXI4Sdrv #(.AXIS_DATA_BITS(1024)) axis_drv_card = new(axis_in_card, 2); + axiSimTypes::AXI4Sdrv axis_out_0 = new(axis_out[0], 3); + axiSimTypes::AXI4Sdrv axis_out_1 = new(axis_out[1], 4); + + // Clock gen + initial begin + while (!done_out_0 || !done_out_1) begin + aclk <= 1; + #(CLK_PERIOD/2); + aclk <= 0; + #(CLK_PERIOD/2); + end + end + + // Reset gen + initial begin + aresetn = 0; + #CLK_PERIOD aresetn = 1; + end + + // DUTs + axis_mux_ddr_sink_wide inst_DUT_sink ( + .aclk(aclk), + .aresetn(aresetn), + .mux(mux_sink), + .axis_in_host(axis_in_host), + .axis_in_card(axis_in_card), + .axis_out(axis_out) + ); + + logic mux_load_sink; + logic mux_load_card_sink; + logic [N_REGIONS_BITS-1:0] mux_load_id_sink; + logic [LEN_BITS-1:0] mux_load_len_sink; + + // Memory subsystem queues + queue #( + .QTYPE(logic[1+N_REGIONS_BITS+LEN_BITS-1:0]) + ) inst_mux_que_sink ( + .aclk(aclk), + .aresetn(aresetn), + .val_snk(mux_load_sink), + .rdy_snk(), + .data_snk({mux_load_card_sink, mux_load_id_sink, mux_load_len_sink}), + .val_src(mux_sink.valid), + .rdy_src(mux_sink.ready), + .data_src({mux_sink.card, mux_sink.id_in, mux_sink.len}) + ); + + /* Sink */ + initial begin + mux_load_sink = 1'b0; + mux_load_card_sink = 1'b0; + mux_load_id_sink = 0; + mux_load_len_sink = 0; + #(2*CLK_PERIOD) + mux_load_sink = 1'b1; + mux_load_card_sink = 1'b1; + mux_load_id_sink = 2; + mux_load_len_sink = 28'h180; + #(CLK_PERIOD) + mux_load_id_sink = 1'b1; + mux_load_card_sink = 1'b0; + mux_load_len_sink = 28'h200; + #(CLK_PERIOD) + mux_load_sink = 1'b0; + end + + /* Host */ + initial begin + axis_drv_host.reset_m(); + #(4*CLK_PERIOD) + @(posedge aclk); + axis_drv_host.send(33, 8); + end + + /* Card */ + initial begin + axis_drv_card.reset_m(); + #(4*CLK_PERIOD) + @(posedge aclk); + axis_drv_card.send(44, 3); + end + + /* Out 0 */ + initial begin + axis_out_0.reset_s(); + #(4*CLK_PERIOD) + @(posedge aclk); + axis_out_0.recv(3); + axis_out_0.recv(4); + done_out_0 = 1; + end + + /* Out 1 */ + initial begin + axis_out_1.reset_s(); + #(4*CLK_PERIOD) + @(posedge aclk); + axis_out_1.recv(3); + axis_out_1.recv(4); + done_out_1 = 1; + end + +endmodule \ No newline at end of file diff --git a/hw/sim/tb/axis_mux_ddr_src_tb.sv b/hw/sim/tb/axis_mux_ddr_src_tb.sv new file mode 100644 index 00000000..b1e2c9f9 --- /dev/null +++ b/hw/sim/tb/axis_mux_ddr_src_tb.sv @@ -0,0 +1,117 @@ +import lynxTypes::*; + +module axis_mux_ddr_src_tb; + + localparam CLK_PERIOD = 5ns; + + logic aclk = 1'b0; + logic aresetn = 1'b1; + + logic done_0 = 0; + logic done_1 = 0; + + // Src + AXI4S #(.AXI4S_DATA_BITS(512)) axis_in (aclk); + AXI4S #(.AXI4S_DATA_BITS(512)) axis_out_host (aclk); + AXI4S #(.AXI4S_DATA_BITS(512)) axis_out_card (aclk); + + // Memory subsystem + muxCardIntf mux_src (); + + // Drivers + axiSimTypes::AXI4Sdrv axis_drv_in = new(axis_in, 3); + axiSimTypes::AXI4Sdrv #(.AXIS_DATA_BITS(512)) axis_drv_host = new(axis_out_host, 1); + axiSimTypes::AXI4Sdrv #(.AXIS_DATA_BITS(512)) axis_drv_card = new(axis_out_card, 2); + + // Clock gen + initial begin + while (!done_0 || !done_1) begin + aclk <= 1; + #(CLK_PERIOD/2); + aclk <= 0; + #(CLK_PERIOD/2); + end + end + + // Reset gen + initial begin + aresetn = 0; + #CLK_PERIOD aresetn = 1; + end + + // DUTs + axis_mux_ddr_src inst_DUT_src ( + .aclk(aclk), + .aresetn(aresetn), + .mux(mux_src), + .axis_in(axis_in), + .axis_out_host(axis_out_host), + .axis_out_card(axis_out_card) + ); + + logic mux_load_src; + logic mux_load_card_src; + logic [N_REGIONS_BITS-1:0] mux_load_id_src; + logic [LEN_BITS-1:0] mux_load_len_src; + + // Memory subsystem queues + queue #( + .QTYPE(logic[1+N_REGIONS_BITS+LEN_BITS-1:0]) + ) inst_mux_que_src ( + .aclk(aclk), + .aresetn(aresetn), + .val_snk(mux_load_src), + .rdy_snk(), + .data_snk({mux_load_card_src, mux_load_id_src, mux_load_len_src}), + .val_src(mux_src.valid), + .rdy_src(mux_src.ready), + .data_src({mux_src.card, mux_src.id_in, mux_src.len}) + ); + + /* src */ + initial begin + mux_load_src = 1'b0; + mux_load_card_src = 1'b0; + mux_load_id_src = 0; + mux_load_len_src = 0; + #(2*CLK_PERIOD) + mux_load_src = 1'b1; + mux_load_card_src = 1'b1; + mux_load_id_src = 2; + mux_load_len_src = 28'h180; + #(CLK_PERIOD) + mux_load_id_src = 1'b1; + mux_load_card_src = 1'b0; + mux_load_len_src = 28'h200; + #(CLK_PERIOD) + mux_load_src = 1'b0; + end + + /* Card */ + initial begin + axis_drv_card.reset_s(); + #(4*CLK_PERIOD) + @(posedge aclk); + axis_drv_card.recv(6); + done_0 = 1; + end + + /* Host */ + initial begin + axis_drv_host.reset_s(); + #(4*CLK_PERIOD) + @(posedge aclk); + axis_drv_host.recv(8); + done_1 = 1; + end + + /* Out */ + initial begin + axis_drv_in.reset_m(); + #(4*CLK_PERIOD) + @(posedge aclk); + axis_drv_in.send(33, 6); + axis_drv_in.send(44, 8); + end + +endmodule \ No newline at end of file diff --git a/hw/sim/tb/axis_mux_ddr_src_wide_tb.sv b/hw/sim/tb/axis_mux_ddr_src_wide_tb.sv new file mode 100644 index 00000000..07aacac4 --- /dev/null +++ b/hw/sim/tb/axis_mux_ddr_src_wide_tb.sv @@ -0,0 +1,127 @@ +import lynxTypes::*; + +module axis_mux_ddr_src_wide_tb; + + localparam CLK_PERIOD = 5ns; + + logic aclk = 1'b0; + logic aresetn = 1'b1; + + logic done_0 = 0; + logic done_1 = 0; + + // src + AXI4S #(.AXI4S_DATA_BITS(512)) axis_out_host (aclk); + AXI4S #(.AXI4S_DATA_BITS(1024)) axis_out_card (aclk); + AXI4S #(.AXI4S_DATA_BITS(512)) axis_in [2] (aclk); + + // Memory subsystem + muxCardIntf mux_src (); + + // Drivers + axiSimTypes::AXI4Sdrv #(.AXIS_DATA_BITS(512)) axis_drv_host = new(axis_out_host, 1); + axiSimTypes::AXI4Sdrv #(.AXIS_DATA_BITS(1024)) axis_drv_card = new(axis_out_card, 2); + axiSimTypes::AXI4Sdrv axis_in_0 = new(axis_in[0], 3); + axiSimTypes::AXI4Sdrv axis_in_1 = new(axis_in[1], 4); + + // Clock gen + initial begin + while (!done_0 || !done_1) begin + aclk <= 1; + #(CLK_PERIOD/2); + aclk <= 0; + #(CLK_PERIOD/2); + end + end + + // Reset gen + initial begin + aresetn = 0; + #CLK_PERIOD aresetn = 1; + end + + // DUTs + axis_mux_ddr_src_wide inst_DUT_src ( + .aclk(aclk), + .aresetn(aresetn), + .mux(mux_src), + .axis_in(axis_in), + .axis_out_host(axis_out_host), + .axis_out_card(axis_out_card) + ); + + logic mux_load_src; + logic mux_load_card_src; + logic [N_REGIONS_BITS-1:0] mux_load_id_src; + logic [LEN_BITS-1:0] mux_load_len_src; + + // Memory subsystem queues + queue #( + .QTYPE(logic[1+N_REGIONS_BITS+LEN_BITS-1:0]) + ) inst_mux_que_src ( + .aclk(aclk), + .aresetn(aresetn), + .val_snk(mux_load_src), + .rdy_snk(), + .data_snk({mux_load_card_src, mux_load_id_src, mux_load_len_src}), + .val_src(mux_src.valid), + .rdy_src(mux_src.ready), + .data_src({mux_src.card, mux_src.id_in, mux_src.len}) + ); + + /* src */ + initial begin + mux_load_src = 1'b0; + mux_load_card_src = 1'b0; + mux_load_id_src = 0; + mux_load_len_src = 0; + #(2*CLK_PERIOD) + mux_load_src = 1'b1; + mux_load_card_src = 1'b1; + mux_load_id_src = 2; + mux_load_len_src = 28'h180; + #(CLK_PERIOD) + mux_load_id_src = 1'b1; + mux_load_card_src = 1'b0; + mux_load_len_src = 28'h200; + #(CLK_PERIOD) + mux_load_src = 1'b0; + end + + /* Host */ + initial begin + axis_drv_host.reset_s(); + #(4*CLK_PERIOD) + @(posedge aclk); + axis_drv_host.recv(8); + done_0 = 0; + end + + /* Card */ + initial begin + axis_drv_card.reset_s(); + #(4*CLK_PERIOD) + @(posedge aclk); + axis_drv_card.recv(3); + done_1 = 1; + end + + /* Out 0 */ + initial begin + axis_in_0.reset_m(); + #(4*CLK_PERIOD) + @(posedge aclk); + axis_in_0.send(33, 3); + axis_in_0.send(100, 4); + end + + /* Out 1 */ + initial begin + axis_in_1.reset_s(); + #(4*CLK_PERIOD) + @(posedge aclk); + axis_in_1.send(44, 3); + axis_in_1.send(200, 4); + end + +endmodule \ No newline at end of file diff --git a/hw/sim/tb/axis_mux_ddr_user_tb.sv b/hw/sim/tb/axis_mux_ddr_user_tb.sv new file mode 100644 index 00000000..74534f68 --- /dev/null +++ b/hw/sim/tb/axis_mux_ddr_user_tb.sv @@ -0,0 +1,115 @@ +import lynxTypes::*; + +module axis_mux_ddr_user_tb; + + localparam CLK_PERIOD = 5ns; + + logic aclk = 1'b0; + logic aresetn = 1'b1; + + logic done = 0; + + // Sink + AXI4S #(.AXI4S_DATA_BITS(512)) axis_in_host (aclk); + AXI4S #(.AXI4S_DATA_BITS(512)) axis_in_card (aclk); + AXI4S #(.AXI4S_DATA_BITS(512)) axis_out (aclk); + + // Memory subsystem + muxCardIntf mux_sink (); + + // Drivers + axiSimTypes::AXI4Sdrv #(.AXIS_DATA_BITS(512)) axis_drv_host = new(axis_in_host, 1); + axiSimTypes::AXI4Sdrv #(.AXIS_DATA_BITS(512)) axis_drv_card = new(axis_in_card, 2); + axiSimTypes::AXI4Sdrv axis_drv_out = new(axis_out, 3); + + // Clock gen + initial begin + while (!done) begin + aclk <= 1; + #(CLK_PERIOD/2); + aclk <= 0; + #(CLK_PERIOD/2); + end + end + + // Reset gen + initial begin + aresetn = 0; + #CLK_PERIOD aresetn = 1; + end + + // DUTs + axis_mux_ddr_sink inst_DUT_sink ( + .aclk(aclk), + .aresetn(aresetn), + .mux(mux_sink), + .axis_in_host(axis_in_host), + .axis_in_card(axis_in_card), + .axis_out(axis_out) + ); + + logic mux_load_sink; + logic mux_load_card_sink; + logic [N_REGIONS_BITS-1:0] mux_load_id_sink; + logic [LEN_BITS-1:0] mux_load_len_sink; + + // Memory subsystem queues + queue #( + .QTYPE(logic[1+N_REGIONS_BITS+LEN_BITS-1:0]) + ) inst_mux_que_sink ( + .aclk(aclk), + .aresetn(aresetn), + .val_snk(mux_load_sink), + .rdy_snk(), + .data_snk({mux_load_card_sink, mux_load_id_sink, mux_load_len_sink}), + .val_src(mux_sink.valid), + .rdy_src(mux_sink.ready), + .data_src({mux_sink.card, mux_sink.id_in, mux_sink.len}) + ); + + /* Sink */ + initial begin + mux_load_sink = 1'b0; + mux_load_card_sink = 1'b0; + mux_load_id_sink = 0; + mux_load_len_sink = 0; + #(2*CLK_PERIOD) + mux_load_sink = 1'b1; + mux_load_card_sink = 1'b1; + mux_load_id_sink = 2; + mux_load_len_sink = 28'h180; + #(CLK_PERIOD) + mux_load_id_sink = 1'b1; + mux_load_card_sink = 1'b0; + mux_load_len_sink = 28'h200; + #(CLK_PERIOD) + mux_load_sink = 1'b0; + end + + /* Card */ + initial begin + axis_drv_card.reset_m(); + #(4*CLK_PERIOD) + @(posedge aclk); + axis_drv_card.send(44, 6); + end + + /* Host */ + initial begin + axis_drv_host.reset_m(); + #(4*CLK_PERIOD) + @(posedge aclk); + axis_drv_host.send(33, 8); + end + + /* Out */ + initial begin + axis_drv_out.reset_s(); + #(4*CLK_PERIOD) + @(posedge aclk); + axis_drv_out.recv(6); + axis_drv_out.recv(8); + done = 1; + end + +endmodule \ No newline at end of file diff --git a/hw/sim/tb/axis_mux_tb.sv b/hw/sim/tb/axis_mux_tb.sv new file mode 100644 index 00000000..2a54c9ed --- /dev/null +++ b/hw/sim/tb/axis_mux_tb.sv @@ -0,0 +1,174 @@ +import lynxTypes::*; + +module axis_mux_tb; + + localparam CLK_PERIOD = 5ns; + + logic aclk = 1'b0; + logic aresetn = 1'b1; + + logic done_sink = 0; + logic done_src = 0; + + // Sink + AXI4S axis_sink_in [N_REGIONS] (aclk); + AXI4S axis_sink_out (aclk); + + // Source + AXI4S axis_src_in (aclk); + AXI4S axis_src_out [N_REGIONS] (aclk); + + // Decoupler + AXI4S axis_dcplr [N_REGIONS] (aclk); + + logic [N_REGIONS-1:0] decouple = 0; + + // Memory subsystem + muxIntf mux_sink (); + muxIntf mux_src (); + + // Drivers + axiSimTypes::AXI4Sdrv axis_drv_sink_out = new(axis_sink_out, 1); + axiSimTypes::AXI4Sdrv axis_drv_src_in = new(axis_src_in, 2); + + // Clock gen + initial begin + while (!done_sink || !done_src) begin + aclk <= 1; + #(CLK_PERIOD/2); + aclk <= 0; + #(CLK_PERIOD/2); + end + end + + // Reset gen + initial begin + aresetn = 0; + #CLK_PERIOD aresetn = 1; + end + + // DUTs + axis_mux_sink inst_DUT_sink ( + .aclk(aclk), + .aresetn(aresetn), + .mux(mux_sink), + .axis_in(axis_sink_in), + .axis_out(axis_sink_out) + ); + + axis_mux_src inst_DUT_src ( + .aclk(aclk), + .aresetn(aresetn), + .mux(mux_src), + .axis_in(axis_src_in), + .axis_out(axis_src_out) + ); + + axis_decoupler inst_DUT_dcplr_sink ( + .aclk(aclk), + .aresetn(aresetn), + .decouple(decouple), + .axis_in(axis_dcplr), + .axis_out(axis_sink_in) + ); + + axis_decoupler inst_DUT_dcplr_src ( + .aclk(aclk), + .aresetn(aresetn), + .decouple(decouple), + .axis_in(axis_src_out), + .axis_out(axis_dcplr) + ); + + logic mux_load_sink, mux_load_src; + logic [N_REGIONS_BITS-1:0] mux_load_id_sink, mux_load_id_src; + logic [LEN_BITS-1:0] mux_load_len_sink, mux_load_len_src; + + // Memory subsystem queues + queue #( + .QTYPE(logic[N_REGIONS_BITS+LEN_BITS-1:0]) + ) inst_mux_que_sink ( + .aclk(aclk), + .aresetn(aresetn), + .val_snk(mux_load_sink), + .rdy_snk(), + .data_snk({mux_load_id_sink, mux_load_len_sink}), + .val_src(mux_sink.valid), + .rdy_src(mux_sink.ready), + .data_src({mux_sink.id_in, mux_sink.len}) + ); + + queue #( + .QTYPE(logic[N_REGIONS_BITS+LEN_BITS-1:0]) + ) inst_mux_que_src ( + .aclk(aclk), + .aresetn(aresetn), + .val_snk(mux_load_src), + .rdy_snk(), + .data_snk({mux_load_id_src, mux_load_len_src}), + .val_src(mux_src.valid), + .rdy_src(mux_src.ready), + .data_src({mux_src.id_in, mux_src.len}) + ); + + + /* Sink */ + initial begin + mux_load_sink = 1'b0; + mux_load_id_sink = 0; + mux_load_len_sink = 0; + #(2*CLK_PERIOD) + mux_load_sink = 1'b1; + mux_load_id_sink = 2; + mux_load_len_sink = 28'h180; + #(CLK_PERIOD) + mux_load_id_sink = 1; + mux_load_len_sink = 28'h1C0; + #(CLK_PERIOD) + mux_load_id_sink = 0; + mux_load_len_sink = 28'hC0; + #(CLK_PERIOD) + mux_load_sink = 1'b0; + end + + initial begin + axis_drv_sink_out.reset_s(); + #(4*CLK_PERIOD) + @(posedge aclk); + axis_drv_sink_out.recv(6); + axis_drv_sink_out.recv(7); + axis_drv_sink_out.recv(3); + done_sink = 1; + end + + /* Source */ + initial begin + mux_load_src = 1'b0; + mux_load_id_src = 0; + mux_load_len_src = 0; + #(2*CLK_PERIOD) + mux_load_src = 1'b1; + mux_load_id_src = 2; + mux_load_len_src = 28'h180; + #(CLK_PERIOD) + mux_load_id_src = 1; + mux_load_len_src = 28'h1C0; + #(CLK_PERIOD) + mux_load_id_src = 0; + mux_load_len_src = 28'hC0; + #(CLK_PERIOD) + mux_load_src = 1'b0; + end + + initial begin + axis_drv_src_in.reset_m(); + #(4*CLK_PERIOD) + @(posedge aclk); + axis_drv_src_in.send(0, 64'hffffffffffffffff, 6); + axis_drv_src_in.send(0, 64'hffffffffffffffff, 7); + axis_drv_src_in.send(0, 64'hffffffffffffffff, 3); + done_src = 1; + end + + +endmodule \ No newline at end of file diff --git a/hw/sim/tb/tb_cdma_unaglined.sv b/hw/sim/tb/tb_cdma_unaglined.sv new file mode 100644 index 00000000..56a0143c --- /dev/null +++ b/hw/sim/tb/tb_cdma_unaglined.sv @@ -0,0 +1,96 @@ +import lynxTypes::*; + +module tb_cdma_unaglined; + + localparam CLK_PERIOD = 5ns; + + logic aclk = 1'b0; + logic aresetn = 1'b1; + + logic done = 0; + + // Clock gen + initial begin + while (!done) begin + aclk <= 1; + #(CLK_PERIOD/2); + aclk <= 0; + #(CLK_PERIOD/2); + end + end + + // Reset gen + initial begin + aresetn = 0; + #(2*CLK_PERIOD) aresetn = 1; + end + + // Signals + dmaIntf rdCDMA (aclk); + dmaIntf wrCDMA (aclk); + + AXI4 axi_ddr_in (aclk); + AXI4S axis_ddr_in (aclk); + AXI4S axis_ddr_out (aclk); + + // Drivers + lynxSimTypes::DMAdrv dma_drv_rd = new(rdCDMA, 0); + lynxSimTypes::DMAdrv dma_drv_wr = new(wrCDMA, 1); + + axiSimTypes::AXI4drv axi_drv_ddr_in = new(axi_ddr_in, 2); + axiSimTypes::AXI4Sdrv axis_drv_ddr_in = new(axis_ddr_in, 3); + axiSimTypes::AXI4Sdrv axis_drv_ddr_out = new(axis_ddr_out, 4); + + // DUT + cdma_unaglined inst_DUT ( + .aclk(aclk), + .aresetn(aresetn), + .rdCDMA(rdCDMA), + .wrCDMA(wrCDMA), + .axi_ddr_in(axi_ddr_in), + .axis_ddr_in(axis_ddr_in), + .axis_ddr_out(axis_ddr_out) + ); + + // DRIVER ------------------------------------------------------------------------------- + + // DMA drive + initial begin + dma_drv_rd.reset_m(); + #(10*CLK_PERIOD) + @(posedge aclk); + dma_drv_rd.send_dma(8, 68, 1'b1); + end + + initial begin + dma_drv_wr.reset_m(); + #(10*CLK_PERIOD) + @(posedge aclk); + end + + // AXI sink + initial begin + axi_drv_ddr_in.reset_s(); + #(50*CLK_PERIOD) + @(posedge aclk); + axi_drv_ddr_in.read_ar(); + #(20*CLK_PERIOD) + @(posedge aclk); + for(int i = 0; i < 10; i++) begin + axi_drv_ddr_in.write_r($urandom_range(0, 1000), 0); + end + end + + // AXIS + initial begin + axis_drv_ddr_in.reset_m(); + end + + initial begin + axis_drv_ddr_out.reset_s(); + #(70*CLK_PERIOD) + @(posedge aclk); + axis_drv_ddr_out.recv(10); + end + +endmodule \ No newline at end of file diff --git a/hw/sim/tb/tb_design_user_logic.sv b/hw/sim/tb/tb_design_user_logic.sv new file mode 100644 index 00000000..55269eee --- /dev/null +++ b/hw/sim/tb/tb_design_user_logic.sv @@ -0,0 +1,64 @@ +`timescale 1ns / 1ps + +import lynxTypes::*; + +/** + * User logic + * + */ +module design_user_logic_0 ( + // AXI4L CONTROL + // Slave control. Utilize this interface for any kind of CSR implementation. + AXI4L.s axi_ctrl, + + // AXI4S HOST + AXI4S.m axis_host_src, + AXI4S.s axis_host_sink, + + // AXI4S RDMA + AXI4S.m axis_rdma_src, + AXI4S.s axis_rdma_sink, + + // FV + metaIntf.s fv_sink, + metaIntf.m fv_src, + + // Requests + reqIntf.m rd_req_user, + reqIntf.m wr_req_user, + + // RDMA + reqIntf.s rd_req_rdma, + reqIntf.s wr_req_rdma, + + // Clock and reset + input wire aclk, + input wire[0:0] aresetn +); + +/* -- Tie-off unused interfaces and signals ----------------------------- */ +always_comb axi_ctrl.tie_off_s(); +//always_comb rd_req_user.tie_off_m(); +//always_comb wr_req_user.tie_off_m(); +always_comb rd_req_rdma.tie_off_s(); +//always_comb wr_req_rdma.tie_off_s(); +//always_comb fv_sink.tie_off_s(); +//always_comb fv_src.tie_off_m(); +//always_comb axis_rdma_src.tie_off_m(); +//always_comb axis_rdma_sink.tie_off_s(); +//always_comb axis_host_src.tie_off_m(); +//always_comb axis_host_sink.tie_off_s(); + +/* -- USER LOGIC -------------------------------------------------------- */ + +// Base Read + Write +always_comb begin + axis_host_src.tvalid = axis_host_sink.tvalid; + axis_host_src.tdata = ~axis_host_sink.tdata; + axis_host_src.tkeep = axis_host_sink.tkeep; + axis_host_src.tlast = axis_host_sink.tlast; + axis_host_sink.tready = axis_host_src.tready; +end + +endmodule + diff --git a/hw/sim/tb/tb_user.sv b/hw/sim/tb/tb_user.sv new file mode 100644 index 00000000..a9f47d94 --- /dev/null +++ b/hw/sim/tb/tb_user.sv @@ -0,0 +1,124 @@ +import lynxTypes::*; + +module tb_user; + + localparam CLK_PERIOD = 5ns; + + logic aclk = 1'b0; + logic aresetn = 1'b1; + + logic done = 0; + + // Clock gen + initial begin + while (!done) begin + aclk <= 1; + #(CLK_PERIOD/2); + aclk <= 0; + #(CLK_PERIOD/2); + end + end + + // Reset gen + initial begin + aresetn = 0; + #CLK_PERIOD aresetn = 1; + end + + // Interfaces + AXI4L axi_ctrl (aclk); + AXI4S axis_host_src (aclk); + AXI4S axis_host_sink (aclk); + AXI4S axis_rdma_src (aclk); + AXI4S axis_rdma_sink (aclk); + reqIntf rd_req_user (aclk); + reqIntf wr_req_user (aclk); + reqIntf rd_req_rdma (aclk); + reqIntf wr_req_rdma (aclk); + metaIntf #(.DATA_BITS(256)) fv_sink (aclk); + metaIntf #(.DATA_BITS(256)) fv_src (aclk); + + // Drivers + axiSimTypes::AXI4Ldrv axi_drv_ctrl = new(axi_ctrl, 0); + axiSimTypes::AXI4Sdrv axis_drv_host_src = new(axis_host_src, 1); + axiSimTypes::AXI4Sdrv axis_drv_host_sink = new(axis_host_sink, 2); + axiSimTypes::AXI4Sdrv axis_drv_rdma_src = new(axis_rdma_src, 3); + axiSimTypes::AXI4Sdrv axis_drv_rdma_sink = new(axis_rdma_sink, 4); + lynxSimTypes::REQdrv req_drv_rd_user = new(rd_req_user, 5); + lynxSimTypes::REQdrv req_drv_wr_user = new(wr_req_user, 6); + lynxSimTypes::REQdrv req_drv_rd_rdma = new(rd_req_rdma, 7); + lynxSimTypes::REQdrv req_drv_wr_rdma = new(wr_req_rdma, 8); + lynxSimTypes::METAdrv #(.DB(256)) meta_drv_fv_sink = new(fv_sink, 9); + lynxSimTypes::METAdrv #(.DB(256)) meta_drv_fv_src = new(fv_src, 10); + + // DUT + design_user_logic_0 inst_DUT ( + .aclk(aclk), + .aresetn(aresetn), + .axi_ctrl(axi_ctrl), + .fv_sink(fv_sink), + .fv_src(fv_src), + .rd_req_user(rd_req_user), + .wr_req_user(wr_req_user), + .rd_req_rdma(rd_req_rdma), + .wr_req_rdma(wr_req_rdma), + .axis_host_sink(axis_host_sink), + .axis_host_src(axis_host_src), + .axis_rdma_sink(axis_rdma_sink), + .axis_rdma_src(axis_rdma_src) + ); + + // DRIVER ------------------------------------------------------------------------------- + + // Control + initial begin + axi_drv_ctrl.reset_m(); + end + + // USER requests + initial begin + req_drv_rd_user.reset_s(); + end + + initial begin + req_drv_wr_user.reset_s(); + end + + // RDMA requests + initial begin + req_drv_rd_rdma.reset_m(); + end + + initial begin + req_drv_wr_rdma.reset_m(); + end + + // FARVIEW requests + initial begin + meta_drv_fv_sink.reset_m(); + end + + initial begin + meta_drv_fv_src.reset_s(); + end + + // HOST data + initial begin + axis_drv_host_sink.reset_m(); + end + + initial begin + axis_drv_host_src.reset_s(); + end + + // RDMA data + initial begin + axis_drv_rdma_sink.reset_m(); + end + + initial begin + axis_drv_rdma_src.reset_s(); + end + + +endmodule \ No newline at end of file diff --git a/hw/sim/tb/tlb_arbiter_tb.sv b/hw/sim/tb/tlb_arbiter_tb.sv new file mode 100644 index 00000000..05933d16 --- /dev/null +++ b/hw/sim/tb/tlb_arbiter_tb.sv @@ -0,0 +1,81 @@ +import lynxTypes::*; + +module tlb_arbiter_tb; + + localparam CLK_PERIOD = 5ns; + + logic aclk = 1'b0; + logic aresetn = 1'b1; + + logic done = 0; + + // Requests + reqIntf req_in [N_REGIONS] (aclk); + reqIntf req_out (aclk); + + logic [N_REGIONS_BITS-1:0] id; + + // Drivers + lynxSimTypes::REQdrv req_drv_in_0 = new(req_in[0], 0); + lynxSimTypes::REQdrv req_drv_in_1 = new(req_in[1], 1); + lynxSimTypes::REQdrv req_drv_in_2 = new(req_in[2], 2); + lynxSimTypes::REQdrv req_drv_out = new(req_out, 3); + + // Clock gen + initial begin + while (!done) begin + aclk <= 1; + #(CLK_PERIOD/2); + aclk <= 0; + #(CLK_PERIOD/2); + end + end + + // Reset gen + initial begin + aresetn = 0; + #CLK_PERIOD aresetn = 1; + end + + // DUT + tlb_arbiter inst_DUT ( + .aclk(aclk), + .aresetn(aresetn), + .req_snk(req_in), + .req_src(req_out), + .id(id) + ); + + /* Init */ + initial begin + req_drv_in_0.reset_src_s(); + #(5*CLK_PERIOD) + @(posedge aclk); + req_drv_in_0.send(512, 64'hffff); + end + + initial begin + req_drv_in_1.reset_src_s(); + #(8*CLK_PERIOD) + @(posedge aclk); + req_drv_in_1.send(256, 64'heeee); + end + + initial begin + req_drv_in_2.reset_src_s(); + #(5*CLK_PERIOD) + @(posedge aclk); + req_drv_in_2.send(1024, 64'hdddd); + end + + initial begin + req_drv_out.reset_src_m(); + #(4*CLK_PERIOD) + @(posedge aclk); + req_drv_out.recv(); + req_drv_out.recv(); + req_drv_out.recv(); + done = 1; + end + +endmodule \ No newline at end of file diff --git a/hw/sim/tb/tlb_top.sv b/hw/sim/tb/tlb_top.sv new file mode 100644 index 00000000..e81ed9e2 --- /dev/null +++ b/hw/sim/tb/tlb_top.sv @@ -0,0 +1,157 @@ +import lynxTypes::*; +import lynxSimTypes::*; +import axiSimTypes::*; + +module tlb_top_tb; + + // ---------------------------------------------------------------- + // -- Clock and reset + // ---------------------------------------------------------------- + logic done = 0; + + localparam CLK_PERIOD = 5ns; + + logic aclk = 1'b0; + logic aresetn = 1'b1; + + // Clock gen + initial begin + while (!done) begin + aclk <= 1; + #(CLK_PERIOD/2); + aclk <= 0; + #(CLK_PERIOD/2); + end + end + + // Reset gen + initial begin + aresetn = 0; + #CLK_PERIOD aresetn = 1; + end + + // ---------------------------------------------------------------- + // -- DUT + // ---------------------------------------------------------------- + + // Control + AXI4L axi_ctrl_lTlb [N_REGIONS] (aclk); + AXI4L axi_ctrl_sTlb [N_REGIONS] (aclk); + AXI4L axi_ctrl_cnfg [N_REGIONS] (aclk); + + // Requests + reqIntf rd_req_user [N_REGIONS] (aclk); + reqIntf wr_req_user [N_REGIONS] (aclk); + + // DMAs + dmaIntf rdXDMA_host (aclk); + dmaIntf wrXDMA_host (aclk); + + // Drivers + axiSimTypes::AXI4Ldrv axil_drv_lTlb = new(axi_ctrl_lTlb[0], 0); + axiSimTypes::AXI4Ldrv axil_drv_sTlb = new(axi_ctrl_sTlb[0], 1); + axiSimTypes::AXI4Ldrv axil_drv_cnfg = new(axi_ctrl_cnfg[0], 2); + + lynxSimTypes::REQdrv req_drv_rd = new(rd_req_user[0], 3); + lynxSimTypes::REQdrv req_drv_wr = new(wr_req_user[0], 4); + + lynxSimTypes::DMAdrv dma_drv_rd_xdma_host = new(rdXDMA_host, 5); + lynxSimTypes::DMAdrv dma_drv_wr_xdma_host = new(wrXDMA_host, 6); + + logic [N_REGIONS-1:0] rxfer_host; + logic [N_REGIONS-1:0] wxfer_host; + + // DUT + tlb_top inst_DUT ( + .aclk(aclk), + .aresetn(aresetn), + .axi_ctrl_lTlb(axi_ctrl_lTlb), + .axi_ctrl_sTlb(axi_ctrl_sTlb), + .axi_ctrl_cnfg(axi_ctrl_cnfg), + .rd_req_user(rd_req_user), + .wr_req_user(wr_req_user), + .rdXDMA_host(rdXDMA_host), + .wrXDMA_host(wrXDMA_host), + .rxfer_host(rxfer_host), + .wxfer_host(wxfer_host), + .decouple(), + .pf_irq() + ); + + // ---------------------------------------------------------------- + // -- Sim + // ---------------------------------------------------------------- + + // TLB entries + initial begin + axil_drv_sTlb.reset_m(); + #(80*CLK_PERIOD) + @(posedge aclk); + // Write to stlb host + axil_drv_sTlb.write(64'h10, 64'h8000_0000_4000_0056); + #(2*CLK_PERIOD); + // Write to stlb card + axil_drv_sTlb.write(64'h2010, 64'hC000_0000_4000_0010); + end + + initial begin + axil_drv_lTlb.reset_m(); + end + + // Config + initial begin + axil_drv_cnfg.reset_m(); + #(5*CLK_PERIOD) + @(posedge aclk); + // Change DP + axil_drv_cnfg.write(64'h50, 64'h2); + #(90*CLK_PERIOD); + axil_drv_cnfg.write(64'h00, 64'h4); + end + + // User requests RD + initial begin + // Hit small read + req_drv_rd.reset_m(); + #(20*CLK_PERIOD) + @(posedge aclk); + req_drv_rd.send(64'h200, 64'h1002010, 1'b0, 1'b1, 1'b1); + // Hit small read sync + //#(10*CLK_PERIOD) + //@(posedge aclk); + //req_drv_rd.send(64'h200, 64'h1002010, 1'b1); + end + + // User requests wR + initial begin + req_drv_wr.reset_m(); + // Hit small sync write + //#(50*CLK_PERIOD) + //@(posedge aclk); + //req_drv_wr.send(64'h200, 64'h1002010, 1'b1); + end + + // DMAs HOST + initial begin + dma_drv_rd_xdma_host.reset_s(); + #(50*CLK_PERIOD) + @(posedge aclk); + dma_drv_rd_xdma_host.recv_dma(); + #(70*CLK_PERIOD); + @(posedge aclk); + dma_drv_rd_xdma_host.send_done(); + end + + initial begin + dma_drv_wr_xdma_host.reset_s(); + /* + #(50*CLK_PERIOD) + @(posedge aclk); + dma_drv_wr_xdma_sync.recv_dma(); + #(100*CLK_PERIOD); + @(posedge aclk); + dma_drv_wr_xdma_sync.send_done(); + */ + end + +endmodule \ No newline at end of file diff --git a/sw/base/CMakeLists.txt b/sw/base/CMakeLists.txt new file mode 100644 index 00000000..fb79bc2a --- /dev/null +++ b/sw/base/CMakeLists.txt @@ -0,0 +1,22 @@ +cmake_minimum_required(VERSION 3.0) +project(base) + +# Includes +include_directories(include) + +# Sources +file(GLOB SOURCES src/*.cpp) + +# Exec +set(EXEC main) + +# Compilation +set (CMAKE_CXX_STANDARD 14) +set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -mavx -march=native -O3") + +# Boost lib +find_package(Boost COMPONENTS program_options REQUIRED) + +# Targets +add_executable(${EXEC} ${SOURCES}) +target_link_libraries(${EXEC} ${Boost_LIBRARIES}) \ No newline at end of file diff --git a/sw/base/include/fBench.hpp b/sw/base/include/fBench.hpp new file mode 100644 index 00000000..da6a7744 --- /dev/null +++ b/sw/base/include/fBench.hpp @@ -0,0 +1,121 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "tsc_x86.h" + +constexpr auto kCalibrate = true; +constexpr auto kCyclesRequired = 1e9; +constexpr auto kNumRunsDist = 1000; +constexpr auto kNumRunsDef = 100; + +using namespace std::chrono; + +/** + * Exec times [ns] + */ +class Bench { + std::vector times; + double avg_time = 0.0; + int num_runs = 0; + int num_runs_def = kNumRunsDef; + int size = 0; + + void sortBench() { std::sort(times.begin(), times.end()); } + +public: + Bench(int num_runs_def = kNumRunsDef) { this->num_runs_def = num_runs_def; } + + // Number of runs for the average + inline int getNumRuns() { return num_runs; } + + // Average run time + inline double getAvg() { return avg_time; } + + // Statistics + inline double getMin() { if(!times.empty()) return times[0]; else return 0; } + inline double getMax() { if(!times.empty()) return times[times.size()-1]; else return 0; } + inline double getP25() { if(!times.empty()) return times[(times.size()/4)-1]; else return 0; } + inline double getP50() { if(!times.empty()) return times[(times.size()/2)-1]; else return 0; } + inline double getP75() { if(!times.empty()) return times[((times.size()*3)/4)-1]; else return 0; } + inline double getP95() { if(!times.empty()) return times[((times.size()*95)/100)-1]; else return 0; } + inline double getP99() { if(!times.empty()) return times[((times.size()*99)/100)-1]; else return 0; } + + // Print results + void printOut() { + std::ios_base::fmtflags f(std::cout.flags()); + + std::cout << "Average time: " << getAvg() << " ns" << std::endl; + std::cout << "Max time: " << getMax() << " ns" << std::endl; + std::cout << "Min time: " << getMin() << " ns" << std::endl; + std::cout << "Median: " << getP50() << " ns" << std::endl; + std::cout << "25th: " << getP25() << " ns" << std::endl; + std::cout << "75th: " << getP75() << " ns" << std::endl; + std::cout << "95th: " << getP95() << " ns" << std::endl; + std::cout << "99th: " << getP99() << " ns" << std::endl; + + std::cout.flags( f ); + } + + /** + * Measure the function execution + */ + template + void runtime(Func const &func, Args... args) { + num_runs = 1; + times.clear(); + + // Warm-up + if (kCalibrate) { + while (num_runs < (1 << 14)) { + const auto start = start_tsc(); + for (int i = 0; i < num_runs; ++i) { + func(args...); + } + const auto cycles = stop_tsc(start); + + if (cycles >= kCyclesRequired) + break; + + num_runs *= 2; + } + } else { + num_runs = num_runs_def; + } + + std::cout <<"N runs: " << num_runs << std::endl; + + // Average time + auto begin_time = std::chrono::high_resolution_clock::now(); + for (int i = 0; i < num_runs; ++i) { + func(args...); + } + auto end_time = std::chrono::high_resolution_clock::now(); + + double time = std::chrono::duration_cast(end_time - begin_time).count(); + avg_time = time / num_runs; + + for (int i = 0; i < kNumRunsDist; ++i) { + begin_time = std::chrono::high_resolution_clock::now(); + func(args...); + end_time = std::chrono::high_resolution_clock::now(); + + time = std::chrono::duration_cast(end_time - begin_time).count(); + times.emplace_back(time); + } + + sortBench(); + //printOut(); + } + +}; diff --git a/sw/base/include/fDefs.hpp b/sw/base/include/fDefs.hpp new file mode 100644 index 00000000..c6ae8a1a --- /dev/null +++ b/sw/base/include/fDefs.hpp @@ -0,0 +1,33 @@ +#pragma once + +#include +#include +#include + +/* FLAGS */ +// TODO: SET ACCORDING TO THE BITSTREAM (Should enable the reading of these things through the driver) +#define EN_AVX +#define EN_DDR +//#define EN_RDMA + +/* Farview Op codes */ +enum class opCode : uint8_t { READ=0, WRITE=1, RPC=2 }; + +/* Verbosity */ +#define VERBOSE_DEBUG_1 +//#define VERBOSE_DEBUG_2 +//#define VERBOSE_DEBUG_3 + +/* ltoh: little to host */ +/* htol: little to host */ +#if __BYTE_ORDER == __LITTLE_ENDIAN +# define ltohl(x) (x) +# define ltohs(x) (x) +# define htoll(x) (x) +# define htols(x) (x) +#elif __BYTE_ORDER == __BIG_ENDIAN +# define ltohl(x) __bswap_32(x) +# define ltohs(x) __bswap_16(x) +# define htoll(x) __bswap_32(x) +# define htols(x) __bswap_16(x) +#endif \ No newline at end of file diff --git a/sw/base/include/fDev.hpp b/sw/base/include/fDev.hpp new file mode 100644 index 00000000..edee3eb3 --- /dev/null +++ b/sw/base/include/fDev.hpp @@ -0,0 +1,223 @@ +#ifndef __FDEV_HPP__ +#define __FDEV_HPP__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "fDefs.hpp" + +namespace fpga { + +/* Command FIFO depth */ +static const uint32_t cmd_fifo_depth = 64; +static const uint32_t cmd_fifo_thr = 10; + +/** + * Fpga device region + */ +class fDev { + + /* Fpga device */ + int32_t fd = 0; + + /* Used markers */ + uint32_t rd_cmd_cnt = 0; + uint32_t wr_cmd_cnt = 0; +#ifdef EN_RDMA + uint32_t rdma_cmd_cnt = 0; +#endif + + /* Mmapped regions */ +#ifdef EN_AVX + __m256i *cnfg_reg = 0; +#else + uint64_t *cnfg_reg = 0; +#endif + uint64_t *ctrl_reg = 0; + + /* Mapped large pages hash table */ + std::unordered_map mapped_large; + + /* Utility */ + bool mmapFpga(); + bool munmapFpga(); + + /* Send to controller */ + void postCmd(uint64_t offs_3, uint64_t offs_2, uint64_t offs_1, uint64_t offs_0); + + /* Check busy */ + bool checkBusyRead(); + bool checkBusyWrite(); + + /* Check ready */ + bool checkReadyRead(); + bool checkReadyWrite(); + + /* Memory */ + uint64_t* _getHostMem(uint32_t n_pages); + void _freeHostMem(uint64_t* vaddr, uint32_t n_pages); + void _userMap(uint64_t *vaddr, uint32_t len); + void _userUnmap( uint64_t *vaddr, uint32_t len); + + /* Data movement */ + void _read(uint64_t *vaddr, uint32_t len, uint8_t dest = 0, bool stream = true, bool clr_stat = true, bool poll = true); + void _write(uint64_t *vaddr, uint32_t len, uint8_t dest = 0, bool stream = true, bool clr_stat = true, bool poll = true); + void _transfer(uint64_t *vaddr_src, uint64_t* vaddr_dst, uint32_t len_src, uint32_t len_dst, uint8_t dest = 0, bool stream = true, bool clr_stat = true, bool poll = true); + + void _offload(uint64_t *vaddr, uint32_t len, bool poll = true); + void _sync(uint64_t *vaddr, uint32_t len, bool poll = true); + + +public: + + fDev() {} + ~fDev() {} + + /** + * Obtain and release FPGA regions + */ + + // Acquire an FPGA region with target ID + bool acquireRegion(uint32_t rNum); + // Release an acquired FPGA region + void releaseRegion(); + + /** + * Control bus + */ + + // Control status bus, AXI Lite + inline void setCSR(uint64_t val, uint32_t offs) { ctrl_reg[offs] = val; } + inline uint64_t getCSR(uint32_t offs) { return ctrl_reg[offs]; } + + /** + * Explicit buffer management + * @param n_pages - number of 2MB pages to be allocated + */ + + // Obtain host memory - pages 2M + template + _Vaddr* getHostMem(uint32_t n_pages) { + return (_Vaddr*) _getHostMem(n_pages); + } + + // Free host memory + template + void freeHostMem(_Vaddr* vaddr, uint32_t n_pages) { + _freeHostMem((uint64_t*)vaddr, n_pages); + } + + // FPGA user space range mapping + template + void userMap(uint64_t *vaddr, uint32_t len) { + _userMap((uint64_t*)vaddr, len); + } + + // FPGA user space range unmapping (auto on release) + template + void userUnmap(_Vaddr *vaddr, uint32_t len) { + _userUnmap((uint64_t*)vaddr, len); + } + + // Obtain PR memory - pages 2M + uint64_t* getPrMem(uint64_t n_pages); + // Free PR memory + void freePrMem(uint64_t* vaddr, uint64_t n_pages); + + /** + * Bulk transfers + * @param vaddr - data pointer + * @param len - transfer length + * @param poll - blocking vs non-blocking + */ + + template + void read(_Vaddr *vaddr, uint32_t len, uint8_t dest = 0, bool stream = true, bool clr_stat = true, bool poll = false) { + _read((uint64_t*)vaddr, len, dest, stream, clr_stat, poll); + } + + template + void write(_Vaddr *vaddr, uint32_t len, uint8_t dest = 0, bool stream = true, bool clr_stat = true, bool poll = false) { + _write((uint64_t*)vaddr, len, dest, stream, clr_stat, poll); + } + + template + void transfer(_Vaddr *vaddr_src, _Vaddr *vaddr_dst, uint32_t len_src, uint32_t len_dst, uint8_t dest_src = 0, uint8_t dest_dst = 0, bool stream = true, bool clr_stat = true, bool poll = false) { + _transfer((uint64_t*)vaddr_src, (uint64_t*)vaddr_dst, len_src, len_dst, dest_src, dest_dst, stream, clr_stat, poll); + } + +#ifdef EN_DDR + // Sync operations + template + void sync(uint64_t *vaddr, uint32_t len, bool poll = true) { + _sync((uint64_t*)vaddr, len, poll); + } + + template + void offload(uint64_t *vaddr, uint32_t len, bool poll = true) { + _offload((uint64_t*)vaddr, len, poll); + } +#endif + + /** + * Check for completion + */ + + // Returns the number of completed reads + uint32_t checkCompletedRead(); + // Returns the number of completed writes + uint32_t checkCompletedWrite(); + // Clear all status + void clearCompleted(); + + // Timers + void setTimerStopAt(uint64_t tmr_stop_at); + uint64_t getReadTimer(); + uint64_t getWriteTimer(); + + // Debug + void printDebugXDMA(); + + /** + * PR + */ + void reconfigure(uint64_t* vaddr, uint64_t len); + + /** + * Roce operations + */ + +#ifdef EN_RDMA + // ARP lookup + bool doArpLookup(); + // Write initial context + void writeContext(fQPair *pair); + // Write connection + void writeConnection(fQPair *pair, uint32_t port); + + // RDMA ops + bool postWrite(fQPair *pair, uint64_t l_offs, uint64_t r_offs, uint32_t size); + bool postRead(fQPair *pair, uint64_t l_offs, uint64_t r_offs, uint32_t size); + bool postFarview(fQPair *pair, uint64_t l_offs, uint64_t r_offs, uint32_t size, uint64_t params); +#endif +}; + +} /* namespace fpga */ + +#endif diff --git a/sw/base/include/tsc_x86.h b/sw/base/include/tsc_x86.h new file mode 100644 index 00000000..a0e30822 --- /dev/null +++ b/sw/base/include/tsc_x86.h @@ -0,0 +1,89 @@ +#pragma once + +// ================= GNU C and possibly other UNIX compilers ================= +#ifndef _WIN32 + +#if defined(__GNUC__) || defined(__linux__) +#define VOLATILE __volatile__ +#define ASM __asm__ +#else +/* if we're neither compiling with gcc or under linux, we can hope + * the following lines work, they probably won't */ +#define ASM asm +#define VOLATILE +#endif + +#define myInt64 unsigned long long +#define INT32 unsigned int + +// ================================== WIN32 ================================== +#else + +#define myInt64 signed __int64 +#define INT32 unsigned __int32 + +#endif + +/* This is the RDTSC timer. + * RDTSC is an instruction on several Intel and compatible CPUs that Reads the + * Time Stamp Counter. The Intel manuals contain more information. + */ + +#define COUNTER_LO(a) ((a).int32.lo) +#define COUNTER_HI(a) ((a).int32.hi) +#define COUNTER_VAL(a) ((a).int64) + +#define COUNTER(a) ((unsigned long long)COUNTER_VAL(a)) + +#define COUNTER_DIFF(a, b) (COUNTER(a) - COUNTER(b)) + +// ================= GNU C and possibly other UNIX compilers ================= +#ifndef _WIN32 + +typedef union { + myInt64 int64; + struct { + INT32 lo, hi; + } int32; +} tsc_counter; + +#define RDTSC(cpu_c) \ + ASM VOLATILE("rdtsc" : "=a"((cpu_c).int32.lo), "=d"((cpu_c).int32.hi)) +#define CPUID() ASM VOLATILE("cpuid" : : "a"(0) : "bx", "cx", "dx") + +// ================================== WIN32 ================================== +#else + +typedef union { + myInt64 int64; + struct { + INT32 lo, hi; + } int32; +} tsc_counter; + +#define RDTSC(cpu_c) \ + { __asm rdtsc __asm mov(cpu_c).int32.lo, eax __asm mov(cpu_c).int32.hi, edx } + +#define CPUID() \ + { __asm mov eax, 0 __asm cpuid } + +#endif + +// static void init_tsc() { +// ; // no need to initialize anything for x86 +// } + +static myInt64 start_tsc(void) { + tsc_counter start; + CPUID(); + RDTSC(start); + return COUNTER_VAL(start); +} + +static myInt64 stop_tsc(myInt64 start) { + tsc_counter end; + RDTSC(end); + CPUID(); + return COUNTER_VAL(end) - start; +} + diff --git a/sw/base/src/fDev.cpp b/sw/base/src/fDev.cpp new file mode 100644 index 00000000..32f3fd56 --- /dev/null +++ b/sw/base/src/fDev.cpp @@ -0,0 +1,924 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "fDev.hpp" + +/* Sleep */ +#define POLL_SLEEP_NS 100 + +#define LARGE_PAGE_SIZE 2 * 1024 * 1024 +#define LARGE_PAGE_SHIFT 21UL +#define PAGE_SIZE 4 * 1024 +#define PAGE_SHIFT 12UL + +/* Clock */ +#define CLK_NS 4 + +/* IOCTL */ +#define IOCTL_ALLOC_HOST_USER_MEM _IOR('D', 1, unsigned long) +#define IOCTL_FREE_HOST_USER_MEM _IOR('D', 2, unsigned long) +#define IOCTL_ALLOC_HOST_PR_MEM _IOR('D', 3, unsigned long) +#define IOCTL_FREE_HOST_PR_MEM _IOR('D', 4, unsigned long) +#define IOCTL_MAP_USER _IOR('D', 5, unsigned long) +#define IOCTL_UNMAP_USER _IOR('D', 6, unsigned long) +#define IOCTL_RECONFIG_LOAD _IOR('D', 7, unsigned long) +#define IOCTL_ARP_LOOKUP _IOR('D', 8, unsigned long) +#define IOCTL_WRITE_CTX _IOR('D', 9, unsigned long) +#define IOCTL_WRITE_CONN _IOR('D', 10, unsigned long) +#define IOCTL_RDMA_STAT _IOR('D', 11, unsigned long) +#define IOCTL_READ_ENG_STATUS _IOR('D', 12, unsigned long) + +/* MMAP */ +#define MMAP_CTRL (0x0 << PAGE_SHIFT) +#define MMAP_CNFG (0x1 << PAGE_SHIFT) +#define MMAP_CNFG_AVX (0x2 << PAGE_SHIFT) +#define MMAP_BUFF (0x200 << PAGE_SHIFT) +#define MMAP_PR (0x400 << PAGE_SHIFT) + +/* Regions */ +#define CTRL_REGION_SIZE 64 * 1024 +#define CNFG_REGION_SIZE 64 * 1024 +#define CNFG_AVX_REGION_SIZE 256 * 1024 + +#define N_RDMA_STAT_REGS 24 + +/* Config regs */ +#ifdef EN_AVX + // Base + #define CNFG_CTRL_REG 0 + #define CNFG_PF_REG 1 + #define CNFG_DATAPATH_REG_SET 2 + #define CNFG_DATAPATH_REG_CLR 3 + #define CNFG_TMR_STOP_REG 4 + #define CNFG_TMR_REG 5 + #define CNFG_STAT_REG 6 + // RDMA + #define CNFG_RDMA_POST_REG 10 + #define CNFG_RDMA_STAT_CMD_USED_REG 11 + #define CNFG_RDMA_QPN_REG 12 + + #define CTRL_START_RD 0x1 + #define CTRL_START_WR 0x2 + #define CTRL_SYNC_RD 0x4 + #define CTRL_SYNC_WR 0x8 + #define CTRL_STREAM_RD 0x10 + #define CTRL_STREAM_WR 0x20 + #define CTRL_CLR_STAT_RD 0x40 + #define CTRL_CLR_STAT_WR 0x80 + #define CTRL_CLR_IRQ_PENDING 0x100 + #define CTRL_DEST_RD 9 + #define CTRL_DEST_WR 13 +#else + // Base + #define CNFG_CTRL_REG 0 + #define CNFG_VADDR_RD_REG 1 + #define CNFG_LEN_RD_REG 2 + #define CNFG_VADDR_WR_REG 3 + #define CNFG_LEN_WR_REG 4 + #define VADDR_MISS_REG 5 + #define LEN_MISS_REG 6 + #define CNFG_DATAPATH_REG_SET 7 + #define CNFG_DATAPATH_REG_CLR 8 + #define CNFG_TMR_STOP_REG 9 + #define CNFG_TMR_RD_REG 10 + #define CNFG_TMR_WR_REG 11 + #define CNFG_STAT_CMD_USED_RD_REG 12 + #define CNFG_STAT_CMD_USED_WR_REG 13 + #define CNFG_STAT_DMA_RD_REG 14 + #define CNFG_STAT_DMA_WR_REG 15 + #define CNFG_STAT_SENT_RD_REG 16 + #define CNFG_STAT_SENT_WR_REG 17 + #define CNFG_STAT_PFAULTS_REG 18 + // RDMA + #define CNFG_RDMA_POST_REG_0 20 + #define CNFG_RDMA_POST_REG_1 21 + #define CNFG_RDMA_POST_REG_2 22 + #define CNFG_RDMA_POST_REG_3 23 + #define CNFG_RDMA_STAT_CMD_USED_REG 24 + #define CNFG_RDMA_QPN_REG 25 + + #define CTRL_START_RD 0x1 + #define CTRL_START_WR 0x2 + #define CTRL_SYNC_RD 0x4 + #define CTRL_SYNC_WR 0x8 + #define CTRL_CLR_STAT_RD 0x10 + #define CTRL_CLR_STAT_WR 0x20 + #define CTRL_CLR_IRQ_PENDING 0x40 + #define CTRL_SEND_RDMA_REQ 0x80 + #define CTRL_SEND_QP_CTX 0x100 + #define CTRL_SEND_QP_CONN 0x200 +#endif + +using namespace std::chrono; + +namespace fpga { + +// ------------------------------------------------------------------------------- +// -- Obtain regions +// ------------------------------------------------------------------------------- + +/** + * Obtain vFPGA char devices + * @param: rNum - region ID + */ +bool fDev::acquireRegion(uint32_t rNum) { + std::string region = "/dev/fpga" + std::to_string(rNum); + fd = open(region.c_str(), O_RDWR | O_SYNC); + if(fd == -1) { + std::cout << "ERR: Cannot acquire an FPGA region" << std::endl; + return false; + } + + if(!mmapFpga()) { + std::cout << "ERR: Cannot mmap an FPGA region" << std::endl; + return false; + } + + return true; +} + +/** + * Release the vFPGA handle + */ +void fDev::releaseRegion() { + close(fd); +} + +/** + * Memory map control + */ +bool fDev::mmapFpga() { +#ifdef EN_AVX + cnfg_reg = (__m256i*) mmap(NULL, CNFG_AVX_REGION_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, MMAP_CNFG_AVX); + if(cnfg_reg == MAP_FAILED) { + releaseRegion(); + return false; + } +#else + cnfg_reg = (uint64_t*) mmap(NULL, CNFG_REGION_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, MMAP_CNFG); + if(cnfg_reg == MAP_FAILED) { + releaseRegion(); + return false; + } +#endif + + ctrl_reg = (uint64_t*) mmap(NULL, CTRL_REGION_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, MMAP_CTRL); + if(ctrl_reg == MAP_FAILED) { + releaseRegion(); + return false; + } + + return true; +} + +/** + * Unmap + */ +bool fDev::munmapFpga() { +#ifdef EN_AVX + if(munmap(cnfg_reg, CNFG_AVX_REGION_SIZE) != 0) { + releaseRegion(); + return false; + } +#else + if(munmap(cnfg_reg, CNFG_REGION_SIZE) != 0) { + releaseRegion(); + return false; + } +#endif + + if(munmap(ctrl_reg, CTRL_REGION_SIZE) != 0) { + releaseRegion(); + return false; + } + + cnfg_reg = 0; + ctrl_reg = 0; + + return true; +} + +// ------------------------------------------------------------------------------- +// -- Memory management +// ------------------------------------------------------------------------------- + +/** + * Obtain huge pages on the host memory + * @param: n_pages - number of requested large pages + */ +uint64_t* fDev::_getHostMem(uint32_t n_pages) { + uint64_t *hMem, *hMemAligned; + uint64_t n_pg = n_pages; + + ioctl(fd, IOCTL_ALLOC_HOST_USER_MEM, &n_pg); + hMem = (uint64_t*)mmap(NULL, (n_pg + 1) * LARGE_PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, MMAP_BUFF); + // alignment + hMemAligned = (uint64_t*)( ((((uint64_t)hMem + LARGE_PAGE_SIZE - 1) >> LARGE_PAGE_SHIFT)) << LARGE_PAGE_SHIFT); + mapped_large[hMemAligned] = hMem; + return hMemAligned; +} + +/** + * Release huge pages on the host memory + * @param: vaddr - memory pointer + * @param: n_pages - number of obtained pages + */ +void fDev::_freeHostMem(uint64_t *vaddr, uint32_t n_pages) { + uint64_t* hMem; + uint64_t n_pg = n_pages; + + hMem = mapped_large[vaddr]; + munmap(hMem, (n_pg + 1) * LARGE_PAGE_SIZE); + ioctl(fd, IOCTL_FREE_HOST_USER_MEM, &vaddr); +} + +/** + * Obtain huge pages allocated for the PR bitstreams + * @param: n_pages - number of requested large pages + */ +uint64_t* fDev::getPrMem(uint64_t n_pages) { + uint64_t *hMem, *hMemAligned; + uint64_t n_pg = n_pages; + + ioctl(fd, IOCTL_ALLOC_HOST_PR_MEM, &n_pg); + hMem = (uint64_t*)mmap(NULL, (n_pg + 1) * LARGE_PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, MMAP_PR); + // alignment + hMemAligned = (uint64_t*)( ((((uint64_t)hMem + LARGE_PAGE_SIZE - 1) >> LARGE_PAGE_SHIFT)) << LARGE_PAGE_SHIFT); + mapped_large[hMemAligned] = hMem; + return hMemAligned; +} + +/** + * Release huge pages on the host memory + * @param: vaddr - memory pointer + * @param: n_pages - number of obtained pages + */ +void fDev::freePrMem(uint64_t *vaddr, uint64_t n_pages) { + uint64_t* hMem; + uint64_t n_pg = n_pages; + + hMem = mapped_large[vaddr]; + munmap(hMem, (n_pg + 1) * LARGE_PAGE_SIZE); + ioctl(fd, IOCTL_FREE_HOST_PR_MEM, &vaddr); +} + +/** + * Explicit TLB mapping + * @param: mem - memory pointer + * @param: len - length of the mapping + */ +void fDev::_userMap(uint64_t *mem, uint32_t len) { + uint64_t vdata [2]; + vdata[0] = (uint64_t)mem; + vdata[1] = len; + ioctl(fd, IOCTL_MAP_USER, &vdata); +} + +/** + * TLB unmap + * @param: mem - memory pointer + * @param: len - length of the mapping + */ +void fDev::_userUnmap(uint64_t *mem, uint32_t len) { + uint64_t vdata [2]; + vdata[0] = (uint64_t)mem; + vdata[1] = len; + ioctl(fd, IOCTL_UNMAP_USER, &vdata); +} + +// ------------------------------------------------------------------------------- +// -- PR +// ------------------------------------------------------------------------------- + +/** + * Reconfiguration ioctl call + * @param: vaddr - memory pointer of the PR stream + * @param: len - length of the stream + */ +void fDev::reconfigure(uint64_t *vaddr, uint64_t len) { + uint64_t vdata [2]; + vdata[0] = (uint64_t)vaddr; + vdata[1] = len; + ioctl(fd, IOCTL_RECONFIG_LOAD, &vdata); +} + +#ifdef EN_AVX + // ------------------------------------------------------------------------------- + // -- Bulk transfers + // ------------------------------------------------------------------------------- + + /** + * Read operation (read to FPGA user logic) + * @param: vaddr - memory pointer + * @param: len - length + * @param: stream - stream from host memory + * @param: poll - blocking/non-blocking + * @param: clr_stat - prior status clear + */ + void fDev::_read(uint64_t* vaddr, uint32_t len, uint8_t dest, bool stream, bool clr_stat, bool poll) { + // Check outstanding + while (rd_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) { + rd_cmd_cnt = _mm256_extract_epi64(cnfg_reg[CNFG_STAT_REG], 0) & 0xffffffff; + if (rd_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) + nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + + uint64_t len_cmd = len; + uint64_t ctrl_cmd = CTRL_START_RD | (clr_stat ? CTRL_CLR_STAT_RD : 0x0) | (stream ? CTRL_STREAM_RD : 0x0) | ((dest & 0xf) << CTRL_DEST_RD); + cnfg_reg[CNFG_CTRL_REG] = _mm256_set_epi64x(len_cmd, 0, (uint64_t)vaddr, ctrl_cmd); + + rd_cmd_cnt++; + + if(poll) { + while(checkBusyRead()) nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + } + + /** + * Write operation (write from FPGA user logic) + * @param: vaddr - memory pointer + * @param: len - length + * @param: stream - stream from host memory + * @param: poll - blocking/non-blocking + * @param: clr_stat - prior status clear + */ + void fDev::_write(uint64_t* vaddr, uint32_t len, uint8_t dest, bool stream, bool clr_stat, bool poll) { + // Check outstanding + while (wr_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) { + wr_cmd_cnt = _mm256_extract_epi64(cnfg_reg[CNFG_STAT_REG], 0) >> 32; + if (wr_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) + nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + + uint64_t len_cmd = (uint64_t)len << 32; + uint64_t ctrl_cmd = CTRL_START_WR | (clr_stat ? CTRL_CLR_STAT_WR : 0x0) | (stream ? CTRL_STREAM_WR : 0x0) | ((dest & 0xf) << CTRL_DEST_WR); + cnfg_reg[CNFG_CTRL_REG] = _mm256_set_epi64x(len_cmd, (uint64_t)vaddr, 0, ctrl_cmd); + + wr_cmd_cnt++; + + if(poll) { + while(checkBusyWrite()) nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + } + + /** + * transfer (read + write) + * @param: vaddr_src, _dst - memory pointer + * @param: len_src, _dst - length + * @param: stream - stream from host memory + * @param: poll - blocking/non-blocking + * @param: clr_stat - prior status clear + */ + void fDev::_transfer(uint64_t* vaddr_src, uint64_t* vaddr_dst, uint32_t len_src, uint32_t len_dst, uint8_t dest_src, uint8_t dest_dst, bool stream, bool clr_stat, bool poll) { + // Check outstanding read + while (rd_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) { + rd_cmd_cnt = _mm256_extract_epi64(cnfg_reg[CNFG_STAT_REG], 0) & 0xffffffff; + if (rd_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) + nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + // Check outstanding write + while (wr_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) { + wr_cmd_cnt = _mm256_extract_epi64(cnfg_reg[CNFG_STAT_REG], 0) >> 32; + if (wr_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) + nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + + uint64_t len_cmd = ((uint64_t)len_dst << 32) | len_src; + uint64_t ctrl_cmd = CTRL_START_WR | (clr_stat ? CTRL_CLR_STAT_WR : 0x0) | (stream ? CTRL_STREAM_WR : 0x0) | ((dest_src & 0xf) << CTRL_DEST_WR) | + CTRL_START_RD | (clr_stat ? CTRL_CLR_STAT_RD : 0x0) | (stream ? CTRL_STREAM_RD : 0x0) | ((dest_dst & 0xf) << CTRL_DEST_RD); + + cnfg_reg[CNFG_CTRL_REG] = _mm256_set_epi64x(len_cmd, (uint64_t)vaddr_dst, (uint64_t)vaddr_src, ctrl_cmd); + + rd_cmd_cnt++; + wr_cmd_cnt++; + + if(poll) { + while(checkBusyWrite()) nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + } + +#ifdef EN_DDR + + /** + * Offload to FPGA DDR (only with local FPGA memory) + * @param: vaddr - memory pointer + * @param: len - length + * @param: poll - blocking/non-blocking + */ + void fDev::_offload(uint64_t* vaddr, uint32_t len, bool poll) { + // Check outstanding + while (rd_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) { + rd_cmd_cnt = _mm256_extract_epi64(cnfg_reg[CNFG_STAT_REG], 0) & 0xffffffff; + if (rd_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) + nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + + uint64_t len_cmd = len; + uint64_t ctrl_cmd = CTRL_START_RD | CTRL_CLR_STAT_RD | CTRL_SYNC_RD; + cnfg_reg[CNFG_CTRL_REG] = _mm256_set_epi64x(len_cmd, 0, (uint64_t)vaddr, ctrl_cmd); + + rd_cmd_cnt++; + + if(poll) { + while(checkBusyRead()) nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + } + + /** + * Sync with FPGA DDR (only with local FPGA memory) + * @param: vaddr - memory pointer + * @param: len - length + * @param: poll - blocking/non-blocking + */ + void fDev::_sync(uint64_t* vaddr, uint32_t len, bool poll) { + // Check outstanding + while (wr_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) { + wr_cmd_cnt = _mm256_extract_epi64(cnfg_reg[CNFG_STAT_REG], 0) >> 32; + if (wr_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) + nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + + wr_cmd_cnt++; + + uint64_t len_cmd = (uint64_t)len << 32; + uint64_t ctrl_cmd = CTRL_START_WR | CTRL_CLR_STAT_WR | CTRL_SYNC_WR; + cnfg_reg[CNFG_CTRL_REG] = _mm256_set_epi64x(len_cmd, (uint64_t)vaddr, 0, ctrl_cmd); + + if(poll) { + while(checkBusyWrite()) nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + } + +#endif + + // ------------------------------------------------------------------------------- + // -- Polling + // ------------------------------------------------------------------------------- + + /** + * Check whether busy read + */ + bool fDev::checkBusyRead() { + return !(_mm256_extract_epi64(cnfg_reg[CNFG_STAT_REG], 1) & 0xffffffff); + } + + /** + * Check whether busy write + */ + bool fDev::checkBusyWrite() { + return !(_mm256_extract_epi64(cnfg_reg[CNFG_STAT_REG], 1) >> 32); + } + + /** + * Return read completed + */ + uint32_t fDev::checkCompletedRead() { + return _mm256_extract_epi64(cnfg_reg[CNFG_STAT_REG], 1) & 0xffffffff; + } + + /** + * Return write completed + */ + uint32_t fDev::checkCompletedWrite() { + return _mm256_extract_epi64(cnfg_reg[CNFG_STAT_REG], 1) >> 32; + } + + /** + * Clear status + */ + void fDev::clearCompleted() { + cnfg_reg[CNFG_CTRL_REG] = _mm256_set_epi64x(0, 0, 0, CTRL_CLR_STAT_RD | CTRL_CLR_STAT_WR); + } + + // ------------------------------------------------------------------------------- + // -- Timers + // ------------------------------------------------------------------------------- + + /** + * Set timer stop at x number of completed transfers + * @param: tmr_stop_at - stop once completed reached + */ + void fDev::setTimerStopAt(uint64_t tmr_stop_at) { + cnfg_reg[CNFG_TMR_STOP_REG] = _mm256_set_epi64x(0, 0, 0, tmr_stop_at); + } + + /** + * Read timer + */ + uint64_t fDev::getReadTimer() { + return _mm256_extract_epi64(cnfg_reg[CNFG_TMR_REG], 0); + } + + /** + * Write timer + */ + uint64_t fDev::getWriteTimer() { + return _mm256_extract_epi64(cnfg_reg[CNFG_TMR_REG], 1); + } + + // ------------------------------------------------------------------------------- + // -- Debug XDMA + // ------------------------------------------------------------------------------- + + /** + * XDMA debug + */ + void fDev::printDebugXDMA() // TODO + { + std::cout << "-- XDMA STATISTICS ----------------------------" << std::endl; + std::cout << std::setw(35) << "Read command FIFO used: \t" << _mm256_extract_epi32(cnfg_reg[CNFG_STAT_REG], 0x0) << std::endl; + std::cout << std::setw(35) << "Write command FIFO used: \t" << _mm256_extract_epi32(cnfg_reg[CNFG_STAT_REG], 0x1) << std::endl; + std::cout << std::setw(35) << "Reads completed: \t" << _mm256_extract_epi32(cnfg_reg[CNFG_STAT_REG], 0x2) << std::endl; + std::cout << std::setw(35) << "Writes completed: \t" << _mm256_extract_epi32(cnfg_reg[CNFG_STAT_REG], 0x3) << std::endl; + std::cout << std::setw(35) << "Read requests sent: \t" << _mm256_extract_epi32(cnfg_reg[CNFG_STAT_REG], 0x4) << std::endl; + std::cout << std::setw(35) << "Write requests sent: \t" << _mm256_extract_epi32(cnfg_reg[CNFG_STAT_REG], 0x5) << std::endl; + std::cout << std::setw(35) << "Page faults: \t" << _mm256_extract_epi32(cnfg_reg[CNFG_STAT_REG], 0x6) << std::endl; + std::cout << "-----------------------------------------------" << std::endl; + } + +#else + + /** + * Read operation (read to FPGA user logic) + * @param: vaddr - memory pointer + * @param: len - length + * @param: poll - blocking/non-blocking + * @param: clr_stat - prior status clear + */ + void fDev::read(uint64_t* vaddr, uint32_t len, bool clr_stat, bool poll) { + // Check outstanding + while (rd_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) { + rd_cmd_cnt = cnfg_reg[CNFG_STAT_CMD_USED_RD_REG]; + if (rd_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) + nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + + cnfg_reg[CNFG_VADDR_RD_REG] = (uint64_t)vaddr; + cnfg_reg[CNFG_LEN_RD_REG] = len; + + cnfg_reg[CNFG_CTRL_REG] = CTRL_START_RD | (clr_stat ? CTRL_CLR_STAT_RD : 0x0); + + rd_cmd_cnt++; + + if(poll) { + while(checkBusyRead()) nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + } + + /** + * Write operation (write from FPGA user logic) + * @param: vaddr - memory pointer + * @param: len - length + * @param: poll - blocking/non-blocking + * @param: clr_stat - prior status clear + */ + void fDev::write(uint64_t* vaddr, uint32_t len, bool clr_stat, bool poll) { + // Check outstanding + while (wr_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) { + wr_cmd_cnt = cnfg_reg[CNFG_STAT_CMD_USED_WR_REG]; + if (wr_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) + nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + + cnfg_reg[CNFG_VADDR_WR_REG] = (uint64_t)vaddr; + cnfg_reg[CNFG_LEN_WR_REG] = len; + + cnfg_reg[CNFG_CTRL_REG] = CTRL_START_WR | (clr_stat ? CTRL_CLR_STAT_WR : 0x0); + + wr_cmd_cnt++; + + if(poll) { + while(checkBusyWrite()) nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + } + + /** + * transfer (read + write) + * @param: vaddr_src, _dst - memory pointer + * @param: len_src, _dst - length + * @param: poll - blocking/non-blocking + * @param: clr_stat - prior status clear + */ + void fDev::transfer(uint64_t* vaddr_src, uint64_t* vaddr_dst, uint32_t len_src, uint32_t len_dst, bool clr_stat, bool poll) { + // Check outstanding + while (rd_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) { + rd_cmd_cnt = cnfg_reg[CNFG_STAT_CMD_USED_RD_REG]; + if (rd_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) + nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + + // Check outstanding + while (wr_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) { + wr_cmd_cnt = cnfg_reg[CNFG_STAT_CMD_USED_WR_REG]; + if (wr_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) + nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + + cnfg_reg[CNFG_VADDR_RD_REG] = (uint64_t)vaddr_src; + cnfg_reg[CNFG_LEN_RD_REG] = len_src; + cnfg_reg[CNFG_VADDR_WR_REG] = (uint64_t)vaddr_dst; + cnfg_reg[CNFG_LEN_WR_REG] = len_dst; + + cnfg_reg[CNFG_CTRL_REG] = CTRL_START_RD | (clr_stat ? CTRL_CLR_STAT_RD : 0x0); + cnfg_reg[CNFG_CTRL_REG] = CTRL_START_WR | (clr_stat ? CTRL_CLR_STAT_WR : 0x0); + + rd_cmd_cnt++; + wr_cmd_cnt++; + + if(poll) { + while(checkBusyWrite()) nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + } + +#ifdef EN_DDR + + /** + * Offload to FPGA DDR (only with local FPGA memory) + * @param: vaddr - memory pointer + * @param: len - length + * @param: poll - blocking/non-blocking + */ + void fDev::offload(uint64_t* vaddr, uint32_t len, bool poll) { + cnfg_reg[CNFG_VADDR_RD_REG] = (uint64_t)vaddr; + cnfg_reg[CNFG_LEN_RD_REG] = len; + + cnfg_reg[CNFG_CTRL_REG] = CTRL_SYNC_RD | CTRL_START_RD | CTRL_CLR_STAT_RD; + + rd_cmd_cnt++; + + if(poll) { + while(checkBusyRead()) nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + } + + /** + * Sync with FPGA DDR (only with local FPGA memory) + * @param: vaddr - memory pointer + * @param: len - length + * @param: poll - blocking/non-blocking + */ + void fDev::sync(uint64_t* vaddr, uint32_t len, bool poll) { + cnfg_reg[CNFG_VADDR_WR_REG] = (uint64_t)vaddr; + cnfg_reg[CNFG_LEN_WR_REG] = len; + + cnfg_reg[CNFG_CTRL_REG] = CTRL_SYNC_WR | CTRL_START_WR | CTRL_CLR_STAT_WR; + + wr_cmd_cnt++; + + if(poll) { + while(checkBusyWrite()) nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + } + +#endif + + /** + * XDMA debug + */ + void fDev::printDebugXDMA() // TODO + { + std::cout << "-- XDMA STATISTICS ----------------------------" << std::endl; + std::cout << "-----------------------------------------------" << std::endl; + } + + // ------------------------------------------------------------------------------- + // -- Polling + // ------------------------------------------------------------------------------- + bool fDev::checkBusyRead() { + return !(cnfg_reg[CNFG_STAT_DMA_RD_REG]); + } + + bool fDev::checkBusyWrite() { + return !(cnfg_reg[CNFG_STAT_DMA_WR_REG]); + } + + uint32_t fDev::checkCompletedRead() { + return (cnfg_reg[CNFG_STAT_DMA_RD_REG]); + } + + uint32_t fDev::checkCompletedWrite() { + return (cnfg_reg[CNFG_STAT_DMA_WR_REG]); + } + + void fDev::clearCompleted() { + cnfg_reg[CNFG_CTRL_REG] = CTRL_CLR_STAT_RD | CTRL_CLR_STAT_WR; + } + + // ------------------------------------------------------------------------------- + // -- Timers + // ------------------------------------------------------------------------------- + + /** + * Set timer stop at x number of completed transfers + * @param: tmr_stop_at - stop once completed reached + */ + void fDev::setTimerStopAt(uint64_t tmr_stop_at) { + cnfg_reg[CNFG_TMR_STOP_REG] = tmr_stop_at; + } + + /** + * Read timer + */ + uint64_t fDev::getReadTimer() { + return cnfg_reg[CNFG_TMR_RD_REG]; + } + + /** + * Write timer + */ + uint64_t fDev::getWriteTimer() { + return cnfg_reg[CNFG_TMR_WR_REG]; + } + +#endif + +// ------------------------------------------------------------------------------- +// -- Network static +// ------------------------------------------------------------------------------- + +#ifdef EN_RDMA + +/** + * ARP lookup + */ +bool fDev::doArpLookup() { + ioctl(fd, IOCTL_ARP_LOOKUP, 0); + return true; +} + +/** + * Write QP context + * @param: pair - target queue pair + */ +void fDev::writeContext(fQPair *pair) { + uint64_t offs[3]; + offs[0] = (((uint64_t)pair->remote.psn & 0xffffff) << 31) | (((uint64_t)pair->local.qpn & 0xffffff) << 7) | (((uint64_t)pair->local.region & 0xf) << 3); + offs[1] = (((uint64_t)pair->remote.rkey & 0xffffff) << 24) | ((uint64_t)pair->local.psn & 0xffffff); + offs[2] = (uint64_t)pair->remote.vaddr; + ioctl(fd, IOCTL_WRITE_CTX, &offs); +} + +/** + * Write QP connection + * @param: pair - target queue pair + */ +void fDev::writeConnection(fQPair *pair, uint32_t port) { + uint64_t offs[3]; + offs[0] = (((uint64_t)port & 0xffff) << 40) | (((uint64_t)pair->remote.qpn & 0xffffff) << 16) | ((pair->local.qpn) & 0xffff); + offs[1] = ((htols((uint64_t)pair->remote.gidToUint(8)) & 0xffffffff) << 32) | (htols((uint64_t)pair->remote.gidToUint(0)) & 0xffffffff); + offs[2] = ((htols((uint64_t)pair->remote.gidToUint(24)) & 0xffffffff) << 32) | (htols((uint64_t)pair->remote.gidToUint(16)) & 0xffffffff); + ioctl(fd, IOCTL_WRITE_CONN, &offs); +} + + // ------------------------------------------------------------------------------- + // -- Network + // ------------------------------------------------------------------------------- + +#ifdef EN_AVX + + /** + * RDMA write + * @param: l_addr - local virtual address + * @param: r_addr - remote virtual address + * @param: size - transfer size + */ + bool fDev::postWrite(fQPair *pair, uint64_t l_offs, uint64_t r_offs, uint32_t size) { + uint64_t l_addr = pair->local.vaddr + l_offs; + uint64_t r_addr = pair->remote.vaddr + r_offs; + + uint64_t offs_0 = (((uint64_t)pair->local.qpn & 0xffffff) << 5) | ((uint64_t)opCode::WRITE & 0x1f); + uint64_t offs_1 = (((uint64_t)r_addr & 0xffff) << 48) | ((uint64_t)l_addr & 0xffffffffffff); + uint64_t offs_2 = ((uint64_t)size << 32) | (((uint64_t)r_addr >> 16) & 0xffffffff); + uint64_t offs_3 = 0; + + postCmd(offs_3, offs_2, offs_1, offs_0); + + return 0; + } + + /** + * RDMA read + * @param: l_addr - local virtual address + * @param: r_addr - remote virtual address + * @param: size - transfer size + */ + bool fDev::postRead(fQPair *pair, uint64_t l_offs, uint64_t r_offs, uint32_t size) { + uint64_t l_addr = pair->local.vaddr + l_offs; + uint64_t r_addr = pair->remote.vaddr + r_offs; + + uint64_t offs_0 = (((uint64_t)pair->local.qpn & 0xffffff) << 5) | ((uint64_t)opCode::READ & 0x1f); + uint64_t offs_1 = (((uint64_t)r_addr & 0xffff) << 48) | ((uint64_t)l_addr & 0xffffffffffff); + uint64_t offs_2 = ((uint64_t)size << 32) | (((uint64_t)r_addr >> 16) & 0xffffffff); + uint64_t offs_3 = 0; + + postCmd(offs_3, offs_2, offs_1, offs_0); + + return 0; + } + + /** + * RDMA RPC + * @param: offs_3, _2, _1 - parameters + */ + bool fDev::postFarview(fQPair *pair, uint64_t l_offs, uint64_t r_offs, uint32_t size, uint64_t params) { + uint64_t l_addr = pair->local.vaddr + l_offs; + uint64_t r_addr = pair->remote.vaddr + r_offs; + + uint64_t offs_0 = (((uint64_t)pair->local.qpn & 0xffffff) << 5) | ((uint64_t)opCode::FV & 0x1f); + uint64_t offs_1 = (((uint64_t)r_addr & 0xffff) << 48) | ((uint64_t)l_addr & 0xffffffffffff); + uint64_t offs_2 = ((uint64_t)size << 32) | (((uint64_t)r_addr >> 16) & 0xffffffff); + uint64_t offs_3 = params; + + postCmd(offs_3, offs_2, offs_1, offs_0); + + return 0; + } + + /** + * Base post + */ + void fDev::postCmd(uint64_t offs_3, uint64_t offs_2, uint64_t offs_1, uint64_t offs_0) { + // Check outstanding + while (rdma_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) { + rdma_cmd_cnt = _mm256_extract_epi64(cnfg_reg[CNFG_RDMA_STAT_CMD_USED_REG], 0) & 0xffffffff; + if (rdma_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) + nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + + cnfg_reg[CNFG_RDMA_POST_REG] = _mm256_set_epi64x(offs_3, offs_2, offs_1, offs_0); + + rdma_cmd_cnt++; + } + +#else + + // ------------------------------------------------------------------------------- + // -- Network + // ------------------------------------------------------------------------------- + + /** + * RDMA write + * @param: l_addr - local virtual address + * @param: r_addr - remote virtual address + * @param: size - transfer size + */ + bool fDev::postWrite(rQPair *pair, uint64_t *l_addr, uint64_t *r_addr, uint32_t size) { + if(qpn_attached) + postCmd(opCode::WRITE, pair, l_addr, r_addr, size); + else + return 1; + + return 0; + } + + /** + * RDMA read + * @param: l_addr - local virtual address + * @param: r_addr - remote virtual address + * @param: size - transfer size + */ + bool fDev::postRead(rQPair *pair, uint64_t *l_addr, uint64_t *r_addr, uint32_t size) { + if(qpn_attached) + postCmd(opCode::READ, pair, l_addr, r_addr, size); + else + return 1; + + return 0; + } + + /** + * Base post + * TODO: Change to new config + */ + void fDev::postCmd(opCode op, rQPair *pair, uint64_t *l_addr, uint64_t *r_addr, uint32_t size) { + #ifdef VERBOSE_DEBUG + std::cout << "Post, queue pair l: " << pair->local.qpn << ", r: " << pair->remote.qpn << std::endl; + #endif + + // Check outstanding + while (rdma_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) { + rdma_cmd_cnt = cnfg_reg[CNFG_RDMA_STAT_CMD_USED_REG]; + if (rdma_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) + nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + + cnfg_reg[CNFG_RDMA_POST_REG_0] = (((uint64_t)size << 27)) | (((uint64_t)pair->local.qpn & 0xffffff) << 3) | (((uint64_t)op & 0x3)); + cnfg_reg[CNFG_RDMA_POST_REG_1] = (uint64_t)l_addr; + cnfg_reg[CNFG_RDMA_POST_REG_2] = (uint64_t)r_addr; + cnfg_reg[CNFG_RDMA_POST_REG_3] = 0; + + rdma_cmd_cnt++; + } + +#endif +#endif + +} diff --git a/sw/base/src/main.cpp b/sw/base/src/main.cpp new file mode 100644 index 00000000..573cf3e8 --- /dev/null +++ b/sw/base/src/main.cpp @@ -0,0 +1,140 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "fBench.hpp" +#include "fDev.hpp" + +using namespace std; +using namespace std::chrono; +using namespace fpga; + +/* Runtime */ +#define N_REGIONS 1 +#define N_PAGES 8 // 16 MB +#define N_REPS 1 +#define TR_SIZE (4 * 1024) + +/** + * Loopback example. + * This code is used to demonstrate data transfer abilities of the system. + * It can be used with a number of provided operators with minimal modifications + * (addmul, AES, chacha, hll, lpn ...) + */ +int main(int argc, char *argv[]) +{ + // --------------------------------------------------------------- + // -- Initialization + // --------------------------------------------------------------- + + // Read arguments + boost::program_options::options_description programDescription("Options:"); + programDescription.add_options()("regions,n", boost::program_options::value(), "Number of FPGA regions") + ("host,h", boost::program_options::value(), "Explicit FPGA memory allocation") + ("stream,t", boost::program_options::value(), "Host or card") + ("pages,p", boost::program_options::value(), "Huge page allocation") + ("reps,r", boost::program_options::value(), "Number of repetitions") + ("size, s", boost::program_options::value(), "Transfer size"); + + boost::program_options::variables_map commandLineArgs; + boost::program_options::store(boost::program_options::parse_command_line(argc, argv, programDescription), commandLineArgs); + boost::program_options::notify(commandLineArgs); + + uint32_t n_regions = N_REGIONS; + bool host = 0; + bool stream = 1; + uint32_t n_pages = N_PAGES; + uint32_t n_reps = N_REPS; + uint32_t size = TR_SIZE; + + if(commandLineArgs.count("regions") > 0) n_regions = commandLineArgs["regions"].as(); + if(commandLineArgs.count("host") > 0) host = commandLineArgs["host"].as(); + if(commandLineArgs.count("stream") > 0) stream = commandLineArgs["stream"].as(); + if(commandLineArgs.count("pages") > 0) n_pages = commandLineArgs["pages"].as(); + if(commandLineArgs.count("reps") > 0) n_reps = commandLineArgs["reps"].as(); + if(commandLineArgs.count("size") > 0) size = commandLineArgs["size"].as(); + + // FPGA handles + fDev *fdev = new fDev[N_REGIONS]; + + // Memory + uint64_t *hMem[N_REGIONS]; + + // Obtain regions + for (int i = 0; i < N_REGIONS; i++) { + if (!fdev[i].acquireRegion(i)) return EXIT_FAILURE; + fdev[i].clearCompleted(); // Clear records of previous transactions + } + + // Allocate buffers + for(int i = 0; i < N_REGIONS; i++) { + if(host) + hMem[i] = fdev[i].getHostMem(N_PAGES); + else + hMem[i] = (uint64_t*) memalign(64, TR_SIZE); + } + + // --------------------------------------------------------------- + // -- Runs + // --------------------------------------------------------------- + Bench bench(n_reps, size); + uint32_t n_runs = 0; + + // Throughput test + auto benchmark_thr = [&fdev, &hMem, &n_runs, n_reps, n_regions, size, stream]() { + bool k = false; + n_runs++; + + // Transfer the data + for(int i = 0; i < n_reps; i++) { + for(int j = 0; j < n_regions; j++) { + fdev[j].transfer(hMem[j], hMem[j], size, size, stream, false); + } + } + + while(!k) { + k = true; + for(int i = 0; i < n_regions; i++) + if(fdev[i].checkCompletedWrite() != n_reps * n_runs) k = false; + } + }; + bench.runtime(benchmark_thr); + std::cout << "Throughput: " << ((n_regions * 1000 * size)) / (bench.getAvg() / n_reps) << " MB/s" << std::endl; + + // Latency test + auto benchmark_lat = [&fdev, &hMem, n_reps, n_regions, size, stream]() { + // Transfer the data + for(int i = 0; i < n_reps; i++) { + for(int j = 0; j < n_regions; j++) { + fdev[j].transfer(hMem[j], hMem[j], size, size, stream, true); + while(fdev[j].checkCompletedWrite() != 1); + } + } + }; + bench.runtime(benchmark_lat); + std::cout << "Latency: " << bench.getAvg() / n_reps << " ns" << std::endl; + + // Free buffers + for(int i = 0; i < N_REGIONS; i++) { + if(host) + fdev[i].freeHostMem(hMem[i], N_PAGES); + else + free(hMem[i]); + } + + // Release regions + for (int i = 0; i < N_REGIONS; i++) { + fdev[i].releaseRegion(); + } + + return EXIT_SUCCESS; +} diff --git a/sw/gbm/CMakeLists.txt b/sw/gbm/CMakeLists.txt new file mode 100644 index 00000000..16894e2e --- /dev/null +++ b/sw/gbm/CMakeLists.txt @@ -0,0 +1,22 @@ +cmake_minimum_required(VERSION 3.0) +project(gbm) + +# Includes +include_directories(include) + +# Sources +file(GLOB SOURCES src/*.cpp) + +# Exec +set(EXEC main) + +# Compilation +set (CMAKE_CXX_STANDARD 14) +set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -mavx -march=native -O3") + +# Boost lib +find_package(Boost COMPONENTS program_options REQUIRED) + +# Targets +add_executable(${EXEC} ${SOURCES}) +target_link_libraries(${EXEC} ${Boost_LIBRARIES}) \ No newline at end of file diff --git a/sw/gbm/include/fDefs.hpp b/sw/gbm/include/fDefs.hpp new file mode 100644 index 00000000..c6ae8a1a --- /dev/null +++ b/sw/gbm/include/fDefs.hpp @@ -0,0 +1,33 @@ +#pragma once + +#include +#include +#include + +/* FLAGS */ +// TODO: SET ACCORDING TO THE BITSTREAM (Should enable the reading of these things through the driver) +#define EN_AVX +#define EN_DDR +//#define EN_RDMA + +/* Farview Op codes */ +enum class opCode : uint8_t { READ=0, WRITE=1, RPC=2 }; + +/* Verbosity */ +#define VERBOSE_DEBUG_1 +//#define VERBOSE_DEBUG_2 +//#define VERBOSE_DEBUG_3 + +/* ltoh: little to host */ +/* htol: little to host */ +#if __BYTE_ORDER == __LITTLE_ENDIAN +# define ltohl(x) (x) +# define ltohs(x) (x) +# define htoll(x) (x) +# define htols(x) (x) +#elif __BYTE_ORDER == __BIG_ENDIAN +# define ltohl(x) __bswap_32(x) +# define ltohs(x) __bswap_16(x) +# define htoll(x) __bswap_32(x) +# define htols(x) __bswap_16(x) +#endif \ No newline at end of file diff --git a/sw/gbm/include/fDev.hpp b/sw/gbm/include/fDev.hpp new file mode 100644 index 00000000..414e2bdd --- /dev/null +++ b/sw/gbm/include/fDev.hpp @@ -0,0 +1,230 @@ +#ifndef __FDEV_HPP__ +#define __FDEV_HPP__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "fDefs.hpp" + +namespace fpga { + +/* Command FIFO depth */ +static const uint32_t cmd_fifo_depth = 64; +static const uint32_t cmd_fifo_thr = 10; + +/** + * Fpga device region + */ +class fDev { + + /* Fpga device */ + int32_t fd = 0; + + /* Used markers */ + uint32_t rd_cmd_cnt = 0; + uint32_t wr_cmd_cnt = 0; +#ifdef EN_RDMA + uint32_t rdma_cmd_cnt = 0; +#endif + + /* Mmapped regions */ +#ifdef EN_AVX + __m256i *cnfg_reg = 0; +#else + uint64_t *cnfg_reg = 0; +#endif + uint64_t *ctrl_reg = 0; + + /* Mapped large pages hash table */ + std::unordered_map mapped_large; + + /* Utility */ + bool mmapFpga(); + bool munmapFpga(); + + /* Send to controller */ + void postCmd(uint64_t offs_3, uint64_t offs_2, uint64_t offs_1, uint64_t offs_0); + + /* Check busy */ + bool checkBusyRead(); + bool checkBusyWrite(); + + /* Check ready */ + bool checkReadyRead(); + bool checkReadyWrite(); + + /* Memory */ + uint64_t* _getHostMem(uint32_t n_pages); + void _freeHostMem(uint64_t* vaddr, uint32_t n_pages); + void _userMap(uint64_t *vaddr, uint32_t len); + void _userUnmap( uint64_t *vaddr, uint32_t len); + + /* Data movement */ + void _read(uint64_t *vaddr, uint32_t len, bool stream = true, bool poll = true, bool clr_stat = true); + void _write(uint64_t *vaddr, uint32_t len, bool stream = true, bool poll = true, bool clr_stat = true); + void _transfer(uint64_t *vaddr_src, uint64_t* vaddr_dst, uint32_t len_src, uint32_t len_dst, bool stream = true, bool poll = true, bool clr_stat = true); + + void _offload(uint64_t *vaddr, uint32_t len, bool poll = true); + void _sync(uint64_t *vaddr, uint32_t len, bool poll = true); + + +public: + + fDev() {} + ~fDev() {} + + /** + * Obtain and release FPGA regions + */ + + // Acquire an FPGA region with target ID + bool acquireRegion(uint32_t rNum); + // Release an acquired FPGA region + void releaseRegion(); + + /** + * Control bus + */ + + // Control status bus, AXI Lite + inline void setCSR(uint64_t val, uint32_t offs) { ctrl_reg[offs] = val; } + inline uint64_t getCSR(uint32_t offs) { return ctrl_reg[offs]; } + + /** + * Explicit buffer management + * @param n_pages - number of 2MB pages to be allocated + */ + + // Obtain host memory - pages 2M + template + _Vaddr* getHostMem(uint32_t n_pages) { + return (_Vaddr*) _getHostMem(n_pages); + } + + // Free host memory + template + void freeHostMem(_Vaddr* vaddr, uint32_t n_pages) { + _freeHostMem((uint64_t*)vaddr, n_pages); + } + + // FPGA user space range mapping + template + void userMap(uint64_t *vaddr, uint32_t len) { + _userMap((uint64_t*)vaddr, len); + } + + // FPGA user space range unmapping (auto on release) + template + void userUnmap(_Vaddr *vaddr, uint32_t len) { + _userUnmap((uint64_t*)vaddr, len); + } + + // Obtain PR memory - pages 2M + uint64_t* getPrMem(uint64_t n_pages); + // Free PR memory + void freePrMem(uint64_t* vaddr, uint64_t n_pages); + + /** + * Bulk transfers + * @param vaddr - data pointer + * @param len - transfer length + * @param poll - blocking vs non-blocking + */ + + template + void read(_Vaddr *vaddr, uint32_t len, bool stream = true, bool poll = true, bool clr_stat = true) { + _read((uint64_t*)vaddr, len, stream, poll, clr_stat); + } + + template + void write(_Vaddr *vaddr, uint32_t len, bool stream = true, bool poll = true, bool clr_stat = true) { + _write((uint64_t*)vaddr, len, stream, poll, clr_stat); + } + + template + void transfer(_Vaddr *vaddr_src, _Vaddr *vaddr_dst, uint32_t len_src, uint32_t len_dst, bool stream = true, bool poll = true, bool clr_stat = true) { + _transfer((uint64_t*)vaddr_src, (uint64_t*)vaddr_dst, len_src, len_dst, stream, poll, clr_stat); + } + +#ifdef EN_DDR + // Sync operations + template + void sync(uint64_t *vaddr, uint32_t len, bool poll = true) { + _sync((uint64_t*)vaddr, len, poll); + } + + template + void offload(uint64_t *vaddr, uint32_t len, bool poll = true) { + _offload((uint64_t*)vaddr, len, poll); + } +#endif + + /** + * Check for completion + */ + + // Returns the number of completed reads + uint32_t checkCompletedRead(); + // Returns the number of completed writes + uint32_t checkCompletedWrite(); + // Clear all status + void clearCompleted(); + + // Timers + void setTimerStopAt(uint64_t tmr_stop_at); + uint64_t getReadTimer(); + uint64_t getWriteTimer(); + + // Debug + void printDebugXDMA(); + + /** + * PR + */ + void reconfigure(uint64_t* vaddr, uint64_t len); + + /** + * Roce operations + */ + +#ifdef EN_RDMA + // ARP lookup + bool doArpLookup(); + // Write initial context + void writeContext(uint64_t r_vaddr, uint32_t r_key, uint32_t l_psn, uint32_t r_psn, uint32_t l_qpn, uint32_t l_region); + // Write connection + void writeConnection(uint32_t r_qpn, uint32_t l_qpn, uint32_t port); + // QPn + void writeQpn(uint32_t qpn); + uint32_t getQpn() { return qpn; } + bool getQpnAttached() { return qpn_attached; } + + // RDMA ops + bool postWrite(uint64_t *l_addr, uint64_t *r_addr, uint32_t size); + bool postRead(uint64_t *l_addr, uint64_t *r_addr, uint32_t size); + bool postRpc(uint64_t *l_addr, uint64_t *r_addr, uint32_t size, uint64_t params); + + // Debug + void printDebugRDMA(); +#endif +}; + +} /* namespace fpga */ + +#endif diff --git a/sw/gbm/src/fDev.cpp b/sw/gbm/src/fDev.cpp new file mode 100644 index 00000000..60b253dd --- /dev/null +++ b/sw/gbm/src/fDev.cpp @@ -0,0 +1,983 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "fDev.hpp" + +/* Sleep */ +#define POLL_SLEEP_NS 100 + +#define LARGE_PAGE_SIZE 2 * 1024 * 1024 +#define LARGE_PAGE_SHIFT 21UL +#define PAGE_SIZE 4 * 1024 +#define PAGE_SHIFT 12UL + +/* Clock */ +#define CLK_NS 4 + +/* IOCTL */ +#define IOCTL_ALLOC_HOST_USER_MEM _IOR('D', 1, unsigned long) +#define IOCTL_FREE_HOST_USER_MEM _IOR('D', 2, unsigned long) +#define IOCTL_ALLOC_HOST_PR_MEM _IOR('D', 3, unsigned long) +#define IOCTL_FREE_HOST_PR_MEM _IOR('D', 4, unsigned long) +#define IOCTL_MAP_USER _IOR('D', 5, unsigned long) +#define IOCTL_UNMAP_USER _IOR('D', 6, unsigned long) +#define IOCTL_RECONFIG_LOAD _IOR('D', 7, unsigned long) +#define IOCTL_ARP_LOOKUP _IOR('D', 8, unsigned long) +#define IOCTL_WRITE_CTX _IOR('D', 9, unsigned long) +#define IOCTL_WRITE_CONN _IOR('D', 10, unsigned long) +#define IOCTL_RDMA_STAT _IOR('D', 11, unsigned long) +#define IOCTL_READ_ENG_STATUS _IOR('D', 12, unsigned long) + +/* MMAP */ +#define MMAP_CTRL (0x0 << PAGE_SHIFT) +#define MMAP_CNFG (0x1 << PAGE_SHIFT) +#define MMAP_CNFG_AVX (0x2 << PAGE_SHIFT) +#define MMAP_BUFF (0x200 << PAGE_SHIFT) +#define MMAP_PR (0x400 << PAGE_SHIFT) + +/* Regions */ +#define CTRL_REGION_SIZE 64 * 1024 +#define CNFG_REGION_SIZE 64 * 1024 +#define CNFG_AVX_REGION_SIZE 256 * 1024 + +#define N_RDMA_STAT_REGS 24 + +/* Config regs */ +#ifdef EN_AVX + // Base + #define CNFG_CTRL_REG 0 + #define CNFG_PF_REG 1 + #define CNFG_DATAPATH_REG_SET 2 + #define CNFG_DATAPATH_REG_CLR 3 + #define CNFG_TMR_STOP_REG 4 + #define CNFG_TMR_REG 5 + #define CNFG_STAT_REG 6 + // RDMA + #define CNFG_RDMA_POST_REG 10 + #define CNFG_RDMA_STAT_CMD_USED_REG 11 + #define CNFG_RDMA_QPN_REG 12 + + #define CTRL_START_RD 0x1 + #define CTRL_START_WR 0x2 + #define CTRL_SYNC_RD 0x4 + #define CTRL_SYNC_WR 0x8 + #define CTRL_STREAM_RD 0x10 + #define CTRL_STREAM_WR 0x20 + #define CTRL_CLR_STAT_RD 0x40 + #define CTRL_CLR_STAT_WR 0x80 + #define CTRL_CLR_IRQ_PENDING 0x100 +#else + // Base + #define CNFG_CTRL_REG 0 + #define CNFG_VADDR_RD_REG 1 + #define CNFG_LEN_RD_REG 2 + #define CNFG_VADDR_WR_REG 3 + #define CNFG_LEN_WR_REG 4 + #define VADDR_MISS_REG 5 + #define LEN_MISS_REG 6 + #define CNFG_DATAPATH_REG_SET 7 + #define CNFG_DATAPATH_REG_CLR 8 + #define CNFG_TMR_STOP_REG 9 + #define CNFG_TMR_RD_REG 10 + #define CNFG_TMR_WR_REG 11 + #define CNFG_STAT_CMD_USED_RD_REG 12 + #define CNFG_STAT_CMD_USED_WR_REG 13 + #define CNFG_STAT_DMA_RD_REG 14 + #define CNFG_STAT_DMA_WR_REG 15 + #define CNFG_STAT_SENT_RD_REG 16 + #define CNFG_STAT_SENT_WR_REG 17 + #define CNFG_STAT_PFAULTS_REG 18 + // RDMA + #define CNFG_RDMA_POST_REG_0 20 + #define CNFG_RDMA_POST_REG_1 21 + #define CNFG_RDMA_POST_REG_2 22 + #define CNFG_RDMA_POST_REG_3 23 + #define CNFG_RDMA_STAT_CMD_USED_REG 24 + #define CNFG_RDMA_QPN_REG 25 + + #define CTRL_START_RD 0x1 + #define CTRL_START_WR 0x2 + #define CTRL_SYNC_RD 0x4 + #define CTRL_SYNC_WR 0x8 + #define CTRL_CLR_STAT_RD 0x10 + #define CTRL_CLR_STAT_WR 0x20 + #define CTRL_CLR_IRQ_PENDING 0x40 + #define CTRL_SEND_RDMA_REQ 0x80 + #define CTRL_SEND_QP_CTX 0x100 + #define CTRL_SEND_QP_CONN 0x200 +#endif + +using namespace std::chrono; + +namespace fpga { + +// ------------------------------------------------------------------------------- +// -- Obtain regions +// ------------------------------------------------------------------------------- + +/** + * Obtain vFPGA char devices + * @param: rNum - region ID + */ +bool fDev::acquireRegion(uint32_t rNum) { + std::string region = "/dev/fpga" + std::to_string(rNum); + fd = open(region.c_str(), O_RDWR | O_SYNC); + if(fd == -1) { + std::cout << "ERR: Cannot acquire an FPGA region" << std::endl; + return false; + } + + if(!mmapFpga()) { + std::cout << "ERR: Cannot mmap an FPGA region" << std::endl; + return false; + } + + return true; +} + +/** + * Release the vFPGA handle + */ +void fDev::releaseRegion() { + close(fd); +} + +/** + * Memory map control + */ +bool fDev::mmapFpga() { +#ifdef EN_AVX + cnfg_reg = (__m256i*) mmap(NULL, CNFG_AVX_REGION_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, MMAP_CNFG_AVX); + if(cnfg_reg == MAP_FAILED) { + releaseRegion(); + return false; + } +#else + cnfg_reg = (uint64_t*) mmap(NULL, CNFG_REGION_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, MMAP_CNFG); + if(cnfg_reg == MAP_FAILED) { + releaseRegion(); + return false; + } +#endif + + ctrl_reg = (uint64_t*) mmap(NULL, CTRL_REGION_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, MMAP_CTRL); + if(ctrl_reg == MAP_FAILED) { + releaseRegion(); + return false; + } + + return true; +} + +/** + * Unmap + */ +bool fDev::munmapFpga() { +#ifdef EN_AVX + if(munmap(cnfg_reg, CNFG_AVX_REGION_SIZE) != 0) { + releaseRegion(); + return false; + } +#else + if(munmap(cnfg_reg, CNFG_REGION_SIZE) != 0) { + releaseRegion(); + return false; + } +#endif + + if(munmap(ctrl_reg, CTRL_REGION_SIZE) != 0) { + releaseRegion(); + return false; + } + + cnfg_reg = 0; + ctrl_reg = 0; + + return true; +} + +// ------------------------------------------------------------------------------- +// -- Memory management +// ------------------------------------------------------------------------------- + +/** + * Obtain huge pages on the host memory + * @param: n_pages - number of requested large pages + */ +uint64_t* fDev::_getHostMem(uint32_t n_pages) { + uint64_t *hMem, *hMemAligned; + uint64_t n_pg = n_pages; + + ioctl(fd, IOCTL_ALLOC_HOST_USER_MEM, &n_pg); + hMem = (uint64_t*)mmap(NULL, (n_pg + 1) * LARGE_PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, MMAP_BUFF); + // alignment + hMemAligned = (uint64_t*)( ((((uint64_t)hMem + LARGE_PAGE_SIZE - 1) >> LARGE_PAGE_SHIFT)) << LARGE_PAGE_SHIFT); + mapped_large[hMemAligned] = hMem; + return hMemAligned; +} + +/** + * Release huge pages on the host memory + * @param: vaddr - memory pointer + * @param: n_pages - number of obtained pages + */ +void fDev::_freeHostMem(uint64_t *vaddr, uint32_t n_pages) { + uint64_t* hMem; + uint64_t n_pg = n_pages; + + hMem = mapped_large[vaddr]; + munmap(hMem, (n_pg + 1) * LARGE_PAGE_SIZE); + ioctl(fd, IOCTL_FREE_HOST_USER_MEM, &vaddr); +} + +/** + * Obtain huge pages allocated for the PR bitstreams + * @param: n_pages - number of requested large pages + */ +uint64_t* fDev::getPrMem(uint64_t n_pages) { + uint64_t *hMem, *hMemAligned; + uint64_t n_pg = n_pages; + + ioctl(fd, IOCTL_ALLOC_HOST_PR_MEM, &n_pg); + hMem = (uint64_t*)mmap(NULL, (n_pg + 1) * LARGE_PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, MMAP_PR); + // alignment + hMemAligned = (uint64_t*)( ((((uint64_t)hMem + LARGE_PAGE_SIZE - 1) >> LARGE_PAGE_SHIFT)) << LARGE_PAGE_SHIFT); + mapped_large[hMemAligned] = hMem; + return hMemAligned; +} + +/** + * Release huge pages on the host memory + * @param: vaddr - memory pointer + * @param: n_pages - number of obtained pages + */ +void fDev::freePrMem(uint64_t *vaddr, uint64_t n_pages) { + uint64_t* hMem; + uint64_t n_pg = n_pages; + + hMem = mapped_large[vaddr]; + munmap(hMem, (n_pg + 1) * LARGE_PAGE_SIZE); + ioctl(fd, IOCTL_FREE_HOST_PR_MEM, &vaddr); +} + +/** + * Explicit TLB mapping + * @param: mem - memory pointer + * @param: len - length of the mapping + */ +void fDev::_userMap(uint64_t *mem, uint32_t len) { + uint64_t vdata [2]; + vdata[0] = (uint64_t)mem; + vdata[1] = len; + ioctl(fd, IOCTL_MAP_USER, &vdata); +} + +/** + * TLB unmap + * @param: mem - memory pointer + * @param: len - length of the mapping + */ +void fDev::_userUnmap(uint64_t *mem, uint32_t len) { + uint64_t vdata [2]; + vdata[0] = (uint64_t)mem; + vdata[1] = len; + ioctl(fd, IOCTL_UNMAP_USER, &vdata); +} + +// ------------------------------------------------------------------------------- +// -- PR +// ------------------------------------------------------------------------------- + +/** + * Reconfiguration ioctl call + * @param: vaddr - memory pointer of the PR stream + * @param: len - length of the stream + */ +void fDev::reconfigure(uint64_t *vaddr, uint64_t len) { + uint64_t vdata [2]; + vdata[0] = (uint64_t)vaddr; + vdata[1] = len; + ioctl(fd, IOCTL_RECONFIG_LOAD, &vdata); +} + +#ifdef EN_AVX + // ------------------------------------------------------------------------------- + // -- Bulk transfers + // ------------------------------------------------------------------------------- + + /** + * Read operation (read to FPGA user logic) + * @param: vaddr - memory pointer + * @param: len - length + * @param: stream - stream from host memory + * @param: poll - blocking/non-blocking + * @param: clr_stat - prior status clear + */ + void fDev::_read(uint64_t* vaddr, uint32_t len, bool stream, bool poll, bool clr_stat) { + // Check outstanding + while (rd_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) { + rd_cmd_cnt = _mm256_extract_epi64(cnfg_reg[CNFG_STAT_REG], 0) & 0xffffffff; + if (rd_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) + nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + + uint64_t len_cmd = len; + uint64_t ctrl_cmd = CTRL_START_RD | (clr_stat ? CTRL_CLR_STAT_RD : 0x0) | (stream ? CTRL_STREAM_RD : 0x0); + cnfg_reg[CNFG_CTRL_REG] = _mm256_set_epi64x(len_cmd, 0, (uint64_t)vaddr, ctrl_cmd); + + rd_cmd_cnt++; + + if(poll) { + while(checkBusyRead()) nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + } + + /** + * Write operation (write from FPGA user logic) + * @param: vaddr - memory pointer + * @param: len - length + * @param: stream - stream from host memory + * @param: poll - blocking/non-blocking + * @param: clr_stat - prior status clear + */ + void fDev::_write(uint64_t* vaddr, uint32_t len, bool stream, bool poll, bool clr_stat) { + // Check outstanding + while (wr_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) { + wr_cmd_cnt = _mm256_extract_epi64(cnfg_reg[CNFG_STAT_REG], 0) >> 32; + if (wr_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) + nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + + uint64_t len_cmd = (uint64_t)len << 32; + uint64_t ctrl_cmd = CTRL_START_WR | (clr_stat ? CTRL_CLR_STAT_WR : 0x0) | (stream ? CTRL_STREAM_WR : 0x0); + cnfg_reg[CNFG_CTRL_REG] = _mm256_set_epi64x(len_cmd, (uint64_t)vaddr, 0, ctrl_cmd); + + wr_cmd_cnt++; + + if(poll) { + while(checkBusyWrite()) nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + } + + /** + * transfer (read + write) + * @param: vaddr_src, _dst - memory pointer + * @param: len_src, _dst - length + * @param: stream - stream from host memory + * @param: poll - blocking/non-blocking + * @param: clr_stat - prior status clear + */ + void fDev::_transfer(uint64_t* vaddr_src, uint64_t* vaddr_dst, uint32_t len_src, uint32_t len_dst, bool stream, bool poll, bool clr_stat) { + // Check outstanding read + while (rd_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) { + rd_cmd_cnt = _mm256_extract_epi64(cnfg_reg[CNFG_STAT_REG], 0) & 0xffffffff; + if (rd_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) + nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + // Check outstanding write + while (wr_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) { + wr_cmd_cnt = _mm256_extract_epi64(cnfg_reg[CNFG_STAT_REG], 0) >> 32; + if (wr_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) + nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + + uint64_t len_cmd = ((uint64_t)len_dst << 32) | len_src; + uint64_t ctrl_cmd = CTRL_START_WR | (clr_stat ? CTRL_CLR_STAT_WR : 0x0) | (stream ? CTRL_STREAM_WR : 0x0) | + CTRL_START_RD | (clr_stat ? CTRL_CLR_STAT_RD : 0x0) | (stream ? CTRL_STREAM_RD : 0x0); + + cnfg_reg[CNFG_CTRL_REG] = _mm256_set_epi64x(len_cmd, (uint64_t)vaddr_dst, (uint64_t)vaddr_src, ctrl_cmd); + + rd_cmd_cnt++; + wr_cmd_cnt++; + + if(poll) { + while(checkBusyWrite()) nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + } + +#ifdef EN_DDR + + /** + * Offload to FPGA DDR (only with local FPGA memory) + * @param: vaddr - memory pointer + * @param: len - length + * @param: poll - blocking/non-blocking + */ + void fDev::_offload(uint64_t* vaddr, uint32_t len, bool poll) { + // Check outstanding + while (rd_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) { + rd_cmd_cnt = _mm256_extract_epi64(cnfg_reg[CNFG_STAT_REG], 0) & 0xffffffff; + if (rd_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) + nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + + uint64_t len_cmd = len; + uint64_t ctrl_cmd = CTRL_START_RD | CTRL_CLR_STAT_RD | CTRL_SYNC_RD; + cnfg_reg[CNFG_CTRL_REG] = _mm256_set_epi64x(len_cmd, 0, (uint64_t)vaddr, ctrl_cmd); + + rd_cmd_cnt++; + + if(poll) { + while(checkBusyRead()) nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + } + + /** + * Sync with FPGA DDR (only with local FPGA memory) + * @param: vaddr - memory pointer + * @param: len - length + * @param: poll - blocking/non-blocking + */ + void fDev::_sync(uint64_t* vaddr, uint32_t len, bool poll) { + // Check outstanding + while (wr_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) { + wr_cmd_cnt = _mm256_extract_epi64(cnfg_reg[CNFG_STAT_REG], 0) >> 32; + if (wr_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) + nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + + wr_cmd_cnt++; + + uint64_t len_cmd = (uint64_t)len << 32; + uint64_t ctrl_cmd = CTRL_START_WR | CTRL_CLR_STAT_WR | CTRL_SYNC_WR; + cnfg_reg[CNFG_CTRL_REG] = _mm256_set_epi64x(len_cmd, (uint64_t)vaddr, 0, ctrl_cmd); + + if(poll) { + while(checkBusyWrite()) nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + } + +#endif + + // ------------------------------------------------------------------------------- + // -- Polling + // ------------------------------------------------------------------------------- + + /** + * Check whether busy read + */ + bool fDev::checkBusyRead() { + return !(_mm256_extract_epi64(cnfg_reg[CNFG_STAT_REG], 1) & 0xffffffff); + } + + /** + * Check whether busy write + */ + bool fDev::checkBusyWrite() { + return !(_mm256_extract_epi64(cnfg_reg[CNFG_STAT_REG], 1) >> 32); + } + + /** + * Return read completed + */ + uint32_t fDev::checkCompletedRead() { + return _mm256_extract_epi64(cnfg_reg[CNFG_STAT_REG], 1) & 0xffffffff; + } + + /** + * Return write completed + */ + uint32_t fDev::checkCompletedWrite() { + return _mm256_extract_epi64(cnfg_reg[CNFG_STAT_REG], 1) >> 32; + } + + /** + * Clear status + */ + void fDev::clearCompleted() { + cnfg_reg[CNFG_CTRL_REG] = _mm256_set_epi64x(0, 0, 0, CTRL_CLR_STAT_RD | CTRL_CLR_STAT_WR); + } + + // ------------------------------------------------------------------------------- + // -- Timers + // ------------------------------------------------------------------------------- + + /** + * Set timer stop at x number of completed transfers + * @param: tmr_stop_at - stop once completed reached + */ + void fDev::setTimerStopAt(uint64_t tmr_stop_at) { + cnfg_reg[CNFG_TMR_STOP_REG] = _mm256_set_epi64x(0, 0, 0, tmr_stop_at); + } + + /** + * Read timer + */ + uint64_t fDev::getReadTimer() { + return _mm256_extract_epi64(cnfg_reg[CNFG_TMR_REG], 0); + } + + /** + * Write timer + */ + uint64_t fDev::getWriteTimer() { + return _mm256_extract_epi64(cnfg_reg[CNFG_TMR_REG], 1); + } + + // ------------------------------------------------------------------------------- + // -- Debug XDMA + // ------------------------------------------------------------------------------- + + /** + * XDMA debug + */ + void fDev::printDebugXDMA() // TODO + { + std::cout << "-- XDMA STATISTICS ----------------------------" << std::endl; + std::cout << std::setw(35) << "Read command FIFO used: \t" << _mm256_extract_epi32(cnfg_reg[CNFG_STAT_REG], 0x0) << std::endl; + std::cout << std::setw(35) << "Write command FIFO used: \t" << _mm256_extract_epi32(cnfg_reg[CNFG_STAT_REG], 0x1) << std::endl; + std::cout << std::setw(35) << "Reads completed: \t" << _mm256_extract_epi32(cnfg_reg[CNFG_STAT_REG], 0x2) << std::endl; + std::cout << std::setw(35) << "Writes completed: \t" << _mm256_extract_epi32(cnfg_reg[CNFG_STAT_REG], 0x3) << std::endl; + std::cout << std::setw(35) << "Read requests sent: \t" << _mm256_extract_epi32(cnfg_reg[CNFG_STAT_REG], 0x4) << std::endl; + std::cout << std::setw(35) << "Write requests sent: \t" << _mm256_extract_epi32(cnfg_reg[CNFG_STAT_REG], 0x5) << std::endl; + std::cout << std::setw(35) << "Page faults: \t" << _mm256_extract_epi32(cnfg_reg[CNFG_STAT_REG], 0x6) << std::endl; + std::cout << "-----------------------------------------------" << std::endl; + } + +#else + + /** + * Read operation (read to FPGA user logic) + * @param: vaddr - memory pointer + * @param: len - length + * @param: poll - blocking/non-blocking + * @param: clr_stat - prior status clear + */ + void fDev::read(uint64_t* vaddr, uint32_t len, bool poll, bool clr_stat) { + // Check outstanding + while (rd_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) { + rd_cmd_cnt = cnfg_reg[CNFG_STAT_CMD_USED_RD_REG]; + if (rd_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) + nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + + cnfg_reg[CNFG_VADDR_RD_REG] = (uint64_t)vaddr; + cnfg_reg[CNFG_LEN_RD_REG] = len; + + cnfg_reg[CNFG_CTRL_REG] = CTRL_START_RD | (clr_stat ? CTRL_CLR_STAT_RD : 0x0); + + rd_cmd_cnt++; + + if(poll) { + while(checkBusyRead()) nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + } + + /** + * Write operation (write from FPGA user logic) + * @param: vaddr - memory pointer + * @param: len - length + * @param: poll - blocking/non-blocking + * @param: clr_stat - prior status clear + */ + void fDev::write(uint64_t* vaddr, uint32_t len, bool poll, bool clr_stat) { + // Check outstanding + while (wr_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) { + wr_cmd_cnt = cnfg_reg[CNFG_STAT_CMD_USED_WR_REG]; + if (wr_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) + nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + + cnfg_reg[CNFG_VADDR_WR_REG] = (uint64_t)vaddr; + cnfg_reg[CNFG_LEN_WR_REG] = len; + + cnfg_reg[CNFG_CTRL_REG] = CTRL_START_WR | (clr_stat ? CTRL_CLR_STAT_WR : 0x0); + + wr_cmd_cnt++; + + if(poll) { + while(checkBusyWrite()) nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + } + + /** + * transfer (read + write) + * @param: vaddr_src, _dst - memory pointer + * @param: len_src, _dst - length + * @param: poll - blocking/non-blocking + * @param: clr_stat - prior status clear + */ + void fDev::transfer(uint64_t* vaddr_src, uint64_t* vaddr_dst, uint32_t len_src, uint32_t len_dst, bool poll, bool clr_stat) { + // Check outstanding + while (rd_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) { + rd_cmd_cnt = cnfg_reg[CNFG_STAT_CMD_USED_RD_REG]; + if (rd_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) + nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + + // Check outstanding + while (wr_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) { + wr_cmd_cnt = cnfg_reg[CNFG_STAT_CMD_USED_WR_REG]; + if (wr_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) + nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + + cnfg_reg[CNFG_VADDR_RD_REG] = (uint64_t)vaddr_src; + cnfg_reg[CNFG_LEN_RD_REG] = len_src; + cnfg_reg[CNFG_VADDR_WR_REG] = (uint64_t)vaddr_dst; + cnfg_reg[CNFG_LEN_WR_REG] = len_dst; + + cnfg_reg[CNFG_CTRL_REG] = CTRL_START_RD | (clr_stat ? CTRL_CLR_STAT_RD : 0x0); + cnfg_reg[CNFG_CTRL_REG] = CTRL_START_WR | (clr_stat ? CTRL_CLR_STAT_WR : 0x0); + + rd_cmd_cnt++; + wr_cmd_cnt++; + + if(poll) { + while(checkBusyWrite()) nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + } + +#ifdef EN_DDR + + /** + * Offload to FPGA DDR (only with local FPGA memory) + * @param: vaddr - memory pointer + * @param: len - length + * @param: poll - blocking/non-blocking + */ + void fDev::offload(uint64_t* vaddr, uint32_t len, bool poll) { + cnfg_reg[CNFG_VADDR_RD_REG] = (uint64_t)vaddr; + cnfg_reg[CNFG_LEN_RD_REG] = len; + + cnfg_reg[CNFG_CTRL_REG] = CTRL_SYNC_RD | CTRL_START_RD | CTRL_CLR_STAT_RD; + + rd_cmd_cnt++; + + if(poll) { + while(checkBusyRead()) nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + } + + /** + * Sync with FPGA DDR (only with local FPGA memory) + * @param: vaddr - memory pointer + * @param: len - length + * @param: poll - blocking/non-blocking + */ + void fDev::sync(uint64_t* vaddr, uint32_t len, bool poll) { + cnfg_reg[CNFG_VADDR_WR_REG] = (uint64_t)vaddr; + cnfg_reg[CNFG_LEN_WR_REG] = len; + + cnfg_reg[CNFG_CTRL_REG] = CTRL_SYNC_WR | CTRL_START_WR | CTRL_CLR_STAT_WR; + + wr_cmd_cnt++; + + if(poll) { + while(checkBusyWrite()) nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + } + +#endif + + /** + * XDMA debug + */ + void fDev::printDebugXDMA() // TODO + { + std::cout << "-- XDMA STATISTICS ----------------------------" << std::endl; + std::cout << "-----------------------------------------------" << std::endl; + } + + // ------------------------------------------------------------------------------- + // -- Polling + // ------------------------------------------------------------------------------- + bool fDev::checkBusyRead() { + return !(cnfg_reg[CNFG_STAT_DMA_RD_REG]); + } + + bool fDev::checkBusyWrite() { + return !(cnfg_reg[CNFG_STAT_DMA_WR_REG]); + } + + uint32_t fDev::checkCompletedRead() { + return (cnfg_reg[CNFG_STAT_DMA_RD_REG]); + } + + uint32_t fDev::checkCompletedWrite() { + return (cnfg_reg[CNFG_STAT_DMA_WR_REG]); + } + + void fDev::clearCompleted() { + cnfg_reg[CNFG_CTRL_REG] = CTRL_CLR_STAT_RD | CTRL_CLR_STAT_WR; + } + + // ------------------------------------------------------------------------------- + // -- Timers + // ------------------------------------------------------------------------------- + + /** + * Set timer stop at x number of completed transfers + * @param: tmr_stop_at - stop once completed reached + */ + void fDev::setTimerStopAt(uint64_t tmr_stop_at) { + cnfg_reg[CNFG_TMR_STOP_REG] = tmr_stop_at; + } + + /** + * Read timer + */ + uint64_t fDev::getReadTimer() { + return cnfg_reg[CNFG_TMR_RD_REG]; + } + + /** + * Write timer + */ + uint64_t fDev::getWriteTimer() { + return cnfg_reg[CNFG_TMR_WR_REG]; + } + +#endif + +// ------------------------------------------------------------------------------- +// -- Network static +// ------------------------------------------------------------------------------- + +#ifdef EN_RDMA + +/** + * ARP lookup + */ +bool fDev::doArpLookup() { + ioctl(fd, IOCTL_ARP_LOOKUP, 0); + return true; +} + +/** + * Write QP context + * @param: pair - target queue pair + */ +void fDev::writeContext(uint64_t r_vaddr, uint32_t r_key, uint32_t l_psn, uint32_t r_psn, uint32_t l_qpn, uint32_t l_region) { + uint64_t offs[3]; + offs[0] = (((uint64_t)r_psn & 0xffffff) << 31) | (((uint64_t)l_qpn & 0xffffff) << 7) | (((uint64_t)l_region & 0xf) << 3); + offs[1] = (((uint64_t)r_key & 0xffffff) << 24) | ((uint64_t)l_psn & 0xffffff); + offs[2] = (uint64_t)r_vaddr; + ioctl(fd, IOCTL_WRITE_CTX, &offs); +} + +/** + * Write QP connection + * @param: pair - target queue pair + */ +void fDev::writeConnection(uint32_t r_qpn, uint32_t l_qpn, uint32_t port) { + uint64_t offs[3]; + offs[0] = (((uint64_t)port & 0xffff) << 40) | (((uint64_t)r_qpn & 0xffffff) << 16) | ((l_qpn) & 0xffff); + offs[1] = ((htols((uint64_t)pair->remote.gidToUint(8)) & 0xffffffff) << 32) | (htols((uint64_t)pair->remote.gidToUint(0)) & 0xffffffff); + offs[2] = ((htols((uint64_t)pair->remote.gidToUint(24)) & 0xffffffff) << 32) | (htols((uint64_t)pair->remote.gidToUint(16)) & 0xffffffff); + ioctl(fd, IOCTL_WRITE_CONN, &offs); +} + +/** + * RDMA debug + * TODO: Pretty expensive in hw, if needed enable it there first. + */ +void fDev::printDebugRDMA() +{ + uint64_t offs[N_RDMA_STAT_REGS]; + ioctl(fd, IOCTL_RDMA_STAT, &offs); + std::cout << "-- RDMA STATISTICS ----------------------------" << std::endl; + std::cout << std::setw(35) << "Roce crc pkg drop: \t" << offs[0] << std::endl; + std::cout << std::setw(35) << "Roce psn pkg drop: \t" << offs[1] << std::endl; + std::cout << std::setw(35) << "Rx word counter: \t" << offs[2] << std::endl; + std::cout << std::setw(35) << "Rx pkg counter: \t" << offs[3] << std::endl; + std::cout << std::setw(35) << "Tx word counter: \t" << offs[4] << std::endl; + std::cout << std::setw(35) << "Tx pkg counter: \t" << offs[5] << std::endl; + std::cout << std::setw(35) << "Arp rx pkg counter: \t" << offs[6] << std::endl; + std::cout << std::setw(35) << "Arp tx pkg counter: \t" << offs[7] << std::endl; + std::cout << std::setw(35) << "Arp request pkg counter: \t" << offs[8] << std::endl; + std::cout << std::setw(35) << "Arp reply pkg counter: \t" << offs[9] << std::endl; + std::cout << std::setw(35) << "Icmp rx pkg counter: \t" << offs[10] << std::endl; + std::cout << std::setw(35) << "Icmp tx pkg counter: \t" << offs[11] << std::endl; + std::cout << std::setw(35) << "Tcp rx pkg counter: \t" << offs[12] << std::endl; + std::cout << std::setw(35) << "Tcp tx pkg counter: \t" << offs[13] << std::endl; + std::cout << std::setw(35) << "Roce rx pkg counter: \t" << offs[14] << std::endl; + std::cout << std::setw(35) << "Roce tx pkg counter: \t" << offs[15] << std::endl; + std::cout << std::setw(35) << "Roce data rx word counter: \t" << offs[16] << std::endl; + std::cout << std::setw(35) << "Roce data rx pkg counter: \t" << offs[17] << std::endl; + std::cout << std::setw(35) << "Roce data tx role word counter: \t" << offs[18] << std::endl; + std::cout << std::setw(35) << "Roce data tx role pkg counter: \t" << offs[19] << std::endl; + std::cout << std::setw(35) << "Roce data tx host word counter: \t" << offs[20] << std::endl; + std::cout << std::setw(35) << "Roce data tx host pkg counter: \t" << offs[21] << std::endl; + std::cout << std::setw(35) << "Axis stream down: \t" << offs[22] << std::endl; + std::cout << std::setw(35) << "Axis stream down: \t" << offs[23] << std::endl; + std::cout << "-----------------------------------------------" << std::endl; +} + // ------------------------------------------------------------------------------- + // -- Network + // ------------------------------------------------------------------------------- + +#ifdef EN_AVX + + /** + * QPN load + * @param: qpn - qp number + */ + void fDev::writeQpn(uint32_t qpn) { + cnfg_reg[CNFG_RDMA_QPN_REG] = _mm256_set_epi64x(0, 0, 0, qpn); + this->qpn = qpn; + qpn_attached = true; + } + + /** + * RDMA write + * @param: l_addr - local virtual address + * @param: r_addr - remote virtual address + * @param: size - transfer size + */ + bool fDev::postWrite(uint64_t *l_addr, uint64_t *r_addr, uint32_t size) { + if (qpn_attached) { + uint64_t offs_0 = (uint64_t)opCode::WRITE & 0x1f; + uint64_t offs_1 = (((uint64_t)r_addr & 0xffff) << 48) | ((uint64_t)l_addr & 0xffffffffffff); + uint64_t offs_2 = ((uint64_t)size << 32) | (((uint64_t)r_addr >> 16) & 0xffffffff); + uint64_t offs_3 = 0; + + postCmd(offs_3, offs_2, offs_1, offs_0); + } + else { + return 1; + } + + return 0; + } + + /** + * RDMA read + * @param: l_addr - local virtual address + * @param: r_addr - remote virtual address + * @param: size - transfer size + */ + bool fDev::postRead(uint64_t *l_addr, uint64_t *r_addr, uint32_t size) { + if (qpn_attached) { + uint64_t offs_0 = (uint64_t)opCode::READ & 0x1f; + uint64_t offs_1 = (((uint64_t)r_addr & 0xffff) << 48) | ((uint64_t)l_addr & 0xffffffffffff); + uint64_t offs_2 = ((uint64_t)size << 32) | (((uint64_t)r_addr >> 16) & 0xffffffff); + uint64_t offs_3 = 0; + + postCmd(offs_3, offs_2, offs_1, offs_0); + } + else { + return 1; + } + + return 0; + } + + /** + * RDMA RPC + * @param: offs_3, _2, _1 - parameters + */ + bool fDev::postRpc(uint64_t *l_addr, uint64_t *r_addr, uint32_t size, uint64_t params) { + if (qpn_attached) { + uint64_t offs_0 = (uint64_t)opCode::RPC & 0x1f; + uint64_t offs_1 = (((uint64_t)r_addr & 0xffff) << 48) | ((uint64_t)l_addr & 0xffffffffffff); + uint64_t offs_2 = ((uint64_t)size << 32) | (((uint64_t)r_addr >> 16) & 0xffffffff); + uint64_t offs_3 = params; + + postCmd(offs_3, offs_2, offs_1, offs_0); + } + else { + return 1; + } + + return 0; + } + + /** + * Base post + */ + void fDev::postCmd(uint64_t offs_3, uint64_t offs_2, uint64_t offs_1, uint64_t offs_0) { + // Check outstanding + while (rdma_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) { + rdma_cmd_cnt = _mm256_extract_epi64(cnfg_reg[CNFG_RDMA_STAT_CMD_USED_REG], 0) & 0xffffffff; + if (rdma_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) + nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + + cnfg_reg[CNFG_RDMA_POST_REG] = _mm256_set_epi64x(offs_3, offs_2, offs_1, offs_0); + + rdma_cmd_cnt++; + } + +#else + + // ------------------------------------------------------------------------------- + // -- Network + // ------------------------------------------------------------------------------- + + /** + * QPN load + * @param: qpn - qp number + */ + void fDev::writeQpn(uint32_t qpn) { + cnfg_reg[CNFG_RDMA_QPN_REG] = qpn; + this->qpn = qpn; + qpn_attached = true; + } + + /** + * RDMA write + * @param: l_addr - local virtual address + * @param: r_addr - remote virtual address + * @param: size - transfer size + */ + bool fDev::postWrite(rQPair *pair, uint64_t *l_addr, uint64_t *r_addr, uint32_t size) { + if(qpn_attached) + postCmd(opCode::WRITE, pair, l_addr, r_addr, size); + else + return 1; + + return 0; + } + + /** + * RDMA read + * @param: l_addr - local virtual address + * @param: r_addr - remote virtual address + * @param: size - transfer size + */ + bool fDev::postRead(rQPair *pair, uint64_t *l_addr, uint64_t *r_addr, uint32_t size) { + if(qpn_attached) + postCmd(opCode::READ, pair, l_addr, r_addr, size); + else + return 1; + + return 0; + } + + /** + * Base post + * TODO: Change to new config + */ + void fDev::postCmd(opCode op, rQPair *pair, uint64_t *l_addr, uint64_t *r_addr, uint32_t size) { + #ifdef VERBOSE_DEBUG + std::cout << "Post, queue pair l: " << pair->local.qpn << ", r: " << pair->remote.qpn << std::endl; + #endif + + // Check outstanding + while (rdma_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) { + rdma_cmd_cnt = cnfg_reg[CNFG_RDMA_STAT_CMD_USED_REG]; + if (rdma_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) + nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + + cnfg_reg[CNFG_RDMA_POST_REG_0] = (((uint64_t)size << 27)) | (((uint64_t)pair->local.qpn & 0xffffff) << 3) | (((uint64_t)op & 0x3)); + cnfg_reg[CNFG_RDMA_POST_REG_1] = (uint64_t)l_addr; + cnfg_reg[CNFG_RDMA_POST_REG_2] = (uint64_t)r_addr; + cnfg_reg[CNFG_RDMA_POST_REG_3] = 0; + + rdma_cmd_cnt++; + } + +#endif +#endif + +} diff --git a/sw/gbm/src/main.cpp b/sw/gbm/src/main.cpp new file mode 100644 index 00000000..529edc3e --- /dev/null +++ b/sw/gbm/src/main.cpp @@ -0,0 +1,283 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "fDev.hpp" + +using namespace std; + +void initData(float* data, uint NUM_FEATURES, uint numtuples); +void initTrees(uint* trees, int numtrees, int numnodes, int depth); + +/* Runtime */ +#define TARGET_FPGA_REGION 0 + +int main() +{ + fDev fd; + uint64_t *tMem, *dMem, *oMem; + uint64_t n_trees_pages, n_data_pages, n_result_pages; + clock_t begin, end; + + ////////////////////////////////////////////////////////////////////////////////////////// + // Parameters + int NUM_TUPLES = 4 * 1024; //atoi(argv[1]);// atoi(argv[1]); + int NUM_TREES = 109; //atoi(argv[2]);// atoi(argv[1]); + int DEPTH = 5; //atoi(argv[3]);// atoi(argv[1]); + int NUM_FEATURES = 25;// atoi(argv[3]); + int PAGE_SIZE = 2*1024*1024; + + int result_size = 4*NUM_TUPLES; + int data_size = NUM_FEATURES*NUM_TUPLES*4; + + n_data_pages = data_size/PAGE_SIZE + ((data_size%PAGE_SIZE > 0)? 1 : 0); + n_result_pages = result_size/PAGE_SIZE + ((result_size%PAGE_SIZE > 0)? 1 : 0); + + uint outputNumCLs = result_size/64 + ((result_size%64 > 0)? 1 : 0); + + unsigned char puTrees = NUM_TREES/28 + ((NUM_TREES%28 == 0)? 0 : 1); + + int numnodes = pow(2, DEPTH) - 1; + int tree_size = 2*(pow(2,DEPTH-1) - 1) + 10*pow(2,DEPTH-1) + 1; + tree_size = tree_size + ( ((tree_size%16) > 0)? 16 - (tree_size%16) : 0); + + int trees_size = tree_size*NUM_TREES*4; // atoi(argv[1]); + + n_trees_pages = trees_size/PAGE_SIZE + ((trees_size%PAGE_SIZE > 0)? 1 : 0); + + short lastOutLineMask = ((NUM_TUPLES%16) > 0)? 0xFFFF << (NUM_TUPLES%16) : 0x0000; + + ////////////////////////////////////////////////////////////////////////////////////////// + // Acquire a region + if(fd.acquireRegion(TARGET_FPGA_REGION)) + cout << "Acquired an FPGA region" << endl; + else + return EXIT_FAILURE; + + // Allocate Trees Memory + tMem = fd.getHostMem(n_trees_pages); + dMem = fd.getHostMem(n_data_pages); + oMem = fd.getHostMem(n_result_pages); + + cout << "Trees memory mapped at: " << tMem << endl; + cout << "Data memory mapped at: " << dMem << endl; + cout << "Result memory mapped at: " << oMem << endl; + ////////////////////////////////////////////////////////////////////////////////////////// + // initialize trees/data + initData(((float*)(dMem)), NUM_FEATURES, NUM_TUPLES); + + initTrees(((uint*)(tMem)), NUM_TREES, numnodes, DEPTH); + + ////////////////////////////////////////////////////////////////////////////////////////// + // Set paprameters + fd.setCSR(NUM_FEATURES, 1); + fd.setCSR(DEPTH, 2); + fd.setCSR(puTrees, 3); + fd.setCSR(outputNumCLs, 4); + fd.setCSR(lastOutLineMask, 5); + fd.setCSR(0x1, 0); // ap_start + + // Push trees to the FPGA + fd.readFrom(tMem, trees_size); // Blocking: returns when all trees streamed to the FPGA + + begin = clock(); + // Stream data into the FPGA + fd.readFrom(dMem, data_size, false); // Non blocking: initiate transfer, then start read results + + // read results from the FPGA as they come. + fd.writeTo(oMem, result_size); // Blocking: returns when all results are read from the FPGA + + end = clock(); + cout << dec << "H -> H: " << (end - begin) / (double) CLOCKS_PER_SEC << "s" << endl << endl; + + // Release memory + fd.freeHostMem(tMem, n_tree_pages); + fd.freeHostMem(dMem, n_data_pages); + fd.freeHostMem(oMem, n_result_pages); + + // Release region + fd.releaseRegion(); + + return EXIT_SUCCESS; +} +/////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////////////////////////// +uint find_node_index(uint nd, uint* indexes, uint* pnodes) +{ + int parent = nd/2; + + if (nd%2 != 0) return indexes[parent] + 1; + else return indexes[parent-1] + pnodes[parent-1] + 1; + + return 0; +} +/////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////////////////////////// +uint find_depth(uint nd, uint depth) +{ + uint currLevel = depth -1; + + while(1) + { + int nd1 = pow(2, currLevel) - 1; + + if (nd >= nd1) return currLevel; + else currLevel = currLevel - 1; + } + return 0; +} +/////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void initTrees(uint* trees, int numtrees, int numnodes, int depth) +{ + uint node_t; + uint curr_tree_offset = 0; + + uint* nodes_ids = new uint[numnodes]; + uint* nodes_size = new uint[numnodes]; + uint* indexes = new uint[numnodes]; + uint* pnodes = new uint[numnodes]; + + // set root + nodes_ids[0] = 0; + nodes_size[0] = 2; + indexes[0] = 0; + pnodes[0] = numnodes/2; + + int lastLevelNodeID = numnodes/2; + + for (int i = 1; i < numnodes; ++i) + { + int idx = find_node_index(i, indexes, pnodes); + nodes_ids[idx] = i; + nodes_size[idx] = (i < lastLevelNodeID)? 2 : 10; + indexes[i] = idx; + + uint ndLevel = find_depth(i, depth); + pnodes[i] = pow(2, (depth-1-ndLevel) ) - 1; + } + + for(int i = 0; i < numnodes; i++) printf("(%d, %d)", nodes_ids[i], nodes_size[i]); + printf("\n"); + + delete [] pnodes; + delete [] indexes; + + // + for (int i = 0; i < numtrees; ++i) + { + // initialize tree i + uint tree_i[4096]; + uint node_off = 0; + for (int j = 0; j < numnodes; ++j) + { + // initialize node j; + // Node type + node_t = 0; + + uint op_type = (nodes_ids[j] >= lastLevelNodeID)? 3 : (i%2 == 0)? 0 : 2; + uint left_child = (nodes_ids[j] >= lastLevelNodeID)? 0 : 1; + uint right_child = (nodes_ids[j] >= lastLevelNodeID)? 0 : 1; + uint findex = nodes_ids[j]%25; + uint split_dir = 2; + + uint rc_id = nodes_ids[j]*2 + 2; + + uint rc_off = 0; + for (int f = j+1; f < numnodes; ++f) + { + if (nodes_ids[f] != rc_id) rc_off += nodes_size[f]; + else break; + } + + uint rchild_offset = (nodes_ids[j] >= lastLevelNodeID)? 0 : rc_off; + + node_t = (node_t & 0xFFFFFFFC) | ((op_type << 0) & 0x00000003); + node_t = (node_t & 0xFFFFFFFB) | ((left_child << 2) & 0x00000004); + node_t = (node_t & 0xFFFFFFF7) | ((right_child << 3) & 0x00000008); + node_t = (node_t & 0xFFFFF00F) | ((findex << 4) & 0x00000FF0); + node_t = (node_t & 0xFFFF0FFF) | ((split_dir << 12) & 0x0000F000); + node_t = (node_t & 0x0000FFFF) | ((rchild_offset << 16) & 0xFFFF0000); + + tree_i[node_off] = node_t; + node_off += 1; + + // split value/set + float sval = 1.5; + if (nodes_ids[j] < lastLevelNodeID) + { + if (i%2 == 0) // split value + { + memcpy( &(tree_i[node_off]), &sval, 4); + } + else // small split Set + { + tree_i[node_off] = 0xAAAAAAAA; + } + node_off += 1; + } + else + { + // large split set offset and length + tree_i[node_off] = 0x00000006; + node_off += 1; + // + float val = 2.5; + memcpy( &(tree_i[node_off]), &val, 4); + val = 0.5; + memcpy( &(tree_i[node_off+1]), &val, 4); + + for (int s = 0; s < 6; ++s) // large split set words + { + tree_i[node_off + 2 + s] = 0x55555555; + } + + node_off += 8; + } + + } + // trees + trees[curr_tree_offset] = node_off; + for (uint32_t t = 0; t < node_off; ++t) + { + trees[curr_tree_offset+1+t] = tree_i[t]; + } + + curr_tree_offset += node_off + 1; + + printf("Tree Size = %d\n", curr_tree_offset); + + curr_tree_offset = curr_tree_offset + (((curr_tree_offset%16) != 0)? 16 - (curr_tree_offset%16) : 0); + } + + // + delete [] nodes_ids; + delete [] nodes_size; + + printf("initialization done!\n"); fflush(stdout); +} +/////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void initData(float* data, uint NUM_FEATURES, uint numtuples) +{ + for (int i = 0; i < numtuples; ++i) + { + for (int j = 0; j < NUM_FEATURES; ++j) + { + data[i*NUM_FEATURES + j] = ((float)(i+1))/((float)(j+i+1)); + } + } +} diff --git a/sw/gbm/verify/dtengine.cpp b/sw/gbm/verify/dtengine.cpp new file mode 100644 index 00000000..ed1024f6 --- /dev/null +++ b/sw/gbm/verify/dtengine.cpp @@ -0,0 +1,349 @@ + + +#include +#include +#include +#include +#include + +using namespace std; +/////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////////////////////////// +uint find_node_index(uint nd, uint* indexes, uint* pnodes) +{ + int parent = nd/2; + + if (nd%2 != 0) return indexes[parent] + 1; + else return indexes[parent-1] + pnodes[parent-1] + 1; + + return 0; +} +/////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////////////////////////// +uint find_depth(uint nd, uint depth) +{ + uint currLevel = depth -1; + + while(1) + { + int nd1 = pow(2, currLevel) - 1; + + if (nd >= nd1) return currLevel; + else currLevel = currLevel - 1; + } + return 0; +} +/////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void initTrees(uint* trees, int numtrees, int numnodes, int depth) +{ + uint node_t; + uint curr_tree_offset = 0; + + uint* nodes_ids = new uint[numnodes]; + uint* nodes_size = new uint[numnodes]; + uint* indexes = new uint[numnodes]; + uint* pnodes = new uint[numnodes]; + + // set root + nodes_ids[0] = 0; + nodes_size[0] = 2; + indexes[0] = 0; + pnodes[0] = numnodes/2; + + int lastLevelNodeID = numnodes/2; + + for (int i = 1; i < numnodes; ++i) + { + int idx = find_node_index(i, indexes, pnodes); + nodes_ids[idx] = i; + nodes_size[idx] = (i < lastLevelNodeID)? 2 : 10; + indexes[i] = idx; + + uint ndLevel = find_depth(i, depth); + pnodes[i] = pow(2, (depth-1-ndLevel) ) - 1; + } + + for(int i = 0; i < numnodes; i++) printf("(%d, %d)", nodes_ids[i], nodes_size[i]); + printf("\n"); + + delete [] pnodes; + delete [] indexes; + + // + for (int i = 0; i < numtrees; ++i) + { + // initialize tree i + uint tree_i[4096]; + uint node_off = 0; + for (int j = 0; j < numnodes; ++j) + { + // initialize node j; + // Node type + node_t = 0; + + uint op_type = (nodes_ids[j] >= lastLevelNodeID)? 3 : (i%2 == 0)? 0 : 2; + uint left_child = (nodes_ids[j] >= lastLevelNodeID)? 0 : 1; + uint right_child = (nodes_ids[j] >= lastLevelNodeID)? 0 : 1; + uint findex = nodes_ids[j]%25; + uint split_dir = 2; + + uint rc_id = nodes_ids[j]*2 + 2; + + uint rc_off = 0; + for (int f = j+1; f < numnodes; ++f) + { + if (nodes_ids[f] != rc_id) rc_off += nodes_size[f]; + else break; + } + + uint rchild_offset = (nodes_ids[j] >= lastLevelNodeID)? 0 : rc_off; + + node_t = (node_t & 0xFFFFFFFC) | ((op_type << 0) & 0x00000003); + node_t = (node_t & 0xFFFFFFFB) | ((left_child << 2) & 0x00000004); + node_t = (node_t & 0xFFFFFFF7) | ((right_child << 3) & 0x00000008); + node_t = (node_t & 0xFFFFF00F) | ((findex << 4) & 0x00000FF0); + node_t = (node_t & 0xFFFF0FFF) | ((split_dir << 12) & 0x0000F000); + node_t = (node_t & 0x0000FFFF) | ((rchild_offset << 16) & 0xFFFF0000); + + tree_i[node_off] = node_t; + node_off += 1; + + // split value/set + float sval = 1.5; + if (nodes_ids[j] < lastLevelNodeID) + { + if (i%2 == 0) // split value + { + memcpy( &(tree_i[node_off]), &sval, 4); + } + else // small split Set + { + tree_i[node_off] = 0xAAAAAAAA; + } + node_off += 1; + } + else + { + // large split set offset and length + tree_i[node_off] = 0x00000006; + node_off += 1; + // + float val = 2.5; + memcpy( &(tree_i[node_off]), &val, 4); + val = 0.5; + memcpy( &(tree_i[node_off+1]), &val, 4); + + for (int s = 0; s < 6; ++s) // large split set words + { + tree_i[node_off + 2 + s] = 0x55555555; + } + + node_off += 8; + } + + } + // trees + trees[curr_tree_offset] = node_off; + for (uint t = 0; t < node_off; ++t) + { + trees[curr_tree_offset+1+t] = tree_i[t]; + } + + curr_tree_offset += node_off + 1; + + printf("Tree Size = %d\n", curr_tree_offset); + + curr_tree_offset = curr_tree_offset + (((curr_tree_offset%16) != 0)? 16 - (curr_tree_offset%16) : 0); + } + + // + delete [] nodes_ids; + delete [] nodes_size; + + printf("initialization done!\n"); fflush(stdout); +} +/////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void initData(float* data, uint NUM_FEATURES, uint numtuples) +{ + for (int i = 0; i < numtuples; ++i) + { + for (int j = 0; j < NUM_FEATURES; ++j) + { + data[i*NUM_FEATURES + j] = ((float)(i+1))/((float)(j+i+1)); + } + } +} +/////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +void dtinfer_sw(uint* trees, void* data, void* res, uint numtuples, uint numtrees, uint num_features) +{ + for(int d = 0; d < numtuples; ++d) + { + //printf("process tuple: %d\n", d); + float tuple_res = 0.0; + uint curr_tree_offset = 0; + for (int t = 0; t < numtrees; ++t) + { + // + bool tree_end = false; + uint curr_node_offset = 0; + uint next_node_offset; + + curr_tree_offset += 1; + + // + while( !tree_end ) + { + // + uint findex = (trees[curr_tree_offset + curr_node_offset] >> 4) & 0x000000FF; + uint optype = trees[curr_tree_offset + curr_node_offset] & 0x00000003; + uint split_dir = (trees[curr_tree_offset + curr_node_offset] >> 12) & 0x0000000F; + bool has_rchild = ((trees[curr_tree_offset + curr_node_offset] >> 2) & 0x00000001) == 1; + bool has_lchild = ((trees[curr_tree_offset + curr_node_offset] >> 3) & 0x00000001) == 1; + + uint rchild_offset = ((trees[curr_tree_offset + curr_node_offset] >> 16) & 0x0000FFFF); + + uint left_offset = curr_node_offset + 2 + ((has_lchild)? 0 : 1) + ((has_rchild)? 0 : 1); + uint right_offset = left_offset + rchild_offset; + + bool go_right = false; + + if (optype == 0) + { + float* features = reinterpret_cast(data); + float feature_f = features[d*num_features+findex]; + + float split_val; + memcpy( &split_val, &(trees[curr_tree_offset + curr_node_offset+1]), 4); + + go_right = feature_f < split_val; + } + else if(optype == 2) + { + uint* features = reinterpret_cast(data); + uint feature_i = features[d*num_features+findex]; + uint split_set = trees[curr_tree_offset + curr_node_offset+1]; + + go_right = ((feature_i > 31) || (feature_i < 0))? false : (split_set >> feature_i) & 0x00000001; + } + else if(optype == 3) + { + uint* features = reinterpret_cast(data); + uint feature_i = features[d*num_features+findex]; + uint splitset_off = ((!has_lchild)? 1 : 0) + ((!has_rchild)? 1 : 0); + uint splitset_start = ((trees[curr_tree_offset + curr_node_offset+1]) & 0x0000FFFF); + uint splitset_leng = ((trees[curr_tree_offset + curr_node_offset+1] >> 16) & 0x0000FFFF); + + uint split_set = trees[curr_tree_offset + curr_node_offset+2+splitset_off]; + + go_right = ((feature_i >= (splitset_start+splitset_leng)) || (feature_i < splitset_start))? false : ((split_set - splitset_start) >> feature_i) & 0x00000001; + } + /////////////////////////////////// + next_node_offset = (go_right)? right_offset : left_offset; + + uint res_offset = (go_right)? ((has_lchild)? 1 : 0) : 0; + + if (go_right) + { + //printf("Go Right for tree off: %d, node_off : %d, rchild:%d, res_offset: %d\n", curr_tree_offset, curr_node_offset, has_rchild, res_offset); + if (!has_rchild) + { + tree_end = true; + float tree_res; + memcpy( &tree_res, &(trees[curr_tree_offset + curr_node_offset+2+res_offset]), 4); + //printf("right child tree_res = %.10f\n", tree_res); + tuple_res += tree_res; + } + } + else + { + //printf("Go Left for tree off: %d, node_off : %d, lchild:%d, res_offset: %d\n", curr_tree_offset, curr_node_offset,has_lchild, res_offset); + if (!has_lchild) + { + tree_end = true; + float tree_res; + memcpy( &tree_res, &(trees[curr_tree_offset + curr_node_offset+2+res_offset]), 4); + //printf("left child tree_res = %.10f\n", tree_res); + tuple_res += tree_res; + } + } + // + curr_node_offset = next_node_offset; + } + ////////////////////////////////////// + curr_tree_offset += trees[curr_tree_offset-1]; + + curr_tree_offset = curr_tree_offset + (((curr_tree_offset%16) != 0)? 16 - (curr_tree_offset%16) : 0); + } + (reinterpret_cast(res))[d] = tuple_res; + } +} +/////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +int main(int argc, char *argv[]) +{ + + ////////////////////////////////////////////////////////////////////////////////////////// + // Parameters + int NUM_TUPLES = 1024; //atoi(argv[1]);// atoi(argv[1]); + int NUM_TREES = 109; //atoi(argv[2]);// atoi(argv[1]); + int DEPTH = 5; //atoi(argv[3]);// atoi(argv[1]); + int NUM_FEATURES = 25;// atoi(argv[3]); + + int result_size = 4*NUM_TUPLES; + int data_size = NUM_FEATURES*NUM_TUPLES*4; + + uint outputNumCLs = result_size/64 + ((result_size%64 > 0)? 1 : 0); + + unsigned char puTrees = NUM_TREES/28 + ((NUM_TREES%28 == 0)? 0 : 1); + + int numnodes = pow(2, DEPTH) - 1; + int tree_size = 2*(pow(2,DEPTH-1) - 1) + 10*pow(2,DEPTH-1) + 1; + tree_size = tree_size + ( ((tree_size%16) > 0)? 16 - (tree_size%16) : 0); + + int trees_size = tree_size*NUM_TREES*4; // atoi(argv[1]); + + ////////////////////////////////////////////////////////////////////////////////////////// + // Allocate memory + uint* trees = reinterpret_cast(malloc( trees_size )); + float* data = reinterpret_cast( malloc(data_size )); + void* res = malloc( result_size ); + + ////////////////////////////////////////////////////////////////////////////////////////// + // initialize trees/data + initData(data, NUM_FEATURES, NUM_TUPLES); + + initTrees(trees, NUM_TREES, numnodes, DEPTH); + + ////////////////////////////////////////////////////////////////////////////////////////// + // run software + dtinfer_sw(trees, data, res, NUM_TUPLES, 109, NUM_FEATURES); + + ////////////////////////////////////////////////////////////////////////////////////////// + // Printout results + printf("Obtained Results:\n"); + for (int i = 1; i <= NUM_TUPLES; ++i) + { + printf(" %.5f ", (reinterpret_cast(res))[i-1] ); + if ( (i%16) == 0 ) printf("\n"); + + } + + return 1; +} diff --git a/sw/gbm/verify/infer b/sw/gbm/verify/infer new file mode 100755 index 0000000000000000000000000000000000000000..1f9798fae711a5886034fb218d4e44ad9742cab5 GIT binary patch literal 17568 zcmeHPe{@vUoxg7;fq<9^mR9+p%(}7#EMo#fgIUW2^6=pjO(AJh6o(-*A%pqV%nU?U zG5AAV}w5u+M>M{Wi6D{kBR(ss7yF1IF-3g&JO4YPdvY+q$ zF?lnYMB6=Q|H?gQ-uHfg-tYb1AMf6qck{5md80$qm`V=z6-HQ1k(ZOild;raa#YgF z*(~N|i`gPJ1C&DiyqrT&i4@!-%bP4N zCDMNdWp&vt&7qxZZoQ>Bu%bEC8tq)sS-xh)np-{LHqT04Z?aFacidISOHy%k10$d3 z;YWRd_{V22PL^#r9eSbl%~=by_1k9s?zIafuWV%MrIm0RSZvw#2UXV~bV7OylxrqX zg-v%JdN*kK=r4hu&(5An=w~OPQxx*qIX4M?XcGEj(B1gurV0?;xkTW9k6*rWe=-UE z!Aa=PPeT6%=yTZu)>XO#IgAtj!~^{!90@esd^7clHO%MR)za4L3rGAN5ucCwHq}=9 z0>O^pu2476=zSiJgUvsc^ zSEPygwzc|O@C^8C_K*iHvhX|i__oNRHZ=Rg;b54xx9uf~1|iYd*c=TvvG!;r3|HZ0 z!~MR7ru%)3{!lYf8=Cwbti2=D8fj#xR$DZ}TKvt;Z4HnnE3#rO{!lBsWAmo<8+iW5!KknIt@7NO5xNzj5<|>^@!-JNa7erm^A1r2SHozEL6*jZ z8^7p+&@7q>KS7?7200Co_@(%DzVZB3G}2~*NVLeZ@1bd&Y#s?Ak^HlO#G^DX6uO#b zI=7S;_8l4fqqpqfax@l&M;y(xJI%?;7IChV{b45ZvPdE`w~emmE}}2B(e3klsg0hQ z2OwW=qf;3wd2RGeUj);z(Gj>zs=lFWUDRe_SIDQ-9 zvk0%@_)5ZO6Yk~skH6?&^Nsrq^PCYoKT=b>DVeCjo6AV{rxp@O7R~pv z^j;|bw$Zh?im_vlA#ioZ3YtHR*l4j~{&LU4V`PfLHFx_7VvMCAe1O~Eeu|>h=u!B; z5WmiNaoz$FB^SE_q;f-nWd);ZO~$|gf?6zSp6x7(Oq-dkgO6L)gbCShBja^Z4fHk zPf>B`SZ)+)M$?v_j-@qIAGP#R1g-O`M%R0Z4-D7x#PEE>JZZdf0ZtgRPAc-r4_x&L zgI5IJcpF_y2MkkBec&q5Q|XI;7#kxBu);pPI6iRQI{>`@1J{=lyhfMT>K8i-QPl8^ zSj(t3{Own{J6nxME*|ru)y!AwPmGP3FV@ntw}zhGoAC@7R>6{`C=g|(%M!_WGl&!o zD^5a)&T+yRODs0l4(|PuK@l;N>8s$7Va5}X$uGHTxG+6~grvg1U*-w}Q!2boLQ>%e zLSb-Hg$Gf>Gd6`zq3{m5ZE??%+;bpQYLZWoK**xt2+z%v{TCK%gKvzYQLH$rn$hXLsJ?u7|-VGh&ReKOvsPh)b<^l5`SwKFE7J@n)iihKl`)S4F0 ziIDH#BCss4MW6;PG$H8_Z;l+^GRzpT=9v>snE&`-OS^xMdU0WG`0xpeW%_oIjMys_ z0Tyw|UP0adfM8Hu1w%A&GJQW7$#`eROUhNmX|%bzM=|Xu;}1$+GTxPuqMYhJALC__ zj33V8Wz{SJ7R^tn8TH1sC&LRZULv2&qBD7IhUchGLGKOw;o+-Q= zZ=)iMYhjdi0c+3Y2*5yTe)6zBd_!IBXSuWXU7^DYvgK=-HSxc)hj%?82hC0F~;m5g(!3tPS9 zyLl*{0*LWmtzE)fX*QN=mt1q>0h0L)9WIPfMT10i-W!Z=ZqVwbGK~In4nEnDG1DXdjrxq!pCSt4^%8$*U%^nzsnzm=3BzjZIeA?5@$IlKE!FC!vsx zAA}+`Y3vos6+Zk$tCuoDlU4){Ga)^$LyB<)ZFo?4R}|wh(uO|9ENJn!?)W++UJpY& z02B?=k&O2MQehU)Rq$|xZ?}4zd5Og3YY@!MnZ?Yw5n@{+cBUi7Eoj3picpK9g);X- z5HLZ0Gc!Ss{Z_dcOS!OYnkQs{9H+g-vXkbZ?A{tT5#tT34Sn=`)FSo@&0Au!q#Vtn zU-^(T1~LrF5n~A$Cmun;>AyvpSfEF)cVlIeOfLeX$H0;*V5}1B6|C9+hh!c(P3xO` z*p~7^vl|yJ!*QIrr5gC z*h!5RW|v&cPg>QNLC{_kq*p%XGy*<)}M=l;kQCM(wj!~sv zq~*I|_BH{S-2ptS%s4IKtIY0u0qhH~Q7pjpgTm2+6>$=F#m?dFAzYcO_aya0#7^pc zLaMKV+j7Oe52-Wy0dSroPE0?fb?raET=84Lu<8z3`cv5zjdS;Cx!a#ESVEo4)DKTB zh*`?ipP5>wKRpNi!qksUEruB_o^bUh9@iiJG{GV%_}J8f{pk`@f39o)V=OY0FIk5R zkJD0(SG;Tgqd8^nF7 z27CE4l$nU_e|hoV1r`Y>%>JKUFbBo%XXY`nl;tlba^@!CjMEigfZVtw`?Ilvo2R_yRB{c7u`q@tvPSYCulGwh7VJ z5N}y^XDxkD8?4Yz@A;yopRUl)b}Y8^w{F(YQbV7fDo|bf<1B)`)(gWxcC2R z40~Bqe`+X6uOt1CSm%$wOERI;bOiX2cn{1Kxd|@;@bx}z=ss@hVAgtzWkZp=$JaGpKd|?lKKE0pd|IP^8qY9 zfu)kE4_f+JOzr}C3ys`evkp^;ak77OhS?ANZ~8ARK&(^imJP-e4`q!4Z2+xlRi8fo z{hKy@53AM--37DzM+?l8=0N}GVtUb;XQpPK>@MhqGt5NhZOSa;3tK*iX(Ny8KjGVb z*K&Pe!dAbJw)%Z(?Kk7wqJP99II}I{Tm5&@v(Y%x%Ea>mY2@W!j~k&=DP;s_27@?f zz;A9{-=~=BA%A+(-?W)p%}sp+#}4&6m2~wSPNU}Gh)6@nje0Q4qT0i zG1?DJ>0OI)L*6<43lPs zo_JUXM5*vm9tMqyK8$&PTjInKS}2ip`sc$mcwXkCBRX4bB|iWy25ZCr#ARN^@l_RL zm0z#?TBSKwSyx%RxqHc#xo#Ymb*~_}b(8tgCiCsh%jhVn$msurGkrHq81Y|6+-1K| z=WCnI3!BZ4s$gMk-n&NZlxD0Qj$WVx%zL(1-dnl7^1e#nsh;ukxJ#!{OR^`AIO&|3 z($Xb?FS(Zlma*E7V9>oav@hts9r$b(Y7Ip~{^rm=eJH$~0Ds2r-WkD3dNAPL z5)4P1BjMYa+r7lIy3y_C#xjqQbvQ4=&+P>XP^Fu^+Cpc=y+CP~edNN}*b%@^RId}T z6>%Q~{13npz-Dy362$eMkH*Hz0sj-Q9LjZQlh_KQPTEpVOnR2 zws6kOq8@E#3DFnfH;OWslYmV5e0``~b+m1pm8P{BLtUm2&{UTGaVHI^9+JM$G9v zSHxvVjmmirzlB%_(@G&D=XfYn&RLKj0)Golipq|&x{w4&{~~@zu_XT)HGPKvx`X&s zhdGF06Apxe$g}muCEsA^yyxb)E(Msay;W_7W*qqw}XxU{l( z>H1T4lkjZZ@%jb1Wtc`rIc`Z@f7Dg1eZ%k@h4WEu9NWd z5-Q+S8GJrQNu41SNqu!*qrNS?mP_Hgz!_eiQ}m@0SALgDIr?CZk}Cg-JhnMf=;d@A zCGle%zQYB>cIAzmo7H31<{}xv4Km_!knc zkubNMJ2q^%&Aqg4XS6jEb+7iU_LQzDTN&kO*+XS(Jf*iT6J+EvDyGI^Hhm83lI@aBznXdTw1<-wXYPGu%e$Bw*V*(EwnL8hZ2H{HeT8iL zHLU!4ubss6PWHLXJYi?zYh|Y9hip1VvuB5$#Pd$}h0Og7JJZRoW$OFoY&u4#9RJz$ z`K&!Jef&H3YW8f)x5ox({Wc##`V31sud zfh6+Tr!!}|X`t@Ga1@XK!cU`FHmAKlIFjQ+y;s7>Tw@zw%Vh;NLf$R&c2GA7J;d$XB}M+PC!u>eed752&Lr|r zPeOll68f(up%=>fR?YTu2l?L|kh@mWUzGip|E&V_rJ%dHWcH&9^trfMpyrtZ$=*7N zd<*C$dD?3q==s{qBKwZ{nR^VX^e4G|G21Bpy_$5Sos(0BWk_}_E@libgV@0~4No{MHb8ag83NVKuhLsw?*+_1&BdDGTf z+yolG1mugf_!{UM5M99uwE1>5x9#*d`vQ@+jc^s{eK<-6B}D@-{?=u(s<$=gTrMpKU5r`)dcdb4gb ziTh8vCUP!A<*>J{rGU_%fWtMrL!l6^YL%6u#R9ks)*flvz57!xhTV;OR)J8fFB%Ra zm~@A1l6ztSf5b2DWYI;h94~yhZl;=bTiI$}ersDGh}H@OJ9pRY-bzgu2)Fr~{H+1H zktXhVQ8SSLz;}kj(tPf1vGKBQ<+Te$LamL#4qte0&F-2#TT6Yrsh0ePniQ37;@ehE z#;f3YO(V$!sS@(G*|=4@RmQKGi7RW_4CVn}{PSZj%-_7&>hvf8&wHkVa-f`5)5?G=^2+JC5f z!EW&ENxk0`tqd8vzS^fmq`{k%poF>lRiNW@23p4Or1mp?Ir@c?ulBJLWp^P%dugSg zaCm{yzMBI^-$hWO{WxWnK2~HreGN~%5;FQ~e^f6WRr_@zEo`Vf!Ta!|y|OA_?W4XW z9llWtEB}?8g7J4ff_03n5Q%aROKq_(>eNTA6hF1o;qhy^0Heg zl>I>JtM*gt$Q@F@Gb1T7x&HqsM_=tf)cyQ>bM$k|Kar!a_P^!V3&p1>xVDsA{;S|& zm&f}iiK+86bzeVMJ{Nx*Ixd^O+E0(JAZz3OO<)u8{|19xq_M%1+IM>dnp<8j{ws)4 z-&OkRyh;5(1@(UuRR31`svq=0*IvHb$B*7Jb@|H9dpY`PvKXPA)2bcT6cGSMtvtLPxY7L$}HCJBD*R>5gC%Dq~KiSRoR>&AFo%4 w41J4bPdO;JqsES1?wBn9sGLV`0F# +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "tsc_x86.h" + +constexpr auto kCalibrate = false; +constexpr auto kCyclesRequired = 1e9; +constexpr auto kNumRunsDist = 1000; +constexpr auto kNumRunsDef = 100; + +using namespace std::chrono; + +/** + * Exec times [ns] + */ +class Bench { + std::vector times; + double avg_time = 0.0; + int num_runs = 0; + int num_runs_def = 0; + + void sortBench() { std::sort(times.begin(), times.end()); } + +public: + Bench(int num_runs = kNumRunsDef) { this->num_runs_def = num_runs; } + + // Number of runs for the average + inline int getNumRuns() { return num_runs; } + inline void setNumRuns(uint32_t n_runs) { num_runs = n_runs; } + + // Average run time + inline double getAvg() { return avg_time; } + + // Statistics + inline double getMin() { if(!times.empty()) return times[0]; else return 0; } + inline double getMax() { if(!times.empty()) return times[times.size()-1]; else return 0; } + inline double getP25() { if(!times.empty()) return times[(times.size()/4)-1]; else return 0; } + inline double getP50() { if(!times.empty()) return times[(times.size()/2)-1]; else return 0; } + inline double getP75() { if(!times.empty()) return times[((times.size()*3)/4)-1]; else return 0; } + inline double getP95() { if(!times.empty()) return times[((times.size()*95)/100)-1]; else return 0; } + inline double getP99() { if(!times.empty()) return times[((times.size()*99)/100)-1]; else return 0; } + + // Print results + void printOut() { + std::ios_base::fmtflags f(std::cout.flags()); + + std::cout << "Average time: " << getAvg() << " ns" << std::endl; + std::cout << "Max time: " << getMax() << " ns" << std::endl; + std::cout << "Min time: " << getMin() << " ns" << std::endl; + std::cout << "Median: " << getP50() << " ns" << std::endl; + std::cout << "25th: " << getP25() << " ns" << std::endl; + std::cout << "75th: " << getP75() << " ns" << std::endl; + std::cout << "95th: " << getP95() << " ns" << std::endl; + std::cout << "99th: " << getP99() << " ns" << std::endl; + + std::cout.flags( f ); + } + + /** + * Measure the function execution + */ + template + void runtime(Func const &func, Args... args) { + times.clear(); + + // Warm-up + if (kCalibrate) { + num_runs = 1; + while (num_runs < (1 << 14)) { + const auto start = start_tsc(); + for (int i = 0; i < num_runs; ++i) { + func(args...); + } + const auto cycles = stop_tsc(start); + + if (cycles >= kCyclesRequired) + break; + + num_runs *= 2; + } + } else { + num_runs = num_runs_def; + } + + std::cout <<"N runs: " << num_runs << std::endl; + + // Average time + auto begin_time = std::chrono::high_resolution_clock::now(); + for (int i = 0; i < num_runs; ++i) { + func(args...); + } + auto end_time = std::chrono::high_resolution_clock::now(); + + double time = std::chrono::duration_cast(end_time - begin_time).count(); + avg_time = time / num_runs; + /* + for (int i = 0; i < kNumRunsDist; ++i) { + begin_time = std::chrono::high_resolution_clock::now(); + func(args...); + end_time = std::chrono::high_resolution_clock::now(); + + time = std::chrono::duration_cast(end_time - begin_time).count(); + times.emplace_back(time); + } + */ + //sortBench(); + //printOut(); + } + +}; diff --git a/sw/rdma/include/fDefs.hpp b/sw/rdma/include/fDefs.hpp new file mode 100644 index 00000000..15dee5a8 --- /dev/null +++ b/sw/rdma/include/fDefs.hpp @@ -0,0 +1,49 @@ +#pragma once + +#include +#include +#include + +/* FLAGS */ +// SET ACCORDING TO THE BITSTREAM +#define EN_AVX +#define EN_DDR +#define EN_RDMA + +/* Sleep */ +#define POLL_SLEEP_NS 100 + +/* Large pages */ +#define LARGE_PAGE_SIZE (2 * 1024 * 1024) +#define LARGE_PAGE_SHIFT 21UL +#define PAGE_SIZE 4 * 1024 +#define PAGE_SHIFT 12UL + +/* Clock */ +#define CLK_NS 4 + +/* Command FIFO depth */ +static const uint32_t cmd_fifo_depth = 64; +static const uint32_t cmd_fifo_thr = 10; + +/* Farview Op codes */ +enum class opCode : uint8_t { READ=0, WRITE=1, FV=2 }; + +/* Verbosity */ +#define VERBOSE_DEBUG_1 +//#define VERBOSE_DEBUG_2 +//#define VERBOSE_DEBUG_3 + +/* ltoh: little to host */ +/* htol: little to host */ +#if __BYTE_ORDER == __LITTLE_ENDIAN +# define ltohl(x) (x) +# define ltohs(x) (x) +# define htoll(x) (x) +# define htols(x) (x) +#elif __BYTE_ORDER == __BIG_ENDIAN +# define ltohl(x) __bswap_32(x) +# define ltohs(x) __bswap_16(x) +# define htoll(x) __bswap_32(x) +# define htols(x) __bswap_16(x) +#endif \ No newline at end of file diff --git a/sw/rdma/include/fDev.hpp b/sw/rdma/include/fDev.hpp new file mode 100644 index 00000000..6826cc63 --- /dev/null +++ b/sw/rdma/include/fDev.hpp @@ -0,0 +1,225 @@ +#ifndef __FDEV_HPP__ +#define __FDEV_HPP__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "fDefs.hpp" +#include "fQ.hpp" + +namespace fpga { + +/* Command FIFO depth */ +static const uint32_t cmd_fifo_depth = 64; +static const uint32_t cmd_fifo_thr = 10; + +/** + * Fpga device region + */ +class fDev { + + /* Fpga device */ + int32_t fd = 0; + + /* Used markers */ + uint32_t rd_cmd_cnt = 0; + uint32_t wr_cmd_cnt = 0; +#ifdef EN_RDMA + uint32_t rdma_cmd_cnt = 0; +#endif + + /* Mmapped regions */ +#ifdef EN_AVX + __m256i *cnfg_reg = 0; +#else + uint64_t *cnfg_reg = 0; +#endif + uint64_t *ctrl_reg = 0; + + /* Mapped large pages hash table */ + std::unordered_map mapped_large; + + /* Utility */ + bool mmapFpga(); + bool munmapFpga(); + + /* Send to controller */ + void postCmd(uint64_t offs_3, uint64_t offs_2, uint64_t offs_1, uint64_t offs_0); + + /* Check busy */ + bool checkBusyRead(); + bool checkBusyWrite(); + + /* Check ready */ + bool checkReadyRead(); + bool checkReadyWrite(); + + /* Memory */ + uint64_t* _getHostMem(uint32_t n_pages); + void _freeHostMem(uint64_t* vaddr, uint32_t n_pages); + void _userMap(uint64_t *vaddr, uint32_t len); + void _userUnmap( uint64_t *vaddr, uint32_t len); + + /* Data movement */ + void _read(uint64_t *vaddr, uint32_t len, bool stream = true, bool clr_stat = true, bool poll = true); + void _write(uint64_t *vaddr, uint32_t len, bool stream = true, bool clr_stat = true, bool poll = true); + void _transfer(uint64_t *vaddr_src, uint64_t* vaddr_dst, uint32_t len_src, uint32_t len_dst, bool stream = true, bool clr_stat = true, bool poll = true); + + void _offload(uint64_t *vaddr, uint32_t len, bool poll = true); + void _sync(uint64_t *vaddr, uint32_t len, bool poll = true); + + +public: + + fDev() {} + ~fDev() {} + + /** + * Obtain and release FPGA regions + */ + + // Acquire an FPGA region with target ID + bool acquireRegion(uint32_t rNum); + // Release an acquired FPGA region + void releaseRegion(); + + /** + * Control bus + */ + + // Control status bus, AXI Lite + inline void setCSR(uint64_t val, uint32_t offs) { ctrl_reg[offs] = val; } + inline uint64_t getCSR(uint32_t offs) { return ctrl_reg[offs]; } + + /** + * Explicit buffer management + * @param n_pages - number of 2MB pages to be allocated + */ + + // Obtain host memory - pages 2M + template + _Vaddr* getHostMem(uint32_t n_pages) { + return (_Vaddr*) _getHostMem(n_pages); + } + + // Free host memory + template + void freeHostMem(_Vaddr* vaddr, uint32_t n_pages) { + _freeHostMem((uint64_t*)vaddr, n_pages); + } + + // FPGA user space range mapping + template + void userMap(uint64_t *vaddr, uint32_t len) { + _userMap((uint64_t*)vaddr, len); + } + + // FPGA user space range unmapping (auto on release) + template + void userUnmap(_Vaddr *vaddr, uint32_t len) { + _userUnmap((uint64_t*)vaddr, len); + } + + // Obtain PR memory - pages 2M + uint64_t* getPrMem(uint64_t n_pages); + // Free PR memory + void freePrMem(uint64_t* vaddr, uint64_t n_pages); + + /** + * Bulk transfers + * @param vaddr - data pointer + * @param len - transfer length + * @param poll - blocking vs non-blocking + */ + + template + void read(_Vaddr *vaddr, uint32_t len, bool stream = true, bool clr_stat = true, bool poll = false) { + _read((uint64_t*)vaddr, len, stream, clr_stat, poll); + } + + template + void write(_Vaddr *vaddr, uint32_t len, bool stream = true, bool clr_stat = true, bool poll = false) { + _write((uint64_t*)vaddr, len, stream, clr_stat, poll); + } + + template + void transfer(_Vaddr *vaddr_src, _Vaddr *vaddr_dst, uint32_t len_src, uint32_t len_dst, bool stream = true, bool clr_stat = true, bool poll = false) { + _transfer((uint64_t*)vaddr_src, (uint64_t*)vaddr_dst, len_src, len_dst, stream, clr_stat, poll); + } + +#ifdef EN_DDR + // Sync operations + template + void sync(uint64_t *vaddr, uint32_t len, bool poll = true) { + _sync((uint64_t*)vaddr, len, poll); + } + + template + void offload(uint64_t *vaddr, uint32_t len, bool poll = true) { + _offload((uint64_t*)vaddr, len, poll); + } +#endif + + /** + * Check for completion + */ + + // Returns the number of completed reads + uint32_t checkCompletedRead(); + // Returns the number of completed writes + uint32_t checkCompletedWrite(); + // Clear all status + void clearCompleted(); + + // Timers + void setTimerStopAt(uint64_t tmr_stop_at); + uint64_t getReadTimer(); + uint64_t getWriteTimer(); + + // Debug + void printDebugXDMA(); + + /** + * PR + */ + void reconfigure(uint64_t* vaddr, uint64_t len); + + /** + * Roce operations + */ + +#ifdef EN_RDMA + // ARP lookup + bool doArpLookup(); + // Write initial context + void writeContext(fQPair *pair); + // Write connection + void writeConnection(fQPair *pair, uint32_t port); + + // RDMA ops + bool postWrite(fQPair *pair, uint64_t l_offs, uint64_t r_offs, uint32_t size); + bool postRead(fQPair *pair, uint64_t l_offs, uint64_t r_offs, uint32_t size); + bool postFarview(fQPair *pair, uint64_t l_offs, uint64_t r_offs, uint32_t size, uint64_t params); + bool postFarviewBase(fQPair *pair, uint64_t params_0, uint64_t params_1, uint64_t params_2); +#endif +}; + +} /* namespace fpga */ + +#endif diff --git a/sw/rdma/include/fQ.hpp b/sw/rdma/include/fQ.hpp new file mode 100644 index 00000000..1349b5b6 --- /dev/null +++ b/sw/rdma/include/fQ.hpp @@ -0,0 +1,46 @@ +#pragma once + +#include +#include +#include + +namespace fpga { + +#define MSG_LEN 82 + +class fQ { +public: + // Queue + uint32_t qpn; + uint32_t psn; + uint32_t rkey; + + // Buffer + uint64_t vaddr; + uint32_t size; + + // Node + uint32_t region; + + // Global ID + char gid[33]; + + // + fQ() { memset(gid, 0, 33); } + + std::string encode(); + void decode (char *buf, size_t len); + + uint32_t gidToUint(int idx); + void uintToGid(int idx, uint32_t ip_addr); + + void print(const char *name); + static uint32_t getLength() { return MSG_LEN; } +}; + +struct fQPair { + fQ local; + fQ remote; +}; + +} /* namespace fpga */ \ No newline at end of file diff --git a/sw/rdma/include/fView.hpp b/sw/rdma/include/fView.hpp new file mode 100644 index 00000000..013d5cbb --- /dev/null +++ b/sw/rdma/include/fView.hpp @@ -0,0 +1,97 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include "fQ.hpp" +#include "fDev.hpp" + +using namespace fpga; + +namespace comm { + +/** + * Roce communicator + */ +class fView { + + /* FPGA device */ + fDev *fdev; + int32_t n_regions; + + /* Nodes */ + int32_t node_id; + int32_t n_nodes; + + /* Connections */ + const char *mstr_ip_addr; + int *connections; + uint16_t port; + uint16_t ib_port; + + /* Static */ + static const uint32_t base_ip_addr = 0x0B01D4D1; + + /* Queue pairs */ + std::vector> pairs; + + void initializeLocalQueues(); + + int masterExchangeQueues(); + int clientExchangeQueues(); + + int exchangeWindow(int32_t node_id, int32_t qpair_id); + int masterExchangeWindow(int32_t node_id, int32_t qpair_id); + int clientExchangeWindow(int32_t node_id, int32_t qpair_id); + +public: + + fView(fDev *fdev, uint32_t node_id, uint32_t n_nodes, uint32_t *n_qpairs, uint32_t n_regions, const char *mstr_ip_addr); + ~fView(); + + void closeConnections(); + + /** + * Window management + */ + + uint64_t* allocWindow(uint32_t node_id, uint32_t qpair_id, uint64_t n_pages); + void freeWindow(uint32_t node_id, uint32_t qpair_id); + + /** + * RDMA operations base + */ + + void writeRemote(uint32_t node_id, uint32_t qpair_id, uint64_t src_offs, uint64_t dst_offs, uint32_t size); + void readRemote(uint32_t node_id, uint32_t qpair_id, uint64_t src_offs, uint64_t dst_offs, uint32_t size); + void farviewRemote(uint32_t node_id, uint32_t qpair_id, uint64_t src_offs, uint64_t dst_offs, uint32_t size, uint64_t params); + void farviewRemoteBase(uint32_t node_id, uint32_t qpair_id, uint64_t params_0, uint64_t params_1, uint64_t params_2); + + /** + * RDMA install operator + */ + //void installOperator(); + + /** + * Added + */ + void farviewStride(uint32_t node_id, uint32_t qpair_id, uint64_t src_offs, uint64_t dst_offs, uint32_t dwidth, uint32_t stride, uint32_t num_elem); + void farviewRegexConfigLoad(uint32_t node_id, uint32_t qpair_id, unsigned char* config_bytes); + void farviewRegexRead(uint32_t node_id, uint32_t qpair_id, uint64_t src_offs, uint64_t dst_offs, uint32_t size); + + // Poll + uint32_t pollRemoteWrite(uint32_t node_id, uint32_t qpair_id); + uint32_t pollLocalRead(uint32_t node_id, uint32_t qpair_id); + + // Sync + int32_t waitOnCloseRemote(uint32_t node_id); + int32_t waitOnReplyRemote(uint32_t node_id); + int32_t replyRemote(uint32_t node_id, uint32_t ack); + int32_t syncRemote(uint32_t node_id); + +}; + +} diff --git a/sw/rdma/include/tsc_x86.h b/sw/rdma/include/tsc_x86.h new file mode 100644 index 00000000..a0e30822 --- /dev/null +++ b/sw/rdma/include/tsc_x86.h @@ -0,0 +1,89 @@ +#pragma once + +// ================= GNU C and possibly other UNIX compilers ================= +#ifndef _WIN32 + +#if defined(__GNUC__) || defined(__linux__) +#define VOLATILE __volatile__ +#define ASM __asm__ +#else +/* if we're neither compiling with gcc or under linux, we can hope + * the following lines work, they probably won't */ +#define ASM asm +#define VOLATILE +#endif + +#define myInt64 unsigned long long +#define INT32 unsigned int + +// ================================== WIN32 ================================== +#else + +#define myInt64 signed __int64 +#define INT32 unsigned __int32 + +#endif + +/* This is the RDTSC timer. + * RDTSC is an instruction on several Intel and compatible CPUs that Reads the + * Time Stamp Counter. The Intel manuals contain more information. + */ + +#define COUNTER_LO(a) ((a).int32.lo) +#define COUNTER_HI(a) ((a).int32.hi) +#define COUNTER_VAL(a) ((a).int64) + +#define COUNTER(a) ((unsigned long long)COUNTER_VAL(a)) + +#define COUNTER_DIFF(a, b) (COUNTER(a) - COUNTER(b)) + +// ================= GNU C and possibly other UNIX compilers ================= +#ifndef _WIN32 + +typedef union { + myInt64 int64; + struct { + INT32 lo, hi; + } int32; +} tsc_counter; + +#define RDTSC(cpu_c) \ + ASM VOLATILE("rdtsc" : "=a"((cpu_c).int32.lo), "=d"((cpu_c).int32.hi)) +#define CPUID() ASM VOLATILE("cpuid" : : "a"(0) : "bx", "cx", "dx") + +// ================================== WIN32 ================================== +#else + +typedef union { + myInt64 int64; + struct { + INT32 lo, hi; + } int32; +} tsc_counter; + +#define RDTSC(cpu_c) \ + { __asm rdtsc __asm mov(cpu_c).int32.lo, eax __asm mov(cpu_c).int32.hi, edx } + +#define CPUID() \ + { __asm mov eax, 0 __asm cpuid } + +#endif + +// static void init_tsc() { +// ; // no need to initialize anything for x86 +// } + +static myInt64 start_tsc(void) { + tsc_counter start; + CPUID(); + RDTSC(start); + return COUNTER_VAL(start); +} + +static myInt64 stop_tsc(myInt64 start) { + tsc_counter end; + RDTSC(end); + CPUID(); + return COUNTER_VAL(end) - start; +} + diff --git a/sw/rdma/src/fDev.cpp b/sw/rdma/src/fDev.cpp new file mode 100644 index 00000000..8968f24a --- /dev/null +++ b/sw/rdma/src/fDev.cpp @@ -0,0 +1,932 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "fDev.hpp" + +/* Sleep */ +#define POLL_SLEEP_NS 100 + +/* Clock */ +#define CLK_NS 4 + +/* IOCTL */ +#define IOCTL_ALLOC_HOST_USER_MEM _IOR('D', 1, unsigned long) +#define IOCTL_FREE_HOST_USER_MEM _IOR('D', 2, unsigned long) +#define IOCTL_ALLOC_HOST_PR_MEM _IOR('D', 3, unsigned long) +#define IOCTL_FREE_HOST_PR_MEM _IOR('D', 4, unsigned long) +#define IOCTL_MAP_USER _IOR('D', 5, unsigned long) +#define IOCTL_UNMAP_USER _IOR('D', 6, unsigned long) +#define IOCTL_RECONFIG_LOAD _IOR('D', 7, unsigned long) +#define IOCTL_ARP_LOOKUP _IOR('D', 8, unsigned long) +#define IOCTL_WRITE_CTX _IOR('D', 9, unsigned long) +#define IOCTL_WRITE_CONN _IOR('D', 10, unsigned long) +#define IOCTL_RDMA_STAT _IOR('D', 11, unsigned long) +#define IOCTL_READ_ENG_STATUS _IOR('D', 12, unsigned long) + +/* MMAP */ +#define MMAP_CTRL (0x0 << PAGE_SHIFT) +#define MMAP_CNFG (0x1 << PAGE_SHIFT) +#define MMAP_CNFG_AVX (0x2 << PAGE_SHIFT) +#define MMAP_BUFF (0x200 << PAGE_SHIFT) +#define MMAP_PR (0x400 << PAGE_SHIFT) + +/* Regions */ +#define CTRL_REGION_SIZE 64 * 1024 +#define CNFG_REGION_SIZE 64 * 1024 +#define CNFG_AVX_REGION_SIZE 256 * 1024 + +#define N_RDMA_STAT_REGS 24 + +/* Config regs */ +#ifdef EN_AVX + // Base + #define CNFG_CTRL_REG 0 + #define CNFG_PF_REG 1 + #define CNFG_DATAPATH_REG_SET 2 + #define CNFG_DATAPATH_REG_CLR 3 + #define CNFG_TMR_STOP_REG 4 + #define CNFG_TMR_REG 5 + #define CNFG_STAT_REG 6 + // RDMA + #define CNFG_RDMA_POST_REG 10 + #define CNFG_RDMA_STAT_CMD_USED_REG 11 + #define CNFG_RDMA_QPN_REG 12 + + #define CTRL_START_RD 0x1 + #define CTRL_START_WR 0x2 + #define CTRL_SYNC_RD 0x4 + #define CTRL_SYNC_WR 0x8 + #define CTRL_STREAM_RD 0x10 + #define CTRL_STREAM_WR 0x20 + #define CTRL_CLR_STAT_RD 0x40 + #define CTRL_CLR_STAT_WR 0x80 + #define CTRL_CLR_IRQ_PENDING 0x100 +#else + // Base + #define CNFG_CTRL_REG 0 + #define CNFG_VADDR_RD_REG 1 + #define CNFG_LEN_RD_REG 2 + #define CNFG_VADDR_WR_REG 3 + #define CNFG_LEN_WR_REG 4 + #define VADDR_MISS_REG 5 + #define LEN_MISS_REG 6 + #define CNFG_DATAPATH_REG_SET 7 + #define CNFG_DATAPATH_REG_CLR 8 + #define CNFG_TMR_STOP_REG 9 + #define CNFG_TMR_RD_REG 10 + #define CNFG_TMR_WR_REG 11 + #define CNFG_STAT_CMD_USED_RD_REG 12 + #define CNFG_STAT_CMD_USED_WR_REG 13 + #define CNFG_STAT_DMA_RD_REG 14 + #define CNFG_STAT_DMA_WR_REG 15 + #define CNFG_STAT_SENT_RD_REG 16 + #define CNFG_STAT_SENT_WR_REG 17 + #define CNFG_STAT_PFAULTS_REG 18 + // RDMA + #define CNFG_RDMA_POST_REG_0 20 + #define CNFG_RDMA_POST_REG_1 21 + #define CNFG_RDMA_POST_REG_2 22 + #define CNFG_RDMA_POST_REG_3 23 + #define CNFG_RDMA_STAT_CMD_USED_REG 24 + #define CNFG_RDMA_QPN_REG 25 + + #define CTRL_START_RD 0x1 + #define CTRL_START_WR 0x2 + #define CTRL_SYNC_RD 0x4 + #define CTRL_SYNC_WR 0x8 + #define CTRL_CLR_STAT_RD 0x10 + #define CTRL_CLR_STAT_WR 0x20 + #define CTRL_CLR_IRQ_PENDING 0x40 + #define CTRL_SEND_RDMA_REQ 0x80 + #define CTRL_SEND_QP_CTX 0x100 + #define CTRL_SEND_QP_CONN 0x200 +#endif + +using namespace std::chrono; + +namespace fpga { + +// ------------------------------------------------------------------------------- +// -- Obtain regions +// ------------------------------------------------------------------------------- + +/** + * Obtain vFPGA char devices + * @param: rNum - region ID + */ +bool fDev::acquireRegion(uint32_t rNum) { + std::string region = "/dev/fpga" + std::to_string(rNum); + fd = open(region.c_str(), O_RDWR | O_SYNC); + if(fd == -1) { + std::cout << "ERR: Cannot acquire an FPGA region" << std::endl; + return false; + } + + if(!mmapFpga()) { + std::cout << "ERR: Cannot mmap an FPGA region" << std::endl; + return false; + } + + return true; +} + +/** + * Release the vFPGA handle + */ +void fDev::releaseRegion() { + close(fd); +} + +/** + * Memory map control + */ +bool fDev::mmapFpga() { +#ifdef EN_AVX + cnfg_reg = (__m256i*) mmap(NULL, CNFG_AVX_REGION_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, MMAP_CNFG_AVX); + if(cnfg_reg == MAP_FAILED) { + releaseRegion(); + return false; + } +#else + cnfg_reg = (uint64_t*) mmap(NULL, CNFG_REGION_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, MMAP_CNFG); + if(cnfg_reg == MAP_FAILED) { + releaseRegion(); + return false; + } +#endif + + ctrl_reg = (uint64_t*) mmap(NULL, CTRL_REGION_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, MMAP_CTRL); + if(ctrl_reg == MAP_FAILED) { + releaseRegion(); + return false; + } + + return true; +} + +/** + * Unmap + */ +bool fDev::munmapFpga() { +#ifdef EN_AVX + if(munmap(cnfg_reg, CNFG_AVX_REGION_SIZE) != 0) { + releaseRegion(); + return false; + } +#else + if(munmap(cnfg_reg, CNFG_REGION_SIZE) != 0) { + releaseRegion(); + return false; + } +#endif + + if(munmap(ctrl_reg, CTRL_REGION_SIZE) != 0) { + releaseRegion(); + return false; + } + + cnfg_reg = 0; + ctrl_reg = 0; + + return true; +} + +// ------------------------------------------------------------------------------- +// -- Memory management +// ------------------------------------------------------------------------------- + +/** + * Obtain huge pages on the host memory + * @param: n_pages - number of requested large pages + */ +uint64_t* fDev::_getHostMem(uint32_t n_pages) { + uint64_t *hMem, *hMemAligned; + uint64_t n_pg = n_pages; + + ioctl(fd, IOCTL_ALLOC_HOST_USER_MEM, &n_pg); + hMem = (uint64_t*)mmap(NULL, (n_pg + 1) * LARGE_PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, MMAP_BUFF); + // alignment + hMemAligned = (uint64_t*)( ((((uint64_t)hMem + LARGE_PAGE_SIZE - 1) >> LARGE_PAGE_SHIFT)) << LARGE_PAGE_SHIFT); + mapped_large[hMemAligned] = hMem; + return hMemAligned; +} + +/** + * Release huge pages on the host memory + * @param: vaddr - memory pointer + * @param: n_pages - number of obtained pages + */ +void fDev::_freeHostMem(uint64_t *vaddr, uint32_t n_pages) { + uint64_t* hMem; + uint64_t n_pg = n_pages; + + hMem = mapped_large[vaddr]; + munmap(hMem, (n_pg + 1) * LARGE_PAGE_SIZE); + ioctl(fd, IOCTL_FREE_HOST_USER_MEM, &vaddr); +} + +/** + * Obtain huge pages allocated for the PR bitstreams + * @param: n_pages - number of requested large pages + */ +uint64_t* fDev::getPrMem(uint64_t n_pages) { + uint64_t *hMem, *hMemAligned; + uint64_t n_pg = n_pages; + + ioctl(fd, IOCTL_ALLOC_HOST_PR_MEM, &n_pg); + hMem = (uint64_t*)mmap(NULL, (n_pg + 1) * LARGE_PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, MMAP_PR); + // alignment + hMemAligned = (uint64_t*)( ((((uint64_t)hMem + LARGE_PAGE_SIZE - 1) >> LARGE_PAGE_SHIFT)) << LARGE_PAGE_SHIFT); + mapped_large[hMemAligned] = hMem; + return hMemAligned; +} + +/** + * Release huge pages on the host memory + * @param: vaddr - memory pointer + * @param: n_pages - number of obtained pages + */ +void fDev::freePrMem(uint64_t *vaddr, uint64_t n_pages) { + uint64_t* hMem; + uint64_t n_pg = n_pages; + + hMem = mapped_large[vaddr]; + munmap(hMem, (n_pg + 1) * LARGE_PAGE_SIZE); + ioctl(fd, IOCTL_FREE_HOST_PR_MEM, &vaddr); +} + +/** + * Explicit TLB mapping + * @param: mem - memory pointer + * @param: len - length of the mapping + */ +void fDev::_userMap(uint64_t *mem, uint32_t len) { + uint64_t vdata [2]; + vdata[0] = (uint64_t)mem; + vdata[1] = len; + ioctl(fd, IOCTL_MAP_USER, &vdata); +} + +/** + * TLB unmap + * @param: mem - memory pointer + * @param: len - length of the mapping + */ +void fDev::_userUnmap(uint64_t *mem, uint32_t len) { + uint64_t vdata [2]; + vdata[0] = (uint64_t)mem; + vdata[1] = len; + ioctl(fd, IOCTL_UNMAP_USER, &vdata); +} + +// ------------------------------------------------------------------------------- +// -- PR +// ------------------------------------------------------------------------------- + +/** + * Reconfiguration ioctl call + * @param: vaddr - memory pointer of the PR stream + * @param: len - length of the stream + */ +void fDev::reconfigure(uint64_t *vaddr, uint64_t len) { + uint64_t vdata [2]; + vdata[0] = (uint64_t)vaddr; + vdata[1] = len; + ioctl(fd, IOCTL_RECONFIG_LOAD, &vdata); +} + +#ifdef EN_AVX + // ------------------------------------------------------------------------------- + // -- Bulk transfers + // ------------------------------------------------------------------------------- + + /** + * Read operation (read to FPGA user logic) + * @param: vaddr - memory pointer + * @param: len - length + * @param: stream - stream from host memory + * @param: poll - blocking/non-blocking + * @param: clr_stat - prior status clear + */ + void fDev::_read(uint64_t* vaddr, uint32_t len, bool stream, bool clr_stat, bool poll) { + // Check outstanding + while (rd_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) { + rd_cmd_cnt = _mm256_extract_epi64(cnfg_reg[CNFG_STAT_REG], 0) & 0xffffffff; + if (rd_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) + nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + + uint64_t len_cmd = len; + uint64_t ctrl_cmd = CTRL_START_RD | (clr_stat ? CTRL_CLR_STAT_RD : 0x0) | (stream ? CTRL_STREAM_RD : 0x0); + cnfg_reg[CNFG_CTRL_REG] = _mm256_set_epi64x(len_cmd, 0, (uint64_t)vaddr, ctrl_cmd); + + rd_cmd_cnt++; + + if(poll) { + while(checkBusyRead()) nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + } + + /** + * Write operation (write from FPGA user logic) + * @param: vaddr - memory pointer + * @param: len - length + * @param: stream - stream from host memory + * @param: poll - blocking/non-blocking + * @param: clr_stat - prior status clear + */ + void fDev::_write(uint64_t* vaddr, uint32_t len, bool stream, bool clr_stat, bool poll) { + // Check outstanding + while (wr_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) { + wr_cmd_cnt = _mm256_extract_epi64(cnfg_reg[CNFG_STAT_REG], 0) >> 32; + if (wr_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) + nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + + uint64_t len_cmd = (uint64_t)len << 32; + uint64_t ctrl_cmd = CTRL_START_WR | (clr_stat ? CTRL_CLR_STAT_WR : 0x0) | (stream ? CTRL_STREAM_WR : 0x0); + cnfg_reg[CNFG_CTRL_REG] = _mm256_set_epi64x(len_cmd, (uint64_t)vaddr, 0, ctrl_cmd); + + wr_cmd_cnt++; + + if(poll) { + while(checkBusyWrite()) nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + } + + /** + * transfer (read + write) + * @param: vaddr_src, _dst - memory pointer + * @param: len_src, _dst - length + * @param: stream - stream from host memory + * @param: poll - blocking/non-blocking + * @param: clr_stat - prior status clear + */ + void fDev::_transfer(uint64_t* vaddr_src, uint64_t* vaddr_dst, uint32_t len_src, uint32_t len_dst, bool stream, bool clr_stat, bool poll) { + // Check outstanding read + while (rd_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) { + rd_cmd_cnt = _mm256_extract_epi64(cnfg_reg[CNFG_STAT_REG], 0) & 0xffffffff; + if (rd_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) + nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + // Check outstanding write + while (wr_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) { + wr_cmd_cnt = _mm256_extract_epi64(cnfg_reg[CNFG_STAT_REG], 0) >> 32; + if (wr_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) + nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + + uint64_t len_cmd = ((uint64_t)len_dst << 32) | len_src; + uint64_t ctrl_cmd = CTRL_START_WR | (clr_stat ? CTRL_CLR_STAT_WR : 0x0) | (stream ? CTRL_STREAM_WR : 0x0) | + CTRL_START_RD | (clr_stat ? CTRL_CLR_STAT_RD : 0x0) | (stream ? CTRL_STREAM_RD : 0x0); + + cnfg_reg[CNFG_CTRL_REG] = _mm256_set_epi64x(len_cmd, (uint64_t)vaddr_dst, (uint64_t)vaddr_src, ctrl_cmd); + + rd_cmd_cnt++; + wr_cmd_cnt++; + + if(poll) { + while(checkBusyWrite()) nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + } + +#ifdef EN_DDR + + /** + * Offload to FPGA DDR (only with local FPGA memory) + * @param: vaddr - memory pointer + * @param: len - length + * @param: poll - blocking/non-blocking + */ + void fDev::_offload(uint64_t* vaddr, uint32_t len, bool poll) { + // Check outstanding + while (rd_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) { + rd_cmd_cnt = _mm256_extract_epi64(cnfg_reg[CNFG_STAT_REG], 0) & 0xffffffff; + if (rd_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) + nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + + uint64_t len_cmd = len; + uint64_t ctrl_cmd = CTRL_START_RD | CTRL_CLR_STAT_RD | CTRL_SYNC_RD; + cnfg_reg[CNFG_CTRL_REG] = _mm256_set_epi64x(len_cmd, 0, (uint64_t)vaddr, ctrl_cmd); + + rd_cmd_cnt++; + + if(poll) { + while(checkBusyRead()) nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + } + + /** + * Sync with FPGA DDR (only with local FPGA memory) + * @param: vaddr - memory pointer + * @param: len - length + * @param: poll - blocking/non-blocking + */ + void fDev::_sync(uint64_t* vaddr, uint32_t len, bool poll) { + // Check outstanding + while (wr_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) { + wr_cmd_cnt = _mm256_extract_epi64(cnfg_reg[CNFG_STAT_REG], 0) >> 32; + if (wr_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) + nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + + wr_cmd_cnt++; + + uint64_t len_cmd = (uint64_t)len << 32; + uint64_t ctrl_cmd = CTRL_START_WR | CTRL_CLR_STAT_WR | CTRL_SYNC_WR; + cnfg_reg[CNFG_CTRL_REG] = _mm256_set_epi64x(len_cmd, (uint64_t)vaddr, 0, ctrl_cmd); + + if(poll) { + while(checkBusyWrite()) nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + } + +#endif + + // ------------------------------------------------------------------------------- + // -- Polling + // ------------------------------------------------------------------------------- + + /** + * Check whether busy read + */ + bool fDev::checkBusyRead() { + return !(_mm256_extract_epi64(cnfg_reg[CNFG_STAT_REG], 1) & 0xffffffff); + } + + /** + * Check whether busy write + */ + bool fDev::checkBusyWrite() { + return !(_mm256_extract_epi64(cnfg_reg[CNFG_STAT_REG], 1) >> 32); + } + + /** + * Return read completed + */ + uint32_t fDev::checkCompletedRead() { + return _mm256_extract_epi64(cnfg_reg[CNFG_STAT_REG], 1) & 0xffffffff; + } + + /** + * Return write completed + */ + uint32_t fDev::checkCompletedWrite() { + return _mm256_extract_epi64(cnfg_reg[CNFG_STAT_REG], 1) >> 32; + } + + /** + * Clear status + */ + void fDev::clearCompleted() { + cnfg_reg[CNFG_CTRL_REG] = _mm256_set_epi64x(0, 0, 0, CTRL_CLR_STAT_RD | CTRL_CLR_STAT_WR); + } + + // ------------------------------------------------------------------------------- + // -- Timers + // ------------------------------------------------------------------------------- + + /** + * Set timer stop at x number of completed transfers + * @param: tmr_stop_at - stop once completed reached + */ + void fDev::setTimerStopAt(uint64_t tmr_stop_at) { + cnfg_reg[CNFG_TMR_STOP_REG] = _mm256_set_epi64x(0, 0, 0, tmr_stop_at); + } + + /** + * Read timer + */ + uint64_t fDev::getReadTimer() { + return _mm256_extract_epi64(cnfg_reg[CNFG_TMR_REG], 0); + } + + /** + * Write timer + */ + uint64_t fDev::getWriteTimer() { + return _mm256_extract_epi64(cnfg_reg[CNFG_TMR_REG], 1); + } + + // ------------------------------------------------------------------------------- + // -- Debug XDMA + // ------------------------------------------------------------------------------- + + /** + * XDMA debug + */ + void fDev::printDebugXDMA() // TODO + { + std::cout << "-- XDMA STATISTICS ----------------------------" << std::endl; + std::cout << std::setw(35) << "Read command FIFO used: \t" << _mm256_extract_epi32(cnfg_reg[CNFG_STAT_REG], 0x0) << std::endl; + std::cout << std::setw(35) << "Write command FIFO used: \t" << _mm256_extract_epi32(cnfg_reg[CNFG_STAT_REG], 0x1) << std::endl; + std::cout << std::setw(35) << "Reads completed: \t" << _mm256_extract_epi32(cnfg_reg[CNFG_STAT_REG], 0x2) << std::endl; + std::cout << std::setw(35) << "Writes completed: \t" << _mm256_extract_epi32(cnfg_reg[CNFG_STAT_REG], 0x3) << std::endl; + std::cout << std::setw(35) << "Read requests sent: \t" << _mm256_extract_epi32(cnfg_reg[CNFG_STAT_REG], 0x4) << std::endl; + std::cout << std::setw(35) << "Write requests sent: \t" << _mm256_extract_epi32(cnfg_reg[CNFG_STAT_REG], 0x5) << std::endl; + std::cout << std::setw(35) << "Page faults: \t" << _mm256_extract_epi32(cnfg_reg[CNFG_STAT_REG], 0x6) << std::endl; + std::cout << "-----------------------------------------------" << std::endl; + } + +#else + + /** + * Read operation (read to FPGA user logic) + * @param: vaddr - memory pointer + * @param: len - length + * @param: poll - blocking/non-blocking + * @param: clr_stat - prior status clear + */ + void fDev::read(uint64_t* vaddr, uint32_t len, bool clr_stat, bool poll) { + // Check outstanding + while (rd_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) { + rd_cmd_cnt = cnfg_reg[CNFG_STAT_CMD_USED_RD_REG]; + if (rd_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) + nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + + cnfg_reg[CNFG_VADDR_RD_REG] = (uint64_t)vaddr; + cnfg_reg[CNFG_LEN_RD_REG] = len; + + cnfg_reg[CNFG_CTRL_REG] = CTRL_START_RD | (clr_stat ? CTRL_CLR_STAT_RD : 0x0); + + rd_cmd_cnt++; + + if(poll) { + while(checkBusyRead()) nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + } + + /** + * Write operation (write from FPGA user logic) + * @param: vaddr - memory pointer + * @param: len - length + * @param: poll - blocking/non-blocking + * @param: clr_stat - prior status clear + */ + void fDev::write(uint64_t* vaddr, uint32_t len, bool clr_stat, bool poll) { + // Check outstanding + while (wr_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) { + wr_cmd_cnt = cnfg_reg[CNFG_STAT_CMD_USED_WR_REG]; + if (wr_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) + nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + + cnfg_reg[CNFG_VADDR_WR_REG] = (uint64_t)vaddr; + cnfg_reg[CNFG_LEN_WR_REG] = len; + + cnfg_reg[CNFG_CTRL_REG] = CTRL_START_WR | (clr_stat ? CTRL_CLR_STAT_WR : 0x0); + + wr_cmd_cnt++; + + if(poll) { + while(checkBusyWrite()) nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + } + + /** + * transfer (read + write) + * @param: vaddr_src, _dst - memory pointer + * @param: len_src, _dst - length + * @param: poll - blocking/non-blocking + * @param: clr_stat - prior status clear + */ + void fDev::transfer(uint64_t* vaddr_src, uint64_t* vaddr_dst, uint32_t len_src, uint32_t len_dst, bool clr_stat, bool poll) { + // Check outstanding + while (rd_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) { + rd_cmd_cnt = cnfg_reg[CNFG_STAT_CMD_USED_RD_REG]; + if (rd_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) + nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + + // Check outstanding + while (wr_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) { + wr_cmd_cnt = cnfg_reg[CNFG_STAT_CMD_USED_WR_REG]; + if (wr_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) + nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + + cnfg_reg[CNFG_VADDR_RD_REG] = (uint64_t)vaddr_src; + cnfg_reg[CNFG_LEN_RD_REG] = len_src; + cnfg_reg[CNFG_VADDR_WR_REG] = (uint64_t)vaddr_dst; + cnfg_reg[CNFG_LEN_WR_REG] = len_dst; + + cnfg_reg[CNFG_CTRL_REG] = CTRL_START_RD | (clr_stat ? CTRL_CLR_STAT_RD : 0x0); + cnfg_reg[CNFG_CTRL_REG] = CTRL_START_WR | (clr_stat ? CTRL_CLR_STAT_WR : 0x0); + + rd_cmd_cnt++; + wr_cmd_cnt++; + + if(poll) { + while(checkBusyWrite()) nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + } + +#ifdef EN_DDR + + /** + * Offload to FPGA DDR (only with local FPGA memory) + * @param: vaddr - memory pointer + * @param: len - length + * @param: poll - blocking/non-blocking + */ + void fDev::offload(uint64_t* vaddr, uint32_t len, bool poll) { + cnfg_reg[CNFG_VADDR_RD_REG] = (uint64_t)vaddr; + cnfg_reg[CNFG_LEN_RD_REG] = len; + + cnfg_reg[CNFG_CTRL_REG] = CTRL_SYNC_RD | CTRL_START_RD | CTRL_CLR_STAT_RD; + + rd_cmd_cnt++; + + if(poll) { + while(checkBusyRead()) nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + } + + /** + * Sync with FPGA DDR (only with local FPGA memory) + * @param: vaddr - memory pointer + * @param: len - length + * @param: poll - blocking/non-blocking + */ + void fDev::sync(uint64_t* vaddr, uint32_t len, bool poll) { + cnfg_reg[CNFG_VADDR_WR_REG] = (uint64_t)vaddr; + cnfg_reg[CNFG_LEN_WR_REG] = len; + + cnfg_reg[CNFG_CTRL_REG] = CTRL_SYNC_WR | CTRL_START_WR | CTRL_CLR_STAT_WR; + + wr_cmd_cnt++; + + if(poll) { + while(checkBusyWrite()) nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + } + +#endif + + /** + * XDMA debug + */ + void fDev::printDebugXDMA() // TODO + { + std::cout << "-- XDMA STATISTICS ----------------------------" << std::endl; + std::cout << "-----------------------------------------------" << std::endl; + } + + // ------------------------------------------------------------------------------- + // -- Polling + // ------------------------------------------------------------------------------- + bool fDev::checkBusyRead() { + return !(cnfg_reg[CNFG_STAT_DMA_RD_REG]); + } + + bool fDev::checkBusyWrite() { + return !(cnfg_reg[CNFG_STAT_DMA_WR_REG]); + } + + uint32_t fDev::checkCompletedRead() { + return (cnfg_reg[CNFG_STAT_DMA_RD_REG]); + } + + uint32_t fDev::checkCompletedWrite() { + return (cnfg_reg[CNFG_STAT_DMA_WR_REG]); + } + + void fDev::clearCompleted() { + cnfg_reg[CNFG_CTRL_REG] = CTRL_CLR_STAT_RD | CTRL_CLR_STAT_WR; + } + + // ------------------------------------------------------------------------------- + // -- Timers + // ------------------------------------------------------------------------------- + + /** + * Set timer stop at x number of completed transfers + * @param: tmr_stop_at - stop once completed reached + */ + void fDev::setTimerStopAt(uint64_t tmr_stop_at) { + cnfg_reg[CNFG_TMR_STOP_REG] = tmr_stop_at; + } + + /** + * Read timer + */ + uint64_t fDev::getReadTimer() { + return cnfg_reg[CNFG_TMR_RD_REG]; + } + + /** + * Write timer + */ + uint64_t fDev::getWriteTimer() { + return cnfg_reg[CNFG_TMR_WR_REG]; + } + +#endif + +// ------------------------------------------------------------------------------- +// -- Network static +// ------------------------------------------------------------------------------- + +#ifdef EN_RDMA + +/** + * ARP lookup + */ +bool fDev::doArpLookup() { + ioctl(fd, IOCTL_ARP_LOOKUP, 0); + return true; +} + +/** + * Write QP context + * @param: pair - target queue pair + */ +void fDev::writeContext(fQPair *pair) { + uint64_t offs[3]; + offs[0] = (((uint64_t)pair->remote.psn & 0xffffff) << 31) | (((uint64_t)pair->local.qpn & 0xffffff) << 7) | (((uint64_t)pair->local.region & 0xf) << 3); + offs[1] = (((uint64_t)pair->remote.rkey & 0xffffff) << 24) | ((uint64_t)pair->local.psn & 0xffffff); + offs[2] = (uint64_t)pair->remote.vaddr; + ioctl(fd, IOCTL_WRITE_CTX, &offs); +} + +/** + * Write QP connection + * @param: pair - target queue pair + */ +void fDev::writeConnection(fQPair *pair, uint32_t port) { + uint64_t offs[3]; + offs[0] = (((uint64_t)port & 0xffff) << 40) | (((uint64_t)pair->remote.qpn & 0xffffff) << 16) | ((pair->local.qpn) & 0xffff); + offs[1] = ((htols((uint64_t)pair->remote.gidToUint(8)) & 0xffffffff) << 32) | (htols((uint64_t)pair->remote.gidToUint(0)) & 0xffffffff); + offs[2] = ((htols((uint64_t)pair->remote.gidToUint(24)) & 0xffffffff) << 32) | (htols((uint64_t)pair->remote.gidToUint(16)) & 0xffffffff); + ioctl(fd, IOCTL_WRITE_CONN, &offs); +} + + // ------------------------------------------------------------------------------- + // -- Network + // ------------------------------------------------------------------------------- + +#ifdef EN_AVX + + /** + * RDMA write + * @param: l_addr - local virtual address + * @param: r_addr - remote virtual address + * @param: size - transfer size + */ + bool fDev::postWrite(fQPair *pair, uint64_t l_offs, uint64_t r_offs, uint32_t size) { + uint64_t l_addr = pair->local.vaddr + l_offs; + uint64_t r_addr = pair->remote.vaddr + r_offs; + + uint64_t offs_0 = (((uint64_t)pair->local.qpn & 0xffffff) << 5) | ((uint64_t)opCode::WRITE & 0x1f); + uint64_t offs_1 = (((uint64_t)r_addr & 0xffff) << 48) | ((uint64_t)l_addr & 0xffffffffffff); + uint64_t offs_2 = ((uint64_t)size << 32) | (((uint64_t)r_addr >> 16) & 0xffffffff); + uint64_t offs_3 = 0; + + postCmd(offs_3, offs_2, offs_1, offs_0); + + return 0; + } + + /** + * RDMA read + * @param: l_addr - local virtual address + * @param: r_addr - remote virtual address + * @param: size - transfer size + */ + bool fDev::postRead(fQPair *pair, uint64_t l_offs, uint64_t r_offs, uint32_t size) { + uint64_t l_addr = pair->local.vaddr + l_offs; + uint64_t r_addr = pair->remote.vaddr + r_offs; + + uint64_t offs_0 = (((uint64_t)pair->local.qpn & 0xffffff) << 5) | ((uint64_t)opCode::READ & 0x1f); + uint64_t offs_1 = (((uint64_t)r_addr & 0xffff) << 48) | ((uint64_t)l_addr & 0xffffffffffff); + uint64_t offs_2 = ((uint64_t)size << 32) | (((uint64_t)r_addr >> 16) & 0xffffffff); + uint64_t offs_3 = 0; + + postCmd(offs_3, offs_2, offs_1, offs_0); + + return 0; + } + + /** + * RDMA RPC + * @param: offs_3, _2, _1 - parameters + */ + bool fDev::postFarview(fQPair *pair, uint64_t l_offs, uint64_t r_offs, uint32_t size, uint64_t params) { + uint64_t l_addr = pair->local.vaddr + l_offs; + uint64_t r_addr = pair->remote.vaddr + r_offs; + + uint64_t offs_0 = (((uint64_t)pair->local.qpn & 0xffffff) << 5) | ((uint64_t)opCode::FV & 0x1f); + uint64_t offs_1 = (((uint64_t)r_addr & 0xffff) << 48) | ((uint64_t)l_addr & 0xffffffffffff); + uint64_t offs_2 = ((uint64_t)size << 32) | (((uint64_t)r_addr >> 16) & 0xffffffff); + uint64_t offs_3 = params; + + postCmd(offs_3, offs_2, offs_1, offs_0); + + return 0; + } + + /** + * RDMA RPC + * @param: offs_3, _2, _1 - parameters + */ + bool fDev::postFarviewBase(fQPair *pair, uint64_t params_0, uint64_t params_1, uint64_t params_2) { + uint64_t offs_0 = (((uint64_t)pair->local.qpn & 0xffffff) << 5) | ((uint64_t)opCode::FV & 0x1f); + uint64_t offs_1 = params_0; + uint64_t offs_2 = params_1; + uint64_t offs_3 = params_2; + + postCmd(offs_3, offs_2, offs_1, offs_0); + + return 0; + } + + /** + * Base post + */ + void fDev::postCmd(uint64_t offs_3, uint64_t offs_2, uint64_t offs_1, uint64_t offs_0) { + // Check outstanding + while (rdma_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) { + rdma_cmd_cnt = _mm256_extract_epi64(cnfg_reg[CNFG_RDMA_STAT_CMD_USED_REG], 0) & 0xffffffff; + if (rdma_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) + nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + + cnfg_reg[CNFG_RDMA_POST_REG] = _mm256_set_epi64x(offs_3, offs_2, offs_1, offs_0); + + rdma_cmd_cnt++; + } + +#else + + // ------------------------------------------------------------------------------- + // -- Network + // ------------------------------------------------------------------------------- + + /** + * RDMA write + * @param: l_addr - local virtual address + * @param: r_addr - remote virtual address + * @param: size - transfer size + */ + bool fDev::postWrite(rQPair *pair, uint64_t *l_addr, uint64_t *r_addr, uint32_t size) { + if(qpn_attached) + postCmd(opCode::WRITE, pair, l_addr, r_addr, size); + else + return 1; + + return 0; + } + + /** + * RDMA read + * @param: l_addr - local virtual address + * @param: r_addr - remote virtual address + * @param: size - transfer size + */ + bool fDev::postRead(rQPair *pair, uint64_t *l_addr, uint64_t *r_addr, uint32_t size) { + if(qpn_attached) + postCmd(opCode::READ, pair, l_addr, r_addr, size); + else + return 1; + + return 0; + } + + /** + * Base post + * TODO: Change to new config + */ + void fDev::postCmd(opCode op, rQPair *pair, uint64_t *l_addr, uint64_t *r_addr, uint32_t size) { + #ifdef VERBOSE_DEBUG + std::cout << "Post, queue pair l: " << pair->local.qpn << ", r: " << pair->remote.qpn << std::endl; + #endif + + // Check outstanding + while (rdma_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) { + rdma_cmd_cnt = cnfg_reg[CNFG_RDMA_STAT_CMD_USED_REG]; + if (rdma_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) + nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + + cnfg_reg[CNFG_RDMA_POST_REG_0] = (((uint64_t)size << 27)) | (((uint64_t)pair->local.qpn & 0xffffff) << 3) | (((uint64_t)op & 0x3)); + cnfg_reg[CNFG_RDMA_POST_REG_1] = (uint64_t)l_addr; + cnfg_reg[CNFG_RDMA_POST_REG_2] = (uint64_t)r_addr; + cnfg_reg[CNFG_RDMA_POST_REG_3] = 0; + + rdma_cmd_cnt++; + } + +#endif +#endif + +} diff --git a/sw/rdma/src/fQ.cpp b/sw/rdma/src/fQ.cpp new file mode 100644 index 00000000..b0feb5f5 --- /dev/null +++ b/sw/rdma/src/fQ.cpp @@ -0,0 +1,72 @@ +#include "fQ.hpp" + +#include +#include +#include +#include +#include +#include + +namespace fpga { + +uint32_t fQ::gidToUint(int idx) { + if(idx > 24) { + std::cerr << "Invalid index for gitToUint" << std::endl; + return 0; + } + char tmp[9]; + memset(tmp, 0, 9); + uint32_t v32 = 0; + memcpy(tmp, gid+idx, 8); + sscanf(tmp, "%x", &v32); + return ntohl(v32); +} + +void fQ::uintToGid(int idx, uint32_t ip_addr) { + std::ostringstream gidStream; + gidStream << std::setfill('0') << std::setw(8) << std::hex << ip_addr; + memcpy(gid+idx, gidStream.str().c_str(), 8); +} + +void fQ::print(const char *name) { + printf("%s: LID 0x%04x, QPN 0x%06x, PSN 0x%06x, GID %s, REG 0x%04x, RKEY %#08x, VADDR %016lx, SIZE %08x\n", + name, 0, qpn, psn, gid, region, rkey, vaddr, size); +} + +std::string fQ::encode() { + std::uint32_t lid = 0; + std::ostringstream msgStream; + msgStream << std::setfill('0') << std::setw(4) << std::hex << lid << " "; + msgStream << std::setfill('0') << std::setw(6) << std::hex << qpn << " "; + msgStream << std::setfill('0') << std::setw(6) << std::hex << (psn & 0xFFFFFF) << " "; + msgStream << std::setfill('0') << std::setw(4) << std::hex << (region & 0xf) << " "; + msgStream << std::setfill('0') << std::setw(8) << std::hex << rkey << " "; + msgStream << std::setfill('0') << std::setw(16) << std::hex << vaddr << " "; + msgStream << gid; + + std::string msg = msgStream.str(); + return msg; +} + +void fQ::decode(char* buf, size_t len) { + if (len < 60) { + std::cerr << "ERR: unexpected length " << len << " in decode ib connection\n"; + return; + } + buf[4] = ' '; + buf[11] = ' '; + buf[18] = ' '; + buf[23] = ' '; + buf[32] = ' '; + buf[49] = ' '; + + std::uint32_t lid = 0; + //std::cout << "buf " << buf << std::endl; + std::string recvMsg(buf, len); + //std::cout << "string " << recvMsg << ", length: " << recvMsg.length() << std::endl; + std::istringstream recvStream(recvMsg); + recvStream >> std::hex >> lid >> qpn >> psn >> region; + recvStream >> std::hex >> rkey >> vaddr >> gid; +} + +} \ No newline at end of file diff --git a/sw/rdma/src/fView.cpp b/sw/rdma/src/fView.cpp new file mode 100644 index 00000000..956f5dbf --- /dev/null +++ b/sw/rdma/src/fView.cpp @@ -0,0 +1,677 @@ + #include "fView.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace fpga; + +namespace comm { + +/** + * Constructor + * @param: fdev - array of fDev objects. Has to coincide with the number of regions. + * @param: node_id - current node ID + * @param: n_nodes - number of total nodes in the system + * @param: n_qpairs - qpair organization, ex: {1, 3} => 2 node system, master node, remote node 1 shares 3 qpairs + * @param: n_regions - number of vFPGA regions + * @param: mstr_ip_addr - master node IP address + */ +fView::fView(fDev *fdev, uint32_t node_id, uint32_t n_nodes, uint32_t *n_qpairs, uint32_t n_regions, const char *mstr_ip_addr) { + // Set port + port = 18515; // ? + ib_port = 0; + this->mstr_ip_addr = mstr_ip_addr; + + // FPGA device + this->fdev = fdev; + + // Nodes + this->node_id = node_id; + this->n_nodes = n_nodes; + this->n_regions = n_regions; + + for (int i = 0; i < n_nodes; i++) { + std::vector v(n_qpairs[i], fQPair()); + pairs.push_back(v); + } + + // Connections + this->connections = new int[n_nodes]; + + // Initialize local queues + initializeLocalQueues(); + + // Queue exchange + int ret = 1; + if (node_id == 0) { + ret = masterExchangeQueues(); + } else { + std::this_thread::sleep_for(std::chrono::seconds(1)); + ret = clientExchangeQueues(); + } + if (ret) + std::cout << "Exchange failed" << std::endl; + else + std::cout << "Exchange successfull" << std::endl; + + // Load QPn + + + // Load context and connections + for(int i = 0; i < n_nodes; i++) { + if (i == node_id) continue; + + for (uint j = 0; j < pairs[i].size(); j++) { + int pair_reg = pairs[i][j].local.region; + fdev[pair_reg].writeContext(&pairs[i][j]); + fdev[pair_reg].writeConnection(&pairs[i][j], port); + } + } + + // ARP lookup + fdev[0].doArpLookup(); +} + +/** + * Destructor + */ +fView::~fView() { + for (int i = 0; i < n_nodes; i++) { + if (i == node_id) continue; + close(connections[i]); + } + + delete[] connections; +} + +void fView::closeConnections() { + for (int i = 0; i < n_nodes; i++) { + if (i == node_id) continue; + close(connections[i]); + } +} + +static unsigned seed = std::chrono::system_clock::now().time_since_epoch().count(); + +/** + * Initialization of the local queues (no buffers allocated at this point) + */ +void fView::initializeLocalQueues() { + std::default_random_engine rand_gen(seed); + std::uniform_int_distribution distr(0, std::numeric_limits::max()); + + uint32_t ip_addr = base_ip_addr + node_id; + + int i = 0, j; + int node = 0; + for (auto it1 = pairs.begin(); it1 != pairs.end(); it1++) { + j = 0; + for (auto it2 = it1->begin(); it2 != it1->end(); it2++) { + it2->local.uintToGid(0, ip_addr); + it2->local.uintToGid(8, ip_addr); + it2->local.uintToGid(16, ip_addr); + it2->local.uintToGid(24, ip_addr); + it2->local.qpn = 0x3 + i++; + it2->local.psn = distr(rand_gen) & 0xFFFFFF; + it2->local.region = j++ % n_regions; + it2->local.rkey = 0; + it2->local.vaddr = 0; //TODO remove + it2->local.size = 0; + } + node++; + } +} + +/** + * Exchange initial qpairs (server side) + */ +int fView::masterExchangeQueues() { + char *service; + char recv_buf[100]; + int32_t recv_node_id; + uint n; + int sockfd = -1, connfd; + struct sockaddr_in server; + memset(recv_buf, 0, 100); + + std::cout << "Server exchange started ..." << std::endl; + + sockfd = ::socket(AF_INET, SOCK_STREAM, 0); + if (sockfd == -1) { + std::cerr << "Could not create socket" << std::endl; + return 1; + } + + server.sin_family = AF_INET; + server.sin_addr.s_addr = INADDR_ANY; + server.sin_port = htons( port); + + if (::bind(sockfd, (struct sockaddr*)&server, sizeof(server)) < 0) { + std::cerr << "Could not bind socket" << std::endl; + return 1; + } + + if (sockfd < 0 ) { + std::cerr << "Could not listen to port " << port << std::endl; + return 1; + } + + // Get number of local queue pairs for each node + listen(sockfd, n_nodes); + + size_t msg_len; + + // Receive queues + for (int i = 1; i < n_nodes; i++) { + // Accept the connection for each node + connfd = ::accept(sockfd, NULL, 0); + if (connfd < 0) { + std::cerr << "Accept() failed" << std::endl; + return 1; + } + + // Read node id + n = ::read(connfd, &recv_node_id, sizeof(int32_t)); + if (n != sizeof(int32_t)) { + std::cerr << "Could not read initial node ID message, bytes read: " << n << std::endl; + close(connfd); + return 1; + } + std::cout << "Qpair exchange nodeid " << recv_node_id << " ... " << std::endl; + + msg_len = fQ::getLength(); + + for (uint j = 0; j < pairs[recv_node_id].size(); j++) { + // Read remote qpair + n = ::read(connfd, recv_buf, msg_len); + if (n != msg_len) { + std::cerr << "Could not read message, bytes read: " << n << std::endl; + std::cout << "Received msg: " << recv_buf << std::endl; + close(connfd); + return 1; + } + + pairs[recv_node_id][j].remote.decode(recv_buf, msg_len); + std::cout << "Qpair nodeid " << recv_node_id << "[" << j << "]" << std::endl; + pairs[recv_node_id][j].local.print("Local "); + pairs[recv_node_id][j].remote.print("Remote"); + } + + connections[recv_node_id] = connfd; + } + + std::cout << "Received all remote qpairs" << std::endl; + + // Send queues + for (int i = 1; i < n_nodes; i++) { + for (uint j = 0; j < pairs[i].size(); j++) { + std::string msg_string; + msg_string = pairs[i][j].local.encode(); + size_t msg_len = msg_string.length(); + + // Write message + if (::write(connections[i], msg_string.c_str(), msg_len) != msg_len) { + std::cerr << "Could not send local qpair" << std::endl; + ::close(connections[i]); + return 1; + } + } + } + + std::cout << "Sent all local qpairs" << std::endl; + + ::close(sockfd); + return 0; +} + +/** + * Exchange initial qpairs (client side) + */ +int fView::clientExchangeQueues() { + struct addrinfo *res, *t; + struct addrinfo hints = {}; + hints.ai_family = AF_INET; + hints.ai_socktype = SOCK_STREAM; + + char* service; + char recv_buf[100]; + int n = 0; + int sockfd = -1; + memset(recv_buf, 0, 100); + + std::cout << "Client exchange" << std::endl; + + if (asprintf(&service, "%d", port) < 0) { + std::cerr << "Service failed" << std::endl; + return 1; + } + + n = getaddrinfo(mstr_ip_addr, service, &hints, &res); + if (n < 0) { + std::cerr << "[ERROR] getaddrinfo"; + free(service); + return 1; + } + + for (t = res; t; t = t->ai_next) { + sockfd = ::socket(t->ai_family, t->ai_socktype, t->ai_protocol); + if (sockfd >= 0) { + if (!::connect(sockfd, t->ai_addr, t->ai_addrlen)) { + break; + } + ::close(sockfd); + sockfd = -1; + } + } + + if (sockfd < 0) { + std::cerr << "Could not connect to master: " << mstr_ip_addr << ":" << port << std::endl; + return 1; + } + + // Send local node ID + if (write(sockfd, &node_id, sizeof(int32_t)) != sizeof(int32_t)) { + std::cerr << "Could not send local node id" << std::endl; + close(sockfd); + return 1; + } + + size_t msg_len; + + /// Send local queues + for (uint i = 0; i < pairs[0].size(); i++) { + std::string msg_string = pairs[0][i].local.encode(); + + size_t msg_len = msg_string.length(); + + if (write(sockfd, msg_string.c_str(), msg_len) != msg_len) { + std::cerr << "Could not send local address" << std::endl; + close(sockfd); + return 1; + } + } + + std::cout << "Sent all local qpairs" << std::endl; + + msg_len = fQ::getLength(); + + // Receive remote queues + for (uint i = 0; i < pairs[0].size(); i++) { + if ((n = ::read(sockfd, recv_buf, msg_len)) != msg_len) { + std::cout << "n: " << n << ", instread of " << msg_len << std::endl; + std::cout << "Received msg: " << recv_buf << std::endl; + std::cerr << "Could not read remote address" << std::endl; + ::close(sockfd); + return 1; + } + + pairs[0][i].remote.decode(recv_buf, msg_len); + std::cout << "Qpair nodeid " << 0 << "[" << i << "]" << std::endl; + pairs[0][i].local.print("Local "); + pairs[0][i].remote.print("Remote"); + } + + std::cout << "Received all remote qpairs" << std::endl; + + //keep connection around + connections[0] = sockfd; + + if (res) + freeaddrinfo(res); + free(service); + + return 0; +} + +/** + * Exchange windows with target node + */ +int fView::exchangeWindow(int32_t node_id, int32_t qpair_id) { + if(node_id == 0) + return clientExchangeWindow(node_id, qpair_id); + else + return masterExchangeWindow(node_id, qpair_id); +} + +/** + * Master exchange window + */ +int fView::masterExchangeWindow(int32_t node_id, int32_t qpair_id) { + int n; + uint64_t vaddr; + uint32_t size; + + // Receive + // vaddr + n = ::read(connections[node_id], &vaddr, sizeof(uint64_t)); + if (n != sizeof(uint64_t)) { + std::cerr << "Could not read window, read bytes " << n << std::endl; + ::close(connections[node_id]); + return 1; + } + // size + n = ::read(connections[node_id], &size, sizeof(uint32_t)); + if (n != sizeof(uint32_t)) { + std::cerr << "Could not read window, read bytes " << n << std::endl; + ::close(connections[node_id]); + return 1; + } + + pairs[node_id][qpair_id].remote.vaddr = vaddr; + pairs[node_id][qpair_id].remote.size = size; + + std::cout << "Qpair nodeid " << node_id << "[" << qpair_id << "]" << std::endl; + pairs[node_id][qpair_id].local.print("Local "); + pairs[node_id][qpair_id].remote.print("Remote"); + + // Send + // vaddr + if ((n = ::write(connections[node_id], &pairs[node_id][qpair_id].local.vaddr, sizeof(uint64_t))) != sizeof(uint64_t)) { + std::cerr << "Could not send" << std::endl; + ::close(connections[node_id]); + return 1; + } + // size + if ((n = ::write(connections[node_id], &pairs[node_id][qpair_id].local.size, sizeof(uint32_t))) != sizeof(uint32_t)) { + std::cerr << "Could not send" << std::endl; + ::close(connections[node_id]); + return 1; + } + + return 0; +} + +/** + * Client exhchange window + */ +int fView::clientExchangeWindow(int32_t node_id, int32_t qpair_id) { + int n; + uint64_t vaddr; + uint32_t size; + + // Send + // vaddr + if ((n = ::write(connections[node_id], &pairs[node_id][qpair_id].local.vaddr, sizeof(uint64_t))) != sizeof(uint64_t)) { + std::cerr << "Could not send" << std::endl; + ::close(connections[node_id]); + return 1; + } + // size + if ((n = ::write(connections[node_id], &pairs[node_id][qpair_id].local.size, sizeof(uint32_t))) != sizeof(uint32_t)) { + std::cerr << "Could not send" << std::endl; + ::close(connections[node_id]); + return 1; + } + + // Receive + // vaddr + n = ::read(connections[node_id], &vaddr, sizeof(uint64_t)); + if (n != sizeof(uint64_t)) { + std::cerr << "Could not read window, read bytes " << n << std::endl; + ::close(connections[node_id]); + return 1; + } + // size + n = ::read(connections[node_id], &size, sizeof(uint32_t)); + if (n != sizeof(uint32_t)) { + std::cerr << "Could not read window, read bytes " << n << std::endl; + ::close(connections[node_id]); + return 1; + } + + pairs[node_id][qpair_id].remote.vaddr = vaddr; + pairs[node_id][qpair_id].remote.size = size; + + std::cout << "Qpair nodeid " << node_id << "[" << qpair_id << "]" << std::endl; + pairs[node_id][qpair_id].local.print("Local "); + pairs[node_id][qpair_id].remote.print("Remote"); + + return 0; +} + +/* --------------------------------------------------------------------------------------- +/* -- Public +/* --------------------------------------------------------------------------------------- + +/** + * Allocate a window for the specific qpair + * @param: node_id - target node id + * @param: qpair_id - target qpair id + * @param: n_pages - number of large pages (2MB each) + */ +uint64_t* fView::allocWindow(uint32_t node_id, uint32_t qpair_id, uint64_t n_pages) { + int32_t region = pairs[node_id][qpair_id].local.region; + uint64_t *vaddr = fdev[region].getHostMem(n_pages); + + pairs[node_id][qpair_id].local.vaddr = (uint64_t)vaddr; + pairs[node_id][qpair_id].local.size = n_pages * LARGE_PAGE_SIZE; + + exchangeWindow(node_id, qpair_id); + + return vaddr; +} + +/** + * Free window for the specific qpair + * @param: node_id - target node id + * @param: qpair_id - target qpair id + */ +void fView::freeWindow(uint32_t node_id, uint32_t qpair_id) { + int32_t region = pairs[node_id][qpair_id].local.region; + uint64_t *vaddr = (uint64_t*)pairs[node_id][qpair_id].local.vaddr; + uint64_t n_pages = (uint64_t)(pairs[node_id][qpair_id].local.size / LARGE_PAGE_SIZE); + + fdev[region].freeHostMem(vaddr, n_pages); +} + +/** + * Write RDMA operation + * @param: node_id - target node id + * @param: qpair_id - target qpair id + * @param: src_offs - offset in the source qpair buffer + * @param: dst_offs - offset in teh destination qpair buffer + * @param: size - transfer size + */ +void fView::writeRemote(uint32_t node_id, uint32_t qpair_id, uint64_t src_offs, uint64_t dst_offs, uint32_t size) { + fQPair *l_qp = &pairs[node_id][qpair_id]; + int32_t l_reg = l_qp->local.region; + + if(node_id == this->node_id) { + uint64_t *l_addr = (uint64_t*)(l_qp->local.vaddr + src_offs); + uint64_t *r_addr = (uint64_t*)(l_qp->remote.vaddr + dst_offs); + + memcpy(r_addr, l_addr, size); + } else { + fdev[l_reg].postWrite(l_qp, src_offs, dst_offs, size); + } +} + +/** + * Read RDMA operation + * @param: node_id - target node id + * @param: qpair_id - target qpair id + * @param: src_offs - offset in the source qpair buffer + * @param: dst_offs - offset in teh destination qpair buffer + * @param: size - transfer size + */ +void fView::readRemote(uint32_t node_id, uint32_t qpair_id, uint64_t src_offs, uint64_t dst_offs, uint32_t size) { + fQPair *l_qp = &pairs[node_id][qpair_id]; + uint32_t l_reg = l_qp->local.region; + + if(node_id == this->node_id) { + uint64_t *l_addr = (uint64_t*)(l_qp->local.vaddr + src_offs); + uint64_t *r_addr = (uint64_t*)(l_qp->remote.vaddr + dst_offs); + + memcpy(r_addr, l_addr, size); + } else { + fdev[l_reg].postRead(l_qp, src_offs, dst_offs, size); + } +} + +/** + * RPC RDMA operation + * @param: node_id - target node id + * @param: qpair_id - target qpair id + * @param: src_offs - offset in the source qpair buffer + * @param: dst_offs - offset in teh destination qpair buffer + * @param: size - transfer size + * @param: params - arbitrary parameters (depends on the implemented operation) + */ +void fView::farviewRemote(uint32_t node_id, uint32_t qpair_id, uint64_t src_offs, uint64_t dst_offs, uint32_t size, uint64_t params) { + fQPair *l_qp = &pairs[node_id][qpair_id]; + uint32_t l_reg = l_qp->local.region; + + if(node_id == this->node_id) { + uint64_t *l_addr = (uint64_t*)(l_qp->local.vaddr + src_offs); + uint64_t *r_addr = (uint64_t*)(l_qp->remote.vaddr + dst_offs); + + memcpy(r_addr, l_addr, size); + } else { + fdev[l_reg].postFarview(l_qp, src_offs, dst_offs, size, params); + } +} + +/** + * Write RDMA polling function + * @param: node_id - target node id + * @param: qpair_id - target qpair id + */ +uint32_t fView::pollRemoteWrite(uint32_t node_id, uint32_t qpair_id) { + fQPair *l_qp = &pairs[node_id][qpair_id]; + int32_t l_reg = l_qp->local.region; + + return fdev[l_reg].checkCompletedWrite(); +} + +/** + * Read RDMA polling function + * @param: node_id - target node id + * @param: qpair_id - target qpair id + */ +uint32_t fView::pollLocalRead(uint32_t node_id, uint32_t qpair_id) { + fQPair *l_qp = &pairs[node_id][qpair_id]; + int32_t l_reg = l_qp->local.region; + + return fdev[l_reg].checkCompletedRead(); +} + +/** + * Sync with remote + * @param: node_id - target node id + */ +int32_t fView::waitOnReplyRemote(uint32_t node_id) { + int n; + uint32_t ack; + + // Receive ACK + n = ::read(connections[node_id], &ack, sizeof(uint32_t)); + if (n != sizeof(uint32_t)) { + std::cerr << "Could not read ACK, read bytes " << n << std::endl; + ::close(connections[node_id]); + return 1; + } + + return 0; +} + +/** + * Wait on close remote + * @param: node_id - target node id + */ +int32_t fView::waitOnCloseRemote(uint32_t node_id) { + int n; + uint32_t ack; + + // Hacky + n = ::read(connections[node_id], &ack, sizeof(uint32_t)); + if (n == 0) { + std::cerr << "Connection closed" << std::endl; + ::close(connections[node_id]); + return 0; + } + + return 1; +} + + +/** + * Sync with remote + * @param: node_id - target node id + * @param: ack - acknowledge message + */ +int32_t fView::replyRemote(uint32_t node_id, uint32_t ack) { + int n; + + if ((n = ::write(connections[node_id], &ack, sizeof(uint32_t))) != sizeof(uint32_t)) { + std::cerr << "Could not send ACK" << std::endl; + ::close(connections[node_id]); + return 1; + } + + return 0; +} + +/** + * Sync with remote + * @param: node_id - target node id + */ +int32_t fView::syncRemote(uint32_t node_id) { + if(this->node_id == 0) { + replyRemote(node_id, 0); + waitOnReplyRemote(node_id); + } else { + waitOnReplyRemote(node_id); + replyRemote(node_id, 0); + } + + return 0; +} + +// Base control +void fView::farviewRemoteBase(uint32_t node_id, uint32_t qpair_id, uint64_t params_0, uint64_t params_1, uint64_t params_2) { + fQPair *l_qp = &pairs[node_id][qpair_id]; + uint32_t l_reg = l_qp->local.region; + + fdev[l_reg].postFarviewBase(l_qp, params_0, params_1, params_2); +} + +// Stride +void fView::farviewStride(uint32_t node_id, uint32_t qpair_id, uint64_t src_offs, uint64_t dst_offs, uint32_t dwidth, uint32_t stride, uint32_t num_elem) { + uint32_t n_bytes = (1 << dwidth) * num_elem; + uint64_t tmp = ((uint64_t)n_bytes << 32) | stride; + farviewRemote(node_id, qpair_id, src_offs, dst_offs, dwidth, tmp); +} + +// Load the configuration in 2 transactions +void fView::farviewRegexConfigLoad(uint32_t node_id, uint32_t qpair_id, unsigned char* config_bytes) { + uint64_t* params_0 = (uint64_t*)config_bytes; + uint64_t* params_1 = (uint64_t*)config_bytes + 1; + uint64_t* params_2 = (uint64_t*)config_bytes+ 2; + + farviewRemoteBase(node_id, qpair_id, *params_0, *params_1, *params_2); + + params_0 += 3; + params_1 += 3; + params_2 += 3; + + farviewRemoteBase(node_id, qpair_id, *params_0, *params_1, *params_2); +} + +// Regex read +void fView::farviewRegexRead(uint32_t node_id, uint32_t qpair_id, uint64_t src_offs, uint64_t dst_offs, uint32_t size) { + farviewRemote(node_id, qpair_id, src_offs, dst_offs, size, ~0); +} + + +} diff --git a/sw/rdma/src/main.cpp b/sw/rdma/src/main.cpp new file mode 100644 index 00000000..4c9ea979 --- /dev/null +++ b/sw/rdma/src/main.cpp @@ -0,0 +1,192 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include "fDev.hpp" +#include "fView.hpp" +#include "fBench.hpp" + +using namespace std; +using namespace std::chrono; +using namespace fpga; +using namespace comm; + +/* Runtime */ +#define N_NODES 2 +#define N_PAGES 2 +#define N_ID_MASTER 0 +#define N_REGIONS 3 +#define N_REPS 1 +#define TR_SIZE 64 + +static const unsigned char key[] = { + 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, + 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f +}; + +int main(int argc, char *argv[]) +{ + // --------------------------------------------------------------- + // -- Initialization + // --------------------------------------------------------------- + const char* masterAddr = "10.1.212.121"; + + // Read arguments + boost::program_options::options_description programDescription("Options:"); + programDescription.add_options()("nnodes,n", boost::program_options::value(), "Number of system nodes") + ("npages,p", boost::program_options::value(), "Buffer size in 2MB pages") + ("nodeid,i", boost::program_options::value(), "Node ID") + ("nregions,g", boost::program_options::value(), "Number of FPGA regions") + ("reps,r", boost::program_options::value(), "Number of repetitions") + ("size,s", boost::program_options::value(), "Transfer size"); + + boost::program_options::variables_map commandLineArgs; + boost::program_options::store(boost::program_options::parse_command_line(argc, argv, programDescription), commandLineArgs); + boost::program_options::notify(commandLineArgs); + + // Stat + uint32_t n_nodes = N_NODES; + uint64_t n_pages = N_PAGES; + uint32_t node_id = N_ID_MASTER; + uint32_t n_regions = N_REGIONS; + // Runs + uint32_t n_reps = N_REPS; + uint32_t size = TR_SIZE; + + if(commandLineArgs.count("nnodes") > 0) n_nodes = commandLineArgs["nnodes"].as(); + if(commandLineArgs.count("npages") > 0) n_pages = commandLineArgs["npages"].as(); + if(commandLineArgs.count("nodeid") > 0) node_id = commandLineArgs["nodeid"].as(); + if(commandLineArgs.count("nregions") > 0) n_regions = commandLineArgs["nregions"].as(); + if(commandLineArgs.count("reps") > 0) n_reps = commandLineArgs["reps"].as(); + if(commandLineArgs.count("size") > 0) size = commandLineArgs["size"].as(); + + // FPGA handles + fDev *fdev = new fDev[n_regions]; + + // Buffers + uint64_t *hMem[N_REGIONS]; + + uint32_t qpairs[n_nodes]; + for(int i = 0; i < n_nodes; i++) + qpairs[i] = n_regions; + + // 2 nodes example + uint32_t l_id = node_id; + uint32_t r_id = (node_id + 1) % n_nodes; + + // Obtain regions + for (int i = 0; i < n_regions; i++) { + if (!fdev[i].acquireRegion(i)) return EXIT_FAILURE; + fdev[i].clearCompleted(); + } + + // Farview + fView *fview = new fView(fdev, l_id, n_nodes, qpairs, n_regions, masterAddr); + + // Allocate buffers + for(int i = 0; i < n_regions; i++) + hMem[i] = fview->allocWindow(r_id, i, n_pages); + + AES_set_encrypt_key(key, 128, &enc_key); + AES_set_decrypt_key(key, 128, &dec_key); + + // Sync up + fview->syncRemote(r_id); + + // Latency measurements ---------------------------------------------------------------------------------- + if(!l_id) { + // Sender + + // --------------------------------------------------------------- + // -- Runs + // --------------------------------------------------------------- + Bench bench(1); + uint32_t n_runs = 0; + + auto benchmark_thr = [&fview, &fdev, &hMem, &n_runs, r_id, n_reps, n_regions, size]() { + bool k = false; + n_runs++; + + for(int i = 0; i < n_reps; i++) { + for(int j = 0; j < n_regions; j++) { + fview->readRemote(r_id, j, 0, 0, size); + } + } + + while(!k) { + k = true; + for(int j = 0; j < n_regions; j++) { + if(fview->pollRemoteWrite(r_id, j) != n_reps * n_runs) k = false; + } + } + }; + bench.runtime(benchmark_thr); + std::cout << "Throughput: " << ((n_regions * 1000 * size)) / (bench.getAvg() / n_reps) << " MB/s" << std::endl; + + for(int i = 0; i < n_regions; i++) + fdev[i].clearCompleted(); + n_runs = 0; + + auto benchmark_lat = [&fview, &fdev, &hMem, &n_runs, r_id, n_reps, n_regions, size]() { + n_runs++; + + for(int i = 0; i < n_reps; i++) { + for(int j = 0; j < n_regions; j++) { + fview->readRemote(r_id, j, 0, 0, size); + while(fview->pollRemoteWrite(r_id, j) != (i+1) + ((n_runs-1) * n_reps)) ; + } + } + }; + bench.runtime(benchmark_lat); + std::cout << "Latency: " << bench.getAvg() / n_reps << " ns" << std::endl; + + // Done + fview->replyRemote(r_id, 1); + fview->waitOnCloseRemote(r_id); + } else { + // Receiver + uint32_t n_runs = 0; + + for(int i = 0; i < n_reps; i++) { + n_runs++; + + while(fview->pollRemoteWrite(r_id, j) != (i+1) + ((n_runs-1) * n_reps)) ; + + AES_ecb_encrypt(enc_out, dec_out, &dec_key, AES_DECRYPT); + } + + // Done + fview->waitOnReplyRemote(r_id); + fview->closeConnections(); + } + + // Free buffers + for(int i = 0; i < n_regions; i++) + fview->freeWindow(r_id, i); + + // Print status + for (int i = 0; i < n_regions; i++) { + fdev[i].printDebugXDMA(); + } + + // Release regions + for (int i = 0; i < n_regions; i++) { + fdev[i].releaseRegion(); + } + + return EXIT_SUCCESS; +} diff --git a/sw/scheduling/CMakeLists.txt b/sw/scheduling/CMakeLists.txt new file mode 100644 index 00000000..942efe85 --- /dev/null +++ b/sw/scheduling/CMakeLists.txt @@ -0,0 +1,22 @@ +cmake_minimum_required(VERSION 3.0) +project(scheduling) + +# Includes +include_directories(include) + +# Sources +file(GLOB SOURCES src/*.cpp) + +# Exec +set(EXEC main) + +# Compilation +set (CMAKE_CXX_STANDARD 14) +set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -mavx -march=native -O3") + +# Boost lib +#find_package(Boost COMPONENTS program_options REQUIRED) + +# Targets +add_executable(${EXEC} ${SOURCES}) +#target_link_libraries(${EXEC} ${Boost_LIBRARIES}) \ No newline at end of file diff --git a/sw/scheduling/include/classical_kmeans.h b/sw/scheduling/include/classical_kmeans.h new file mode 100644 index 00000000..4f46c05e --- /dev/null +++ b/sw/scheduling/include/classical_kmeans.h @@ -0,0 +1,24 @@ + + +// void initial_centroids(int numClusters, int numCoords, int numObjs, float* cluster, float* objects); +// void compute_low_precision_kmeans(int precision, float* float_objects, float* clusters_ref, int* member_ref, int numObjs, int numClusters, int numCoords, int iter, bool user_specify_precision, float threshold); +// void low_precision_kmeans(int numObjs, int numClusters, int numCoords, int* member_ref, float* newClusterSize, float* newClusters, float* objects, float* clusters_ref, int max_loop, int precision, float* delta, float threshold, int* total_loop); +// void compute_reference_kmeans(float* objects, float* clusters_ref, int* member_ref, int numObjs, int numClusters, int numCoords, int iter, float threshold); +// float get_change_center_thres (float* features, int nfeatures, int npoints); + +#ifndef CLASSIC_KMEANS_H +#define CLASSIC_KMEANS_H + +void compute_kmeans(uint32_t* objects, uint32_t* clusters_ref, int numObjs, int numClusters, int numCoords, int max_iter, bool user_specify_precision); +void run_kmeans(int numObjs, int numClusters, int numCoords, int* member_ref, uint32_t* newClusterSize, uint64_t* newClusters, uint32_t* objects, uint32_t* clusters_ref, int max_loop, int* total_loop); +void printCentroids(uint32_t*centroid, uint32_t numClusters, uint32_t numCoords, uint32_t number_of_iteration); +void normalization_scale(int nfeatures, int npoints, float* features, uint32_t* scaled_unsigned_features ,float* dr_a_min, float* dr_a_max); +void initial_centroids(int numClusters, int numCoords, int numObjs, uint32_t* cluster, uint32_t* objects); +void convert_precision(int precision, int nfeatures, int npoints, uint32_t* features, uint32_t* low_precision_feature); + +float get_sse(int numObjs, int numClusters, int numCoords, float * objects, float * clusters_ref); +void descale_normalization (int nfeatures, int npoints, uint32_t* low_precision_feature, float* denomalized_features, float* dr_a_min, float* dr_a_max); +void normalization(int nfeatures, int npoints, float* features, float* normalized_features); +void scale(int nfeatures, int npoints, float* features, uint32_t* scaled_unsigned_features,float* dr_a_min, float* dr_a_max); + +#endif \ No newline at end of file diff --git a/sw/scheduling/include/fApp.hpp b/sw/scheduling/include/fApp.hpp new file mode 100644 index 00000000..b4fc2c8f --- /dev/null +++ b/sw/scheduling/include/fApp.hpp @@ -0,0 +1,28 @@ +#ifndef __FAPP_HPP__ +#define __FAPP_HPP__ + +#include +#include + +#include "fDev.hpp" +#include "fJob.hpp" +#include "fDefs.hpp" + +using namespace std; + +class fApp : public fJob { +private: + + +public: + fApp(uint32_t id, uint32_t priority) + : fJob(id, priority, OPER_APP) {} + + void run() { + cout << "User function" << endl; + } + +}; + + +#endif \ No newline at end of file diff --git a/sw/scheduling/include/fArbiter.hpp b/sw/scheduling/include/fArbiter.hpp new file mode 100644 index 00000000..b3e46f87 --- /dev/null +++ b/sw/scheduling/include/fArbiter.hpp @@ -0,0 +1,80 @@ +#ifndef __FARBITER_HPP__ +#define __FARBITER_HPP__ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "fDefs.hpp" +#include "fDev.hpp" +#include "fJob.hpp" +#include "fScheduler.hpp" + +using namespace std; + +class fArbiter { +private: + bool run; + mutex mtx; + condition_variable cv; + + thread arbiterThread; + + vector schedulers; + queue request_queue; + + void processRequests(); + +public: + fArbiter() {} + + ~fArbiter() { + run = false; +#ifdef VERBOSE_DEBUG + cout << "Arbiter: destructor called" << endl; +#endif + + arbiterThread.join(); + + for (auto& it : schedulers) { + delete it; + } + } + + // Add a created scheduler + void addScheduler(fScheduler *fscheduler) { + schedulers.push_back(fscheduler); + } + + // Start arbitration + void start() { + unique_lock lck(mtx); +#ifdef VERBOSE_DEBUG + cout << "Arbiter: initial lock" << endl; +#endif + + arbiterThread = thread(&fArbiter::processRequests, this); +#ifdef VERBOSE_DEBUG + cout << "Arbiter: thread started" << endl; +#endif + + cv.wait(lck); + } + + // Getters + bool isRunning() { + return run; + } + + void requestJob(fJob* fjob) { + lock_guard lck2(mtx); + request_queue.push(fjob); + } +}; + +#endif \ No newline at end of file diff --git a/sw/scheduling/include/fDefs.hpp b/sw/scheduling/include/fDefs.hpp new file mode 100644 index 00000000..85c980d1 --- /dev/null +++ b/sw/scheduling/include/fDefs.hpp @@ -0,0 +1,28 @@ +#ifndef __FDEFS_HPP__ +#define __FDEFS_HPP__ + +#define VERBOSE_DEBUG +//#define REQUEST_RANDOM +//#define REQUEST_SCHEDULING + +#define N_JOBS 50 + +#define OPER_0 0 +#define OPER_1 1 +#define OPER_2 2 +#define OPER_3 3 + +#define OPER_HLL 4 +#define OPER_KMEANS 5 + +#define OPER_APP 10 + +#include "fOp0.hpp" +#include "fOp1.hpp" +#include "fOp2.hpp" +#include "fOp3.hpp" + +#include "fHll.hpp" +//#include "fKmeans.hpp" + +#endif \ No newline at end of file diff --git a/sw/scheduling/include/fDev.hpp b/sw/scheduling/include/fDev.hpp new file mode 100644 index 00000000..9bccad4f --- /dev/null +++ b/sw/scheduling/include/fDev.hpp @@ -0,0 +1,279 @@ +#ifndef __FDEV_HPP__ +#define __FDEV_HPP__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#define N_BSTREAM_PAGES 20 +#define PR_WIDTH 2 * 1024 * 1024 + +using namespace std; + +class fBitStream; + +/** + * Fpga device region + */ +class fDev { + + /* Fpga device */ + int32_t fd = 0; + bool regionAcquired = false; + std::unordered_map bitstreams; + + /* Mmapped regions */ + uint64_t *cnfg_reg = 0; + uint64_t *ctrl_reg = 0; + __m256i *data_reg = 0; + + /* Mapped large pages hash table */ + std::unordered_map mapped_large; + + /* Utility */ + bool mmapFpga(); + bool munmapFpga(); + +protected: + + +public: + + fDev() {} + ~fDev() {} + + /** + * Obtain and release FPGA regions + */ + + // Acquire an FPGA region with target ID + bool acquireRegion(uint32_t rNum); + // Release an acquired FPGA region + bool releaseRegion(); + // Check whether region has been acquired + bool isRegionAcquired(); + + /** + * Control bus + */ + + // Control status bus, AXI Lite + inline void setCSR(uint64_t val, uint32_t offs) { ctrl_reg[offs] = val; } + inline uint64_t getCSR(uint32_t offs) { return ctrl_reg[offs]; } + + /** + * Data bus (SIMD) + * TODO: Vector construction + */ + + // Data AVX bus + inline void setData(__m256i val, uint32_t offs) { data_reg[offs] = val; } + inline __m256i getData(uint32_t offs) { return data_reg[offs]; } + + /** + * Explicit buffer management + * @param n_pages - number of 2MB pages to be allocated + */ + + // Obtain host memory - pages 2M + uint64_t* getHostMem(uint64_t &n_pages); + // Obtain card memory - pages 2M + uint64_t* getCardMem(uint64_t &n_pages, int channel); // No striding, explicit channel management + // Obtain card memory - pages 4K - striding (hw needs to be compiled with stride enabled) + uint64_t* getCardMem(uint64_t &n_pages); // Striding, TODO: Extend striding support for 2MB pages (now at 4K) + // Free host memory + void freeHostMem(uint64_t* vaddr, uint64_t &n_pages); + // Free card memory + void freeCardMem(uint64_t* vaddr, uint64_t &n_pages, int channel); + // Free card memory (striding) + void freeCardMem(uint64_t* vaddr, uint64_t &n_pages); + + // FPGA user space range mapping + void userMap(uint64_t *vaddr, uint64_t len); + // FPGA user space range unmapping (done auto on release) + void userUnmap(uint64_t *vaddr, uint64_t len); + + + /** + * Bulk transfers + * @param vaddr - data pointer + * @param len - transfer length + * @param poll - blocking vs non-blocking + */ + + // Reads data from the pointer into the FPGA region (can be both host and card) + void readFrom(uint64_t *vaddr, uint32_t len, bool poll = true); + // Writes data from the FPGA region to the pointer + void writeTo(uint64_t *vaddr, uint32_t len, bool poll = true); + // Transfer data (read + write) + void transferData(uint64_t *vaddr, uint32_t len, bool poll = true); + void transferData(uint64_t *vaddr_src, uint64_t* vaddr_dst, uint32_t len, bool poll = true); + void transferData(uint64_t *vaddr, uint32_t len_src, uint32_t len_dst, bool poll = true); + void transferData(uint64_t *vaddr_src, uint64_t* vaddr_dst, uint32_t len_src, uint32_t len_dst, bool poll = true); + + /** + * Check for completion + */ + + // Check whether read engine is busy + bool checkBusyRead(); + // Check whether write engine is busy + bool checkBusyWrite(); + // Returns the number of completed reads + uint32_t checkCompletedRead(); + // Returns the number of completed writes + uint32_t checkCompletedWrite(); + // Clear all status + void clearCompleted(bool rd, bool wr); + + /** + * Check whether engines are ready to accept transfers + */ + + // Check whether read request queue is full + bool checkReadyRead(); + // Check whether write request queue is full + bool checkReadyWrite(); + + /** + * Partial reconfiguration + */ + + // Only function needed for PR, bitstream needs to be in binary format (.bin) + uint32_t reconfigure(uint32_t op_id); + // Add a bitstream + void addBitstream(std::string name, uint32_t op_id); + // Remove a bitstream + void removeBitstream(uint32_t op_id); + + /** + * Performance tests + */ + void setTimerStop(uint64_t tmr_stop); + uint64_t getTimerStop(); + uint64_t getReadTimer(); + uint64_t getWriteTimer(); + double getThroughputRd(uint32_t len); + double getThroughputWr(uint32_t len); + uint64_t getTimeRdNS(); + uint64_t getTimeWrNS(); +}; + +/** + * Bitstream object + */ +class fBitStream { +private: + string name; + uint32_t op_id; + + uint64_t fsz; + uint64_t fsz_m; + uint64_t fsz_r; + uint64_t n_pages = N_BSTREAM_PAGES; + uint64_t pr_batch = PR_WIDTH; + + bool opened; + + fDev* fdev; + + uint32_t* src; + +public: + fBitStream(string name, uint32_t op_id, fDev* fdev) { + this->name = name; + this->op_id = op_id; + this->fdev = fdev; + opened = false; + } + + ~fBitStream() { + closeBitStream(); + } + + uint64_t getFsz() { + return fsz; + } + + uint64_t getFszM() { + return fsz_m; + } + + uint64_t getFszR() { + return fsz_r; + } + + uint64_t getBatchSize() { + return pr_batch; + } + + uint32_t* getSrc() { + return src; + } + + bool isOpened() { + return opened; + } + + uint8_t readByte(ifstream& fb) { + char temp; + fb.read(&temp, 1); + return (uint8_t)temp; + } + + + bool openBitStream() { + ifstream f_bit(name, ios::ate | ios::binary); + if(!f_bit) { + cout << "Bitstream could not be opened" << endl; + return false; + } + + fsz = f_bit.tellg(); + f_bit.seekg(0); + + fsz_m = fsz / pr_batch; + fsz_r = fsz % pr_batch; + + cout << "Full: " << fsz_m << ", partial: " << fsz_r << endl; + + src = (uint32_t*) fdev->getHostMem(n_pages); + + for(uint i = 0; i < fsz/4; i++) { + src[i] = 0; + src[i] |= readByte(f_bit) << 24; + src[i] |= readByte(f_bit) << 16; + src[i] |= readByte(f_bit) << 8; + src[i] |= readByte(f_bit); + } + + cout << "Bitstream loaded, OP_ID: " << op_id << endl; + + f_bit.close(); + + return true; + } + + void closeBitStream() { + fdev->freeHostMem((uint64_t*)src, n_pages); + + cout << "Bitstream removed, OP_ID: " << op_id << endl; + } +}; + +#endif \ No newline at end of file diff --git a/sw/scheduling/include/fHll.hpp b/sw/scheduling/include/fHll.hpp new file mode 100644 index 00000000..cffd5e7f --- /dev/null +++ b/sw/scheduling/include/fHll.hpp @@ -0,0 +1,66 @@ +#ifndef __FHLL_HPP__ +#define __FHLL_HPP__ + +#include +#include +#include +#include +#include + +#include "fDev.hpp" +#include "fJob.hpp" +#include "fDefs.hpp" + +using namespace std; + +static const struct timespec SLEEP_NS {.tv_sec = 0, .tv_nsec = 1000}; + +/** + * Hyperloglog + */ +class fHll : public fJob { + +public: + fHll(uint32_t id, uint32_t priority) : fJob(id, priority, OPER_HLL) { } + + void run() { + uint64_t n_pages = 2; + uint32_t len = 4 * 1024; + + // Gen + uint32_t* mem = (uint32_t*)fdev->getHostMem(n_pages); + for(uint32_t i = 0; i < len/4; i++) { + //mem[i] = rand(); + mem[i] = i; + } + + auto start_time = std::chrono::high_resolution_clock::now(); + + //std::cout << "Data offload" << std::endl; + + // Offload + fdev->readFrom((uint64_t*)mem, len); + + // Wait for completion of the operation + while(!fdev->getCSR(1)) + nanosleep(&SLEEP_NS, NULL); + + auto end_time = std::chrono::high_resolution_clock::now(); + + double durationUs = std::chrono::duration_cast(end_time-start_time).count(); + std::cout << "duration[us]**:" << durationUs << std::endl; + double dataSizeGB = (double)((double)(len))/1000.0/1000.0/1000.0; + double thruput = dataSizeGB/(durationUs/1000.0/1000.0); + std::cout<<"Datasize[GB]:"<getCSR(2) << endl; + + fdev->setCSR(0x1, 0); + + fdev->freeHostMem((uint64_t*)mem, n_pages); + } +}; + + +#endif \ No newline at end of file diff --git a/sw/scheduling/include/fJob.hpp b/sw/scheduling/include/fJob.hpp new file mode 100644 index 00000000..aa50f5ca --- /dev/null +++ b/sw/scheduling/include/fJob.hpp @@ -0,0 +1,94 @@ +#ifndef __FJOB_HPP__ +#define __FJOB_HPP__ + +#include +#include +#include + +#include "fDev.hpp" + +using namespace std; + +enum jobState {idle, running, done}; + +struct jobCmpr; + +/** + * FPGA job + */ +class fJob { +private: + uint32_t id; + uint32_t op_id; + uint32_t priority; + + jobState state; + +protected: + fDev *fdev; + +public: + fJob(uint32_t id, uint32_t priority, uint32_t op_id) { + this->id = id; + this->priority = priority; + this->op_id = op_id; + state = idle; + fdev = 0; + } + + friend struct jobCmpr; + + // Getters + uint32_t getId() { + return id; + } + + uint32_t getOperator() { + return op_id; + } + + uint32_t getPriority() { + return priority; + } + + jobState getState() { + return state; + } + + // Check whether the job has been completed + bool isDone() { + return state == done; + } + + // Attach a job to the FPGA device + void attachJob(fDev* fdev) { + this->fdev = fdev; + } + + // Start the job + void start() { + if(fdev) { + state = running; + run(); + state = done; + } + } + + // Run + virtual void run() = 0; +}; + +struct jobCmpr { + bool operator()(const fJob* fjob1, const fJob* fjob2) { + // Comparison + if(fjob1->priority < fjob2->priority) return true; + else if(fjob1->priority == fjob2->priority) { + if(fjob1->op_id > fjob2->op_id) + return true; + } + return false; + } +}; + + +#endif \ No newline at end of file diff --git a/sw/scheduling/include/fKmeans.hpp b/sw/scheduling/include/fKmeans.hpp new file mode 100644 index 00000000..0100f1f4 --- /dev/null +++ b/sw/scheduling/include/fKmeans.hpp @@ -0,0 +1,197 @@ +#ifndef __FKMEANS_HPP__ +#define __FKMEANS_HPP__ + +#include +#include +#include +#include +#include +#include +#include + +#include "fDev.hpp" +#include "fJob.hpp" +#include "fDefs.hpp" + +#include "mlweaving.h" +#include "classical_kmeans.h" +#include "utils.hpp" + +#define KMEANS_OP 16 +#define TUPLES_PER_CACHE_LINE 16 + +using namespace std; + +//static const struct timespec SLEEP_NS {.tv_sec = 0, .tv_nsec = 1000}; + +/** + * Hyperloglog + */ +class fKmeans : public fJob { +private: + uint16_t number_of_iterations; + uint64_t data_set_size; + uint32_t number_of_clusters; + uint32_t data_dimension; + +public: + fKmeans(uint16_t number_of_iterations, uint64_t data_set_size, uint32_t number_of_clusters, uint32_t data_dimension, + uint32_t id, uint32_t priority) : fJob(id, priority, OPER_KMEANS) { + this->number_of_iterations = number_of_iterations; + this->data_set_size = data_set_size; + this->number_of_clusters = number_of_clusters; + this->data_dimension = data_dimension; + } + + void run() { + uint32_t number_of_tuples_center = number_of_clusters * data_dimension; + uint32_t number_of_tuples_dataset = data_set_size * data_dimension; + + uint32_t num_cl_centroid = ceil((float)number_of_tuples_center/ (float)TUPLES_PER_CACHE_LINE); + uint32_t num_cl_tuple = compute_num_cl_tuples(data_set_size, data_dimension); + + // Allocation of memory space + float* addr_data = reinterpret_cast( malloc(data_dimension*data_set_size*sizeof(float))); + memset(addr_data, 0 , data_dimension*data_set_size*sizeof(float)); + //uint32_t* addr_data_unsigned = reinterpret_cast( malloc(data_dimension*data_set_size*sizeof(uint32_t))); + uint64_t n_pages = 20; + uint32_t* addr_data_unsigned = (uint32_t*)fdev->getHostMem(n_pages); + uint32_t* result_center = (uint32_t*)fdev->getHostMem(n_pages); + + + uint32_t* addr_center = NULL; + int status =posix_memalign((void**)&addr_center, 64, num_cl_centroid*16*sizeof(uint32_t) ); + memset(addr_center, 0 , num_cl_centroid*16*sizeof(uint32_t)); + + uint32_t* sw_center = NULL; + status=posix_memalign((void**)&sw_center, 64, data_dimension*number_of_clusters*sizeof(uint32_t) ); + memset(sw_center, 0 , data_dimension*number_of_clusters*sizeof(uint32_t)); + + uint32_t* hw_center = NULL; + status=posix_memalign((void**)&hw_center, 64, data_dimension*number_of_clusters*sizeof(uint32_t) ); + memset(hw_center, 0 , data_dimension*number_of_clusters*sizeof(uint32_t)); + + //uint32_t* result_center = NULL; + //status=posix_memalign((void**)&result_center, 64, sizeof(uint32_t)*(number_of_iterations* num_cl_centroid*16) ); + + float* dr_a_min = (float *)malloc(data_dimension*sizeof(float)); //to store the minimum value of features..... + float* dr_a_max = (float *)malloc(data_dimension*sizeof(float)); //to store the miaximum value of features..... + + float* nomalized_center = NULL; + status=posix_memalign((void**)&nomalized_center, 64, data_dimension*number_of_clusters*sizeof(float) ); + + float* data_normalized = reinterpret_cast( malloc(data_dimension*data_set_size*sizeof(float))); + + // Generate data + srand(time(NULL)); + data_gen(addr_data,data_set_size, data_dimension); + + normalization(data_dimension, data_set_size, addr_data, data_normalized); + normalization_scale(data_dimension, data_set_size, addr_data, addr_data_unsigned, dr_a_min, dr_a_max); + + + initial_centroids(number_of_clusters, data_dimension, data_set_size, addr_center, addr_data_unsigned); + printCentroids(addr_center, number_of_clusters, data_dimension, 1); + + memcpy(hw_center, addr_center, data_dimension*number_of_clusters*sizeof(uint32_t)); + memcpy(sw_center, addr_center, data_dimension*number_of_clusters*sizeof(uint32_t)); + + // Run SW + std::cout << "** SW ***********************************************" << std::endl; + compute_kmeans(addr_data_unsigned, sw_center, data_set_size, number_of_clusters, data_dimension, number_of_iterations, true); + descale_normalization (data_dimension, number_of_clusters, sw_center, nomalized_center, dr_a_min, dr_a_max); + for (int n = 0; n < number_of_clusters; ++n) + { + for (int m = 0; m < data_dimension; ++m) + { + printf("%f ", nomalized_center[n*data_dimension+m]); + } + printf("\n"); + } + float sse = get_sse(data_set_size, number_of_clusters, data_dimension, data_normalized, nomalized_center); + printf("final sse:%f\n", sse); + + // Run HW + std::cout << "** HW ***********************************************" << std::endl; + // Load params + fdev->setCSR(data_set_size, 2); + std::cout << "Data set size: " << fdev->getCSR(2) << std::endl; + fdev->setCSR(number_of_clusters, 3); + std::cout << "Number of clusters: " << fdev->getCSR(3) << std::endl; + fdev->setCSR(data_dimension, 4); + std::cout << "Data dimension: " << fdev->getCSR(4) << std::endl; + fdev->setCSR(0x1, 0); + + for(int i = 0; i < number_of_iterations; i++) { + auto start_time = std::chrono::high_resolution_clock::now(); + //std::cout << "CENTROIDS: " << sizeof(uint32_t)*16*num_cl_centroid << std::endl; + fdev->setCSR(0x0, 5); + //std::cout << fdev->getCSR(5) << std::endl; + fdev->readFrom((uint64_t*)hw_center, sizeof(uint32_t)*16*num_cl_centroid); + //std::cout << "Centroids read" << std::endl; + //std::cout << "DATA SRC: " << sizeof(uint32_t)*16*num_cl_tuple << std::endl; + //std::cout << "DATA DST: " << sizeof(uint32_t)*(number_of_iterations* num_cl_centroid*16) << std::endl; + fdev->setCSR(0x1, 5); + //std::cout << fdev->getCSR(5) << std::endl; + fdev->transferData((uint64_t*)addr_data_unsigned, (uint64_t*)result_center, + sizeof(uint32_t)*16*num_cl_tuple, sizeof(uint32_t)*(number_of_iterations* num_cl_centroid*16), true); + auto end_time = std::chrono::high_resolution_clock::now(); + //std::cout << "Completed iteration " << i << std::endl; + + double durationUs = std::chrono::duration_cast(end_time-start_time).count(); + std::cout << "duration[us]**:" << durationUs << std::endl; + double dataSizeGB = (double)((double)num_cl_tuple*(double)number_of_iterations*16.0*sizeof(uint32_t))/1000.0/1000.0/1000.0; + double thruput = dataSizeGB/(durationUs/1000.0/1000.0); + std::cout<<"Datasize[GB]:"<mResults+16*num_cl_centroid*j; + descale_normalization (data_dimension, number_of_cluster, center_result, nomalized_center, dr_a_min, dr_a_max); + printf("normalized center:\n"); + for (int n = 0; n < number_of_cluster; ++n) + { + for (int m = 0; m < data_dimension; ++m) + { + printf("%f ", nomalized_center[n*data_dimension+m]); + } + printf("\n"); + } + float loss = get_sse(data_set_size, number_of_cluster, data_dimension, data_normalized, nomalized_center); + printf("iteration %d: sse:%f\n", j, loss); + } + */ + + fdev->freeHostMem((uint64_t*)addr_data_unsigned, n_pages); + fdev->freeHostMem((uint64_t*)result_center, n_pages); + + // Free memory + free(addr_center); + free(addr_data); + //free(addr_data_unsigned); + free(sw_center); + free(hw_center); + //free(result_center); + + + } +}; + + +#endif \ No newline at end of file diff --git a/sw/scheduling/include/fOp0.hpp b/sw/scheduling/include/fOp0.hpp new file mode 100644 index 00000000..898b092b --- /dev/null +++ b/sw/scheduling/include/fOp0.hpp @@ -0,0 +1,73 @@ +#ifndef __FOP0_HPP__ +#define __FOP0_HPP__ + +#include +#include +#include + +#include "fDev.hpp" +#include "fJob.hpp" +#include "fDefs.hpp" + +using namespace std; + +/** + * FPGA AND bitwise job + */ +class fOp0 : public fJob { +private: + uint64_t* src; + uint64_t* dst; + uint32_t len; + +public: + fOp0(uint64_t* src, uint64_t* dst, uint32_t len, uint32_t id, uint32_t priority) + : fJob(id, priority, OPER_0) { + this->src = src; + this->dst = dst; + this->len = len; + } + + fOp0(uint64_t* mem, uint32_t len, uint32_t id, uint32_t priority) + : fJob(id, priority, OPER_0) { + this->src = mem; + this->dst = mem; + this->len = len; + } + + void run() { + fillData(src, len); + fdev->transferData(src, dst, len, len/8, true); + //checkData(dst, len/8); + //printData(dst, len/8); + } + + void fillData(uint64_t *mem, uint32_t len) { + for(uint32_t i = 0; i < len/8; i++) { + if(i%2) { + mem[i] = 0x55555555aaaaaaaa; + } + else { + mem[i] = 0x5555555555555555; + } + } + } + + void checkData(uint64_t *mem, uint32_t len) { + bool k = false; + for(uint32_t i = 0; i < len/8; i++) { + if(mem[i] != 0x5555555500000000) + k = true; + } + if(k) cout << "Error AND" << endl; + } + + void printData(uint64_t *mem, uint32_t len) { + for(uint32_t i = 0; i < len/8; i++) + cout << hex << mem[i] << endl; + } +}; + + + +#endif \ No newline at end of file diff --git a/sw/scheduling/include/fOp1.hpp b/sw/scheduling/include/fOp1.hpp new file mode 100644 index 00000000..cdabd7bb --- /dev/null +++ b/sw/scheduling/include/fOp1.hpp @@ -0,0 +1,73 @@ +#ifndef __FOP1_HPP__ +#define __FOP1_HPP__ + +#include +#include +#include + +#include "fDev.hpp" +#include "fJob.hpp" +#include "fDefs.hpp" + +using namespace std; + +/** + * FPGA OR bitwise job + */ +class fOp1 : public fJob { +private: + uint64_t* src; + uint64_t* dst; + uint32_t len; + +public: + fOp1(uint64_t* src, uint64_t* dst, uint32_t len, uint32_t id, uint32_t priority) + : fJob(id, priority, OPER_1) { + this->src = src; + this->dst = dst; + this->len = len; + } + + fOp1(uint64_t* mem, uint32_t len, uint32_t id, uint32_t priority) + : fJob(id, priority, OPER_1) { + this->src = mem; + this->dst = mem; + this->len = len; + } + + void run() { + fillData(src, len); + fdev->transferData(src, dst, len, len/8, true); + //checkData(dst, len/8); + //printData(dst, len/8); + } + + void fillData(uint64_t *mem, uint32_t len) { + for(uint32_t i = 0; i < len/8; i++) { + if(i%2) { + mem[i] = 0x55555555aaaaaaaa; + } + else { + mem[i] = 0x5555555555555555; + } + } + } + + void checkData(uint64_t *mem, uint32_t len) { + bool k = false; + for(uint32_t i = 0; i < len/8; i++) { + if(mem[i] != 0x55555555ffffffff) + k = true; + } + if(k) cout << "Error OR" << endl; + } + + void printData(uint64_t *mem, uint32_t len) { + for(uint32_t i = 0; i < len/8; i++) + cout << hex << mem[i] << endl; + } +}; + + + +#endif \ No newline at end of file diff --git a/sw/scheduling/include/fOp2.hpp b/sw/scheduling/include/fOp2.hpp new file mode 100644 index 00000000..d2c916a6 --- /dev/null +++ b/sw/scheduling/include/fOp2.hpp @@ -0,0 +1,79 @@ +#ifndef __FOP2_HPP__ +#define __FOP2_HPP__ + +#include +#include +#include + +#include "fDev.hpp" +#include "fJob.hpp" +#include "fDefs.hpp" + +using namespace std; + +/** + * FPGA excl. OR bitwise job + */ +class fOp2 : public fJob { +private: + uint64_t* src; + uint64_t* dst; + uint32_t len; + +public: + fOp2(uint64_t* src, uint64_t* dst, uint32_t len, uint32_t id, uint32_t priority) + : fJob(id, priority, OPER_2) { + this->src = src; + this->dst = dst; + this->len = len; + } + + fOp2(uint64_t* mem, uint32_t len, uint32_t id, uint32_t priority) + : fJob(id, priority, OPER_2) { + this->src = mem; + this->dst = mem; + this->len = len; + } + + void run() { + fillData(src, len); + fdev->transferData(src, dst, len, len/8, true); + //checkData(dst, len/8); + //printData(dst, len/8); + } + + void fillData(uint64_t *mem, uint32_t len) { + for(uint32_t i = 0; i < len/8; i++) { + if(i%8 == 1) { + mem[i] = 0x0000000000000002; + } + else if(i%8 == 0) { + mem[i] = 0x0000000000000005; + } + else { + mem[i] = 0x0000000000000000; + } + } + } + + void checkData(uint64_t *mem, uint32_t len) { + bool k = false; + for(uint32_t i = 0; i < len/8; i++) { + if(mem[i] != 0x0000000000000007) { + k = true; + cout << mem[i] << endl; + } + + } + if(k) cout << "Error excl. OR" << endl; + } + + void printData(uint64_t *mem, uint32_t len) { + for(uint32_t i = 0; i < len/8; i++) + cout << hex << mem[i] << endl; + } +}; + + + +#endif \ No newline at end of file diff --git a/sw/scheduling/include/fOp3.hpp b/sw/scheduling/include/fOp3.hpp new file mode 100644 index 00000000..a62d228a --- /dev/null +++ b/sw/scheduling/include/fOp3.hpp @@ -0,0 +1,78 @@ +#ifndef __FOP3_HPP__ +#define __FOP3_HPP__ + +#include +#include +#include + +#include "fDev.hpp" +#include "fJob.hpp" +#include "fDefs.hpp" + +using namespace std; + +/** + * FPGA excl. NOR bitwise job + */ +class fOp3 : public fJob { +private: + uint64_t* src; + uint64_t* dst; + uint32_t len; + +public: + fOp3(uint64_t* src, uint64_t* dst, uint32_t len, uint32_t id, uint32_t priority) + : fJob(id, priority, OPER_3) { + this->src = src; + this->dst = dst; + this->len = len; + } + + fOp3(uint64_t* mem, uint32_t len, uint32_t id, uint32_t priority) + : fJob(id, priority, OPER_3) { + this->src = mem; + this->dst = mem; + this->len = len; + } + + void run() { + fillData(src, len); + fdev->transferData(src, dst, len, len/8, true); + checkData(dst, len/8); + //printData(dst, len/8); + } + + void fillData(uint64_t *mem, uint32_t len) { + for(uint32_t i = 0; i < len/8; i++) { + if(i%8 == 1) { + mem[i] = 0x0000000000000002; + } + else if(i%8 == 0) { + mem[i] = 0x0000000000000005; + } + else { + mem[i] = 0x0000000000000000; + } + } + } + + void checkData(uint64_t *mem, uint32_t len) { + bool k = false; + for(uint32_t i = 0; i < len/8; i++) { + if(mem[i] != 0xfffffffffffffff8) { + k = true; + cout << hex << mem[i] << dec << endl; + } + } + if(k) cout << "Error excl. NOR" << endl; + } + + void printData(uint64_t *mem, uint32_t len) { + for(uint32_t i = 0; i < len/8; i++) + cout << hex << mem[i] << endl; + } +}; + + + +#endif \ No newline at end of file diff --git a/sw/scheduling/include/fScheduler.hpp b/sw/scheduling/include/fScheduler.hpp new file mode 100644 index 00000000..bc52f19a --- /dev/null +++ b/sw/scheduling/include/fScheduler.hpp @@ -0,0 +1,102 @@ +#ifndef __FSCHEDULER_HPP__ +#define __FSCHEDULER_HPP__ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "fDefs.hpp" +#include "fDev.hpp" +#include "fJob.hpp" + +using namespace std; + +static const struct timespec PAUSE {.tv_sec = 0, .tv_nsec = 1000}; +static const struct timespec MSPAUSE {.tv_sec = 0, .tv_nsec = 1000000}; + +class fScheduler : public fDev { +private: + uint32_t region_id; + + bool run; + mutex mtx; + condition_variable cv; + + thread schedulerThread; + +#ifdef REQUEST_SCHEDULING + priority_queue, jobCmpr> fque; +#else + queue fque; +#endif + + fDev *fdev; + + void processRequests(); + +public: + fScheduler(uint32_t region_id) { + this->region_id = region_id; + + unique_lock lck(mtx); +#ifdef VERBOSE_DEBUG + cout << "Scheduler: initial lock" << endl; +#endif + + schedulerThread = thread(&fScheduler::processRequests, this); +#ifdef VERBOSE_DEBUG + cout << "Scheduler: thread started" << endl; +#endif + + cv.wait(lck); +#ifdef VERBOSE_DEBUG + cout << "Scheduler: constructor finished" << endl; +#endif + } + + ~fScheduler() { + run = false; +#ifdef VERBOSE_DEBUG + cout << "Scheduler: destructor called" << endl; +#endif + + schedulerThread.join(); + } + + // Getters + bool isRunning() { + return run; + } + + uint32_t getSize() { + return fque.size(); + } + + uint32_t getRegionId() { + return region_id; + } + + // Obtain a region + bool obtainRegion() { + if(acquireRegion(region_id)) + return true; + else + return false; + } + + // Request a job + void requestJob(fJob* fjob) { + if(isRegionAcquired()) { + fjob->attachJob(this); + lock_guard lck2(mtx); + fque.push(fjob); + } + } +}; + +#endif \ No newline at end of file diff --git a/sw/scheduling/include/kmeans.hpp b/sw/scheduling/include/kmeans.hpp new file mode 100644 index 00000000..1bfbb6e4 --- /dev/null +++ b/sw/scheduling/include/kmeans.hpp @@ -0,0 +1,196 @@ +#include +#include +#include +#include +#include + +template +class Kmeans { + +public: + Kmeans(T* points, uint32_t size, uint32_t dimensions, uint32_t k); + ~Kmeans(); + + void run(uint32_t iter); + T2 getSSE(); + double getRuntime(); + void printCentroids(); + +private: + void assignment(); + void update(); + void initCentroids(); + T2 euclideanDist(T* p1, T* p2); + + T* mPoints; + T* mCentroids; + T* mAccu; + uint32_t* mAssigned; + uint32_t mSize; + uint32_t mClusters; + uint32_t mDimensions; + double mDurationUs; +}; + +template +Kmeans::Kmeans(T* points, uint32_t size, uint32_t dimensions, uint32_t k) +{ + mPoints = points; + mSize = size; + mClusters = k; + mDimensions = dimensions; + + mCentroids = new T[k*mDimensions]; + mAccu = new T[k*mDimensions]; + mAssigned = new uint32_t[k]; +} + +template +Kmeans::~Kmeans() +{ + delete[] mCentroids; + delete[] mAccu; + delete[] mAssigned; +} + +template +void Kmeans::run(uint32_t iterations) +{ + initCentroids(); + auto start_time = std::chrono::high_resolution_clock::now(); + for (uint32_t it = 0; it < iterations; ++it) { + memset(mAccu, 0.0, mClusters*mDimensions*sizeof(T)); + memset(mAssigned, 0, mClusters*sizeof(uint32_t)); + assignment(); + update(); + } + auto end_time = std::chrono::high_resolution_clock::now(); + mDurationUs = std::chrono::duration_cast(end_time-start_time).count(); + +} + +template +T2 Kmeans::getSSE() +{ + T2 sse = 0.0; + + for(uint32_t p = 0; p < (mSize*mDimensions); p += mDimensions) { + T2 minDist = 0.0; + for (uint32_t c = 0; c < (mClusters*mDimensions); c += mDimensions) { + T dist = euclideanDist(&mPoints[p], &mCentroids[c]); + if (c == 0 || dist <= minDist) { + minDist = dist; + } + } + + sse += minDist; + int ind = p/mDimensions; + //printf("[%d]sse:%d\n",ind, sse); + } + + return sse; +} + +template +double Kmeans::getRuntime() +{ + return mDurationUs; +} + +template +void Kmeans::printCentroids() +{ + std::cout << "Centroids:" << std::endl; + for (uint32_t c = 0; c < mClusters; ++c) { + std::cout << "centroid[" << c << "]: "; + for (uint32_t d = 0; d < mDimensions; ++d) { + std::cout << " " << mCentroids[c*mDimensions+d]; + } + std::cout << std::endl; + } +} + +template +void Kmeans::assignment() +{ + for(uint32_t p = 0; p < (mSize*mDimensions); p += mDimensions) { + T2 minDist = 0.0; + uint32_t clusterIdx = 0; + for (uint32_t c = 0; c < mClusters; ++c) { + T2 dist = euclideanDist(&mPoints[p], &mCentroids[c*mDimensions]); + if (c == 0 || dist <= minDist) { + minDist = dist; + clusterIdx = c; + } + } + int ind = p/mDimensions; + //printf("[%d]assign:%d\n",ind, clusterIdx); + //printf("[%d]mindist:%d\n",ind, minDist); + //Accumulate + for (uint32_t d = 0; d < mDimensions; ++d) { + mAccu[clusterIdx*mDimensions + d] += mPoints[p + d]; + } + mAssigned[clusterIdx]++; + } +/* printf("accumulated counters:\n"); + for(int i =0; i< mClusters;i++) + { + printf("%u ", mAssigned[i]); + } + printf("\n"); + + printf("accumulated results:\n"); + for(int i =0; i< mClusters;i++) + { + for(int j=0; j +void Kmeans::update() +{ +// printf("updated center:\n"); + for (uint32_t c = 0; c < mClusters; ++c) { + for (uint32_t d = 0; d < mDimensions; ++d) { + if (mAssigned[c] != 0) { + mCentroids[c*mDimensions+d] = mAccu[c*mDimensions+d] / mAssigned[c]; +// printf("%d ", mCentroids[c*mDimensions+d]); + } + } +// printf("\n"); + } + +} + +template +void Kmeans::initCentroids() + +{ + int indx_array[8] = {0, 70, 149, 35, 105, 17, 50, 85}; + + std::default_random_engine generator; + std::uniform_int_distribution distribution(0, mSize); + for (uint32_t c = 0; c < mClusters; ++c) { + // int idx = distribution(generator); + int idx = indx_array[c]; + for (uint32_t d = 0; d < mDimensions; ++d) { + mCentroids[c*mDimensions+d] = mPoints[idx*mDimensions+d]; + } + } +} + +template +T2 Kmeans::euclideanDist(T* p1, T* p2) +{ + T2 dist = 0.0; + for (uint32_t d = 0; d < mDimensions; ++d) { + T diff = (p1[d] > p2[d]) ? (p1[d] - p2[d]) : (p2[d] - p1[d]); + dist += (diff * diff); + //dist += ((p1[d] - p2[d]) * (p1[d] - p2[d])); + } + return dist; +} diff --git a/sw/scheduling/include/mlweaving.h b/sw/scheduling/include/mlweaving.h new file mode 100644 index 00000000..b7d39209 --- /dev/null +++ b/sw/scheduling/include/mlweaving.h @@ -0,0 +1,219 @@ +// Copyright 2018 Zeke Wang, ETH, Zurich +// Author : Zeke Wang (zeke.wang [at] inf.ethz.ch) +//Licensed under the Apache License, Version 2.0 (the "License"); +//you may not use this file except in compliance with the License. +//You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +//Unless required by applicable law or agreed to in writing, software +//distributed under the License is distributed on an "AS IS" BASIS, +//WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//See the License for the specific language governing permissions and +//limitations under the License. + +#ifndef MLWEAVING_H +#define MLWEAVING_H + + +// This file is mainly about how to compress the dataset into MLWeaving layout and +// to get data (char or short) out of MLWeaving layout, + +#include "string.h" +#include +#include + + +#define BITS_OF_CL 512 +#define NUM_PIPE 32 + + +uint32_t compute_num_cl_tuples(uint32_t numSamples, uint32_t numFeatures) +{ + uint32_t NUM_BLOCK = ceil((float)numSamples/(float)NUM_PIPE) * numFeatures; + uint32_t NUM_BLOCK_COLUMN = BITS_OF_CL/NUM_PIPE; + uint32_t num_cl_tuples = ceil((float)NUM_BLOCK / (float)NUM_BLOCK_COLUMN) * 32; + printf("num_cl_tuples:%d\n", num_cl_tuples); + return num_cl_tuples; +} + +void convert_float_to_fix(float* float_src, uint32_t* fix_src, uint32_t numSamples, uint32_t numFeatures) +{ + uint32_t scale = 1; + for (int i = 0; i < numSamples; ++i) + { + for (int j = 0; j < numFeatures; ++j) + { + fix_src[i*numFeatures+j] = (uint32_t) (float_src[i*numFeatures+j]*scale); + // printf("%d ", fix_src[i*numFeatures+j] ); + } + // printf("\n"); + } +} + + +//This function performs weaving on the input data array: src. +//Input : src (dense, unsigned int) +//Output: dest (in MLWeaving) +void mlweaving_on_sample(uint32_t *dest, uint32_t *src, uint32_t numSamples, uint32_t numFeatures) +{ + printf("start MLWeaving\n"); + uint32_t address_index = 0; + uint32_t sample_idx = 0; + uint32_t feature_idx = 0; + uint32_t NUM_BLOCK = ceil((float)numSamples/(float)NUM_PIPE) * numFeatures; + uint32_t NUM_BLOCK_COLUMN = BITS_OF_CL/NUM_PIPE; + uint32_t NUM_BLOCK_ROW = ceil( (float)NUM_BLOCK / (float)NUM_BLOCK_COLUMN ); + printf("NUM_BLOCK:%d, NUM_BLOCK_COLUMN:%d, NUM_BLOCK_ROW:%d\n", NUM_BLOCK, NUM_BLOCK_COLUMN, NUM_BLOCK_ROW); + + ///Do the bitWeaving to the training data... + + //each block row contains #BITS_OF_CL/NUM_PIPE blocks, which is 512 bits + for (uint32_t i = 0; i < NUM_BLOCK; i+=NUM_BLOCK_COLUMN) + { + uint32_t blocks_in_cl = ( (i+NUM_BLOCK_COLUMN)>5] = result_buffer[m>>5] | ((tmp_buffer[m] >>31)<<(m&31)); + tmp_buffer[m] = tmp_buffer[m] << 1; + } + //2.2: store the bit-level result back to the memory... + dest[address_index++] = result_buffer[0]; + dest[address_index++] = result_buffer[1]; + dest[address_index++] = result_buffer[2]; + dest[address_index++] = result_buffer[3]; + dest[address_index++] = result_buffer[4]; + dest[address_index++] = result_buffer[5]; + dest[address_index++] = result_buffer[6]; + dest[address_index++] = result_buffer[7]; + dest[address_index++] = result_buffer[8]; + dest[address_index++] = result_buffer[9]; + dest[address_index++] = result_buffer[10]; + dest[address_index++] = result_buffer[11]; + dest[address_index++] = result_buffer[12]; + dest[address_index++] = result_buffer[13]; + dest[address_index++] = result_buffer[14]; + dest[address_index++] = result_buffer[15]; + } + } + printf("finished ml weaving\n"); +} + +// This function retrives one single sample feature from the mlweaving layout with address: src. +// dest: destination +// src : address of mlweaving array +void retrieve_from_mlweaving(uint32_t* dest, uint32_t *mlweaving_src, uint32_t sample_idx, uint32_t feature_idx, uint32_t numFeatures, uint32_t numSamples) +{ + // printf("retrieve from MLWeaving\n"); + uint32_t NUM_BLOCK = ceil((float)numSamples/(float)NUM_PIPE) * numFeatures; + uint32_t NUM_BLOCK_COLUMN = BITS_OF_CL/NUM_PIPE; + uint32_t NUM_BLOCK_ROW = ceil( (float)NUM_BLOCK / (float)NUM_BLOCK_COLUMN ); + + uint32_t block_idx = (sample_idx/NUM_PIPE)*numFeatures + feature_idx; + uint32_t cl_offset = (block_idx/NUM_BLOCK_COLUMN)*32; + uint32_t bit_offset_cl = (block_idx % NUM_BLOCK_COLUMN)*NUM_PIPE + sample_idx % NUM_PIPE; + uint32_t int_offset_cl = bit_offset_cl/32; + uint32_t addr_offset = cl_offset*16 + int_offset_cl; + uint32_t bit_offset_int = bit_offset_cl % 32; + // printf("block_idx:%d, cl_offset:%d, bit_offset_cl:%d, int_offset_cl:%d,bit_offset_int:%d\n", block_idx, cl_offset, bit_offset_cl,int_offset_cl,bit_offset_int); + + uint32_t result=0; + // printf("MSB:"); + for (int i = 0; i < 32; ++i) + { + uint32_t bit = ((mlweaving_src[addr_offset+16*i] & (1<> bit_offset_int); + // printf("%d", bit); + result |= ((mlweaving_src[addr_offset+16*i] & (1<> bit_offset_int) << (31-i); + } + // printf(" retrieve result:%d\n", result); + dest[sample_idx*numFeatures+feature_idx] = result; +} + +void print_weaving (uint32_t num_cl_tuples, uint32_t* weaving_dest) +{ + printf("print weaving\n"); + for (int i = 0; i < num_cl_tuples; ++i) + { + for (int j = 0; j < 16; ++j) + { + printf("%u ", weaving_dest[i*16+j]); + } + printf("\n"); + } +} + +void compare_results_ml_weaving(uint32_t* fix_src, uint32_t* retrieve_dest, uint32_t numFeatures, uint32_t numSamples ) +{ + uint32_t num_error = 0; + for (int i = 0; i < numSamples; ++i) + { + for (int j = 0; j < numFeatures; ++j) + { + if (fix_src[i*numFeatures+j] != retrieve_dest[i*numFeatures+j]) + { + printf("sample:%d, feature:%d, fix_src:%d, retrieve_dest:%d\n", i,j,fix_src[i*numFeatures+j],retrieve_dest[i*numFeatures+j]); + num_error++; + } + } + } + if (num_error==0) + { + printf("All comparisons correct\n"); + } +} + +void test_ml_weaving(float* float_src ,uint32_t numSamples, uint32_t numFeatures) +{ + printf("start test_ml_weaving\n"); + uint32_t num_cl_tuples = compute_num_cl_tuples( numSamples, numFeatures); + uint32_t* weaving_dest = NULL; + int status =posix_memalign((void**)&weaving_dest, 64, num_cl_tuples*sizeof(uint32_t)*16); + memset(weaving_dest,0,num_cl_tuples*sizeof(uint32_t)*16); + + uint32_t* fix_src = NULL; + status=posix_memalign((void**)&fix_src, 64, sizeof(uint32_t)*numFeatures*numSamples); + convert_float_to_fix(float_src, fix_src, numSamples, numFeatures); + + mlweaving_on_sample(weaving_dest, fix_src, numSamples, numFeatures); + // print_weaving(num_cl_tuples, weaving_dest); + + uint32_t* retrieve_dest = NULL; + status=posix_memalign((void**)&retrieve_dest, 64, sizeof(uint32_t)*numFeatures*numSamples); + + printf("start retrieve_dest\n"); + for (int i = 0; i < numSamples ; ++i) + { + for (int j = 0; j < numFeatures; ++j) + { + retrieve_from_mlweaving(retrieve_dest, weaving_dest, i, j, numFeatures, numSamples); + // printf("%d ", retrieve_dest[i*numFeatures+j] ); + } + // printf("\n"); + } + compare_results_ml_weaving(fix_src, retrieve_dest, numFeatures, numSamples); +} + +#endif diff --git a/sw/scheduling/include/utils.hpp b/sw/scheduling/include/utils.hpp new file mode 100644 index 00000000..941fa6e3 --- /dev/null +++ b/sw/scheduling/include/utils.hpp @@ -0,0 +1,35 @@ +#ifndef UTILS_HPP +#define UTILS_HPP +#include +#include +#include +#include +#include +#include +#include + +#define MAX_LINE_LENGTH 2049 + +void readFloatData(char* filename, float* points, uint32_t size, uint32_t dimensions); +void readFixData(char* filename, uint32_t* points, uint32_t size, uint32_t dimensions, uint32_t fixpoint); + +template +void printPoints(T* points, uint32_t size, uint32_t dimensions) +{ + T* ptr = points; + for (uint32_t i = 0; i < size; ++i) { + std::cout << "point[" << i << "]:"; + for (uint32_t d = 0; d < dimensions; ++d) { + std::cout << " " << *ptr; + ptr++; + } + std::cout << std::endl; + } +} + +void read_input(const char *filename, int nclusters, int nfeatures, int npoints, float* features); +void data_gen(float* data, uint64_t data_set_size, uint32_t data_dim ); +void read_file(float *array, int N, int D, const char *filename, bool isBinary); + + +#endif diff --git a/sw/scheduling/src/classical_kmeans.cpp b/sw/scheduling/src/classical_kmeans.cpp new file mode 100644 index 00000000..a5effe2a --- /dev/null +++ b/sw/scheduling/src/classical_kmeans.cpp @@ -0,0 +1,489 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "utils.hpp" +#include "classical_kmeans.h" + +using namespace std; +#define SCALE_FACTOR 16 + + +void compute_kmeans( uint32_t* objects, uint32_t* clusters_ref, int numObjs, int numClusters, int numCoords, int max_iter, bool user_specify_precision){ + + int* member_ref = NULL; + int status = posix_memalign((void**)&member_ref, 64, sizeof(int)*numObjs); + /* initialize membership[] */ + for (int i=0; i::max(); + int index = 0; + + // printf("%d:", i); + for (int j=0; j 0){ + clusters_ref[i*numCoords+j] = (uint32_t)(newClusters[i*numCoords+j] / newClusterSize[i]) ; + } + else { + clusters_ref[i*numCoords+j] = 0; + } + + newClusters[i*numCoords+j] = 0; + } + printf("%u ",newClusterSize[i]); + newClusterSize[i] = 0; + } + printf("\n"); + printCentroids(clusters_ref, numClusters, numCoords,1); + printf ("# iteration:%u, loss_low_prec is:@ %lu\n", *total_loop, loss); + + + (*total_loop)++; + + + } while ((*total_loop) < max_loop); +} + + +void printCentroids(uint32_t*centroid, uint32_t numClusters, uint32_t numCoords, uint32_t number_of_iteration) +{ + std::cout << "Centroids:" << std::endl; + for (int i = 0; i < number_of_iteration; ++i) + { + // printf("iteration:%u\n", i); + for (uint32_t c = 0; c < numClusters; ++c) { + std::cout << "centroid[" << c << "]: "; + for (uint32_t d = 0; d < numCoords; ++d) { + std::cout << " " << centroid[c*numCoords+d]; + } + std::cout << std::endl; + } + } + +} + + +void normalization_scale(int nfeatures, int npoints, float* features, uint32_t* scaled_unsigned_features ,float* dr_a_min, float* dr_a_max) +{ + double scale = SCALE_FACTOR; + printf("\nStart normalization 0 - 1 and scale to (2^%lf-1):\n", scale); + + for (int j = 0; j < nfeatures; ++j) + { + float amin = numeric_limits::max(); + float amax = numeric_limits::min(); + + for (int i = 0; i < npoints; ++i) + { + float a_here = features[i*nfeatures+j]; + if (a_here > amax) + amax = a_here; + if (a_here < amin) + amin = a_here; + } + dr_a_min[j] = amin; //set to the global variable for pm + dr_a_max[j] = amax; + printf("column: %d, min:%f, max:%f\n", j, amin, amax); + float arange = amax - amin; + if (arange > 0) + { + for (int i = 0; i < npoints; ++i) + { + float tmp = ((features[i*nfeatures+j]-amin)/arange); + scaled_unsigned_features[i*nfeatures+j] = (uint32_t) (tmp * ((pow(2.0,scale))-1)); + } + } + } + + + printf("normalization and scale finished\n"); + + // for (int i = 0; i < npoints; ++i) + // { + // for (int j = 0; j < nfeatures; ++j) + // { + // printf("%u ", scaled_unsigned_features[i*nfeatures+j]); + // } + // printf("\n"); + // } +} + +void scale(int nfeatures, int npoints, float* features, uint32_t* scaled_unsigned_features,float* dr_a_min, float* dr_a_max) +{ + double scale = SCALE_FACTOR; + printf("\nStart scale to %lf:\n", scale); + + for (int j = 0; j < nfeatures; ++j) + { + float amin = numeric_limits::max(); + float amax = numeric_limits::min(); + + for (int i = 0; i < npoints; ++i) + { + float a_here = features[i*nfeatures+j]; + if (a_here > amax) + amax = a_here; + if (a_here < amin) + amin = a_here; + } + dr_a_min[j] = amin; //set to the global variable for pm + dr_a_max[j] = amax; + printf("column: %d, min:%f, max:%f\n", j, amin, amax); + for (int i = 0; i < npoints; ++i) + { + scaled_unsigned_features[i*nfeatures+j] = (uint32_t) ((features[i*nfeatures+j] - amin) * scale); + } + + } + + + printf("scale finished\n"); + + for (int i = 0; i < npoints; ++i) + { + for (int j = 0; j < nfeatures; ++j) + { + printf("%u ", scaled_unsigned_features[i*nfeatures+j]); + } + printf("\n"); + } +} + + +void normalization(int nfeatures, int npoints, float* features, float* normalized_features) +{ + + printf("\nStart normalization 0 - 1:\n"); + + for (int j = 0; j < nfeatures; ++j) + { + float amin = numeric_limits::max(); + float amax = numeric_limits::min(); + + for (int i = 0; i < npoints; ++i) + { + float a_here = features[i*nfeatures+j]; + if (a_here > amax) + amax = a_here; + if (a_here < amin) + amin = a_here; + } + //printf("column: %d, min:%f, max:%f\n", j, amin, amax); + float arange = amax - amin; + if (arange > 0) + { + for (int i = 0; i < npoints; ++i) + { + float tmp = ((features[i*nfeatures+j]-amin)/arange); + normalized_features[i*nfeatures+j] = tmp; + } + } + } + + // for (int i = 0; i < npoints; ++i) + // { + // for (int j = 0; j < nfeatures; ++j) + // { + // printf("%f ", normalized_features[i*nfeatures+j]); + // } + // printf("\n"); + // } + + printf("normalization finished\n"); +} + +void convert_precision(int precision, int nfeatures, int npoints, uint32_t* features, uint32_t* low_precision_feature) +{ + //printf("precision:%d\n", precision); + uint32_t tmp; + for (int i = 0; i < npoints; ++i) + { + for (int j = 0; j < nfeatures; ++j) + { + tmp = ((uint32_t)features[i*nfeatures+j]) >> (SCALE_FACTOR-precision); + // tmp = (uint32_t)features[i*nfeatures+j]; + low_precision_feature[i*nfeatures+j] = (uint32_t)tmp; + } + } +} + +void initial_centroids(int numClusters, int numCoords, int numObjs, uint32_t* cluster, uint32_t* objects) +{ + srand(1); + /* randomly pick cluster centers */ + printf("randomly select cluster centers\n"); + for (int i=0; i 0){ +// clusters_ref[i*numCoords+j] = newClusters[i*numCoords+j] / newClusterSize[i]; +// } +// newClusters[i*numCoords+j] = 0.0f; +// center_change_norm = center_change_norm + (clusters_ref[i*numCoords+j]-old_center[i*numCoords+j])*(clusters_ref[i*numCoords+j]-old_center[i*numCoords+j]); + +// } +// newClusterSize[i] = 0; +// } + + +// delta /= numObjs; +// sse_change = abs(loss - previous_sse); +// previous_sse = loss; + +// loss = loss / ((1< change_center_thres && loop++ < 500); +// uint32_t relative_data_movement = numObjs*numCoords*(loop+1)*32; +// printf("relative_data_movement:*# %d\n", relative_data_movement); +// //Clean temp arrays +// free (newClusterSize); +// free (newClusters); +// } + diff --git a/sw/scheduling/src/fArbiter.cpp b/sw/scheduling/src/fArbiter.cpp new file mode 100644 index 00000000..35145fb2 --- /dev/null +++ b/sw/scheduling/src/fArbiter.cpp @@ -0,0 +1,41 @@ +#include "fArbiter.hpp" + +void fArbiter::processRequests() { + unique_lock lck(mtx); + run = true; + fJob* currJob; + cv.notify_one(); + lck.unlock(); + + while(run || !request_queue.empty()) { + lck.lock(); + if(!request_queue.empty()) { + if(request_queue.front() !=nullptr) { + // Remove next job from the queue + currJob = request_queue.front(); + request_queue.pop(); + + int32_t min = INT32_MAX; + uint32_t min_id = 0; + for (auto& it : schedulers) { + if(it->getSize() < min) { + min = it->getSize(); + min_id = it->getRegionId(); + } + } + + for (auto& it : schedulers) { + if(it->getRegionId() == min_id) + it->requestJob(currJob); + } + } + else { + request_queue.pop(); + } + } + + lck.unlock(); + + nanosleep(&PAUSE, NULL); + } +} \ No newline at end of file diff --git a/sw/scheduling/src/fDev.cpp b/sw/scheduling/src/fDev.cpp new file mode 100644 index 00000000..c1dab4c0 --- /dev/null +++ b/sw/scheduling/src/fDev.cpp @@ -0,0 +1,449 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "fDev.hpp" + +/* Performance test */ +#define PERF_RUN + +/* AVX */ +#define AVX_WIDTH 8 +#define PR_WIDTH 2 * 1024 * 1024 + +/* Sleep */ +#define POLL_SLEEP_NS 100 + +#define LARGE_PAGE_SIZE 2 * 1024 * 1024 +#define LARGE_PAGE_SHIFT 21UL +#define PAGE_SIZE 4 * 1024 +#define PAGE_SHIFT 12UL + +/* Clock */ +#define CLK_NS 4 + +/* MMAP */ +#define MMAP_CTRL (0x0 << PAGE_SHIFT) +#define MMAP_CNFG (0x1 << PAGE_SHIFT) +#define MMAP_DATA (0x2 << PAGE_SHIFT) +#define MMAP_PREC (0x3 << PAGE_SHIFT) +#define MMAP_BUFF (0x200 << PAGE_SHIFT) +#define MMAP_CARD (0x400 << PAGE_SHIFT) +#define MMAP_CHAN_0 (0x600 << PAGE_SHIFT) +#define MMAP_CHAN_1 (0x800 << PAGE_SHIFT) + +/* IOCTL */ +#define IOCTL_ALLOC_HOST_MEM _IOR('D', 1, unsigned long) +#define IOCTL_FREE_HOST_MEM _IOR('D', 2, unsigned long) +#define IOCTL_ALLOC_CARD_MEM_STRIDE _IOR('D', 3, unsigned long) +#define IOCTL_FREE_CARD_MEM_STRIDE _IOR('D', 4, unsigned long) +#define IOCTL_READ_ENG_STATUS _IOR('D', 5, unsigned long) +#define IOCTL_UNMAP_USER _IOR('D', 6, unsigned long) +#define IOCTL_RECONFIG_LOCK _IOR('D', 7, unsigned long) +#define IOCTL_RECONFIG_UNLOCK _IOR('D', 8, unsigned long) +#define IOCTL_ALLOC_CARD_MEM_CHAN_0 _IOR('D', 9, unsigned long) +#define IOCTL_ALLOC_CARD_MEM_CHAN_1 _IOR('D', 10, unsigned long) +#define IOCTL_FREE_CARD_MEM_CHAN_0 _IOR('D', 11, unsigned long) +#define IOCTL_FREE_CARD_MEM_CHAN_1 _IOR('D', 12, unsigned long) +#define IOCTL_MAP_USER _IOR('D', 13, unsigned long) + +/* Regions */ +#define CTRL_REGION_SIZE 64 * 1024 +#define CNFG_REGION_SIZE 64 * 1024 +#define DATA_REGION_SIZE 1 * 1024 * 1024 +#define PREC_REGION_SIZE 32 * 1024 + +/* Config regs */ +#define CNFG_CTRL_REG 0x0 +#define CNFG_STATUS_REG 0x1 +#define CNFG_STATUS_DMA_RD_REG 0x2 +#define CNFG_STATUS_DMA_WR_REG 0x3 +#define CNFG_VADDR_RD_REG 0x4 +#define CNFG_LEN_RD_REG 0x5 +#define CNFG_VADDR_WR_REG 0x6 +#define CNFG_LEN_WR_REG 0x7 +#define CNFG_VADDR_MISS_REG 0x8 +#define CNFG_LEN_MISS_REG 0x9 +#define CNFG_DCPL_REG 0xA +#define CNFG_DP_REG 0xB +#define CNFG_TMR_STOP_REG 0xC +#define CNFG_TMR_RD_REG 0xD +#define CNFG_TMR_WR_REG 0xE + +#define CNFG_CTRL_START_RD 0x1 +#define CNFG_CTRL_START_WR 0x2 +#define CNFG_CTRL_START_TMR_RD 0x8 +#define CNFG_CTRL_START_TMR_WR 0x10 +#define CNFG_CTRL_CLR_STAT_RD 0x20 +#define CNFG_CTRL_CLR_STAT_WR 0x40 +#define CNFG_CTRL_START_CYC (CNFG_CTRL_START_RD | CNFG_CTRL_START_WR) +#define CNFG_CTRL_START_TMR_CYC (CNFG_CTRL_START_TMR_RD | CNFG_CTRL_START_TMR_WR) +#define CNFG_CTRL_CLR_STAT_CYC (CNFG_CTRL_CLR_STAT_RD | CNFG_CTRL_CLR_STAT_WR) +#define CNFG_STATUS_READY_RD 0x1 +#define CNFG_STATUS_READY_WR 0x2 + +using namespace std::chrono; + +/* -- Obtain regions ---------------------------------------------------------------------------------- */ + +bool fDev::acquireRegion(uint32_t rNum) { + std::string region = "/dev/fpga" + std::to_string(rNum); + fd = open(region.c_str(), O_RDWR | O_SYNC); + if(fd == -1) { + std::cout << "ERR: Cannot acquire an FPGA region" << std::endl; + return false; + } + + if(!mmapFpga()) { + std::cout << "ERR: Cannot mmap an FPGA region" << std::endl; + return false; + } + + regionAcquired = true; + return true; +} + +bool fDev::releaseRegion() { + close(fd); + + regionAcquired = false; + return true; +} + +bool fDev::isRegionAcquired() { + return regionAcquired; +} + +bool fDev::mmapFpga() { + cnfg_reg = (uint64_t*) mmap(NULL, CNFG_REGION_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, MMAP_CNFG); + if(cnfg_reg == MAP_FAILED) { + releaseRegion(); + return false; + } + + ctrl_reg = (uint64_t*) mmap(NULL, CTRL_REGION_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, MMAP_CTRL); + if(ctrl_reg == MAP_FAILED) { + releaseRegion(); + return false; + } + + data_reg = (__m256i*) mmap(NULL, DATA_REGION_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, MMAP_DATA); + if(data_reg == MAP_FAILED) { + releaseRegion(); + return false; + } + + return true; +} + +bool fDev::munmapFpga() { + if(munmap(cnfg_reg, CNFG_REGION_SIZE) != 0) { + releaseRegion(); + return false; + } + + if(munmap(ctrl_reg, CTRL_REGION_SIZE) != 0) { + releaseRegion(); + return false; + } + + if(munmap(data_reg, DATA_REGION_SIZE) != 0) { + releaseRegion(); + return false; + } + + cnfg_reg = 0; + ctrl_reg = 0; + data_reg = 0; + + return true; +} + +/* -- Memory management ------------------------------------------------------------------------------- */ +uint64_t* fDev::getHostMem(uint64_t &n_pages) { + uint64_t *hMem, *hMemAligned; + + ioctl(fd, IOCTL_ALLOC_HOST_MEM, &n_pages); + hMem = (uint64_t*)mmap(NULL, (n_pages + 1) * LARGE_PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, MMAP_BUFF); + // alignment + hMemAligned = (uint64_t*)( ((((uint64_t)hMem + LARGE_PAGE_SIZE - 1) >> LARGE_PAGE_SHIFT)) << LARGE_PAGE_SHIFT); + mapped_large[hMemAligned] = hMem; + return hMemAligned; +} + +void fDev::freeHostMem(uint64_t *vaddr, uint64_t &n_pages) { + uint64_t* hMem; + hMem = mapped_large[vaddr]; + munmap(hMem, (n_pages + 1) * LARGE_PAGE_SIZE); + ioctl(fd, IOCTL_FREE_HOST_MEM, &vaddr); +} + +uint64_t* fDev::getCardMem(uint64_t &n_pages, int channel) { + uint64_t *cMem, *cMemAligned; + if(channel == 0) { + ioctl(fd, IOCTL_ALLOC_CARD_MEM_CHAN_0, &n_pages); + cMem = (uint64_t*)mmap(NULL, (n_pages + 1) * LARGE_PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, MMAP_CHAN_0); + } else if(channel == 1) { + ioctl(fd, IOCTL_ALLOC_CARD_MEM_CHAN_1, &n_pages); + cMem = (uint64_t*)mmap(NULL, (n_pages + 1) * LARGE_PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, MMAP_CHAN_1); + } + // alignment + cMemAligned = (uint64_t*)( ((((uint64_t)cMem + LARGE_PAGE_SIZE - 1) >> LARGE_PAGE_SHIFT)) << LARGE_PAGE_SHIFT); + mapped_large[cMemAligned] = cMem; + return cMemAligned; +} + +void fDev::freeCardMem(uint64_t *vaddr, uint64_t &n_pages, int channel) { + uint64_t* cMem; + cMem = mapped_large[vaddr]; + munmap(cMem, (n_pages + 1) * LARGE_PAGE_SIZE); + if(channel == 0) + ioctl(fd, IOCTL_FREE_CARD_MEM_CHAN_0, &vaddr); + else if(channel == 1) + ioctl(fd, IOCTL_FREE_CARD_MEM_CHAN_1, &vaddr); +} + +uint64_t* fDev::getCardMem(uint64_t &n_pages) { + uint64_t *cMem; + + ioctl(fd, IOCTL_ALLOC_CARD_MEM_STRIDE, &n_pages); + cMem = (uint64_t*)mmap(NULL, (2*n_pages) * PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, MMAP_CARD); + // alignment + return cMem; +} + +void fDev::freeCardMem(uint64_t *vaddr, uint64_t &n_pages) { + munmap(vaddr, (2*n_pages) * PAGE_SIZE); + ioctl(fd, IOCTL_FREE_CARD_MEM_STRIDE, &vaddr); +} + +void fDev::userMap(uint64_t *mem, uint64_t len) { + uint64_t vdata [2]; + vdata[0] = (uint64_t)mem; + vdata[1] = len; + ioctl(fd, IOCTL_MAP_USER, &vdata); +} + +void fDev::userUnmap(uint64_t *mem, uint64_t len) { + uint64_t vdata [2]; + vdata[0] = (uint64_t)mem; + vdata[1] = len; + ioctl(fd, IOCTL_UNMAP_USER, &vdata); +} + +/* -- Bulk transfers ---------------------------------------------------------------------------------- */ + +void fDev::readFrom(uint64_t* vaddr, uint32_t len, bool poll) { + cnfg_reg[CNFG_VADDR_RD_REG] = (uint64_t)vaddr; + cnfg_reg[CNFG_LEN_RD_REG] = len; +#ifndef PERF_RUN + cnfg_reg[CNFG_CTRL_REG] = CNFG_CTRL_START_RD; +#else + cnfg_reg[CNFG_CTRL_REG] = CNFG_CTRL_START_RD | CNFG_CTRL_START_TMR_RD | CNFG_CTRL_CLR_STAT_RD; +#endif + + if(poll) { + while(checkBusyRead()) nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } +} + +void fDev::writeTo(uint64_t* vaddr, uint32_t len, bool poll) { + cnfg_reg[CNFG_VADDR_WR_REG] = (uint64_t)vaddr; + cnfg_reg[CNFG_LEN_WR_REG] = len; +#ifndef PERF_RUN + cnfg_reg[CNFG_CTRL_REG] = CNFG_CTRL_START_WR; +#else + cnfg_reg[CNFG_CTRL_REG] = CNFG_CTRL_START_WR | CNFG_CTRL_START_TMR_WR | CNFG_CTRL_CLR_STAT_WR; +#endif + + if(poll) { + while(checkBusyWrite()) nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } +} + +void fDev::transferData(uint64_t* vaddr, uint32_t len, bool poll) { + cnfg_reg[CNFG_VADDR_RD_REG] = (uint64_t)vaddr; + cnfg_reg[CNFG_LEN_RD_REG] = len; + cnfg_reg[CNFG_VADDR_WR_REG] = (uint64_t)vaddr; + cnfg_reg[CNFG_LEN_WR_REG] = len; +#ifndef PERF_RUN + cnfg_reg[CNFG_CTRL_REG] = CNFG_CTRL_START_WR | CNFG_CTRL_START_RD; +#else + cnfg_reg[CNFG_CTRL_REG] = CNFG_CTRL_START_WR | CNFG_CTRL_START_RD | + CNFG_CTRL_START_TMR_WR | CNFG_CTRL_START_TMR_RD | + CNFG_CTRL_CLR_STAT_WR | CNFG_CTRL_CLR_STAT_RD; +#endif + + if(poll) { + while(checkBusyWrite()) nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } +} + +void fDev::transferData(uint64_t* vaddr_src, uint64_t* vaddr_dst, uint32_t len, bool poll) { + cnfg_reg[CNFG_VADDR_RD_REG] = (uint64_t)vaddr_src; + cnfg_reg[CNFG_LEN_RD_REG] = len; + cnfg_reg[CNFG_VADDR_WR_REG] = (uint64_t)vaddr_dst; + cnfg_reg[CNFG_LEN_WR_REG] = len; +#ifndef PERF_RUN + cnfg_reg[CNFG_CTRL_REG] = CNFG_CTRL_START_RD; + cnfg_reg[CNFG_CTRL_REG] = CNFG_CTRL_START_WR; +#else + cnfg_reg[CNFG_CTRL_REG] = CNFG_CTRL_START_RD | CNFG_CTRL_START_TMR_RD | CNFG_CTRL_CLR_STAT_RD; + cnfg_reg[CNFG_CTRL_REG] = CNFG_CTRL_START_WR | CNFG_CTRL_START_TMR_WR | CNFG_CTRL_CLR_STAT_WR; +#endif + + if(poll) { + while(checkBusyWrite()) nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } +} + +void fDev::transferData(uint64_t* vaddr, uint32_t len_src, uint32_t len_dst, bool poll) { + cnfg_reg[CNFG_VADDR_RD_REG] = (uint64_t)vaddr; + cnfg_reg[CNFG_LEN_RD_REG] = len_src; + cnfg_reg[CNFG_VADDR_WR_REG] = (uint64_t)vaddr; + cnfg_reg[CNFG_LEN_WR_REG] = len_dst; +#ifndef PERF_RUN + cnfg_reg[CNFG_CTRL_REG] = CNFG_CTRL_START_RD; + cnfg_reg[CNFG_CTRL_REG] = CNFG_CTRL_START_WR; +#else + cnfg_reg[CNFG_CTRL_REG] = CNFG_CTRL_START_RD | CNFG_CTRL_START_TMR_RD | CNFG_CTRL_CLR_STAT_RD; + cnfg_reg[CNFG_CTRL_REG] = CNFG_CTRL_START_WR | CNFG_CTRL_START_TMR_WR | CNFG_CTRL_CLR_STAT_WR; +#endif + + if(poll) { + while(checkBusyWrite()) nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } +} + +void fDev::transferData(uint64_t* vaddr_src, uint64_t* vaddr_dst, uint32_t len_src, uint32_t len_dst, bool poll) { + cnfg_reg[CNFG_VADDR_RD_REG] = (uint64_t)vaddr_src; + cnfg_reg[CNFG_LEN_RD_REG] = len_src; + cnfg_reg[CNFG_VADDR_WR_REG] = (uint64_t)vaddr_dst; + cnfg_reg[CNFG_LEN_WR_REG] = len_dst; +#ifndef PERF_RUN + cnfg_reg[CNFG_CTRL_REG] = CNFG_CTRL_START_RD; + cnfg_reg[CNFG_CTRL_REG] = CNFG_CTRL_START_WR; +#else + cnfg_reg[CNFG_CTRL_REG] = CNFG_CTRL_START_RD | CNFG_CTRL_START_TMR_RD | CNFG_CTRL_CLR_STAT_RD; + cnfg_reg[CNFG_CTRL_REG] = CNFG_CTRL_START_WR | CNFG_CTRL_START_TMR_WR | CNFG_CTRL_CLR_STAT_WR; +#endif + + if(poll) { + while(checkBusyWrite()) nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } +} + +/* -- Polling ---------------------------------------------------------------------------------------- */ + +bool fDev::checkBusyRead() { + return !(cnfg_reg[CNFG_STATUS_DMA_RD_REG]); +} + +bool fDev::checkBusyWrite() { + return !(cnfg_reg[CNFG_STATUS_DMA_WR_REG]); +} + +uint32_t fDev::checkCompletedRead() { + return (cnfg_reg[CNFG_STATUS_DMA_RD_REG]); +} + +uint32_t fDev::checkCompletedWrite() { + return (cnfg_reg[CNFG_STATUS_DMA_WR_REG]); +} + +bool fDev::checkReadyRead() { + return cnfg_reg[CNFG_STATUS_REG] & CNFG_STATUS_READY_RD; +} + +bool fDev::checkReadyWrite() { + return cnfg_reg[CNFG_STATUS_REG] & CNFG_STATUS_READY_WR; +} + +void fDev::clearCompleted(bool rd, bool wr) { + cnfg_reg[CNFG_CTRL_REG] = (rd & CNFG_CTRL_CLR_STAT_RD) | (wr & CNFG_CTRL_CLR_STAT_WR); +} + +/* -- Partial reconfiguration ------------------------------------------------------------------------- */ +void fDev::addBitstream(std::string name, uint32_t op_id) { + fBitStream* bstream = new fBitStream(name, op_id, this); + if(bstream->openBitStream()) + bitstreams.insert({op_id, bstream}); +} + +void fDev::removeBitstream(uint32_t op_id) { + bitstreams[op_id]->closeBitStream(); + bitstreams.erase(op_id); +} + + +uint8_t readByte(std::ifstream& fb) { + char temp; + fb.read(&temp, 1); + return (uint8_t)temp; +} + +uint32_t fDev::reconfigure(uint32_t op_id) { + fBitStream *bstream = bitstreams[op_id]; + + // Obtain the lock and decouple the design + ioctl(fd, IOCTL_RECONFIG_LOCK, 0); + + high_resolution_clock::time_point begin = high_resolution_clock::now(); + + for(uint i = 0; i < bstream->getFszM(); i++) { + // Send the data + readFrom((uint64_t*)(bstream->getSrc() + i*bstream->getBatchSize()/4), bstream->getBatchSize()); + } + // Last batch + if(bstream->getFszR() > 0) + readFrom((uint64_t*)(bstream->getSrc() + bstream->getFszM()*bstream->getBatchSize()/4), bstream->getFszR()); + + high_resolution_clock::time_point end = high_resolution_clock::now(); + auto duration = duration_cast(end - begin).count(); + std::cout << std::dec << "PR completed in: " << duration << " us" << std::endl; + + // Free the lock and couple the design + ioctl(fd, IOCTL_RECONFIG_UNLOCK, 0); + + return 0; +} + +/* -- Timers ------------------------------------------------------------------------------------------ */ +void fDev::setTimerStop(uint64_t tmr_stop) { + cnfg_reg[CNFG_TMR_STOP_REG] = tmr_stop; +} + +uint64_t fDev::getTimerStop() { + return cnfg_reg[CNFG_TMR_STOP_REG]; +} + + +uint64_t fDev::getReadTimer() { + return cnfg_reg[CNFG_TMR_RD_REG]; +} + +uint64_t fDev::getWriteTimer() { + return cnfg_reg[CNFG_TMR_WR_REG]; +} + +double fDev::getThroughputRd(uint32_t size) { + return (double)((size / (1024.0 * 1024.0)) / (cnfg_reg[CNFG_TMR_RD_REG] * CLK_NS ) * 1000000000); +} + +double fDev::getThroughputWr(uint32_t size) { + return (double)((size / (1024.0 * 1024.0)) / (cnfg_reg[CNFG_TMR_WR_REG] * CLK_NS ) * 1000000000); +} + +uint64_t fDev::getTimeRdNS() { + return cnfg_reg[CNFG_TMR_RD_REG] * CLK_NS; +} + +uint64_t fDev::getTimeWrNS() { + return cnfg_reg[CNFG_TMR_WR_REG] * CLK_NS; +} diff --git a/sw/scheduling/src/fScheduler.cpp b/sw/scheduling/src/fScheduler.cpp new file mode 100644 index 00000000..b4b7e133 --- /dev/null +++ b/sw/scheduling/src/fScheduler.cpp @@ -0,0 +1,59 @@ +#include "fScheduler.hpp" + +void fScheduler::processRequests() { + unique_lock lck(mtx); + run = true; + fJob* currJob; + uint32_t curr_op_id = -1; + cv.notify_one(); + lck.unlock(); + + while(run || !fque.empty()) { + lck.lock(); + if(!fque.empty()) { +#ifdef REQUEST_SCHEDULING + if(fque.top() != nullptr) { +#else + if(fque.front() !=nullptr) { +#endif + // Check whether PR is needed +#ifdef REQUEST_SCHEDULING + if(curr_op_id != fque.top()->getOperator()) { +#else + if(curr_op_id != fque.front()->getOperator()) { +#endif + // PR execution +#ifdef REQUEST_SCHEDULING + this->reconfigure(fque.top()->getOperator()); +#else + this->reconfigure(fque.front()->getOperator()); +#endif + } + + // Remove next job from the queue +#ifdef REQUEST_SCHEDULING + currJob = fque.top(); +#else + currJob = fque.front(); +#endif + +#ifdef VERBOSE_DEBUG + cout << "Process Requests: current region ID: " << region_id << ", current thread ID: " << currJob->getId() << ", current operation ID: " << currJob->getOperator() << ", current priority: " << currJob->getPriority() << endl; +#endif + fque.pop(); + + // Run the job + curr_op_id = currJob->getOperator(); + currJob->start(); + } + else { + fque.pop(); + curr_op_id = -1; + } + } + + lck.unlock(); + + nanosleep(&PAUSE, NULL); + } +} \ No newline at end of file diff --git a/sw/scheduling/src/main.cpp b/sw/scheduling/src/main.cpp new file mode 100644 index 00000000..3cd29367 --- /dev/null +++ b/sw/scheduling/src/main.cpp @@ -0,0 +1,183 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "fScheduler.hpp" +#include "fArbiter.hpp" +#include "fDefs.hpp" + +using namespace std; +using namespace std::chrono; + +#define TARGET_FPGA_REGION_0 0 +#define TARGET_FPGA_REGION_1 1 +#define TARGET_FPGA_REGION_2 2 +#define TRANSFER_SIZE 512 +#define N_HOST_PG 2 +#define N_OPER 4 + +template +auto select_random(const S &s, size_t n) { + auto it = std::begin(s); + // 'advance' the iterator n times + std::advance(it,n); + return it; +} + +struct jobObj { + int oper = 0; + int sent = 0; + + jobObj(int oper) { this->oper = oper; } +}; + +int main() +{ + vector fscheduler; + + // Acquire an FPGA region 0 + fscheduler.push_back(new fScheduler(TARGET_FPGA_REGION_0)); + if(fscheduler[0]->obtainRegion()) + cout << "Acquired an FPGA region " << TARGET_FPGA_REGION_0 << endl; + else + return EXIT_FAILURE; + + // Add bitstreams + fscheduler[0]->addBitstream("bitstreams/part_bstream_c0_0.bin", OPER_0); + fscheduler[0]->addBitstream("bitstreams/part_bstream_c1_0.bin", OPER_1); + fscheduler[0]->addBitstream("bitstreams/part_bstream_c2_0.bin", OPER_2); + fscheduler[0]->addBitstream("bitstreams/part_bstream_c3_0.bin", OPER_3); + + // Acquire an FPGA region 1 + fscheduler.push_back(new fScheduler(TARGET_FPGA_REGION_1)); + if(fscheduler[1]->obtainRegion()) + cout << "Acquired an FPGA region " << TARGET_FPGA_REGION_1 << endl; + else + return EXIT_FAILURE; + + // Add bitstreams + fscheduler[1]->addBitstream("bitstreams/part_bstream_c0_1.bin", OPER_0); + fscheduler[1]->addBitstream("bitstreams/part_bstream_c1_1.bin", OPER_1); + fscheduler[1]->addBitstream("bitstreams/part_bstream_c2_1.bin", OPER_2); + fscheduler[1]->addBitstream("bitstreams/part_bstream_c3_1.bin", OPER_3); + + // Acquire an FPGA region 2 + fscheduler.push_back(new fScheduler(TARGET_FPGA_REGION_2)); + if(fscheduler[2]->obtainRegion()) + cout << "Acquired an FPGA region " << TARGET_FPGA_REGION_2 << endl; + else + return EXIT_FAILURE; + + // Add bitstreams + fscheduler[2]->addBitstream("bitstreams/part_bstream_c0_2.bin", OPER_0); + fscheduler[2]->addBitstream("bitstreams/part_bstream_c1_2.bin", OPER_1); + fscheduler[2]->addBitstream("bitstreams/part_bstream_c2_2.bin", OPER_2); + fscheduler[2]->addBitstream("bitstreams/part_bstream_c3_2.bin", OPER_3); + + // Add arbiter + fArbiter farbiter; + + farbiter.addScheduler(fscheduler[0]); + farbiter.addScheduler(fscheduler[1]); + farbiter.addScheduler(fscheduler[2]); + + // Start arbitration + farbiter.start(); + + uint64_t* uMem = (uint64_t*) malloc(TRANSFER_SIZE); + + // Create jobs + vector jobs [N_OPER]; + for(int i = 0; i < N_JOBS; i++) { + jobs[0].push_back(new fOp0(uMem, TRANSFER_SIZE, i, 1)); + jobs[1].push_back(new fOp1(uMem, TRANSFER_SIZE, i, 1)); + jobs[2].push_back(new fOp2(uMem, TRANSFER_SIZE, i, 1)); + jobs[3].push_back(new fOp3(uMem, TRANSFER_SIZE, i, 1)); + } + +#ifdef VERBOSE_DEBUG + cout << "All jobs created" << endl; +#endif + + srand(time(0)); + int tmp[N_OPER]; + set s; + for(int i = 0; i < N_OPER; i++) { + tmp[i] = 0; + s.insert(i); + } + + // Start the measurements + high_resolution_clock::time_point begin = high_resolution_clock::now(); + +#ifdef REQUEST_RANDOM + for(int i = 0; i < N_OPER * N_JOBS; i++) { + auto r = rand() % s.size(); + auto n = *select_random(s, r); + + farbiter.requestJob(jobs[n][tmp[n]]); + tmp[n]++; + +//#ifdef VERBOSE_DEBUG +// cout << "OPER: " << n << ", OCC: " << tmp[n] << endl; +//#endif + + if(tmp[n] == N_JOBS) + s.erase(n); + } +#else + // Schedule jobs + for(int i = 0; i < N_JOBS; i++) { + farbiter.requestJob(jobs[0][i]); + farbiter.requestJob(jobs[1][i]); + farbiter.requestJob(jobs[2][i]); + farbiter.requestJob(jobs[3][i]); + } +#endif + +#ifdef VERBOSE_DEBUG + cout << "All jobs scheduled" << endl; +#endif + + bool k = false; + while(!k) { + k = true; + for(int i = 0; i < N_JOBS; i++) { + if(!jobs[0][i]->isDone()) + k = false; + if(!jobs[1][i]->isDone()) + k = false; + if(!jobs[2][i]->isDone()) + k = false; + if(!jobs[3][i]->isDone()) + k = false; + } + nanosleep(&PAUSE, NULL); + } + + high_resolution_clock::time_point end = high_resolution_clock::now(); + auto duration = duration_cast(end - begin).count(); + std::cout << std::dec << "All jobs completed in: " << duration << " us" << std::endl; + + + fscheduler[0]->removeBitstream(OPER_0); + fscheduler[0]->removeBitstream(OPER_1); + fscheduler[0]->removeBitstream(OPER_2); + fscheduler[0]->removeBitstream(OPER_3); + + fscheduler[1]->removeBitstream(OPER_0); + fscheduler[1]->removeBitstream(OPER_1); + fscheduler[1]->removeBitstream(OPER_2); + fscheduler[1]->removeBitstream(OPER_3); + + fscheduler[2]->removeBitstream(OPER_0); + fscheduler[2]->removeBitstream(OPER_1); + fscheduler[2]->removeBitstream(OPER_2); + fscheduler[2]->removeBitstream(OPER_3); + + return 0; +} \ No newline at end of file diff --git a/sw/scheduling/src/utils.cpp b/sw/scheduling/src/utils.cpp new file mode 100644 index 00000000..aa1e25b2 --- /dev/null +++ b/sw/scheduling/src/utils.cpp @@ -0,0 +1,164 @@ +#include "utils.hpp" +#include +#include +#include + +using namespace std; + +void readFloatData(char* filename, float* points, uint32_t size, uint32_t dimensions) +{ + std::ifstream inputFile(filename); + if (!inputFile) { + std::cerr << "Coult no open file: " << filename << std::endl; + return; + } + + std::string line; + uint32_t idx = 0; + while(getline(inputFile, line)) { + std::stringstream ss(line); + std::string value; + bool isFirst = true; + while(getline(ss, value, ',')) { + std::stringstream vs(value); + float v = 0.0; + vs >> v; + if (isFirst) { + isFirst = false; + continue; + } + points[idx] = v; + idx++; + } + } + + inputFile.close(); +} + +void readFixData(char* filename, uint32_t* points, uint32_t size, uint32_t dimensions, uint32_t fixpoint) +{ + std::ifstream inputFile(filename); + if (!inputFile) { + std::cerr << "Coult no open file: " << filename << std::endl; + return; + } + + std::string line; + uint32_t idx = 0; + while(getline(inputFile, line)) { + std::stringstream ss(line); + std::string value; + bool isFirst = true; + while(getline(ss, value, ',')) { + std::stringstream vs(value); + float v = 0.0; + vs >> v; + if (isFirst) { + isFirst = false; + continue; + } + points[idx] = (uint32_t) (v*fixpoint); + idx++; + } + } + + inputFile.close(); +} + +void read_input(const char *filename, int nclusters, int nfeatures, int npoints, float* features) +{ + printf("Read input data from file\n"); + float temp; + int i; + + FILE *fp = fopen(filename, "r"); + if (fp==NULL) + { + printf("cannot find file\n"); + } + + for(i = 0; i < npoints * nfeatures; i++){ + fscanf(fp, "%f", &temp); + features[i] = temp; + } + + printf("\nI/O completed\n"); + // printf("\nNumber of objects: %u\n", npoints); + // printf("Number of features: %u\n", nfeatures); + printf("\nFinish file reading\n"); + +} + + +void data_gen(float* data, uint64_t data_set_size, uint32_t data_dim ) +{ + srand(0); + uint64_t maximum = pow(2,8); + printf("generated_data:\n"); + for(int data_cnt=0; data_cnt< data_set_size; data_cnt++) + { + // printf("[%d]:", data_cnt); + for (int i = 0; i < data_dim; ++i) + { + data[data_cnt*data_dim+i]= rand()%maximum; + // printf("%d, ",(int)data[data_cnt*data_dim+i]); + } + // printf("\n"); + } + +} + + +void read_file(float *array, int N, int D, const char *filename, bool isBinary){ + FILE *fp; + int counts = 0; + int i=0,j=0; + char line[MAX_LINE_LENGTH]; + char *token=NULL; + char space[2] = " "; + + fp = fopen(filename,"r"); + + if ( fp == NULL ){ + fprintf(stderr, "File '%s' does not exists!", filename); + exit(1); + } + + if ( isBinary ){ + // read binary file, everything at once + counts = fread(array, sizeof(float) * N * D, 1, fp); + + if ( counts == 0 ) { + fprintf(stderr, "Binary file '%s' could not be read. Wrong format.", filename); + exit(1); + } + }else{ + // processing a text file + // format: there are D float values each line. Each value is separated by a space character. + // notice MAX_LINE_LENGTH = 2049 + i = 0; + while ( fgets ( line, MAX_LINE_LENGTH, fp ) != NULL && + i < N ) { + + + if ( line[0] != '%'){ // ignore '%' comment char + token = strtok(line, space); + j=0; + + + while ( token != NULL && + j < D ){ + + array[i*D + j] = atof(token); // 0.0 if no valid conversion + token = strtok(NULL, space); + j++; + } + i++; + } + } + } + + fclose(fp); +} + + diff --git a/sw/stride/CMakeLists.txt b/sw/stride/CMakeLists.txt new file mode 100644 index 00000000..93665b25 --- /dev/null +++ b/sw/stride/CMakeLists.txt @@ -0,0 +1,22 @@ +cmake_minimum_required(VERSION 3.0) +project(stride) + +# Includes +include_directories(include) + +# Sources +file(GLOB SOURCES src/*.cpp) + +# Exec +set(EXEC main) + +# Compilation +set (CMAKE_CXX_STANDARD 14) +set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -mavx -march=native -O3") + +# Boost lib +find_package(Boost COMPONENTS program_options REQUIRED) + +# Targets +add_executable(${EXEC} ${SOURCES}) +target_link_libraries(${EXEC} ${Boost_LIBRARIES}) \ No newline at end of file diff --git a/sw/stride/include/fBench.hpp b/sw/stride/include/fBench.hpp new file mode 100644 index 00000000..6904a065 --- /dev/null +++ b/sw/stride/include/fBench.hpp @@ -0,0 +1,121 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "tsc_x86.h" + +constexpr auto kCalibrate = false; +constexpr auto kCyclesRequired = 1e9; +constexpr auto kNumRunsDist = 1000; +constexpr auto kNumRunsDef = 100; + +using namespace std::chrono; + +/** + * Exec times [ns] + */ +class Bench { + std::vector times; + double avg_time = 0.0; + int num_runs = 0; + int num_runs_def = 0; + + void sortBench() { std::sort(times.begin(), times.end()); } + +public: + Bench(int num_runs = kNumRunsDef) { this->num_runs_def = num_runs; } + + // Number of runs for the average + inline int getNumRuns() { return num_runs; } + inline void setNumRuns(uint32_t n_runs) { num_runs = n_runs; } + + // Average run time + inline double getAvg() { return avg_time; } + + // Statistics + inline double getMin() { if(!times.empty()) return times[0]; else return 0; } + inline double getMax() { if(!times.empty()) return times[times.size()-1]; else return 0; } + inline double getP25() { if(!times.empty()) return times[(times.size()/4)-1]; else return 0; } + inline double getP50() { if(!times.empty()) return times[(times.size()/2)-1]; else return 0; } + inline double getP75() { if(!times.empty()) return times[((times.size()*3)/4)-1]; else return 0; } + inline double getP95() { if(!times.empty()) return times[((times.size()*95)/100)-1]; else return 0; } + inline double getP99() { if(!times.empty()) return times[((times.size()*99)/100)-1]; else return 0; } + + // Print results + void printOut() { + std::ios_base::fmtflags f(std::cout.flags()); + + std::cout << "Average time: " << getAvg() << " ns" << std::endl; + std::cout << "Max time: " << getMax() << " ns" << std::endl; + std::cout << "Min time: " << getMin() << " ns" << std::endl; + std::cout << "Median: " << getP50() << " ns" << std::endl; + std::cout << "25th: " << getP25() << " ns" << std::endl; + std::cout << "75th: " << getP75() << " ns" << std::endl; + std::cout << "95th: " << getP95() << " ns" << std::endl; + std::cout << "99th: " << getP99() << " ns" << std::endl; + + std::cout.flags( f ); + } + + /** + * Measure the function execution + */ + template + void runtime(Func const &func, Args... args) { + times.clear(); + + // Warm-up + if (kCalibrate) { + num_runs = 1; + while (num_runs < (1 << 14)) { + const auto start = start_tsc(); + for (int i = 0; i < num_runs; ++i) { + func(args...); + } + const auto cycles = stop_tsc(start); + + if (cycles >= kCyclesRequired) + break; + + num_runs *= 2; + } + } else { + num_runs = num_runs_def; + } + + std::cout <<"N runs: " << num_runs << std::endl; + + // Average time + auto begin_time = std::chrono::high_resolution_clock::now(); + for (int i = 0; i < num_runs; ++i) { + func(args...); + } + auto end_time = std::chrono::high_resolution_clock::now(); + + double time = std::chrono::duration_cast(end_time - begin_time).count(); + avg_time = time / num_runs; + /* + for (int i = 0; i < kNumRunsDist; ++i) { + begin_time = std::chrono::high_resolution_clock::now(); + func(args...); + end_time = std::chrono::high_resolution_clock::now(); + + time = std::chrono::duration_cast(end_time - begin_time).count(); + times.emplace_back(time); + } + */ + //sortBench(); + //printOut(); + } + +}; diff --git a/sw/stride/include/fDefs.hpp b/sw/stride/include/fDefs.hpp new file mode 100644 index 00000000..15dee5a8 --- /dev/null +++ b/sw/stride/include/fDefs.hpp @@ -0,0 +1,49 @@ +#pragma once + +#include +#include +#include + +/* FLAGS */ +// SET ACCORDING TO THE BITSTREAM +#define EN_AVX +#define EN_DDR +#define EN_RDMA + +/* Sleep */ +#define POLL_SLEEP_NS 100 + +/* Large pages */ +#define LARGE_PAGE_SIZE (2 * 1024 * 1024) +#define LARGE_PAGE_SHIFT 21UL +#define PAGE_SIZE 4 * 1024 +#define PAGE_SHIFT 12UL + +/* Clock */ +#define CLK_NS 4 + +/* Command FIFO depth */ +static const uint32_t cmd_fifo_depth = 64; +static const uint32_t cmd_fifo_thr = 10; + +/* Farview Op codes */ +enum class opCode : uint8_t { READ=0, WRITE=1, FV=2 }; + +/* Verbosity */ +#define VERBOSE_DEBUG_1 +//#define VERBOSE_DEBUG_2 +//#define VERBOSE_DEBUG_3 + +/* ltoh: little to host */ +/* htol: little to host */ +#if __BYTE_ORDER == __LITTLE_ENDIAN +# define ltohl(x) (x) +# define ltohs(x) (x) +# define htoll(x) (x) +# define htols(x) (x) +#elif __BYTE_ORDER == __BIG_ENDIAN +# define ltohl(x) __bswap_32(x) +# define ltohs(x) __bswap_16(x) +# define htoll(x) __bswap_32(x) +# define htols(x) __bswap_16(x) +#endif \ No newline at end of file diff --git a/sw/stride/include/fDev.hpp b/sw/stride/include/fDev.hpp new file mode 100644 index 00000000..6826cc63 --- /dev/null +++ b/sw/stride/include/fDev.hpp @@ -0,0 +1,225 @@ +#ifndef __FDEV_HPP__ +#define __FDEV_HPP__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "fDefs.hpp" +#include "fQ.hpp" + +namespace fpga { + +/* Command FIFO depth */ +static const uint32_t cmd_fifo_depth = 64; +static const uint32_t cmd_fifo_thr = 10; + +/** + * Fpga device region + */ +class fDev { + + /* Fpga device */ + int32_t fd = 0; + + /* Used markers */ + uint32_t rd_cmd_cnt = 0; + uint32_t wr_cmd_cnt = 0; +#ifdef EN_RDMA + uint32_t rdma_cmd_cnt = 0; +#endif + + /* Mmapped regions */ +#ifdef EN_AVX + __m256i *cnfg_reg = 0; +#else + uint64_t *cnfg_reg = 0; +#endif + uint64_t *ctrl_reg = 0; + + /* Mapped large pages hash table */ + std::unordered_map mapped_large; + + /* Utility */ + bool mmapFpga(); + bool munmapFpga(); + + /* Send to controller */ + void postCmd(uint64_t offs_3, uint64_t offs_2, uint64_t offs_1, uint64_t offs_0); + + /* Check busy */ + bool checkBusyRead(); + bool checkBusyWrite(); + + /* Check ready */ + bool checkReadyRead(); + bool checkReadyWrite(); + + /* Memory */ + uint64_t* _getHostMem(uint32_t n_pages); + void _freeHostMem(uint64_t* vaddr, uint32_t n_pages); + void _userMap(uint64_t *vaddr, uint32_t len); + void _userUnmap( uint64_t *vaddr, uint32_t len); + + /* Data movement */ + void _read(uint64_t *vaddr, uint32_t len, bool stream = true, bool clr_stat = true, bool poll = true); + void _write(uint64_t *vaddr, uint32_t len, bool stream = true, bool clr_stat = true, bool poll = true); + void _transfer(uint64_t *vaddr_src, uint64_t* vaddr_dst, uint32_t len_src, uint32_t len_dst, bool stream = true, bool clr_stat = true, bool poll = true); + + void _offload(uint64_t *vaddr, uint32_t len, bool poll = true); + void _sync(uint64_t *vaddr, uint32_t len, bool poll = true); + + +public: + + fDev() {} + ~fDev() {} + + /** + * Obtain and release FPGA regions + */ + + // Acquire an FPGA region with target ID + bool acquireRegion(uint32_t rNum); + // Release an acquired FPGA region + void releaseRegion(); + + /** + * Control bus + */ + + // Control status bus, AXI Lite + inline void setCSR(uint64_t val, uint32_t offs) { ctrl_reg[offs] = val; } + inline uint64_t getCSR(uint32_t offs) { return ctrl_reg[offs]; } + + /** + * Explicit buffer management + * @param n_pages - number of 2MB pages to be allocated + */ + + // Obtain host memory - pages 2M + template + _Vaddr* getHostMem(uint32_t n_pages) { + return (_Vaddr*) _getHostMem(n_pages); + } + + // Free host memory + template + void freeHostMem(_Vaddr* vaddr, uint32_t n_pages) { + _freeHostMem((uint64_t*)vaddr, n_pages); + } + + // FPGA user space range mapping + template + void userMap(uint64_t *vaddr, uint32_t len) { + _userMap((uint64_t*)vaddr, len); + } + + // FPGA user space range unmapping (auto on release) + template + void userUnmap(_Vaddr *vaddr, uint32_t len) { + _userUnmap((uint64_t*)vaddr, len); + } + + // Obtain PR memory - pages 2M + uint64_t* getPrMem(uint64_t n_pages); + // Free PR memory + void freePrMem(uint64_t* vaddr, uint64_t n_pages); + + /** + * Bulk transfers + * @param vaddr - data pointer + * @param len - transfer length + * @param poll - blocking vs non-blocking + */ + + template + void read(_Vaddr *vaddr, uint32_t len, bool stream = true, bool clr_stat = true, bool poll = false) { + _read((uint64_t*)vaddr, len, stream, clr_stat, poll); + } + + template + void write(_Vaddr *vaddr, uint32_t len, bool stream = true, bool clr_stat = true, bool poll = false) { + _write((uint64_t*)vaddr, len, stream, clr_stat, poll); + } + + template + void transfer(_Vaddr *vaddr_src, _Vaddr *vaddr_dst, uint32_t len_src, uint32_t len_dst, bool stream = true, bool clr_stat = true, bool poll = false) { + _transfer((uint64_t*)vaddr_src, (uint64_t*)vaddr_dst, len_src, len_dst, stream, clr_stat, poll); + } + +#ifdef EN_DDR + // Sync operations + template + void sync(uint64_t *vaddr, uint32_t len, bool poll = true) { + _sync((uint64_t*)vaddr, len, poll); + } + + template + void offload(uint64_t *vaddr, uint32_t len, bool poll = true) { + _offload((uint64_t*)vaddr, len, poll); + } +#endif + + /** + * Check for completion + */ + + // Returns the number of completed reads + uint32_t checkCompletedRead(); + // Returns the number of completed writes + uint32_t checkCompletedWrite(); + // Clear all status + void clearCompleted(); + + // Timers + void setTimerStopAt(uint64_t tmr_stop_at); + uint64_t getReadTimer(); + uint64_t getWriteTimer(); + + // Debug + void printDebugXDMA(); + + /** + * PR + */ + void reconfigure(uint64_t* vaddr, uint64_t len); + + /** + * Roce operations + */ + +#ifdef EN_RDMA + // ARP lookup + bool doArpLookup(); + // Write initial context + void writeContext(fQPair *pair); + // Write connection + void writeConnection(fQPair *pair, uint32_t port); + + // RDMA ops + bool postWrite(fQPair *pair, uint64_t l_offs, uint64_t r_offs, uint32_t size); + bool postRead(fQPair *pair, uint64_t l_offs, uint64_t r_offs, uint32_t size); + bool postFarview(fQPair *pair, uint64_t l_offs, uint64_t r_offs, uint32_t size, uint64_t params); + bool postFarviewBase(fQPair *pair, uint64_t params_0, uint64_t params_1, uint64_t params_2); +#endif +}; + +} /* namespace fpga */ + +#endif diff --git a/sw/stride/include/fQ.hpp b/sw/stride/include/fQ.hpp new file mode 100644 index 00000000..1349b5b6 --- /dev/null +++ b/sw/stride/include/fQ.hpp @@ -0,0 +1,46 @@ +#pragma once + +#include +#include +#include + +namespace fpga { + +#define MSG_LEN 82 + +class fQ { +public: + // Queue + uint32_t qpn; + uint32_t psn; + uint32_t rkey; + + // Buffer + uint64_t vaddr; + uint32_t size; + + // Node + uint32_t region; + + // Global ID + char gid[33]; + + // + fQ() { memset(gid, 0, 33); } + + std::string encode(); + void decode (char *buf, size_t len); + + uint32_t gidToUint(int idx); + void uintToGid(int idx, uint32_t ip_addr); + + void print(const char *name); + static uint32_t getLength() { return MSG_LEN; } +}; + +struct fQPair { + fQ local; + fQ remote; +}; + +} /* namespace fpga */ \ No newline at end of file diff --git a/sw/stride/include/fView.hpp b/sw/stride/include/fView.hpp new file mode 100644 index 00000000..013d5cbb --- /dev/null +++ b/sw/stride/include/fView.hpp @@ -0,0 +1,97 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include "fQ.hpp" +#include "fDev.hpp" + +using namespace fpga; + +namespace comm { + +/** + * Roce communicator + */ +class fView { + + /* FPGA device */ + fDev *fdev; + int32_t n_regions; + + /* Nodes */ + int32_t node_id; + int32_t n_nodes; + + /* Connections */ + const char *mstr_ip_addr; + int *connections; + uint16_t port; + uint16_t ib_port; + + /* Static */ + static const uint32_t base_ip_addr = 0x0B01D4D1; + + /* Queue pairs */ + std::vector> pairs; + + void initializeLocalQueues(); + + int masterExchangeQueues(); + int clientExchangeQueues(); + + int exchangeWindow(int32_t node_id, int32_t qpair_id); + int masterExchangeWindow(int32_t node_id, int32_t qpair_id); + int clientExchangeWindow(int32_t node_id, int32_t qpair_id); + +public: + + fView(fDev *fdev, uint32_t node_id, uint32_t n_nodes, uint32_t *n_qpairs, uint32_t n_regions, const char *mstr_ip_addr); + ~fView(); + + void closeConnections(); + + /** + * Window management + */ + + uint64_t* allocWindow(uint32_t node_id, uint32_t qpair_id, uint64_t n_pages); + void freeWindow(uint32_t node_id, uint32_t qpair_id); + + /** + * RDMA operations base + */ + + void writeRemote(uint32_t node_id, uint32_t qpair_id, uint64_t src_offs, uint64_t dst_offs, uint32_t size); + void readRemote(uint32_t node_id, uint32_t qpair_id, uint64_t src_offs, uint64_t dst_offs, uint32_t size); + void farviewRemote(uint32_t node_id, uint32_t qpair_id, uint64_t src_offs, uint64_t dst_offs, uint32_t size, uint64_t params); + void farviewRemoteBase(uint32_t node_id, uint32_t qpair_id, uint64_t params_0, uint64_t params_1, uint64_t params_2); + + /** + * RDMA install operator + */ + //void installOperator(); + + /** + * Added + */ + void farviewStride(uint32_t node_id, uint32_t qpair_id, uint64_t src_offs, uint64_t dst_offs, uint32_t dwidth, uint32_t stride, uint32_t num_elem); + void farviewRegexConfigLoad(uint32_t node_id, uint32_t qpair_id, unsigned char* config_bytes); + void farviewRegexRead(uint32_t node_id, uint32_t qpair_id, uint64_t src_offs, uint64_t dst_offs, uint32_t size); + + // Poll + uint32_t pollRemoteWrite(uint32_t node_id, uint32_t qpair_id); + uint32_t pollLocalRead(uint32_t node_id, uint32_t qpair_id); + + // Sync + int32_t waitOnCloseRemote(uint32_t node_id); + int32_t waitOnReplyRemote(uint32_t node_id); + int32_t replyRemote(uint32_t node_id, uint32_t ack); + int32_t syncRemote(uint32_t node_id); + +}; + +} diff --git a/sw/stride/include/tsc_x86.h b/sw/stride/include/tsc_x86.h new file mode 100644 index 00000000..a0e30822 --- /dev/null +++ b/sw/stride/include/tsc_x86.h @@ -0,0 +1,89 @@ +#pragma once + +// ================= GNU C and possibly other UNIX compilers ================= +#ifndef _WIN32 + +#if defined(__GNUC__) || defined(__linux__) +#define VOLATILE __volatile__ +#define ASM __asm__ +#else +/* if we're neither compiling with gcc or under linux, we can hope + * the following lines work, they probably won't */ +#define ASM asm +#define VOLATILE +#endif + +#define myInt64 unsigned long long +#define INT32 unsigned int + +// ================================== WIN32 ================================== +#else + +#define myInt64 signed __int64 +#define INT32 unsigned __int32 + +#endif + +/* This is the RDTSC timer. + * RDTSC is an instruction on several Intel and compatible CPUs that Reads the + * Time Stamp Counter. The Intel manuals contain more information. + */ + +#define COUNTER_LO(a) ((a).int32.lo) +#define COUNTER_HI(a) ((a).int32.hi) +#define COUNTER_VAL(a) ((a).int64) + +#define COUNTER(a) ((unsigned long long)COUNTER_VAL(a)) + +#define COUNTER_DIFF(a, b) (COUNTER(a) - COUNTER(b)) + +// ================= GNU C and possibly other UNIX compilers ================= +#ifndef _WIN32 + +typedef union { + myInt64 int64; + struct { + INT32 lo, hi; + } int32; +} tsc_counter; + +#define RDTSC(cpu_c) \ + ASM VOLATILE("rdtsc" : "=a"((cpu_c).int32.lo), "=d"((cpu_c).int32.hi)) +#define CPUID() ASM VOLATILE("cpuid" : : "a"(0) : "bx", "cx", "dx") + +// ================================== WIN32 ================================== +#else + +typedef union { + myInt64 int64; + struct { + INT32 lo, hi; + } int32; +} tsc_counter; + +#define RDTSC(cpu_c) \ + { __asm rdtsc __asm mov(cpu_c).int32.lo, eax __asm mov(cpu_c).int32.hi, edx } + +#define CPUID() \ + { __asm mov eax, 0 __asm cpuid } + +#endif + +// static void init_tsc() { +// ; // no need to initialize anything for x86 +// } + +static myInt64 start_tsc(void) { + tsc_counter start; + CPUID(); + RDTSC(start); + return COUNTER_VAL(start); +} + +static myInt64 stop_tsc(myInt64 start) { + tsc_counter end; + RDTSC(end); + CPUID(); + return COUNTER_VAL(end) - start; +} + diff --git a/sw/stride/src/fDev.cpp b/sw/stride/src/fDev.cpp new file mode 100644 index 00000000..8968f24a --- /dev/null +++ b/sw/stride/src/fDev.cpp @@ -0,0 +1,932 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "fDev.hpp" + +/* Sleep */ +#define POLL_SLEEP_NS 100 + +/* Clock */ +#define CLK_NS 4 + +/* IOCTL */ +#define IOCTL_ALLOC_HOST_USER_MEM _IOR('D', 1, unsigned long) +#define IOCTL_FREE_HOST_USER_MEM _IOR('D', 2, unsigned long) +#define IOCTL_ALLOC_HOST_PR_MEM _IOR('D', 3, unsigned long) +#define IOCTL_FREE_HOST_PR_MEM _IOR('D', 4, unsigned long) +#define IOCTL_MAP_USER _IOR('D', 5, unsigned long) +#define IOCTL_UNMAP_USER _IOR('D', 6, unsigned long) +#define IOCTL_RECONFIG_LOAD _IOR('D', 7, unsigned long) +#define IOCTL_ARP_LOOKUP _IOR('D', 8, unsigned long) +#define IOCTL_WRITE_CTX _IOR('D', 9, unsigned long) +#define IOCTL_WRITE_CONN _IOR('D', 10, unsigned long) +#define IOCTL_RDMA_STAT _IOR('D', 11, unsigned long) +#define IOCTL_READ_ENG_STATUS _IOR('D', 12, unsigned long) + +/* MMAP */ +#define MMAP_CTRL (0x0 << PAGE_SHIFT) +#define MMAP_CNFG (0x1 << PAGE_SHIFT) +#define MMAP_CNFG_AVX (0x2 << PAGE_SHIFT) +#define MMAP_BUFF (0x200 << PAGE_SHIFT) +#define MMAP_PR (0x400 << PAGE_SHIFT) + +/* Regions */ +#define CTRL_REGION_SIZE 64 * 1024 +#define CNFG_REGION_SIZE 64 * 1024 +#define CNFG_AVX_REGION_SIZE 256 * 1024 + +#define N_RDMA_STAT_REGS 24 + +/* Config regs */ +#ifdef EN_AVX + // Base + #define CNFG_CTRL_REG 0 + #define CNFG_PF_REG 1 + #define CNFG_DATAPATH_REG_SET 2 + #define CNFG_DATAPATH_REG_CLR 3 + #define CNFG_TMR_STOP_REG 4 + #define CNFG_TMR_REG 5 + #define CNFG_STAT_REG 6 + // RDMA + #define CNFG_RDMA_POST_REG 10 + #define CNFG_RDMA_STAT_CMD_USED_REG 11 + #define CNFG_RDMA_QPN_REG 12 + + #define CTRL_START_RD 0x1 + #define CTRL_START_WR 0x2 + #define CTRL_SYNC_RD 0x4 + #define CTRL_SYNC_WR 0x8 + #define CTRL_STREAM_RD 0x10 + #define CTRL_STREAM_WR 0x20 + #define CTRL_CLR_STAT_RD 0x40 + #define CTRL_CLR_STAT_WR 0x80 + #define CTRL_CLR_IRQ_PENDING 0x100 +#else + // Base + #define CNFG_CTRL_REG 0 + #define CNFG_VADDR_RD_REG 1 + #define CNFG_LEN_RD_REG 2 + #define CNFG_VADDR_WR_REG 3 + #define CNFG_LEN_WR_REG 4 + #define VADDR_MISS_REG 5 + #define LEN_MISS_REG 6 + #define CNFG_DATAPATH_REG_SET 7 + #define CNFG_DATAPATH_REG_CLR 8 + #define CNFG_TMR_STOP_REG 9 + #define CNFG_TMR_RD_REG 10 + #define CNFG_TMR_WR_REG 11 + #define CNFG_STAT_CMD_USED_RD_REG 12 + #define CNFG_STAT_CMD_USED_WR_REG 13 + #define CNFG_STAT_DMA_RD_REG 14 + #define CNFG_STAT_DMA_WR_REG 15 + #define CNFG_STAT_SENT_RD_REG 16 + #define CNFG_STAT_SENT_WR_REG 17 + #define CNFG_STAT_PFAULTS_REG 18 + // RDMA + #define CNFG_RDMA_POST_REG_0 20 + #define CNFG_RDMA_POST_REG_1 21 + #define CNFG_RDMA_POST_REG_2 22 + #define CNFG_RDMA_POST_REG_3 23 + #define CNFG_RDMA_STAT_CMD_USED_REG 24 + #define CNFG_RDMA_QPN_REG 25 + + #define CTRL_START_RD 0x1 + #define CTRL_START_WR 0x2 + #define CTRL_SYNC_RD 0x4 + #define CTRL_SYNC_WR 0x8 + #define CTRL_CLR_STAT_RD 0x10 + #define CTRL_CLR_STAT_WR 0x20 + #define CTRL_CLR_IRQ_PENDING 0x40 + #define CTRL_SEND_RDMA_REQ 0x80 + #define CTRL_SEND_QP_CTX 0x100 + #define CTRL_SEND_QP_CONN 0x200 +#endif + +using namespace std::chrono; + +namespace fpga { + +// ------------------------------------------------------------------------------- +// -- Obtain regions +// ------------------------------------------------------------------------------- + +/** + * Obtain vFPGA char devices + * @param: rNum - region ID + */ +bool fDev::acquireRegion(uint32_t rNum) { + std::string region = "/dev/fpga" + std::to_string(rNum); + fd = open(region.c_str(), O_RDWR | O_SYNC); + if(fd == -1) { + std::cout << "ERR: Cannot acquire an FPGA region" << std::endl; + return false; + } + + if(!mmapFpga()) { + std::cout << "ERR: Cannot mmap an FPGA region" << std::endl; + return false; + } + + return true; +} + +/** + * Release the vFPGA handle + */ +void fDev::releaseRegion() { + close(fd); +} + +/** + * Memory map control + */ +bool fDev::mmapFpga() { +#ifdef EN_AVX + cnfg_reg = (__m256i*) mmap(NULL, CNFG_AVX_REGION_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, MMAP_CNFG_AVX); + if(cnfg_reg == MAP_FAILED) { + releaseRegion(); + return false; + } +#else + cnfg_reg = (uint64_t*) mmap(NULL, CNFG_REGION_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, MMAP_CNFG); + if(cnfg_reg == MAP_FAILED) { + releaseRegion(); + return false; + } +#endif + + ctrl_reg = (uint64_t*) mmap(NULL, CTRL_REGION_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, MMAP_CTRL); + if(ctrl_reg == MAP_FAILED) { + releaseRegion(); + return false; + } + + return true; +} + +/** + * Unmap + */ +bool fDev::munmapFpga() { +#ifdef EN_AVX + if(munmap(cnfg_reg, CNFG_AVX_REGION_SIZE) != 0) { + releaseRegion(); + return false; + } +#else + if(munmap(cnfg_reg, CNFG_REGION_SIZE) != 0) { + releaseRegion(); + return false; + } +#endif + + if(munmap(ctrl_reg, CTRL_REGION_SIZE) != 0) { + releaseRegion(); + return false; + } + + cnfg_reg = 0; + ctrl_reg = 0; + + return true; +} + +// ------------------------------------------------------------------------------- +// -- Memory management +// ------------------------------------------------------------------------------- + +/** + * Obtain huge pages on the host memory + * @param: n_pages - number of requested large pages + */ +uint64_t* fDev::_getHostMem(uint32_t n_pages) { + uint64_t *hMem, *hMemAligned; + uint64_t n_pg = n_pages; + + ioctl(fd, IOCTL_ALLOC_HOST_USER_MEM, &n_pg); + hMem = (uint64_t*)mmap(NULL, (n_pg + 1) * LARGE_PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, MMAP_BUFF); + // alignment + hMemAligned = (uint64_t*)( ((((uint64_t)hMem + LARGE_PAGE_SIZE - 1) >> LARGE_PAGE_SHIFT)) << LARGE_PAGE_SHIFT); + mapped_large[hMemAligned] = hMem; + return hMemAligned; +} + +/** + * Release huge pages on the host memory + * @param: vaddr - memory pointer + * @param: n_pages - number of obtained pages + */ +void fDev::_freeHostMem(uint64_t *vaddr, uint32_t n_pages) { + uint64_t* hMem; + uint64_t n_pg = n_pages; + + hMem = mapped_large[vaddr]; + munmap(hMem, (n_pg + 1) * LARGE_PAGE_SIZE); + ioctl(fd, IOCTL_FREE_HOST_USER_MEM, &vaddr); +} + +/** + * Obtain huge pages allocated for the PR bitstreams + * @param: n_pages - number of requested large pages + */ +uint64_t* fDev::getPrMem(uint64_t n_pages) { + uint64_t *hMem, *hMemAligned; + uint64_t n_pg = n_pages; + + ioctl(fd, IOCTL_ALLOC_HOST_PR_MEM, &n_pg); + hMem = (uint64_t*)mmap(NULL, (n_pg + 1) * LARGE_PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, MMAP_PR); + // alignment + hMemAligned = (uint64_t*)( ((((uint64_t)hMem + LARGE_PAGE_SIZE - 1) >> LARGE_PAGE_SHIFT)) << LARGE_PAGE_SHIFT); + mapped_large[hMemAligned] = hMem; + return hMemAligned; +} + +/** + * Release huge pages on the host memory + * @param: vaddr - memory pointer + * @param: n_pages - number of obtained pages + */ +void fDev::freePrMem(uint64_t *vaddr, uint64_t n_pages) { + uint64_t* hMem; + uint64_t n_pg = n_pages; + + hMem = mapped_large[vaddr]; + munmap(hMem, (n_pg + 1) * LARGE_PAGE_SIZE); + ioctl(fd, IOCTL_FREE_HOST_PR_MEM, &vaddr); +} + +/** + * Explicit TLB mapping + * @param: mem - memory pointer + * @param: len - length of the mapping + */ +void fDev::_userMap(uint64_t *mem, uint32_t len) { + uint64_t vdata [2]; + vdata[0] = (uint64_t)mem; + vdata[1] = len; + ioctl(fd, IOCTL_MAP_USER, &vdata); +} + +/** + * TLB unmap + * @param: mem - memory pointer + * @param: len - length of the mapping + */ +void fDev::_userUnmap(uint64_t *mem, uint32_t len) { + uint64_t vdata [2]; + vdata[0] = (uint64_t)mem; + vdata[1] = len; + ioctl(fd, IOCTL_UNMAP_USER, &vdata); +} + +// ------------------------------------------------------------------------------- +// -- PR +// ------------------------------------------------------------------------------- + +/** + * Reconfiguration ioctl call + * @param: vaddr - memory pointer of the PR stream + * @param: len - length of the stream + */ +void fDev::reconfigure(uint64_t *vaddr, uint64_t len) { + uint64_t vdata [2]; + vdata[0] = (uint64_t)vaddr; + vdata[1] = len; + ioctl(fd, IOCTL_RECONFIG_LOAD, &vdata); +} + +#ifdef EN_AVX + // ------------------------------------------------------------------------------- + // -- Bulk transfers + // ------------------------------------------------------------------------------- + + /** + * Read operation (read to FPGA user logic) + * @param: vaddr - memory pointer + * @param: len - length + * @param: stream - stream from host memory + * @param: poll - blocking/non-blocking + * @param: clr_stat - prior status clear + */ + void fDev::_read(uint64_t* vaddr, uint32_t len, bool stream, bool clr_stat, bool poll) { + // Check outstanding + while (rd_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) { + rd_cmd_cnt = _mm256_extract_epi64(cnfg_reg[CNFG_STAT_REG], 0) & 0xffffffff; + if (rd_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) + nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + + uint64_t len_cmd = len; + uint64_t ctrl_cmd = CTRL_START_RD | (clr_stat ? CTRL_CLR_STAT_RD : 0x0) | (stream ? CTRL_STREAM_RD : 0x0); + cnfg_reg[CNFG_CTRL_REG] = _mm256_set_epi64x(len_cmd, 0, (uint64_t)vaddr, ctrl_cmd); + + rd_cmd_cnt++; + + if(poll) { + while(checkBusyRead()) nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + } + + /** + * Write operation (write from FPGA user logic) + * @param: vaddr - memory pointer + * @param: len - length + * @param: stream - stream from host memory + * @param: poll - blocking/non-blocking + * @param: clr_stat - prior status clear + */ + void fDev::_write(uint64_t* vaddr, uint32_t len, bool stream, bool clr_stat, bool poll) { + // Check outstanding + while (wr_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) { + wr_cmd_cnt = _mm256_extract_epi64(cnfg_reg[CNFG_STAT_REG], 0) >> 32; + if (wr_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) + nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + + uint64_t len_cmd = (uint64_t)len << 32; + uint64_t ctrl_cmd = CTRL_START_WR | (clr_stat ? CTRL_CLR_STAT_WR : 0x0) | (stream ? CTRL_STREAM_WR : 0x0); + cnfg_reg[CNFG_CTRL_REG] = _mm256_set_epi64x(len_cmd, (uint64_t)vaddr, 0, ctrl_cmd); + + wr_cmd_cnt++; + + if(poll) { + while(checkBusyWrite()) nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + } + + /** + * transfer (read + write) + * @param: vaddr_src, _dst - memory pointer + * @param: len_src, _dst - length + * @param: stream - stream from host memory + * @param: poll - blocking/non-blocking + * @param: clr_stat - prior status clear + */ + void fDev::_transfer(uint64_t* vaddr_src, uint64_t* vaddr_dst, uint32_t len_src, uint32_t len_dst, bool stream, bool clr_stat, bool poll) { + // Check outstanding read + while (rd_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) { + rd_cmd_cnt = _mm256_extract_epi64(cnfg_reg[CNFG_STAT_REG], 0) & 0xffffffff; + if (rd_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) + nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + // Check outstanding write + while (wr_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) { + wr_cmd_cnt = _mm256_extract_epi64(cnfg_reg[CNFG_STAT_REG], 0) >> 32; + if (wr_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) + nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + + uint64_t len_cmd = ((uint64_t)len_dst << 32) | len_src; + uint64_t ctrl_cmd = CTRL_START_WR | (clr_stat ? CTRL_CLR_STAT_WR : 0x0) | (stream ? CTRL_STREAM_WR : 0x0) | + CTRL_START_RD | (clr_stat ? CTRL_CLR_STAT_RD : 0x0) | (stream ? CTRL_STREAM_RD : 0x0); + + cnfg_reg[CNFG_CTRL_REG] = _mm256_set_epi64x(len_cmd, (uint64_t)vaddr_dst, (uint64_t)vaddr_src, ctrl_cmd); + + rd_cmd_cnt++; + wr_cmd_cnt++; + + if(poll) { + while(checkBusyWrite()) nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + } + +#ifdef EN_DDR + + /** + * Offload to FPGA DDR (only with local FPGA memory) + * @param: vaddr - memory pointer + * @param: len - length + * @param: poll - blocking/non-blocking + */ + void fDev::_offload(uint64_t* vaddr, uint32_t len, bool poll) { + // Check outstanding + while (rd_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) { + rd_cmd_cnt = _mm256_extract_epi64(cnfg_reg[CNFG_STAT_REG], 0) & 0xffffffff; + if (rd_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) + nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + + uint64_t len_cmd = len; + uint64_t ctrl_cmd = CTRL_START_RD | CTRL_CLR_STAT_RD | CTRL_SYNC_RD; + cnfg_reg[CNFG_CTRL_REG] = _mm256_set_epi64x(len_cmd, 0, (uint64_t)vaddr, ctrl_cmd); + + rd_cmd_cnt++; + + if(poll) { + while(checkBusyRead()) nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + } + + /** + * Sync with FPGA DDR (only with local FPGA memory) + * @param: vaddr - memory pointer + * @param: len - length + * @param: poll - blocking/non-blocking + */ + void fDev::_sync(uint64_t* vaddr, uint32_t len, bool poll) { + // Check outstanding + while (wr_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) { + wr_cmd_cnt = _mm256_extract_epi64(cnfg_reg[CNFG_STAT_REG], 0) >> 32; + if (wr_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) + nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + + wr_cmd_cnt++; + + uint64_t len_cmd = (uint64_t)len << 32; + uint64_t ctrl_cmd = CTRL_START_WR | CTRL_CLR_STAT_WR | CTRL_SYNC_WR; + cnfg_reg[CNFG_CTRL_REG] = _mm256_set_epi64x(len_cmd, (uint64_t)vaddr, 0, ctrl_cmd); + + if(poll) { + while(checkBusyWrite()) nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + } + +#endif + + // ------------------------------------------------------------------------------- + // -- Polling + // ------------------------------------------------------------------------------- + + /** + * Check whether busy read + */ + bool fDev::checkBusyRead() { + return !(_mm256_extract_epi64(cnfg_reg[CNFG_STAT_REG], 1) & 0xffffffff); + } + + /** + * Check whether busy write + */ + bool fDev::checkBusyWrite() { + return !(_mm256_extract_epi64(cnfg_reg[CNFG_STAT_REG], 1) >> 32); + } + + /** + * Return read completed + */ + uint32_t fDev::checkCompletedRead() { + return _mm256_extract_epi64(cnfg_reg[CNFG_STAT_REG], 1) & 0xffffffff; + } + + /** + * Return write completed + */ + uint32_t fDev::checkCompletedWrite() { + return _mm256_extract_epi64(cnfg_reg[CNFG_STAT_REG], 1) >> 32; + } + + /** + * Clear status + */ + void fDev::clearCompleted() { + cnfg_reg[CNFG_CTRL_REG] = _mm256_set_epi64x(0, 0, 0, CTRL_CLR_STAT_RD | CTRL_CLR_STAT_WR); + } + + // ------------------------------------------------------------------------------- + // -- Timers + // ------------------------------------------------------------------------------- + + /** + * Set timer stop at x number of completed transfers + * @param: tmr_stop_at - stop once completed reached + */ + void fDev::setTimerStopAt(uint64_t tmr_stop_at) { + cnfg_reg[CNFG_TMR_STOP_REG] = _mm256_set_epi64x(0, 0, 0, tmr_stop_at); + } + + /** + * Read timer + */ + uint64_t fDev::getReadTimer() { + return _mm256_extract_epi64(cnfg_reg[CNFG_TMR_REG], 0); + } + + /** + * Write timer + */ + uint64_t fDev::getWriteTimer() { + return _mm256_extract_epi64(cnfg_reg[CNFG_TMR_REG], 1); + } + + // ------------------------------------------------------------------------------- + // -- Debug XDMA + // ------------------------------------------------------------------------------- + + /** + * XDMA debug + */ + void fDev::printDebugXDMA() // TODO + { + std::cout << "-- XDMA STATISTICS ----------------------------" << std::endl; + std::cout << std::setw(35) << "Read command FIFO used: \t" << _mm256_extract_epi32(cnfg_reg[CNFG_STAT_REG], 0x0) << std::endl; + std::cout << std::setw(35) << "Write command FIFO used: \t" << _mm256_extract_epi32(cnfg_reg[CNFG_STAT_REG], 0x1) << std::endl; + std::cout << std::setw(35) << "Reads completed: \t" << _mm256_extract_epi32(cnfg_reg[CNFG_STAT_REG], 0x2) << std::endl; + std::cout << std::setw(35) << "Writes completed: \t" << _mm256_extract_epi32(cnfg_reg[CNFG_STAT_REG], 0x3) << std::endl; + std::cout << std::setw(35) << "Read requests sent: \t" << _mm256_extract_epi32(cnfg_reg[CNFG_STAT_REG], 0x4) << std::endl; + std::cout << std::setw(35) << "Write requests sent: \t" << _mm256_extract_epi32(cnfg_reg[CNFG_STAT_REG], 0x5) << std::endl; + std::cout << std::setw(35) << "Page faults: \t" << _mm256_extract_epi32(cnfg_reg[CNFG_STAT_REG], 0x6) << std::endl; + std::cout << "-----------------------------------------------" << std::endl; + } + +#else + + /** + * Read operation (read to FPGA user logic) + * @param: vaddr - memory pointer + * @param: len - length + * @param: poll - blocking/non-blocking + * @param: clr_stat - prior status clear + */ + void fDev::read(uint64_t* vaddr, uint32_t len, bool clr_stat, bool poll) { + // Check outstanding + while (rd_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) { + rd_cmd_cnt = cnfg_reg[CNFG_STAT_CMD_USED_RD_REG]; + if (rd_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) + nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + + cnfg_reg[CNFG_VADDR_RD_REG] = (uint64_t)vaddr; + cnfg_reg[CNFG_LEN_RD_REG] = len; + + cnfg_reg[CNFG_CTRL_REG] = CTRL_START_RD | (clr_stat ? CTRL_CLR_STAT_RD : 0x0); + + rd_cmd_cnt++; + + if(poll) { + while(checkBusyRead()) nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + } + + /** + * Write operation (write from FPGA user logic) + * @param: vaddr - memory pointer + * @param: len - length + * @param: poll - blocking/non-blocking + * @param: clr_stat - prior status clear + */ + void fDev::write(uint64_t* vaddr, uint32_t len, bool clr_stat, bool poll) { + // Check outstanding + while (wr_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) { + wr_cmd_cnt = cnfg_reg[CNFG_STAT_CMD_USED_WR_REG]; + if (wr_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) + nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + + cnfg_reg[CNFG_VADDR_WR_REG] = (uint64_t)vaddr; + cnfg_reg[CNFG_LEN_WR_REG] = len; + + cnfg_reg[CNFG_CTRL_REG] = CTRL_START_WR | (clr_stat ? CTRL_CLR_STAT_WR : 0x0); + + wr_cmd_cnt++; + + if(poll) { + while(checkBusyWrite()) nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + } + + /** + * transfer (read + write) + * @param: vaddr_src, _dst - memory pointer + * @param: len_src, _dst - length + * @param: poll - blocking/non-blocking + * @param: clr_stat - prior status clear + */ + void fDev::transfer(uint64_t* vaddr_src, uint64_t* vaddr_dst, uint32_t len_src, uint32_t len_dst, bool clr_stat, bool poll) { + // Check outstanding + while (rd_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) { + rd_cmd_cnt = cnfg_reg[CNFG_STAT_CMD_USED_RD_REG]; + if (rd_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) + nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + + // Check outstanding + while (wr_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) { + wr_cmd_cnt = cnfg_reg[CNFG_STAT_CMD_USED_WR_REG]; + if (wr_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) + nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + + cnfg_reg[CNFG_VADDR_RD_REG] = (uint64_t)vaddr_src; + cnfg_reg[CNFG_LEN_RD_REG] = len_src; + cnfg_reg[CNFG_VADDR_WR_REG] = (uint64_t)vaddr_dst; + cnfg_reg[CNFG_LEN_WR_REG] = len_dst; + + cnfg_reg[CNFG_CTRL_REG] = CTRL_START_RD | (clr_stat ? CTRL_CLR_STAT_RD : 0x0); + cnfg_reg[CNFG_CTRL_REG] = CTRL_START_WR | (clr_stat ? CTRL_CLR_STAT_WR : 0x0); + + rd_cmd_cnt++; + wr_cmd_cnt++; + + if(poll) { + while(checkBusyWrite()) nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + } + +#ifdef EN_DDR + + /** + * Offload to FPGA DDR (only with local FPGA memory) + * @param: vaddr - memory pointer + * @param: len - length + * @param: poll - blocking/non-blocking + */ + void fDev::offload(uint64_t* vaddr, uint32_t len, bool poll) { + cnfg_reg[CNFG_VADDR_RD_REG] = (uint64_t)vaddr; + cnfg_reg[CNFG_LEN_RD_REG] = len; + + cnfg_reg[CNFG_CTRL_REG] = CTRL_SYNC_RD | CTRL_START_RD | CTRL_CLR_STAT_RD; + + rd_cmd_cnt++; + + if(poll) { + while(checkBusyRead()) nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + } + + /** + * Sync with FPGA DDR (only with local FPGA memory) + * @param: vaddr - memory pointer + * @param: len - length + * @param: poll - blocking/non-blocking + */ + void fDev::sync(uint64_t* vaddr, uint32_t len, bool poll) { + cnfg_reg[CNFG_VADDR_WR_REG] = (uint64_t)vaddr; + cnfg_reg[CNFG_LEN_WR_REG] = len; + + cnfg_reg[CNFG_CTRL_REG] = CTRL_SYNC_WR | CTRL_START_WR | CTRL_CLR_STAT_WR; + + wr_cmd_cnt++; + + if(poll) { + while(checkBusyWrite()) nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + } + +#endif + + /** + * XDMA debug + */ + void fDev::printDebugXDMA() // TODO + { + std::cout << "-- XDMA STATISTICS ----------------------------" << std::endl; + std::cout << "-----------------------------------------------" << std::endl; + } + + // ------------------------------------------------------------------------------- + // -- Polling + // ------------------------------------------------------------------------------- + bool fDev::checkBusyRead() { + return !(cnfg_reg[CNFG_STAT_DMA_RD_REG]); + } + + bool fDev::checkBusyWrite() { + return !(cnfg_reg[CNFG_STAT_DMA_WR_REG]); + } + + uint32_t fDev::checkCompletedRead() { + return (cnfg_reg[CNFG_STAT_DMA_RD_REG]); + } + + uint32_t fDev::checkCompletedWrite() { + return (cnfg_reg[CNFG_STAT_DMA_WR_REG]); + } + + void fDev::clearCompleted() { + cnfg_reg[CNFG_CTRL_REG] = CTRL_CLR_STAT_RD | CTRL_CLR_STAT_WR; + } + + // ------------------------------------------------------------------------------- + // -- Timers + // ------------------------------------------------------------------------------- + + /** + * Set timer stop at x number of completed transfers + * @param: tmr_stop_at - stop once completed reached + */ + void fDev::setTimerStopAt(uint64_t tmr_stop_at) { + cnfg_reg[CNFG_TMR_STOP_REG] = tmr_stop_at; + } + + /** + * Read timer + */ + uint64_t fDev::getReadTimer() { + return cnfg_reg[CNFG_TMR_RD_REG]; + } + + /** + * Write timer + */ + uint64_t fDev::getWriteTimer() { + return cnfg_reg[CNFG_TMR_WR_REG]; + } + +#endif + +// ------------------------------------------------------------------------------- +// -- Network static +// ------------------------------------------------------------------------------- + +#ifdef EN_RDMA + +/** + * ARP lookup + */ +bool fDev::doArpLookup() { + ioctl(fd, IOCTL_ARP_LOOKUP, 0); + return true; +} + +/** + * Write QP context + * @param: pair - target queue pair + */ +void fDev::writeContext(fQPair *pair) { + uint64_t offs[3]; + offs[0] = (((uint64_t)pair->remote.psn & 0xffffff) << 31) | (((uint64_t)pair->local.qpn & 0xffffff) << 7) | (((uint64_t)pair->local.region & 0xf) << 3); + offs[1] = (((uint64_t)pair->remote.rkey & 0xffffff) << 24) | ((uint64_t)pair->local.psn & 0xffffff); + offs[2] = (uint64_t)pair->remote.vaddr; + ioctl(fd, IOCTL_WRITE_CTX, &offs); +} + +/** + * Write QP connection + * @param: pair - target queue pair + */ +void fDev::writeConnection(fQPair *pair, uint32_t port) { + uint64_t offs[3]; + offs[0] = (((uint64_t)port & 0xffff) << 40) | (((uint64_t)pair->remote.qpn & 0xffffff) << 16) | ((pair->local.qpn) & 0xffff); + offs[1] = ((htols((uint64_t)pair->remote.gidToUint(8)) & 0xffffffff) << 32) | (htols((uint64_t)pair->remote.gidToUint(0)) & 0xffffffff); + offs[2] = ((htols((uint64_t)pair->remote.gidToUint(24)) & 0xffffffff) << 32) | (htols((uint64_t)pair->remote.gidToUint(16)) & 0xffffffff); + ioctl(fd, IOCTL_WRITE_CONN, &offs); +} + + // ------------------------------------------------------------------------------- + // -- Network + // ------------------------------------------------------------------------------- + +#ifdef EN_AVX + + /** + * RDMA write + * @param: l_addr - local virtual address + * @param: r_addr - remote virtual address + * @param: size - transfer size + */ + bool fDev::postWrite(fQPair *pair, uint64_t l_offs, uint64_t r_offs, uint32_t size) { + uint64_t l_addr = pair->local.vaddr + l_offs; + uint64_t r_addr = pair->remote.vaddr + r_offs; + + uint64_t offs_0 = (((uint64_t)pair->local.qpn & 0xffffff) << 5) | ((uint64_t)opCode::WRITE & 0x1f); + uint64_t offs_1 = (((uint64_t)r_addr & 0xffff) << 48) | ((uint64_t)l_addr & 0xffffffffffff); + uint64_t offs_2 = ((uint64_t)size << 32) | (((uint64_t)r_addr >> 16) & 0xffffffff); + uint64_t offs_3 = 0; + + postCmd(offs_3, offs_2, offs_1, offs_0); + + return 0; + } + + /** + * RDMA read + * @param: l_addr - local virtual address + * @param: r_addr - remote virtual address + * @param: size - transfer size + */ + bool fDev::postRead(fQPair *pair, uint64_t l_offs, uint64_t r_offs, uint32_t size) { + uint64_t l_addr = pair->local.vaddr + l_offs; + uint64_t r_addr = pair->remote.vaddr + r_offs; + + uint64_t offs_0 = (((uint64_t)pair->local.qpn & 0xffffff) << 5) | ((uint64_t)opCode::READ & 0x1f); + uint64_t offs_1 = (((uint64_t)r_addr & 0xffff) << 48) | ((uint64_t)l_addr & 0xffffffffffff); + uint64_t offs_2 = ((uint64_t)size << 32) | (((uint64_t)r_addr >> 16) & 0xffffffff); + uint64_t offs_3 = 0; + + postCmd(offs_3, offs_2, offs_1, offs_0); + + return 0; + } + + /** + * RDMA RPC + * @param: offs_3, _2, _1 - parameters + */ + bool fDev::postFarview(fQPair *pair, uint64_t l_offs, uint64_t r_offs, uint32_t size, uint64_t params) { + uint64_t l_addr = pair->local.vaddr + l_offs; + uint64_t r_addr = pair->remote.vaddr + r_offs; + + uint64_t offs_0 = (((uint64_t)pair->local.qpn & 0xffffff) << 5) | ((uint64_t)opCode::FV & 0x1f); + uint64_t offs_1 = (((uint64_t)r_addr & 0xffff) << 48) | ((uint64_t)l_addr & 0xffffffffffff); + uint64_t offs_2 = ((uint64_t)size << 32) | (((uint64_t)r_addr >> 16) & 0xffffffff); + uint64_t offs_3 = params; + + postCmd(offs_3, offs_2, offs_1, offs_0); + + return 0; + } + + /** + * RDMA RPC + * @param: offs_3, _2, _1 - parameters + */ + bool fDev::postFarviewBase(fQPair *pair, uint64_t params_0, uint64_t params_1, uint64_t params_2) { + uint64_t offs_0 = (((uint64_t)pair->local.qpn & 0xffffff) << 5) | ((uint64_t)opCode::FV & 0x1f); + uint64_t offs_1 = params_0; + uint64_t offs_2 = params_1; + uint64_t offs_3 = params_2; + + postCmd(offs_3, offs_2, offs_1, offs_0); + + return 0; + } + + /** + * Base post + */ + void fDev::postCmd(uint64_t offs_3, uint64_t offs_2, uint64_t offs_1, uint64_t offs_0) { + // Check outstanding + while (rdma_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) { + rdma_cmd_cnt = _mm256_extract_epi64(cnfg_reg[CNFG_RDMA_STAT_CMD_USED_REG], 0) & 0xffffffff; + if (rdma_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) + nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + + cnfg_reg[CNFG_RDMA_POST_REG] = _mm256_set_epi64x(offs_3, offs_2, offs_1, offs_0); + + rdma_cmd_cnt++; + } + +#else + + // ------------------------------------------------------------------------------- + // -- Network + // ------------------------------------------------------------------------------- + + /** + * RDMA write + * @param: l_addr - local virtual address + * @param: r_addr - remote virtual address + * @param: size - transfer size + */ + bool fDev::postWrite(rQPair *pair, uint64_t *l_addr, uint64_t *r_addr, uint32_t size) { + if(qpn_attached) + postCmd(opCode::WRITE, pair, l_addr, r_addr, size); + else + return 1; + + return 0; + } + + /** + * RDMA read + * @param: l_addr - local virtual address + * @param: r_addr - remote virtual address + * @param: size - transfer size + */ + bool fDev::postRead(rQPair *pair, uint64_t *l_addr, uint64_t *r_addr, uint32_t size) { + if(qpn_attached) + postCmd(opCode::READ, pair, l_addr, r_addr, size); + else + return 1; + + return 0; + } + + /** + * Base post + * TODO: Change to new config + */ + void fDev::postCmd(opCode op, rQPair *pair, uint64_t *l_addr, uint64_t *r_addr, uint32_t size) { + #ifdef VERBOSE_DEBUG + std::cout << "Post, queue pair l: " << pair->local.qpn << ", r: " << pair->remote.qpn << std::endl; + #endif + + // Check outstanding + while (rdma_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) { + rdma_cmd_cnt = cnfg_reg[CNFG_RDMA_STAT_CMD_USED_REG]; + if (rdma_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) + nanosleep((const struct timespec[]){{0, 100L}}, NULL); + } + + cnfg_reg[CNFG_RDMA_POST_REG_0] = (((uint64_t)size << 27)) | (((uint64_t)pair->local.qpn & 0xffffff) << 3) | (((uint64_t)op & 0x3)); + cnfg_reg[CNFG_RDMA_POST_REG_1] = (uint64_t)l_addr; + cnfg_reg[CNFG_RDMA_POST_REG_2] = (uint64_t)r_addr; + cnfg_reg[CNFG_RDMA_POST_REG_3] = 0; + + rdma_cmd_cnt++; + } + +#endif +#endif + +} diff --git a/sw/stride/src/fQ.cpp b/sw/stride/src/fQ.cpp new file mode 100644 index 00000000..b0feb5f5 --- /dev/null +++ b/sw/stride/src/fQ.cpp @@ -0,0 +1,72 @@ +#include "fQ.hpp" + +#include +#include +#include +#include +#include +#include + +namespace fpga { + +uint32_t fQ::gidToUint(int idx) { + if(idx > 24) { + std::cerr << "Invalid index for gitToUint" << std::endl; + return 0; + } + char tmp[9]; + memset(tmp, 0, 9); + uint32_t v32 = 0; + memcpy(tmp, gid+idx, 8); + sscanf(tmp, "%x", &v32); + return ntohl(v32); +} + +void fQ::uintToGid(int idx, uint32_t ip_addr) { + std::ostringstream gidStream; + gidStream << std::setfill('0') << std::setw(8) << std::hex << ip_addr; + memcpy(gid+idx, gidStream.str().c_str(), 8); +} + +void fQ::print(const char *name) { + printf("%s: LID 0x%04x, QPN 0x%06x, PSN 0x%06x, GID %s, REG 0x%04x, RKEY %#08x, VADDR %016lx, SIZE %08x\n", + name, 0, qpn, psn, gid, region, rkey, vaddr, size); +} + +std::string fQ::encode() { + std::uint32_t lid = 0; + std::ostringstream msgStream; + msgStream << std::setfill('0') << std::setw(4) << std::hex << lid << " "; + msgStream << std::setfill('0') << std::setw(6) << std::hex << qpn << " "; + msgStream << std::setfill('0') << std::setw(6) << std::hex << (psn & 0xFFFFFF) << " "; + msgStream << std::setfill('0') << std::setw(4) << std::hex << (region & 0xf) << " "; + msgStream << std::setfill('0') << std::setw(8) << std::hex << rkey << " "; + msgStream << std::setfill('0') << std::setw(16) << std::hex << vaddr << " "; + msgStream << gid; + + std::string msg = msgStream.str(); + return msg; +} + +void fQ::decode(char* buf, size_t len) { + if (len < 60) { + std::cerr << "ERR: unexpected length " << len << " in decode ib connection\n"; + return; + } + buf[4] = ' '; + buf[11] = ' '; + buf[18] = ' '; + buf[23] = ' '; + buf[32] = ' '; + buf[49] = ' '; + + std::uint32_t lid = 0; + //std::cout << "buf " << buf << std::endl; + std::string recvMsg(buf, len); + //std::cout << "string " << recvMsg << ", length: " << recvMsg.length() << std::endl; + std::istringstream recvStream(recvMsg); + recvStream >> std::hex >> lid >> qpn >> psn >> region; + recvStream >> std::hex >> rkey >> vaddr >> gid; +} + +} \ No newline at end of file diff --git a/sw/stride/src/fView.cpp b/sw/stride/src/fView.cpp new file mode 100644 index 00000000..956f5dbf --- /dev/null +++ b/sw/stride/src/fView.cpp @@ -0,0 +1,677 @@ + #include "fView.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace fpga; + +namespace comm { + +/** + * Constructor + * @param: fdev - array of fDev objects. Has to coincide with the number of regions. + * @param: node_id - current node ID + * @param: n_nodes - number of total nodes in the system + * @param: n_qpairs - qpair organization, ex: {1, 3} => 2 node system, master node, remote node 1 shares 3 qpairs + * @param: n_regions - number of vFPGA regions + * @param: mstr_ip_addr - master node IP address + */ +fView::fView(fDev *fdev, uint32_t node_id, uint32_t n_nodes, uint32_t *n_qpairs, uint32_t n_regions, const char *mstr_ip_addr) { + // Set port + port = 18515; // ? + ib_port = 0; + this->mstr_ip_addr = mstr_ip_addr; + + // FPGA device + this->fdev = fdev; + + // Nodes + this->node_id = node_id; + this->n_nodes = n_nodes; + this->n_regions = n_regions; + + for (int i = 0; i < n_nodes; i++) { + std::vector v(n_qpairs[i], fQPair()); + pairs.push_back(v); + } + + // Connections + this->connections = new int[n_nodes]; + + // Initialize local queues + initializeLocalQueues(); + + // Queue exchange + int ret = 1; + if (node_id == 0) { + ret = masterExchangeQueues(); + } else { + std::this_thread::sleep_for(std::chrono::seconds(1)); + ret = clientExchangeQueues(); + } + if (ret) + std::cout << "Exchange failed" << std::endl; + else + std::cout << "Exchange successfull" << std::endl; + + // Load QPn + + + // Load context and connections + for(int i = 0; i < n_nodes; i++) { + if (i == node_id) continue; + + for (uint j = 0; j < pairs[i].size(); j++) { + int pair_reg = pairs[i][j].local.region; + fdev[pair_reg].writeContext(&pairs[i][j]); + fdev[pair_reg].writeConnection(&pairs[i][j], port); + } + } + + // ARP lookup + fdev[0].doArpLookup(); +} + +/** + * Destructor + */ +fView::~fView() { + for (int i = 0; i < n_nodes; i++) { + if (i == node_id) continue; + close(connections[i]); + } + + delete[] connections; +} + +void fView::closeConnections() { + for (int i = 0; i < n_nodes; i++) { + if (i == node_id) continue; + close(connections[i]); + } +} + +static unsigned seed = std::chrono::system_clock::now().time_since_epoch().count(); + +/** + * Initialization of the local queues (no buffers allocated at this point) + */ +void fView::initializeLocalQueues() { + std::default_random_engine rand_gen(seed); + std::uniform_int_distribution distr(0, std::numeric_limits::max()); + + uint32_t ip_addr = base_ip_addr + node_id; + + int i = 0, j; + int node = 0; + for (auto it1 = pairs.begin(); it1 != pairs.end(); it1++) { + j = 0; + for (auto it2 = it1->begin(); it2 != it1->end(); it2++) { + it2->local.uintToGid(0, ip_addr); + it2->local.uintToGid(8, ip_addr); + it2->local.uintToGid(16, ip_addr); + it2->local.uintToGid(24, ip_addr); + it2->local.qpn = 0x3 + i++; + it2->local.psn = distr(rand_gen) & 0xFFFFFF; + it2->local.region = j++ % n_regions; + it2->local.rkey = 0; + it2->local.vaddr = 0; //TODO remove + it2->local.size = 0; + } + node++; + } +} + +/** + * Exchange initial qpairs (server side) + */ +int fView::masterExchangeQueues() { + char *service; + char recv_buf[100]; + int32_t recv_node_id; + uint n; + int sockfd = -1, connfd; + struct sockaddr_in server; + memset(recv_buf, 0, 100); + + std::cout << "Server exchange started ..." << std::endl; + + sockfd = ::socket(AF_INET, SOCK_STREAM, 0); + if (sockfd == -1) { + std::cerr << "Could not create socket" << std::endl; + return 1; + } + + server.sin_family = AF_INET; + server.sin_addr.s_addr = INADDR_ANY; + server.sin_port = htons( port); + + if (::bind(sockfd, (struct sockaddr*)&server, sizeof(server)) < 0) { + std::cerr << "Could not bind socket" << std::endl; + return 1; + } + + if (sockfd < 0 ) { + std::cerr << "Could not listen to port " << port << std::endl; + return 1; + } + + // Get number of local queue pairs for each node + listen(sockfd, n_nodes); + + size_t msg_len; + + // Receive queues + for (int i = 1; i < n_nodes; i++) { + // Accept the connection for each node + connfd = ::accept(sockfd, NULL, 0); + if (connfd < 0) { + std::cerr << "Accept() failed" << std::endl; + return 1; + } + + // Read node id + n = ::read(connfd, &recv_node_id, sizeof(int32_t)); + if (n != sizeof(int32_t)) { + std::cerr << "Could not read initial node ID message, bytes read: " << n << std::endl; + close(connfd); + return 1; + } + std::cout << "Qpair exchange nodeid " << recv_node_id << " ... " << std::endl; + + msg_len = fQ::getLength(); + + for (uint j = 0; j < pairs[recv_node_id].size(); j++) { + // Read remote qpair + n = ::read(connfd, recv_buf, msg_len); + if (n != msg_len) { + std::cerr << "Could not read message, bytes read: " << n << std::endl; + std::cout << "Received msg: " << recv_buf << std::endl; + close(connfd); + return 1; + } + + pairs[recv_node_id][j].remote.decode(recv_buf, msg_len); + std::cout << "Qpair nodeid " << recv_node_id << "[" << j << "]" << std::endl; + pairs[recv_node_id][j].local.print("Local "); + pairs[recv_node_id][j].remote.print("Remote"); + } + + connections[recv_node_id] = connfd; + } + + std::cout << "Received all remote qpairs" << std::endl; + + // Send queues + for (int i = 1; i < n_nodes; i++) { + for (uint j = 0; j < pairs[i].size(); j++) { + std::string msg_string; + msg_string = pairs[i][j].local.encode(); + size_t msg_len = msg_string.length(); + + // Write message + if (::write(connections[i], msg_string.c_str(), msg_len) != msg_len) { + std::cerr << "Could not send local qpair" << std::endl; + ::close(connections[i]); + return 1; + } + } + } + + std::cout << "Sent all local qpairs" << std::endl; + + ::close(sockfd); + return 0; +} + +/** + * Exchange initial qpairs (client side) + */ +int fView::clientExchangeQueues() { + struct addrinfo *res, *t; + struct addrinfo hints = {}; + hints.ai_family = AF_INET; + hints.ai_socktype = SOCK_STREAM; + + char* service; + char recv_buf[100]; + int n = 0; + int sockfd = -1; + memset(recv_buf, 0, 100); + + std::cout << "Client exchange" << std::endl; + + if (asprintf(&service, "%d", port) < 0) { + std::cerr << "Service failed" << std::endl; + return 1; + } + + n = getaddrinfo(mstr_ip_addr, service, &hints, &res); + if (n < 0) { + std::cerr << "[ERROR] getaddrinfo"; + free(service); + return 1; + } + + for (t = res; t; t = t->ai_next) { + sockfd = ::socket(t->ai_family, t->ai_socktype, t->ai_protocol); + if (sockfd >= 0) { + if (!::connect(sockfd, t->ai_addr, t->ai_addrlen)) { + break; + } + ::close(sockfd); + sockfd = -1; + } + } + + if (sockfd < 0) { + std::cerr << "Could not connect to master: " << mstr_ip_addr << ":" << port << std::endl; + return 1; + } + + // Send local node ID + if (write(sockfd, &node_id, sizeof(int32_t)) != sizeof(int32_t)) { + std::cerr << "Could not send local node id" << std::endl; + close(sockfd); + return 1; + } + + size_t msg_len; + + /// Send local queues + for (uint i = 0; i < pairs[0].size(); i++) { + std::string msg_string = pairs[0][i].local.encode(); + + size_t msg_len = msg_string.length(); + + if (write(sockfd, msg_string.c_str(), msg_len) != msg_len) { + std::cerr << "Could not send local address" << std::endl; + close(sockfd); + return 1; + } + } + + std::cout << "Sent all local qpairs" << std::endl; + + msg_len = fQ::getLength(); + + // Receive remote queues + for (uint i = 0; i < pairs[0].size(); i++) { + if ((n = ::read(sockfd, recv_buf, msg_len)) != msg_len) { + std::cout << "n: " << n << ", instread of " << msg_len << std::endl; + std::cout << "Received msg: " << recv_buf << std::endl; + std::cerr << "Could not read remote address" << std::endl; + ::close(sockfd); + return 1; + } + + pairs[0][i].remote.decode(recv_buf, msg_len); + std::cout << "Qpair nodeid " << 0 << "[" << i << "]" << std::endl; + pairs[0][i].local.print("Local "); + pairs[0][i].remote.print("Remote"); + } + + std::cout << "Received all remote qpairs" << std::endl; + + //keep connection around + connections[0] = sockfd; + + if (res) + freeaddrinfo(res); + free(service); + + return 0; +} + +/** + * Exchange windows with target node + */ +int fView::exchangeWindow(int32_t node_id, int32_t qpair_id) { + if(node_id == 0) + return clientExchangeWindow(node_id, qpair_id); + else + return masterExchangeWindow(node_id, qpair_id); +} + +/** + * Master exchange window + */ +int fView::masterExchangeWindow(int32_t node_id, int32_t qpair_id) { + int n; + uint64_t vaddr; + uint32_t size; + + // Receive + // vaddr + n = ::read(connections[node_id], &vaddr, sizeof(uint64_t)); + if (n != sizeof(uint64_t)) { + std::cerr << "Could not read window, read bytes " << n << std::endl; + ::close(connections[node_id]); + return 1; + } + // size + n = ::read(connections[node_id], &size, sizeof(uint32_t)); + if (n != sizeof(uint32_t)) { + std::cerr << "Could not read window, read bytes " << n << std::endl; + ::close(connections[node_id]); + return 1; + } + + pairs[node_id][qpair_id].remote.vaddr = vaddr; + pairs[node_id][qpair_id].remote.size = size; + + std::cout << "Qpair nodeid " << node_id << "[" << qpair_id << "]" << std::endl; + pairs[node_id][qpair_id].local.print("Local "); + pairs[node_id][qpair_id].remote.print("Remote"); + + // Send + // vaddr + if ((n = ::write(connections[node_id], &pairs[node_id][qpair_id].local.vaddr, sizeof(uint64_t))) != sizeof(uint64_t)) { + std::cerr << "Could not send" << std::endl; + ::close(connections[node_id]); + return 1; + } + // size + if ((n = ::write(connections[node_id], &pairs[node_id][qpair_id].local.size, sizeof(uint32_t))) != sizeof(uint32_t)) { + std::cerr << "Could not send" << std::endl; + ::close(connections[node_id]); + return 1; + } + + return 0; +} + +/** + * Client exhchange window + */ +int fView::clientExchangeWindow(int32_t node_id, int32_t qpair_id) { + int n; + uint64_t vaddr; + uint32_t size; + + // Send + // vaddr + if ((n = ::write(connections[node_id], &pairs[node_id][qpair_id].local.vaddr, sizeof(uint64_t))) != sizeof(uint64_t)) { + std::cerr << "Could not send" << std::endl; + ::close(connections[node_id]); + return 1; + } + // size + if ((n = ::write(connections[node_id], &pairs[node_id][qpair_id].local.size, sizeof(uint32_t))) != sizeof(uint32_t)) { + std::cerr << "Could not send" << std::endl; + ::close(connections[node_id]); + return 1; + } + + // Receive + // vaddr + n = ::read(connections[node_id], &vaddr, sizeof(uint64_t)); + if (n != sizeof(uint64_t)) { + std::cerr << "Could not read window, read bytes " << n << std::endl; + ::close(connections[node_id]); + return 1; + } + // size + n = ::read(connections[node_id], &size, sizeof(uint32_t)); + if (n != sizeof(uint32_t)) { + std::cerr << "Could not read window, read bytes " << n << std::endl; + ::close(connections[node_id]); + return 1; + } + + pairs[node_id][qpair_id].remote.vaddr = vaddr; + pairs[node_id][qpair_id].remote.size = size; + + std::cout << "Qpair nodeid " << node_id << "[" << qpair_id << "]" << std::endl; + pairs[node_id][qpair_id].local.print("Local "); + pairs[node_id][qpair_id].remote.print("Remote"); + + return 0; +} + +/* --------------------------------------------------------------------------------------- +/* -- Public +/* --------------------------------------------------------------------------------------- + +/** + * Allocate a window for the specific qpair + * @param: node_id - target node id + * @param: qpair_id - target qpair id + * @param: n_pages - number of large pages (2MB each) + */ +uint64_t* fView::allocWindow(uint32_t node_id, uint32_t qpair_id, uint64_t n_pages) { + int32_t region = pairs[node_id][qpair_id].local.region; + uint64_t *vaddr = fdev[region].getHostMem(n_pages); + + pairs[node_id][qpair_id].local.vaddr = (uint64_t)vaddr; + pairs[node_id][qpair_id].local.size = n_pages * LARGE_PAGE_SIZE; + + exchangeWindow(node_id, qpair_id); + + return vaddr; +} + +/** + * Free window for the specific qpair + * @param: node_id - target node id + * @param: qpair_id - target qpair id + */ +void fView::freeWindow(uint32_t node_id, uint32_t qpair_id) { + int32_t region = pairs[node_id][qpair_id].local.region; + uint64_t *vaddr = (uint64_t*)pairs[node_id][qpair_id].local.vaddr; + uint64_t n_pages = (uint64_t)(pairs[node_id][qpair_id].local.size / LARGE_PAGE_SIZE); + + fdev[region].freeHostMem(vaddr, n_pages); +} + +/** + * Write RDMA operation + * @param: node_id - target node id + * @param: qpair_id - target qpair id + * @param: src_offs - offset in the source qpair buffer + * @param: dst_offs - offset in teh destination qpair buffer + * @param: size - transfer size + */ +void fView::writeRemote(uint32_t node_id, uint32_t qpair_id, uint64_t src_offs, uint64_t dst_offs, uint32_t size) { + fQPair *l_qp = &pairs[node_id][qpair_id]; + int32_t l_reg = l_qp->local.region; + + if(node_id == this->node_id) { + uint64_t *l_addr = (uint64_t*)(l_qp->local.vaddr + src_offs); + uint64_t *r_addr = (uint64_t*)(l_qp->remote.vaddr + dst_offs); + + memcpy(r_addr, l_addr, size); + } else { + fdev[l_reg].postWrite(l_qp, src_offs, dst_offs, size); + } +} + +/** + * Read RDMA operation + * @param: node_id - target node id + * @param: qpair_id - target qpair id + * @param: src_offs - offset in the source qpair buffer + * @param: dst_offs - offset in teh destination qpair buffer + * @param: size - transfer size + */ +void fView::readRemote(uint32_t node_id, uint32_t qpair_id, uint64_t src_offs, uint64_t dst_offs, uint32_t size) { + fQPair *l_qp = &pairs[node_id][qpair_id]; + uint32_t l_reg = l_qp->local.region; + + if(node_id == this->node_id) { + uint64_t *l_addr = (uint64_t*)(l_qp->local.vaddr + src_offs); + uint64_t *r_addr = (uint64_t*)(l_qp->remote.vaddr + dst_offs); + + memcpy(r_addr, l_addr, size); + } else { + fdev[l_reg].postRead(l_qp, src_offs, dst_offs, size); + } +} + +/** + * RPC RDMA operation + * @param: node_id - target node id + * @param: qpair_id - target qpair id + * @param: src_offs - offset in the source qpair buffer + * @param: dst_offs - offset in teh destination qpair buffer + * @param: size - transfer size + * @param: params - arbitrary parameters (depends on the implemented operation) + */ +void fView::farviewRemote(uint32_t node_id, uint32_t qpair_id, uint64_t src_offs, uint64_t dst_offs, uint32_t size, uint64_t params) { + fQPair *l_qp = &pairs[node_id][qpair_id]; + uint32_t l_reg = l_qp->local.region; + + if(node_id == this->node_id) { + uint64_t *l_addr = (uint64_t*)(l_qp->local.vaddr + src_offs); + uint64_t *r_addr = (uint64_t*)(l_qp->remote.vaddr + dst_offs); + + memcpy(r_addr, l_addr, size); + } else { + fdev[l_reg].postFarview(l_qp, src_offs, dst_offs, size, params); + } +} + +/** + * Write RDMA polling function + * @param: node_id - target node id + * @param: qpair_id - target qpair id + */ +uint32_t fView::pollRemoteWrite(uint32_t node_id, uint32_t qpair_id) { + fQPair *l_qp = &pairs[node_id][qpair_id]; + int32_t l_reg = l_qp->local.region; + + return fdev[l_reg].checkCompletedWrite(); +} + +/** + * Read RDMA polling function + * @param: node_id - target node id + * @param: qpair_id - target qpair id + */ +uint32_t fView::pollLocalRead(uint32_t node_id, uint32_t qpair_id) { + fQPair *l_qp = &pairs[node_id][qpair_id]; + int32_t l_reg = l_qp->local.region; + + return fdev[l_reg].checkCompletedRead(); +} + +/** + * Sync with remote + * @param: node_id - target node id + */ +int32_t fView::waitOnReplyRemote(uint32_t node_id) { + int n; + uint32_t ack; + + // Receive ACK + n = ::read(connections[node_id], &ack, sizeof(uint32_t)); + if (n != sizeof(uint32_t)) { + std::cerr << "Could not read ACK, read bytes " << n << std::endl; + ::close(connections[node_id]); + return 1; + } + + return 0; +} + +/** + * Wait on close remote + * @param: node_id - target node id + */ +int32_t fView::waitOnCloseRemote(uint32_t node_id) { + int n; + uint32_t ack; + + // Hacky + n = ::read(connections[node_id], &ack, sizeof(uint32_t)); + if (n == 0) { + std::cerr << "Connection closed" << std::endl; + ::close(connections[node_id]); + return 0; + } + + return 1; +} + + +/** + * Sync with remote + * @param: node_id - target node id + * @param: ack - acknowledge message + */ +int32_t fView::replyRemote(uint32_t node_id, uint32_t ack) { + int n; + + if ((n = ::write(connections[node_id], &ack, sizeof(uint32_t))) != sizeof(uint32_t)) { + std::cerr << "Could not send ACK" << std::endl; + ::close(connections[node_id]); + return 1; + } + + return 0; +} + +/** + * Sync with remote + * @param: node_id - target node id + */ +int32_t fView::syncRemote(uint32_t node_id) { + if(this->node_id == 0) { + replyRemote(node_id, 0); + waitOnReplyRemote(node_id); + } else { + waitOnReplyRemote(node_id); + replyRemote(node_id, 0); + } + + return 0; +} + +// Base control +void fView::farviewRemoteBase(uint32_t node_id, uint32_t qpair_id, uint64_t params_0, uint64_t params_1, uint64_t params_2) { + fQPair *l_qp = &pairs[node_id][qpair_id]; + uint32_t l_reg = l_qp->local.region; + + fdev[l_reg].postFarviewBase(l_qp, params_0, params_1, params_2); +} + +// Stride +void fView::farviewStride(uint32_t node_id, uint32_t qpair_id, uint64_t src_offs, uint64_t dst_offs, uint32_t dwidth, uint32_t stride, uint32_t num_elem) { + uint32_t n_bytes = (1 << dwidth) * num_elem; + uint64_t tmp = ((uint64_t)n_bytes << 32) | stride; + farviewRemote(node_id, qpair_id, src_offs, dst_offs, dwidth, tmp); +} + +// Load the configuration in 2 transactions +void fView::farviewRegexConfigLoad(uint32_t node_id, uint32_t qpair_id, unsigned char* config_bytes) { + uint64_t* params_0 = (uint64_t*)config_bytes; + uint64_t* params_1 = (uint64_t*)config_bytes + 1; + uint64_t* params_2 = (uint64_t*)config_bytes+ 2; + + farviewRemoteBase(node_id, qpair_id, *params_0, *params_1, *params_2); + + params_0 += 3; + params_1 += 3; + params_2 += 3; + + farviewRemoteBase(node_id, qpair_id, *params_0, *params_1, *params_2); +} + +// Regex read +void fView::farviewRegexRead(uint32_t node_id, uint32_t qpair_id, uint64_t src_offs, uint64_t dst_offs, uint32_t size) { + farviewRemote(node_id, qpair_id, src_offs, dst_offs, size, ~0); +} + + +} diff --git a/sw/stride/src/main.cpp b/sw/stride/src/main.cpp new file mode 100644 index 00000000..3d531514 --- /dev/null +++ b/sw/stride/src/main.cpp @@ -0,0 +1,181 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include "fDev.hpp" +#include "fView.hpp" +#include "fBench.hpp" + +using namespace std; +using namespace std::chrono; +using namespace fpga; +using namespace comm; + +/* Runtime */ +#define N_NODES 2 +#define N_PAGES 2 +#define N_ID_MASTER 0 +#define N_REGIONS 3 +#define N_REPS 1 +#define DEF_DWITDH 6 +#define DEF_STRIDE 64 +#define DEF_NELEM 1 + +int main(int argc, char *argv[]) +{ + // --------------------------------------------------------------- + // -- Initialization + // --------------------------------------------------------------- + const char* masterAddr = "10.1.212.121"; + + // Read arguments + boost::program_options::options_description programDescription("Options:"); + programDescription.add_options()("nnodes,n", boost::program_options::value(), "Number of system nodes") + ("npages,p", boost::program_options::value(), "Buffer size in 2MB pages") + ("nodeid,i", boost::program_options::value(), "Node ID") + ("nregions,g", boost::program_options::value(), "Number of FPGA regions") + ("reps,r", boost::program_options::value(), "Number of repetitions") + ("dwidth,d", boost::program_options::value(), "Data width") + ("stride,s", boost::program_options::value(), "Stride offset") + ("elems,e", boost::program_options::value(), "Number of elements"); + + boost::program_options::variables_map commandLineArgs; + boost::program_options::store(boost::program_options::parse_command_line(argc, argv, programDescription), commandLineArgs); + boost::program_options::notify(commandLineArgs); + + // Stat + uint32_t n_nodes = N_NODES; + uint64_t n_pages = N_PAGES; + uint32_t node_id = N_ID_MASTER; + uint32_t n_regions = N_REGIONS; + // Runs + uint32_t n_reps = N_REPS; + uint32_t dwidth = DEF_DWITDH; + uint32_t stride = DEF_STRIDE; + uint32_t n_elem = DEF_NELEM; + + if(commandLineArgs.count("nnodes") > 0) n_nodes = commandLineArgs["nnodes"].as(); + if(commandLineArgs.count("npages") > 0) n_pages = commandLineArgs["npages"].as(); + if(commandLineArgs.count("nodeid") > 0) node_id = commandLineArgs["nodeid"].as(); + if(commandLineArgs.count("nregions") > 0) n_regions = commandLineArgs["nregions"].as(); + if(commandLineArgs.count("reps") > 0) n_reps = commandLineArgs["reps"].as(); + if(commandLineArgs.count("dwidth") > 0) dwidth = commandLineArgs["dwidth"].as(); + if(commandLineArgs.count("stride") > 0) stride = commandLineArgs["stride"].as(); + if(commandLineArgs.count("elems") > 0) n_elem = commandLineArgs["elems"].as(); + + // FPGA handles + fDev *fdev = new fDev[n_regions]; + + // Buffers + uint64_t *hMem[N_REGIONS]; + + uint32_t qpairs[n_nodes]; + for(int i = 0; i < n_nodes; i++) + qpairs[i] = n_regions; + + // 2 nodes example + uint32_t l_id = node_id; + uint32_t r_id = (node_id + 1) % n_nodes; + + // Obtain regions + for (int i = 0; i < n_regions; i++) { + if (!fdev[i].acquireRegion(i)) return EXIT_FAILURE; + fdev[i].clearCompleted(); + } + + // Farview + fView *fview = new fView(fdev, l_id, n_nodes, qpairs, n_regions, masterAddr); + + // Allocate buffers + for(int i = 0; i < n_regions; i++) + hMem[i] = fview->allocWindow(r_id, i, n_pages); + + // Sync up + fview->syncRemote(r_id); + + // Latency measurements ---------------------------------------------------------------------------------- + if(!l_id) { + // Sender + + // --------------------------------------------------------------- + // -- Runs + // --------------------------------------------------------------- + Bench bench(1); + uint32_t n_runs = 0; + + auto benchmark_thr = [&fview, &fdev, &hMem, &n_runs, r_id, n_reps, n_regions, dwidth, stride, n_elem]() { + bool k = false; + n_runs++; + + for(int i = 0; i < n_reps; i++) { + for(int j = 0; j < n_regions; j++) { + fview->farviewStride(r_id, j, 0, 0, dwidth, stride, n_elem); + } + } + + while(!k) { + k = true; + for(int j = 0; j < n_regions; j++) { + if(fview->pollRemoteWrite(r_id, j) != n_reps * n_runs) k = false; + } + } + }; + bench.runtime(benchmark_thr); + std::cout << "Throughput: " << ((n_regions * n_elem * 1000 * (1<farviewStride(r_id, j, 0, 0, dwidth, stride, n_elem); + while(fview->pollRemoteWrite(r_id, j) != (i+1) + ((n_runs-1) * n_reps)) ; + } + } + }; + bench.runtime(benchmark_lat); + std::cout << "Latency: " << bench.getAvg() / n_reps << " ns" << std::endl; + + // Done + fview->replyRemote(r_id, 1); + fview->waitOnCloseRemote(r_id); + } else { + // Receiver + + // Done + fview->waitOnReplyRemote(r_id); + fview->closeConnections(); + } + + // Free buffers + for(int i = 0; i < n_regions; i++) + fview->freeWindow(r_id, i); + + // Print status + for (int i = 0; i < n_regions; i++) { + fdev[i].printDebugXDMA(); + } + + // Release regions + for (int i = 0; i < n_regions; i++) { + fdev[i].releaseRegion(); + } + + return EXIT_SUCCESS; +} diff --git a/util/hot_reset.sh b/util/hot_reset.sh new file mode 100644 index 00000000..993e4bae --- /dev/null +++ b/util/hot_reset.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +dev=$1 + +if [ -z "$dev" ]; then + echo "Error: no device specified" + exit 1 +fi + +if [ ! -e "/sys/bus/pci/devices/$dev" ]; then + dev="0000:$dev" +fi + +if [ ! -e "/sys/bus/pci/devices/$dev" ]; then + echo "Error: device $dev not found" + exit 1 +fi + +port=$(basename $(dirname $(readlink "/sys/bus/pci/devices/$dev"))) + +if [ ! -e "/sys/bus/pci/devices/$port" ]; then + echo "Error: device $port not found" + exit 1 +fi + +echo "Removing $dev..." + +echo 1 > "/sys/bus/pci/devices/$dev/remove" + + +sleep 1 +echo "Rescan PCIe devices..." +echo "1" > /sys/bus/pci/rescan