Skip to content

Commit

Permalink
SWDEV-487621: Fixing BW measurement in MI300
Browse files Browse the repository at this point in the history
Change-Id: Ib513009616214a1f3f3568571e58d79259692cfc
  • Loading branch information
ApoKalipse-V committed Oct 7, 2024
1 parent 7b3cccc commit 5a416bd
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 3 deletions.
7 changes: 6 additions & 1 deletion src/core/counters/derived/metrics.xml
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,8 @@
<metric name="FETCH_SIZE" expr="(TCC_EA_RDREQ_32B_sum*32+(TCC_EA_RDREQ_sum-TCC_EA_RDREQ_32B_sum)*64)/1024" descr="The total kilobytes fetched from the video memory. This is measured with all extra fetches and any cache or memory effects taken into account."></metric>
<metric name="WRITE_SIZE" expr="((TCC_EA_WRREQ_sum-TCC_EA_WRREQ_64B_sum)*32+TCC_EA_WRREQ_64B_sum*64)/1024" descr="The total kilobytes written to the video memory. This is measured with all extra fetches and any cache or memory effects taken into account."></metric>
<metric name="WRITE_REQ_32B" expr="TCC_EA_WRREQ_64B_sum*2+(TCC_EA_WRREQ_sum-TCC_EA_WRREQ_64B_sum)" descr="The total number of 32-byte effective memory writes."></metric>
<metric name="BANDWIDTH_EA" expr="1024*(FETCH_SIZE+WRITE_SIZE)/GRBM_GUI_ACTIVE" descr="Memory Bandwidth measured at the TCC_EA interface. In units of bytes/cycle."></metric>
<metric name="OccupancyPercent" expr="400*SQ_WAVE_CYCLES/GRBM_GUI_ACTIVE/CU_NUM/32" descr="GPU occupancy as % of maximum."></metric>
</gfx90a>

<gfx940>
Expand Down Expand Up @@ -388,9 +390,11 @@
<metric name="TCC_EA0_WRREQ_DRAM_sum" expr="sum(TCC_EA0_WRREQ_DRAM,16)" descr="Number of TCC/EA write requests (either 32-byte of 64-byte) destined for DRAM (MC). Sum over TCC instances."></metric>
<metric name="TCC_EA_READBW" expr="(128*TCC_BUBBLE_sum)+64*(TCC_EA0_RDREQ_sum-TCC_BUBBLE_sum-TCC_EA0_RDREQ_32B_sum)+32*TCC_EA0_RDREQ_32B_sum" descr="EA read bandwidth."></metric>
<metric name="TCC_EA_READ_LATENCY" expr="TCC_EA0_RDREQ_LEVEL_sum/(TCC_BUBBLE_sum+TCC_EA0_RDREQ_sum)" descr="Lantency of an EA read."></metric>
<metric name="FETCH_SIZE" expr="(TCC_EA0_RDREQ_32B_sum*32+(TCC_EA0_RDREQ_sum-TCC_EA0_RDREQ_32B_sum)*64)/1024" descr="The total kilobytes fetched from the video memory. This is measured with all extra fetches and any cache or memory effects taken into account."></metric>
<metric name="FETCH_SIZE" expr="(TCC_EA0_RDREQ_32B_sum*32+(TCC_EA0_RDREQ_sum-TCC_EA0_RDREQ_32B_sum)*128)/1024" descr="The total kilobytes fetched from the video memory. This is measured with all extra fetches and any cache or memory effects taken into account."></metric>
<metric name="WRITE_SIZE" expr="((TCC_EA0_WRREQ_sum-TCC_EA0_WRREQ_64B_sum)*32+TCC_EA0_WRREQ_64B_sum*64)/1024" descr="The total kilobytes written to the video memory. This is measured with all extra fetches and any cache or memory effects taken into account."></metric>
<metric name="WRITE_REQ_32B" expr="TCC_EA0_WRREQ_64B_sum*2+(TCC_EA0_WRREQ_sum-TCC_EA0_WRREQ_64B_sum)" descr="The total number of 32-byte effective memory writes."></metric>
<metric name="BANDWIDTH_EA" expr="1024*(FETCH_SIZE+WRITE_SIZE)*XCC_NUM/GRBM_GUI_ACTIVE" descr="Memory Bandwidth measured at the TCC_EA interface. In units of bytes/cycle."></metric>
<metric name="OccupancyPercent" expr="400*SQ_WAVE_CYCLES*XCC_NUM/GRBM_GUI_ACTIVE/CU_NUM/32" descr="GPU occupancy as % of maximum."></metric>
</gfx940>

<gfx941 base="gfx940"></gfx941>
Expand Down Expand Up @@ -460,6 +464,7 @@
<metric name="FETCH_SIZE" expr="(GL2C_EA_RDREQ_32B_sum*32+GL2C_EA_RDREQ_64B_sum*64+GL2C_EA_RDREQ_96B_sum*96+GL2C_EA_RDREQ_128B_sum*128)/1024" descr="The total kilobytes fetched from the video memory. This is measured with all extra fetches and any cache or memory effects taken into account."></metric>
<metric name="WriteUnitStalled" expr="100*GL2C_WRREQ_STALL_max/GRBM_GUI_ACTIVE" descr="The percentage of GPUTime the Write unit is stalled. Value range: 0% to 100% (bad)."></metric>
<metric name="LDSBankConflict" expr="100*SQC_LDS_BANK_CONFLICT/SQC_LDS_IDX_ACTIVE" descr="The percentage of GPUTime LDS is stalled by bank conflicts. Value range: 0% (optimal) to 100% (bad)."></metric>
<metric name="OccupancyPercent" expr="100*SQ_WAVE_CYCLES/GRBM_GUI_ACTIVE/CU_NUM/32" descr="GPU occupancy as % of maximum."></metric>
</gfx11>

</metrics>
7 changes: 6 additions & 1 deletion src/core/counters/metrics/derived_counters.xml
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,8 @@
<metric name="EaWrDramStallRate" expr=100*TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum/TCC_BUSY_sum descr="Unit: percent"></metric>
<metric name="EaWrStarveRate" expr=100*TCC_TOO_MANY_EA_WRREQS_STALL_sum/TCC_BUSY_sum descr="Unit: percent"></metric>
<metric name="EaAtomicLatency" expr=TCC_EA_ATOMIC_LEVEL_sum/TCC_EA_ATOMIC_sum descr="Unit: cycles"></metric>
<metric name="BANDWIDTH_EA" expr=1024*(FETCH_SIZE+WRITE_SIZE)/GRBM_GUI_ACTIVE descr="Memory Bandwidth measured at the TCC_EA interface. In units of bytes/cycle."></metric>
<metric name="OccupancyPercent" expr=400*SQ_WAVE_CYCLES/GRBM_GUI_ACTIVE/CU_NUM/32 descr="GPU occupancy as % of maximum."></metric>
</gfx90a_expr>

<gfx940_expr>
Expand Down Expand Up @@ -474,14 +476,16 @@
<metric name="TCC_EA0_WRREQ_DRAM_sum" expr=sum(TCC_EA0_WRREQ_DRAM,16) descr="Number of TCC/EA write requests (either 32-byte of 64-byte) destined for DRAM (MC). Sum over TCC instances."></metric>
<metric name="TCC_EA_READBW" expr=(128*TCC_BUBBLE_sum)+64*(TCC_EA0_RDREQ_sum-TCC_BUBBLE_sum-TCC_EA0_RDREQ_32B_sum)+32*TCC_EA0_RDREQ_32B_sum descr="EA read bandwidth."></metric>
<metric name="TCC_EA_READ_LATENCY" expr=TCC_EA0_RDREQ_LEVEL_sum/(TCC_BUBBLE_sum+TCC_EA0_RDREQ_sum) descr="Lantency of an EA read."></metric>
<metric name="FETCH_SIZE" expr=(TCC_EA0_RDREQ_32B_sum*32+(TCC_EA0_RDREQ_sum-TCC_EA0_RDREQ_32B_sum)*64)/1024 descr="The total kilobytes fetched from the video memory. This is measured with all extra fetches and any cache or memory effects taken into account."></metric>
<metric name="FETCH_SIZE" expr=(TCC_EA0_RDREQ_32B_sum*32+(TCC_EA0_RDREQ_sum-TCC_EA0_RDREQ_32B_sum)*128)/1024 descr="The total kilobytes fetched from the video memory. This is measured with all extra fetches and any cache or memory effects taken into account."></metric>
<metric name="WRITE_SIZE" expr=((TCC_EA0_WRREQ_sum-TCC_EA0_WRREQ_64B_sum)*32+TCC_EA0_WRREQ_64B_sum*64)/1024 descr="The total kilobytes written to the video memory. This is measured with all extra fetches and any cache or memory effects taken into account."></metric>
<metric name="WRITE_REQ_32B" expr=TCC_EA0_WRREQ_64B_sum*2+(TCC_EA0_WRREQ_sum-TCC_EA0_WRREQ_64B_sum) descr="The total number of 32-byte effective memory writes."></metric>
<metric name="CU_OCCUPANCY" expr=(SQ_CYCLES/(SQ_WAVE_CYCLES*4))/MAX_WAVE_SIZE descr="The ratio of active waves on a CU to the maximum number of active waves supported by the CU"></metric>
<metric name="CU_UTILIZATION" expr=GRBM_GUI_ACTIVE/GRBM_COUNT descr="The total number of active cycles divided by total number of elapsed cycles"></metric>
<metric name="TOTAL_16_OPS" expr=(SQ_INSTS_VALU_FMA_F16*2+SQ_INSTS_VALU_ADD_F16+SQ_INSTS_VALU_MUL_F16+SQ_INSTS_VALU_TRANS_F16)*64+((SQ_INSTS_VALU_MFMA_MOPS_F16+SQ_INSTS_VALU_MFMA_MOPS_BF16)*512) descr="The number of 16 bits OPS executed"></metric>
<metric name="TOTAL_32_OPS" expr=(SQ_INSTS_VALU_FMA_F32*2+SQ_INSTS_VALU_INT32+SQ_INSTS_VALU_ADD_F32+SQ_INSTS_VALU_MUL_F32+SQ_INSTS_VALU_TRANS_F32)*64+(SQ_INSTS_VALU_MFMA_MOPS_F32*512) descr="The number of 32 bits OPS executed"></metric>
<metric name="TOTAL_64_OPS" expr=(SQ_INSTS_VALU_FMA_F64*2+SQ_INSTS_VALU_INT64+SQ_INSTS_VALU_ADD_F64+SQ_INSTS_VALU_MUL_F64)*64+(SQ_INSTS_VALU_MFMA_MOPS_F64*512) descr="The number of 64 bits OPS executed"></metric>
<metric name="BANDWIDTH_EA" expr=1024*(FETCH_SIZE+WRITE_SIZE)*XCC_NUM/GRBM_GUI_ACTIVE descr="Memory Bandwidth measured at the TCC_EA interface. In units of bytes/cycle."></metric>
<metric name="OccupancyPercent" expr=400*SQ_WAVE_CYCLES*XCC_NUM/GRBM_GUI_ACTIVE/CU_NUM/32 descr="GPU occupancy as % of maximum."></metric>
</gfx940_expr>

<gfx10_expr>
Expand Down Expand Up @@ -553,6 +557,7 @@
<metric name="FETCH_SIZE" expr=(GL2C_EA_RDREQ_32B_sum*32+GL2C_EA_RDREQ_64B_sum*64+GL2C_EA_RDREQ_96B_sum*96+GL2C_EA_RDREQ_128B_sum*128)/1024 descr="The total kilobytes fetched from the video memory. This is measured with all extra fetches and any cache or memory effects taken into account."></metric>
<metric name="WriteUnitStalled" expr=100*GL2C_WRREQ_STALL_max/GRBM_GUI_ACTIVE descr="The percentage of GPUTime the Write unit is stalled. Value range: 0% to 100% (bad)."></metric>
<metric name="LDSBankConflict" expr=100*SQC_LDS_BANK_CONFLICT/SQC_LDS_IDX_ACTIVE descr="The percentage of GPUTime LDS is stalled by bank conflicts. Value range: 0% (optimal) to 100% (bad)."></metric>
<metric name="OccupancyPercent" expr=100*SQ_WAVE_CYCLES/GRBM_GUI_ACTIVE/CU_NUM/32 descr="GPU occupancy as % of maximum."></metric>
</gfx11_expr>

<gfx1100_expr base="gfx11_expr"></gfx1100_expr>
Expand Down
7 changes: 6 additions & 1 deletion test/tool/metrics.xml
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,8 @@
<metric name="ACTIVE_CYCLES" expr=GRBM_GUI_ACTIVE descr="Active Cycles"></metric>
<metric name="ELAPSED_CYCLES" expr=GRBM_COUNT descr="Elapsed Cycles"></metric>
<metric name="ACTIVE_WAVES" expr=SQ_WAVES descr="Active Waves"></metric>
<metric name="BANDWIDTH_EA" expr=1024*(FETCH_SIZE+WRITE_SIZE)/GRBM_GUI_ACTIVE descr="Memory Bandwidth measured at the TCC_EA interface. In units of bytes/cycle."></metric>
<metric name="OccupancyPercent" expr=400*SQ_WAVE_CYCLES/GRBM_GUI_ACTIVE/CU_NUM/32 descr="GPU occupancy as % of maximum."></metric>
</gfx90a_expr>

<gfx940_expr>
Expand Down Expand Up @@ -347,7 +349,7 @@
<metric name="TCC_EA0_WRREQ_DRAM_sum" expr=sum(TCC_EA0_WRREQ_DRAM,16) descr="Number of TCC/EA write requests (either 32-byte of 64-byte) destined for DRAM (MC). Sum over TCC instances."></metric>
<metric name="TCC_EA_READBW" expr=(128*TCC_BUBBLE_sum)+64*(TCC_EA0_RDREQ_sum-TCC_BUBBLE_sum-TCC_EA0_RDREQ_32B_sum)+32*TCC_EA0_RDREQ_32B_sum descr="EA read bandwidth."></metric>
<metric name="TCC_EA_READ_LATENCY" expr=TCC_EA0_RDREQ_LEVEL_sum/(TCC_BUBBLE_sum+TCC_EA0_RDREQ_sum) descr="Lantency of an EA read."></metric>
<metric name="FETCH_SIZE" expr=(TCC_EA0_RDREQ_32B_sum*32+(TCC_EA0_RDREQ_sum-TCC_EA0_RDREQ_32B_sum)*64)/1024 descr="The total kilobytes fetched from the video memory. This is measured with all extra fetches and any cache or memory effects taken into account."></metric>
<metric name="FETCH_SIZE" expr=(TCC_EA0_RDREQ_32B_sum*32+(TCC_EA0_RDREQ_sum-TCC_EA0_RDREQ_32B_sum)*128)/1024 descr="The total kilobytes fetched from the video memory. This is measured with all extra fetches and any cache or memory effects taken into account."></metric>
<metric name="WRITE_SIZE" expr=((TCC_EA0_WRREQ_sum-TCC_EA0_WRREQ_64B_sum)*32+TCC_EA0_WRREQ_64B_sum*64)/1024 descr="The total kilobytes written to the video memory. This is measured with all extra fetches and any cache or memory effects taken into account."></metric>
<metric name="WRITE_REQ_32B" expr=TCC_EA0_WRREQ_64B_sum*2+(TCC_EA0_WRREQ_sum-TCC_EA0_WRREQ_64B_sum) descr="The total number of 32-byte effective memory writes."></metric>
<metric name="CU_OCCUPANCY" expr=(SQ_CYCLES/(SQ_WAVE_CYCLES*4))/MAX_WAVE_SIZE descr="The ratio of active waves on a CU to the maximum number of active waves supported by the CU"></metric>
Expand All @@ -358,6 +360,8 @@
<metric name="ACTIVE_CYCLES" expr=GRBM_GUI_ACTIVE/XCC_NUM descr="Active Cycles"></metric>
<metric name="ELAPSED_CYCLES" expr=GRBM_COUNT/XCC_NUM descr="Elapsed Cycles"></metric>
<metric name="ACTIVE_WAVES" expr=SQ_WAVES descr="Active Waves"></metric>
<metric name="BANDWIDTH_EA" expr=1024*(FETCH_SIZE+WRITE_SIZE)*XCC_NUM/GRBM_GUI_ACTIVE descr="Memory Bandwidth measured at the TCC_EA interface. In units of bytes/cycle."></metric>
<metric name="OccupancyPercent" expr=400*SQ_WAVE_CYCLES*XCC_NUM/GRBM_GUI_ACTIVE/CU_NUM/32 descr="GPU occupancy as % of maximum."></metric>
</gfx940_expr>

<gfx10_expr>
Expand Down Expand Up @@ -420,6 +424,7 @@
<metric name="FETCH_SIZE" expr=(GL2C_EA_RDREQ_32B_sum*32+GL2C_EA_RDREQ_64B_sum*64+GL2C_EA_RDREQ_96B_sum*96+GL2C_EA_RDREQ_128B_sum*128)/1024 descr="The total kilobytes fetched from the video memory. This is measured with all extra fetches and any cache or memory effects taken into account."></metric>
<metric name="WriteUnitStalled" expr=100*GL2C_WRREQ_STALL_max/GRBM_GUI_ACTIVE descr="The percentage of GPUTime the Write unit is stalled. Value range: 0% to 100% (bad)."></metric>
<metric name="LDSBankConflict" expr=100*SQC_LDS_BANK_CONFLICT/SQC_LDS_IDX_ACTIVE descr="The percentage of GPUTime LDS is stalled by bank conflicts. Value range: 0% (optimal) to 100% (bad)."></metric>
<metric name="OccupancyPercent" expr=100*SQ_WAVE_CYCLES/GRBM_GUI_ACTIVE/CU_NUM/32 descr="GPU occupancy as % of maximum."></metric>
</gfx11_expr>

<gfx1100_expr base="gfx11_expr"></gfx1100_expr>
Expand Down

0 comments on commit 5a416bd

Please sign in to comment.