Skip to content

Commit

Permalink
Nchwc kcyxc (#155)
Browse files Browse the repository at this point in the history
* support kcyxc weight layout

* now layout support kcyxc/cyxkc in weigt

* modify script

* modify nchwc layout as multiple

* fix int4 config

* fix bug for int4
  • Loading branch information
carlushuang authored Feb 28, 2022
1 parent f1431e2 commit 5d8e4f8
Show file tree
Hide file tree
Showing 20 changed files with 729 additions and 456 deletions.
100 changes: 50 additions & 50 deletions config/igemm_fwd_gtc_gfx1030_nchwc_fp16x4.config

Large diffs are not rendered by default.

100 changes: 50 additions & 50 deletions config/igemm_fwd_gtc_gfx1030_nchwc_fp16x8.config

Large diffs are not rendered by default.

104 changes: 52 additions & 52 deletions config/igemm_fwd_gtc_gfx1030_nchwc_int4x16.config
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@ tensor_b_thread_lengths = [1, 1, 2, 16] # 1xCExNB0xVec-c
tensor_b_cluster_lengths = [1, 4, 1, 64] # 1xCEx1xNB1
direction = "fwd"
precision = "int4"
tensor_layout = 'nchwc'
tensor_layout = ['nchwc_cyxkc', 'nchwc_kcyxc']
nxb = 0
nxe = 1
wavefront_size = 64
wavefront_size = 32
cumode = 0
vector_c = 16

Expand All @@ -43,10 +43,10 @@ tensor_b_thread_lengths = [1, 1, 2, 16] # 1xCExNB0xVec-c
tensor_b_cluster_lengths = [1, 4, 1, 64] # 1xCEx1xNB1
direction = "fwd"
precision = "int4"
tensor_layout = 'nchwc'
tensor_layout = ['nchwc_cyxkc', 'nchwc_kcyxc']
nxb = 0
nxe = 0
wavefront_size = 64
wavefront_size = 32
cumode = 0
vector_c = 16

Expand All @@ -67,10 +67,10 @@ tensor_b_thread_lengths = [1, 1, 4, 16] # 1xCExNB0xVec-c
tensor_b_cluster_lengths = [1, 4, 1, 64] # 1xCEx1xNB1
direction = "fwd"
precision = "int4"
tensor_layout = 'nchwc'
tensor_layout = ['nchwc_cyxkc', 'nchwc_kcyxc']
nxb = 0
nxe = 1
wavefront_size = 64
wavefront_size = 32
cumode = 0
vector_c = 16

Expand All @@ -90,10 +90,10 @@ tensor_b_thread_lengths = [1, 1, 4, 16] # 1xCExNB0xVec-c
tensor_b_cluster_lengths = [1, 4, 1, 64] # 1xCEx1xNB1
direction = "fwd"
precision = "int4"
tensor_layout = 'nchwc'
tensor_layout = ['nchwc_cyxkc', 'nchwc_kcyxc']
nxb = 0
nxe = 0
wavefront_size = 64
wavefront_size = 32
cumode = 0
vector_c = 16

Expand All @@ -114,10 +114,10 @@ tensor_b_thread_lengths = [1, 1, 2, 16] # 1xCExNB0xVec-c
tensor_b_cluster_lengths = [1, 4, 1, 64] # 1xCEx1xNB1
direction = "fwd"
precision = "int4"
tensor_layout = 'nchwc'
tensor_layout = ['nchwc_cyxkc', 'nchwc_kcyxc']
nxb = 0
nxe = 1
wavefront_size = 64
wavefront_size = 32
cumode = 0
vector_c = 16

Expand All @@ -137,10 +137,10 @@ tensor_b_thread_lengths = [1, 1, 2, 16] # 1xCExNB0xVec-c
tensor_b_cluster_lengths = [1, 4, 1, 64] # 1xCEx1xNB1
direction = "fwd"
precision = "int4"
tensor_layout = 'nchwc'
tensor_layout = ['nchwc_cyxkc', 'nchwc_kcyxc']
nxb = 0
nxe = 0
wavefront_size = 64
wavefront_size = 32
cumode = 0
vector_c = 16

Expand All @@ -161,10 +161,10 @@ tensor_b_thread_lengths = [1, 1, 3, 16] # 1xCExNB0xVec-c
tensor_b_cluster_lengths = [1, 4, 1, 64] # 1xCEx1xNB1
direction = "fwd"
precision = "int4"
tensor_layout = 'nchwc'
tensor_layout = ['nchwc_cyxkc', 'nchwc_kcyxc']
nxb = 0
nxe = 1
wavefront_size = 64
wavefront_size = 32
cumode = 0
vector_c = 16

Expand All @@ -184,10 +184,10 @@ tensor_b_thread_lengths = [1, 1, 3, 16] # 1xCExNB0xVec-c
tensor_b_cluster_lengths = [1, 4, 1, 64] # 1xCEx1xNB1
direction = "fwd"
precision = "int4"
tensor_layout = 'nchwc'
tensor_layout = ['nchwc_cyxkc', 'nchwc_kcyxc']
nxb = 0
nxe = 0
wavefront_size = 64
wavefront_size = 32
cumode = 0
vector_c = 16

Expand All @@ -208,10 +208,10 @@ tensor_b_thread_lengths = [1, 1, 1, 16] # 1xCExNB0xVec-c
tensor_b_cluster_lengths = [1, 4, 1, 64] # 1xCEx1xNB1
direction = "fwd"
precision = "int4"
tensor_layout = 'nchwc'
tensor_layout = ['nchwc_cyxkc', 'nchwc_kcyxc']
nxb = 0
nxe = 1
wavefront_size = 64
wavefront_size = 32
cumode = 0
vector_c = 16

Expand All @@ -231,10 +231,10 @@ tensor_b_thread_lengths = [1, 1, 1, 16] # 1xCExNB0xVec-c
tensor_b_cluster_lengths = [1, 4, 1, 64] # 1xCEx1xNB1
direction = "fwd"
precision = "int4"
tensor_layout = 'nchwc'
tensor_layout = ['nchwc_cyxkc', 'nchwc_kcyxc']
nxb = 0
nxe = 0
wavefront_size = 64
wavefront_size = 32
cumode = 0
vector_c = 16

Expand All @@ -255,10 +255,10 @@ tensor_b_thread_lengths = [1, 1, 3, 16] # 1xCExNB0xVec-c
tensor_b_cluster_lengths = [1, 4, 1, 64] # 1xCEx1xNB1
direction = "fwd"
precision = "int4"
tensor_layout = 'nchwc'
tensor_layout = ['nchwc_cyxkc', 'nchwc_kcyxc']
nxb = 0
nxe = 1
wavefront_size = 64
wavefront_size = 32
cumode = 0
vector_c = 16

Expand All @@ -278,10 +278,10 @@ tensor_b_thread_lengths = [1, 1, 3, 16] # 1xCExNB0xVec-c
tensor_b_cluster_lengths = [1, 4, 1, 64] # 1xCEx1xNB1
direction = "fwd"
precision = "int4"
tensor_layout = 'nchwc'
tensor_layout = ['nchwc_cyxkc', 'nchwc_kcyxc']
nxb = 0
nxe = 0
wavefront_size = 64
wavefront_size = 32
cumode = 0
vector_c = 16

Expand All @@ -302,10 +302,10 @@ tensor_b_thread_lengths = [1, 1, 2, 16] # 1xCExNB0xVec-c
tensor_b_cluster_lengths = [1, 4, 1, 64] # 1xCEx1xNB1
direction = "fwd"
precision = "int4"
tensor_layout = 'nchwc'
tensor_layout = ['nchwc_cyxkc', 'nchwc_kcyxc']
nxb = 0
nxe = 1
wavefront_size = 64
wavefront_size = 32
cumode = 0
vector_c = 16

Expand All @@ -325,10 +325,10 @@ tensor_b_thread_lengths = [1, 1, 2, 16] # 1xCExNB0xVec-c
tensor_b_cluster_lengths = [1, 4, 1, 64] # 1xCEx1xNB1
direction = "fwd"
precision = "int4"
tensor_layout = 'nchwc'
tensor_layout = ['nchwc_cyxkc', 'nchwc_kcyxc']
nxb = 0
nxe = 0
wavefront_size = 64
wavefront_size = 32
cumode = 0
vector_c = 16

Expand All @@ -349,7 +349,7 @@ tensor_b_thread_lengths = [1, 1, 3, 16] # 1xCExNB0xVec-c
tensor_b_cluster_lengths = [1, 4, 1, 32] # 1xCEx1xNB1
direction = "fwd"
precision = "int4"
tensor_layout = 'nchwc'
tensor_layout = ['nchwc_cyxkc', 'nchwc_kcyxc']
nxb = 0
nxe = 1
wavefront_size = 32
Expand All @@ -372,7 +372,7 @@ tensor_b_thread_lengths = [1, 1, 3, 16] # 1xCExNB0xVec-c
tensor_b_cluster_lengths = [1, 4, 1, 32] # 1xCEx1xNB1
direction = "fwd"
precision = "int4"
tensor_layout = 'nchwc'
tensor_layout = ['nchwc_cyxkc', 'nchwc_kcyxc']
nxb = 0
nxe = 0
wavefront_size = 32
Expand All @@ -396,7 +396,7 @@ tensor_b_thread_lengths = [1, 1, 4, 16] # 1xCExNB0xVec-c
tensor_b_cluster_lengths = [1, 4, 1, 32] # 1xCEx1xNB1
direction = "fwd"
precision = "int4"
tensor_layout = 'nchwc'
tensor_layout = ['nchwc_cyxkc', 'nchwc_kcyxc']
nxb = 0
nxe = 1
wavefront_size = 32
Expand All @@ -419,7 +419,7 @@ tensor_b_thread_lengths = [1, 1, 4, 16] # 1xCExNB0xVec-c
tensor_b_cluster_lengths = [1, 4, 1, 32] # 1xCEx1xNB1
direction = "fwd"
precision = "int4"
tensor_layout = 'nchwc'
tensor_layout = ['nchwc_cyxkc', 'nchwc_kcyxc']
nxb = 0
nxe = 0
wavefront_size = 32
Expand All @@ -443,10 +443,10 @@ tensor_b_thread_lengths = [1, 1, 2, 16] # 1xCExNB0xVec-c
tensor_b_cluster_lengths = [1, 4, 1, 32] # 1xCEx1xNB1
direction = "fwd"
precision = "int4"
tensor_layout = 'nchwc'
tensor_layout = ['nchwc_cyxkc', 'nchwc_kcyxc']
nxb = 0
nxe = 1
wavefront_size = 64
wavefront_size = 32
cumode = 0
vector_c = 16

Expand All @@ -466,10 +466,10 @@ tensor_b_thread_lengths = [1, 1, 2, 16] # 1xCExNB0xVec-c
tensor_b_cluster_lengths = [1, 4, 1, 32] # 1xCEx1xNB1
direction = "fwd"
precision = "int4"
tensor_layout = 'nchwc'
tensor_layout = ['nchwc_cyxkc', 'nchwc_kcyxc']
nxb = 0
nxe = 0
wavefront_size = 64
wavefront_size = 32
cumode = 0
vector_c = 16

Expand All @@ -490,10 +490,10 @@ tensor_b_thread_lengths = [1, 1, 4, 16] # 1xCExNB0xVec-c
tensor_b_cluster_lengths = [1, 4, 1, 32] # 1xCEx1xNB1
direction = "fwd"
precision = "int4"
tensor_layout = 'nchwc'
tensor_layout = ['nchwc_cyxkc', 'nchwc_kcyxc']
nxb = 0
nxe = 1
wavefront_size = 64
wavefront_size = 32
cumode = 0
vector_c = 16

Expand All @@ -513,10 +513,10 @@ tensor_b_thread_lengths = [1, 1, 4, 16] # 1xCExNB0xVec-c
tensor_b_cluster_lengths = [1, 4, 1, 32] # 1xCEx1xNB1
direction = "fwd"
precision = "int4"
tensor_layout = 'nchwc'
tensor_layout = ['nchwc_cyxkc', 'nchwc_kcyxc']
nxb = 0
nxe = 0
wavefront_size = 64
wavefront_size = 32
cumode = 0
vector_c = 16

Expand All @@ -537,7 +537,7 @@ tensor_b_thread_lengths = [1, 1, 1, 16] # 1xCExNB0xVec-c
tensor_b_cluster_lengths = [1, 4, 1, 32] # 1xCEx1xNB1
direction = "fwd"
precision = "int4"
tensor_layout = 'nchwc'
tensor_layout = ['nchwc_cyxkc', 'nchwc_kcyxc']
nxb = 0
nxe = 1
wavefront_size = 32
Expand All @@ -560,7 +560,7 @@ tensor_b_thread_lengths = [1, 1, 1, 16] # 1xCExNB0xVec-c
tensor_b_cluster_lengths = [1, 4, 1, 32] # 1xCEx1xNB1
direction = "fwd"
precision = "int4"
tensor_layout = 'nchwc'
tensor_layout = ['nchwc_cyxkc', 'nchwc_kcyxc']
nxb = 0
nxe = 0
wavefront_size = 32
Expand All @@ -584,7 +584,7 @@ tensor_b_thread_lengths = [1, 1, 4, 16] # 1xCExNB0xVec-c
tensor_b_cluster_lengths = [1, 4, 1, 32] # 1xCEx1xNB1
direction = "fwd"
precision = "int4"
tensor_layout = 'nchwc'
tensor_layout = ['nchwc_cyxkc', 'nchwc_kcyxc']
nxb = 0
nxe = 1
wavefront_size = 32
Expand All @@ -607,7 +607,7 @@ tensor_b_thread_lengths = [1, 1, 4, 16] # 1xCExNB0xVec-c
tensor_b_cluster_lengths = [1, 4, 1, 32] # 1xCEx1xNB1
direction = "fwd"
precision = "int4"
tensor_layout = 'nchwc'
tensor_layout = ['nchwc_cyxkc', 'nchwc_kcyxc']
nxb = 0
nxe = 0
wavefront_size = 32
Expand All @@ -631,7 +631,7 @@ tensor_b_thread_lengths = [1, 1, 4, 16] # 1xCExNB0xVec-c
tensor_b_cluster_lengths = [1, 4, 1, 16] # 1xCEx1xNB1
direction = "fwd"
precision = "int4"
tensor_layout = 'nchwc'
tensor_layout = ['nchwc_cyxkc', 'nchwc_kcyxc']
nxb = 0
nxe = 1
wavefront_size = 32
Expand All @@ -654,7 +654,7 @@ tensor_b_thread_lengths = [1, 1, 4, 16] # 1xCExNB0xVec-c
tensor_b_cluster_lengths = [1, 4, 1, 16] # 1xCEx1xNB1
direction = "fwd"
precision = "int4"
tensor_layout = 'nchwc'
tensor_layout = ['nchwc_cyxkc', 'nchwc_kcyxc']
nxb = 0
nxe = 0
wavefront_size = 32
Expand All @@ -678,7 +678,7 @@ tensor_b_thread_lengths = [1, 1, 2, 16] # 1xCExNB0xVec-c
tensor_b_cluster_lengths = [1, 4, 1, 16] # 1xCEx1xNB1
direction = "fwd"
precision = "int4"
tensor_layout = 'nchwc'
tensor_layout = ['nchwc_cyxkc', 'nchwc_kcyxc']
nxb = 0
nxe = 1
wavefront_size = 32
Expand All @@ -701,7 +701,7 @@ tensor_b_thread_lengths = [1, 1, 2, 16] # 1xCExNB0xVec-c
tensor_b_cluster_lengths = [1, 4, 1, 16] # 1xCEx1xNB1
direction = "fwd"
precision = "int4"
tensor_layout = 'nchwc'
tensor_layout = ['nchwc_cyxkc', 'nchwc_kcyxc']
nxb = 0
nxe = 0
wavefront_size = 32
Expand All @@ -725,7 +725,7 @@ tensor_b_thread_lengths = [1, 1, 4, 16] # 1xCExNB0xVec-c
tensor_b_cluster_lengths = [1, 4, 1, 16] # 1xCEx1xNB1
direction = "fwd"
precision = "int4"
tensor_layout = 'nchwc'
tensor_layout = ['nchwc_cyxkc', 'nchwc_kcyxc']
nxb = 0
nxe = 1
wavefront_size = 32
Expand All @@ -748,7 +748,7 @@ tensor_b_thread_lengths = [1, 1, 4, 16] # 1xCExNB0xVec-c
tensor_b_cluster_lengths = [1, 4, 1, 16] # 1xCEx1xNB1
direction = "fwd"
precision = "int4"
tensor_layout = 'nchwc'
tensor_layout = ['nchwc_cyxkc', 'nchwc_kcyxc']
nxb = 0
nxe = 0
wavefront_size = 32
Expand All @@ -772,7 +772,7 @@ tensor_b_thread_lengths = [1, 1, 4, 16] # 1xCExNB0xVec-c
tensor_b_cluster_lengths = [1, 4, 1, 8] # 1xCEx1xNB1
direction = "fwd"
precision = "int4"
tensor_layout = 'nchwc'
tensor_layout = ['nchwc_cyxkc', 'nchwc_kcyxc']
nxb = 0
nxe = 1
wavefront_size = 32
Expand All @@ -795,7 +795,7 @@ tensor_b_thread_lengths = [1, 1, 4, 16] # 1xCExNB0xVec-c
tensor_b_cluster_lengths = [1, 4, 1, 8] # 1xCEx1xNB1
direction = "fwd"
precision = "int4"
tensor_layout = 'nchwc'
tensor_layout = ['nchwc_cyxkc', 'nchwc_kcyxc']
nxb = 0
nxe = 0
wavefront_size = 32
Expand Down
Loading

0 comments on commit 5d8e4f8

Please sign in to comment.