-
Notifications
You must be signed in to change notification settings - Fork 40
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
17 changed files
with
241 additions
and
1,988 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
63 changes: 63 additions & 0 deletions
63
crates/cubecl-linalg/src/matmul/cmma/compute_loop/accumulators_first.rs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
use cubecl_core as cubecl; | ||
use cubecl_core::prelude::*; | ||
|
||
use crate::matmul::cmma::{ | ||
base::{Fragments, Ids, SharedMemories}, | ||
compute_loop::base::load_into_fragment, | ||
config::ComptimeCmmaInfo, | ||
}; | ||
|
||
use super::base::ComputeLoop; | ||
|
||
pub(crate) struct AllAccumulatorsFirstComputeLoop {} | ||
|
||
#[cube] | ||
impl ComputeLoop for AllAccumulatorsFirstComputeLoop { | ||
fn compute_loop<F: Float, FC: Float>( | ||
shared_memories: SharedMemories<FC>, | ||
fragments: &mut Fragments<F, FC>, | ||
ids: Ids, | ||
#[comptime] comptime_info: ComptimeCmmaInfo, | ||
) { | ||
// Comptime values | ||
let block_size_k = comptime_info.block_size_k; | ||
let block_size_n = comptime_info.block_size_n; | ||
let tile_size = comptime_info.tile_size; | ||
let unroll = comptime_info.unroll; | ||
let num_accumulators = comptime_info.num_accumulators; | ||
let num_buffers = block_size_k / tile_size; | ||
let num_coop_per_row = (block_size_n / tile_size) / num_accumulators; | ||
|
||
// Runtime values | ||
let tile_row = ids.coop / num_coop_per_row; | ||
let tile_col_base = (ids.coop % num_coop_per_row) * num_accumulators; | ||
|
||
#[unroll(unroll)] | ||
for buffer_iter in 0..num_buffers { | ||
#[unroll] | ||
for accumulator_iter in 0..num_accumulators { | ||
load_into_fragment( | ||
tile_row * num_buffers + buffer_iter, | ||
shared_memories.lhs, | ||
&fragments.lhs, | ||
comptime_info, | ||
); | ||
|
||
load_into_fragment( | ||
(tile_col_base + accumulator_iter) * num_buffers + buffer_iter, | ||
shared_memories.rhs, | ||
&fragments.rhs, | ||
comptime_info, | ||
); | ||
|
||
let accumulator = &fragments.accumulators.index(accumulator_iter); | ||
cmma::execute::<FC, FC, F, F>( | ||
&fragments.lhs, | ||
&fragments.rhs, | ||
accumulator, | ||
accumulator, | ||
); | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
use cubecl_core as cubecl; | ||
use cubecl_core::prelude::*; | ||
|
||
use crate::matmul::cmma::{ | ||
base::{Fragments, Ids, SharedMemories}, | ||
compute_loop::{ | ||
accumulators_first::AllAccumulatorsFirstComputeLoop, | ||
buffers_first::AllBuffersFirstComputeLoop, | ||
}, | ||
config::ComptimeCmmaInfo, | ||
}; | ||
|
||
#[cube] | ||
pub(crate) fn compute_loop<F: Float, FC: Float>( | ||
shared_memories: SharedMemories<FC>, | ||
fragments: &mut Fragments<F, FC>, | ||
ids: Ids, | ||
#[comptime] comptime_info: ComptimeCmmaInfo, | ||
) { | ||
if comptime_info.compute_loop_order_strategy == 0 { | ||
AllBuffersFirstComputeLoop::compute_loop(shared_memories, fragments, ids, comptime_info); | ||
} else { | ||
AllAccumulatorsFirstComputeLoop::compute_loop( | ||
shared_memories, | ||
fragments, | ||
ids, | ||
comptime_info, | ||
); | ||
} | ||
} | ||
|
||
#[cube] | ||
pub(crate) trait ComputeLoop { | ||
fn compute_loop<F: Float, FC: Float>( | ||
shared_memories: SharedMemories<FC>, | ||
fragments: &mut Fragments<F, FC>, | ||
ids: Ids, | ||
#[comptime] comptime_info: ComptimeCmmaInfo, | ||
); | ||
} | ||
|
||
#[cube] | ||
pub(crate) fn load_into_fragment<FC: Float>( | ||
tile: u32, | ||
smem: SharedMemory<FC>, | ||
fragment: &cmma::Matrix<FC>, | ||
#[comptime] comptime_info: ComptimeCmmaInfo, | ||
) { | ||
let tile_size = comptime_info.tile_size; | ||
let smem_stride = tile_size * tile_size; | ||
|
||
let smem_pos = tile * smem_stride; | ||
let slice = smem.slice(smem_pos, smem_pos + smem_stride); | ||
cmma::load::<FC>(fragment, slice, 16); | ||
} |
63 changes: 63 additions & 0 deletions
63
crates/cubecl-linalg/src/matmul/cmma/compute_loop/buffers_first.rs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
use cubecl_core as cubecl; | ||
use cubecl_core::prelude::*; | ||
|
||
use crate::matmul::cmma::{ | ||
base::{Fragments, Ids, SharedMemories}, | ||
compute_loop::base::load_into_fragment, | ||
config::ComptimeCmmaInfo, | ||
}; | ||
|
||
use super::base::ComputeLoop; | ||
|
||
pub(crate) struct AllBuffersFirstComputeLoop {} | ||
|
||
#[cube] | ||
impl ComputeLoop for AllBuffersFirstComputeLoop { | ||
fn compute_loop<F: Float, FC: Float>( | ||
shared_memories: SharedMemories<FC>, | ||
fragments: &mut Fragments<F, FC>, | ||
ids: Ids, | ||
#[comptime] comptime_info: ComptimeCmmaInfo, | ||
) { | ||
// Comptime values | ||
let block_size_k = comptime_info.block_size_k; | ||
let block_size_n = comptime_info.block_size_n; | ||
let tile_size = comptime_info.tile_size; | ||
let unroll = comptime_info.unroll; | ||
let num_accumulators = comptime_info.num_accumulators; | ||
let num_buffers = block_size_k / tile_size; | ||
let num_coop_per_row = (block_size_n / tile_size) / num_accumulators; | ||
|
||
// Runtime values | ||
let tile_row = ids.coop / num_coop_per_row; | ||
let tile_col_base = (ids.coop % num_coop_per_row) * num_accumulators; | ||
|
||
#[unroll] | ||
for accumulator_iter in 0..num_accumulators { | ||
#[unroll(unroll)] | ||
for buffer_iter in 0..num_buffers { | ||
load_into_fragment( | ||
tile_row * num_buffers + buffer_iter, | ||
shared_memories.lhs, | ||
&fragments.lhs, | ||
comptime_info, | ||
); | ||
|
||
load_into_fragment( | ||
(tile_col_base + accumulator_iter) * num_buffers + buffer_iter, | ||
shared_memories.rhs, | ||
&fragments.rhs, | ||
comptime_info, | ||
); | ||
|
||
let accumulator = &fragments.accumulators.index(accumulator_iter); | ||
cmma::execute::<FC, FC, F, F>( | ||
&fragments.lhs, | ||
&fragments.rhs, | ||
accumulator, | ||
accumulator, | ||
); | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
mod accumulators_first; | ||
pub(crate) mod base; | ||
mod buffers_first; |
Oops, something went wrong.