-
Notifications
You must be signed in to change notification settings - Fork 274
refactor: replace byte-level operations with word-level operations #692
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
AnarchistHoneybun
wants to merge
4
commits into
RustCrypto:master
from
AnarchistHoneybun:kupyna-perf
Closed
Changes from all commits
Commits
Show all changes
4 commits
Select commit
Hold shift + click to select a range
bd0258f
refactor: replace byte-level operations with word-level operations in…
AnarchistHoneybun 6ba7c9b
refactor: optimize S-box application in apply_s_box function
AnarchistHoneybun f8a3a97
cleanup
AnarchistHoneybun b211b56
Merge branch 'refs/heads/master' into kupyna-perf
AnarchistHoneybun File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,95 +1,140 @@ | ||
use crate::utils::{add_constant_plus, add_constant_xor, apply_s_box, mix_columns, xor_bytes}; | ||
use crate::{ | ||
long_compress::{COLS, compress, t_xor_l}, | ||
utils::{read_u64_le, write_u64_le, xor_words}, | ||
}; | ||
use core::fmt; | ||
use digest::{ | ||
HashMarker, InvalidOutputSize, Output, | ||
block_buffer::Eager, | ||
core_api::{ | ||
AlgorithmName, Block, BlockSizeUser, Buffer, BufferKindUser, OutputSizeUser, TruncSide, | ||
UpdateCore, VariableOutputCore, | ||
}, | ||
crypto_common::hazmat::{DeserializeStateError, SerializableState, SerializedState}, | ||
typenum::{U64, U128, U136, Unsigned}, | ||
}; | ||
|
||
#[cfg(feature = "zeroize")] | ||
use digest::zeroize::{Zeroize, ZeroizeOnDrop}; | ||
|
||
/// Lowest-level core hasher state of the long Kupyna variant. | ||
#[derive(Clone)] | ||
pub struct KupynaLongVarCore { | ||
state: [u64; COLS], | ||
blocks_len: u64, | ||
} | ||
|
||
impl HashMarker for KupynaLongVarCore {} | ||
|
||
pub(crate) const COLS: usize = 16; | ||
const ROUNDS: u64 = 14; | ||
impl BlockSizeUser for KupynaLongVarCore { | ||
type BlockSize = U128; | ||
} | ||
|
||
type Matrix = [[u8; 8]; 16]; | ||
impl BufferKindUser for KupynaLongVarCore { | ||
type BufferKind = Eager; | ||
} | ||
|
||
pub(crate) fn compress(prev_vector: &mut [u64; COLS], message_block: &[u8; 128]) { | ||
let mut prev_vector_u8 = [0u8; 128]; | ||
for (src, dst) in prev_vector.iter().zip(prev_vector_u8.chunks_exact_mut(8)) { | ||
dst.copy_from_slice(&src.to_be_bytes()); | ||
impl UpdateCore for KupynaLongVarCore { | ||
#[inline] | ||
fn update_blocks(&mut self, blocks: &[Block<Self>]) { | ||
self.blocks_len += blocks.len() as u64; | ||
for block in blocks { | ||
compress(&mut self.state, block.as_ref()); | ||
} | ||
} | ||
} | ||
|
||
let m_xor_p = xor_bytes(*message_block, prev_vector_u8); | ||
impl OutputSizeUser for KupynaLongVarCore { | ||
type OutputSize = U64; | ||
} | ||
|
||
let t_xor_mp = t_xor_l(m_xor_p); | ||
impl VariableOutputCore for KupynaLongVarCore { | ||
const TRUNC_SIDE: TruncSide = TruncSide::Right; | ||
|
||
let t_plus_m = t_plus_l(*message_block); | ||
#[inline] | ||
fn new(output_size: usize) -> Result<Self, InvalidOutputSize> { | ||
let min_size = Self::OutputSize::USIZE / 2; | ||
let max_size = Self::OutputSize::USIZE; | ||
if output_size < min_size || output_size > max_size { | ||
return Err(InvalidOutputSize); | ||
} | ||
let mut state = [0; COLS]; | ||
state[0] = 0x80; | ||
state[0] <<= 56; | ||
let blocks_len = 0; | ||
Ok(Self { state, blocks_len }) | ||
} | ||
|
||
prev_vector_u8 = xor_bytes(xor_bytes(t_xor_mp, t_plus_m), prev_vector_u8); | ||
#[inline] | ||
fn finalize_variable_core(&mut self, buffer: &mut Buffer<Self>, out: &mut Output<Self>) { | ||
let block_size = Self::BlockSize::USIZE as u128; | ||
let msg_len_bytes = (self.blocks_len as u128) * block_size + (buffer.get_pos() as u128); | ||
let msg_len_bits = 8 * msg_len_bytes; | ||
|
||
for (dst, src) in prev_vector.iter_mut().zip(prev_vector_u8.chunks_exact(8)) { | ||
*dst = u64::from_be_bytes(src.try_into().unwrap()); | ||
} | ||
} | ||
buffer.digest_pad(0x80, &msg_len_bits.to_le_bytes()[0..12], |block| { | ||
compress(&mut self.state, block.as_ref()); | ||
}); | ||
|
||
pub(crate) fn t_plus_l(block: [u8; 128]) -> [u8; 128] { | ||
let mut state = block_to_matrix(block); | ||
for nu in 0..ROUNDS { | ||
state = add_constant_plus(state, nu as usize); | ||
state = apply_s_box(state); | ||
state = rotate_rows(state); | ||
state = mix_columns(state); | ||
} | ||
matrix_to_block(state) | ||
} | ||
// Process final state with t_xor_l | ||
let t_xor_ult_processed_block = t_xor_l(self.state); | ||
|
||
fn block_to_matrix(block: [u8; 128]) -> Matrix { | ||
const ROWS: usize = 16; | ||
const COLS: usize = 8; | ||
let result_state = xor_words(self.state, t_xor_ult_processed_block); | ||
|
||
let mut matrix = [[0u8; COLS]; ROWS]; | ||
for i in 0..ROWS { | ||
for j in 0..COLS { | ||
matrix[i][j] = block[i * COLS + j]; | ||
let n = COLS / 2; | ||
for (chunk, v) in out.chunks_exact_mut(8).zip(result_state[n..].iter()) { | ||
chunk.copy_from_slice(&v.to_be_bytes()); | ||
} | ||
} | ||
matrix | ||
} | ||
|
||
fn matrix_to_block(matrix: Matrix) -> [u8; 128] { | ||
const ROWS: usize = 16; | ||
const COLS: usize = 8; | ||
impl AlgorithmName for KupynaLongVarCore { | ||
#[inline] | ||
fn write_alg_name(f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
f.write_str("KupynaLong") | ||
} | ||
} | ||
|
||
let mut block = [0u8; ROWS * COLS]; | ||
for i in 0..ROWS { | ||
for j in 0..COLS { | ||
block[i * COLS + j] = matrix[i][j]; | ||
} | ||
impl fmt::Debug for KupynaLongVarCore { | ||
#[inline] | ||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
f.write_str("KupynaLongVarCore { ... }") | ||
} | ||
block | ||
} | ||
|
||
fn rotate_rows(mut state: Matrix) -> Matrix { | ||
const ROWS: usize = 16; | ||
let cols = 8; | ||
|
||
let mut temp = [0u8; ROWS]; | ||
let mut shift: i32 = -1; | ||
for i in 0..cols { | ||
if i == cols - 1 { | ||
shift = 11; | ||
} else { | ||
shift += 1; | ||
} | ||
for col in 0..ROWS { | ||
temp[(col + shift as usize) % ROWS] = state[col][i]; | ||
} | ||
for col in 0..ROWS { | ||
state[col][i] = temp[col]; | ||
impl Drop for KupynaLongVarCore { | ||
#[inline] | ||
fn drop(&mut self) { | ||
#[cfg(feature = "zeroize")] | ||
{ | ||
self.state.zeroize(); | ||
self.blocks_len.zeroize(); | ||
} | ||
} | ||
state | ||
} | ||
|
||
pub(crate) fn t_xor_l(block: [u8; 128]) -> [u8; 128] { | ||
let mut state = block_to_matrix(block); | ||
for nu in 0..ROUNDS { | ||
state = add_constant_xor(state, nu as usize); | ||
state = apply_s_box(state); | ||
state = rotate_rows(state); | ||
state = mix_columns(state); | ||
impl SerializableState for KupynaLongVarCore { | ||
type SerializedStateSize = U136; | ||
|
||
#[inline] | ||
fn serialize(&self) -> SerializedState<Self> { | ||
let mut serialized_state = SerializedState::<Self>::default(); | ||
let (state_dst, len_dst) = serialized_state.split_at_mut(128); | ||
write_u64_le(&self.state, state_dst); | ||
len_dst.copy_from_slice(&self.blocks_len.to_le_bytes()); | ||
serialized_state | ||
} | ||
|
||
#[inline] | ||
fn deserialize( | ||
serialized_state: &SerializedState<Self>, | ||
) -> Result<Self, DeserializeStateError> { | ||
let (serialized_state, serialized_block_len) = serialized_state.split::<U128>(); | ||
Ok(Self { | ||
state: read_u64_le(&serialized_state.0), | ||
blocks_len: u64::from_le_bytes(serialized_block_len.0), | ||
}) | ||
} | ||
matrix_to_block(state) | ||
} | ||
|
||
#[cfg(feature = "zeroize")] | ||
impl ZeroizeOnDrop for KupynaLongVarCore {} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
use crate::utils::{add_constant_plus, add_constant_xor, apply_s_box, mix_columns, xor_words}; | ||
|
||
pub(crate) const COLS: usize = 16; | ||
const ROUNDS: u64 = 14; | ||
|
||
pub(crate) fn compress(prev_vector: &mut [u64; COLS], message_block: &[u8; 128]) { | ||
// Convert message block from u8 to u64 (column-major order as per paper) | ||
let mut message_u64 = [0u64; COLS]; | ||
for (chunk, v) in message_block.chunks_exact(8).zip(message_u64.iter_mut()) { | ||
*v = u64::from_be_bytes(chunk.try_into().unwrap()); | ||
AnarchistHoneybun marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
let m_xor_p = xor_words(*prev_vector, message_u64); | ||
|
||
let t_xor_mp = t_xor_l(m_xor_p); | ||
|
||
let t_plus_m = t_plus_l(message_u64); | ||
|
||
*prev_vector = xor_words(xor_words(t_xor_mp, t_plus_m), *prev_vector); | ||
} | ||
|
||
pub(crate) fn t_plus_l(state: [u64; COLS]) -> [u64; COLS] { | ||
let mut state = state; | ||
for nu in 0..ROUNDS { | ||
state = add_constant_plus(state, nu as usize); | ||
state = apply_s_box(state); | ||
state = rotate_rows(state); | ||
state = mix_columns(state); | ||
} | ||
state | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It may be worth to change |
||
} | ||
|
||
fn rotate_rows(state: [u64; COLS]) -> [u64; COLS] { | ||
// Convert to matrix format (column-major as per paper) | ||
let mut matrix = [[0u8; COLS]; 8]; | ||
for col in 0..COLS { | ||
let bytes = state[col].to_be_bytes(); | ||
for row in 0..8 { | ||
matrix[row][col] = bytes[row]; | ||
} | ||
} | ||
|
||
// Apply row rotation as per paper: row i rotated by i positions, row 7 by 11 positions for l=1024 | ||
let mut result_matrix = [[0u8; COLS]; 8]; | ||
|
||
for row in 0..8 { | ||
let shift = if row == 7 { 11 } else { row }; | ||
for col in 0..COLS { | ||
result_matrix[row][(col + shift) % COLS] = matrix[row][col]; | ||
} | ||
} | ||
|
||
// Convert back to u64 array | ||
let mut result = [0u64; COLS]; | ||
for col in 0..COLS { | ||
let mut bytes = [0u8; 8]; | ||
for row in 0..8 { | ||
bytes[row] = result_matrix[row][col]; | ||
} | ||
result[col] = u64::from_be_bytes(bytes); | ||
} | ||
|
||
result | ||
} | ||
|
||
pub(crate) fn t_xor_l(state: [u64; COLS]) -> [u64; COLS] { | ||
let mut state = state; | ||
for nu in 0..ROUNDS { | ||
state = add_constant_xor(state, nu as usize); | ||
state = apply_s_box(state); | ||
state = rotate_rows(state); | ||
state = mix_columns(state); | ||
} | ||
state | ||
} |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think you need this dependency.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've set up a main.rs file to verify the code works as expected while making changes to the functions, this is the reason for both the hex-literal dependency and the print statements. will clean them all up once I'm done making large changes to the functions, it just helps to pinpoint error faster if some math goes wrong