-
Notifications
You must be signed in to change notification settings - Fork 26
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Implement much faster sha256 and sha512. #41
Changes from 10 commits
57ef977
354bc0c
d80c79a
c7dd9e5
c705ae2
e17dee4
b685df3
9ad48b8
efec464
99627cf
43813ab
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,23 +13,37 @@ | |
#[cfg(not(any(target_arch = "x86_64", target_arch = "x86", target_arch = "aarch64")))] | ||
compile_error!("crate can only be used on x86, x86-64 and aarch64 architectures"); | ||
|
||
cpufeatures::new!(cpuid_avx2, "avx2"); | ||
|
||
#[link(name = "sha256", kind = "static")] | ||
#[allow(dead_code)] | ||
extern "C" { | ||
fn sha256_compress(state: &mut [u32; 8], block: &[u8; 64]); | ||
fn sha256_transform_rorx(state: &mut [u32; 8], block: *const [u8; 64], num_blocks: u64); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You forgot to change There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems like it's guaranteed There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Your link talks about layout of slice itself (i.e. about how elements of a slice a stored in memory). In this context it's more about ABI guarantees, i.e. I don't think it's currently guaranteed that |
||
} | ||
|
||
/// Safe wrapper around assembly implementation of SHA256 compression function | ||
/// | ||
#[inline] | ||
pub fn compress256(state: &mut [u32; 8], blocks: &[[u8; 64]]) { | ||
for block in blocks { | ||
unsafe { sha256_compress(state, block) } | ||
let token: cpuid_avx2::InitToken = cpuid_avx2::init(); | ||
|
||
if token.get() { | ||
0xdeafbeef marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if !blocks.is_empty() { | ||
unsafe { sha256_transform_rorx(state, blocks.as_ptr(), blocks.len() as u64) } | ||
} | ||
} else { | ||
for block in blocks { | ||
unsafe { sha256_compress(state, block) } | ||
} | ||
} | ||
} | ||
|
||
#[cfg(not(target_arch = "aarch64"))] | ||
#[link(name = "sha512", kind = "static")] | ||
extern "C" { | ||
fn sha512_compress(state: &mut [u64; 8], block: &[u8; 128]); | ||
fn sha512_transform_rorx(state: &mut [u64; 8], block: *const [u8; 128], num_blocks: usize); | ||
} | ||
|
||
/// Safe wrapper around assembly implementation of SHA512 compression function | ||
|
@@ -38,7 +52,14 @@ extern "C" { | |
#[cfg(not(target_arch = "aarch64"))] | ||
#[inline] | ||
pub fn compress512(state: &mut [u64; 8], blocks: &[[u8; 128]]) { | ||
for block in blocks { | ||
unsafe { sha512_compress(state, block) } | ||
let token: cpuid_avx2::InitToken = cpuid_avx2::init(); | ||
if token.get() { | ||
if !blocks.is_empty() { | ||
unsafe { sha512_transform_rorx(state, blocks.as_ptr(), blocks.len()) } | ||
} | ||
} else { | ||
for block in blocks { | ||
unsafe { sha512_compress(state, block) } | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Gate this line on
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
. Otherwise it causes compilation failure on Aarch64 targets.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You forgot to modify the
compress256
function (see the CI failure). Currently it tries to use thecpuid_avx2
module on all targets. I think the easiest solution would be to introduce two function with the same name one gated on x86(-64) and another one on AArch64.