diff --git a/src/sha3.rs b/src/sha3.rs index 958e14c..fe35faf 100644 --- a/src/sha3.rs +++ b/src/sha3.rs @@ -80,51 +80,55 @@ impl Sha3_256 { } fn keccak_permute(input: &mut [u8; TOTAL_STATE_SIZE]) { - let (lanes, _) = input.as_chunks_mut::<8>(); + // let (lanes, _) = input.as_chunks_mut::<8>(); + let (pre, lanes, post) = unsafe { input.align_to_mut::() }; + assert!(pre.len() == 0); + assert!(post.len() == 0); + assert!(lanes.len() == 25); let mut lfsr_state = 0x01_u8; for _ in 0..ROUNDS { // θ step let c: [u64; 5] = array::from_fn(|x| { - get_lane(lanes, x, 0) - ^ get_lane(lanes, x, 1) - ^ get_lane(lanes, x, 2) - ^ get_lane(lanes, x, 3) - ^ get_lane(lanes, x, 4) + get_lane2(lanes, x, 0) + ^ get_lane2(lanes, x, 1) + ^ get_lane2(lanes, x, 2) + ^ get_lane2(lanes, x, 3) + ^ get_lane2(lanes, x, 4) }); let mut d: u64; for x in 0..5 { d = c[(x + 4) % 5] ^ rol64(c[(x + 1) % 5], 1); - let mut out = [0_u64; 8]; - unsafe { - let a: __m512i = - _mm512_set_epi64(d as i64, d as i64, d as i64, d as i64, d as i64, 0, 0, 0); + // let mut out = [0_u64; 8]; + // unsafe { + // let a: __m512i = + // _mm512_set_epi64(d as i64, d as i64, d as i64, d as i64, d as i64, 0, 0, 0); - let b: __m512i = _mm512_set_epi64( - get_lane(lanes, x, 0) as i64, - get_lane(lanes, x, 1) as i64, - get_lane(lanes, x, 2) as i64, - get_lane(lanes, x, 3) as i64, - get_lane(lanes, x, 4) as i64, - 0, - 0, - 0, - ); - let res = _mm512_xor_epi64(a, b); - _mm512_storeu_epi64(out.as_mut_ptr() as *mut i64, res); - } - for i in 0..5 { - set_lane(out[i], x, i, lanes); - } - // for y in 0..5 { - // xor_lane(d, lanes, x, y); + // let b: __m512i = _mm512_set_epi64( + // get_lane2(lanes, x, 0) as i64, + // get_lane2(lanes, x, 1) as i64, + // get_lane2(lanes, x, 2) as i64, + // get_lane2(lanes, x, 3) as i64, + // get_lane2(lanes, x, 4) as i64, + // 0, + // 0, + // 0, + // ); + // let res = _mm512_xor_epi64(a, b); + // _mm512_storeu_epi64(out.as_mut_ptr() as *mut i64, res); // } + // for i in 0..5 { + // set_lane2(out[i], x, i, lanes); + // } + for y in 0..5 { + xor_lane2(d, lanes, x, y); + } } // ρ and π steps let (mut x, mut y) = (1, 0); - let mut current = get_lane(lanes, x, y); + let mut current = get_lane2(lanes, x, y); let mut temp: u64; for t in 0..24 { @@ -133,8 +137,8 @@ fn keccak_permute(input: &mut [u8; TOTAL_STATE_SIZE]) { x = y; y = y2; - temp = get_lane(lanes, x, y); - set_lane(rol64(current, r), x, y, lanes); + temp = get_lane2(lanes, x, y); + set_lane2(rol64(current, r), x, y, lanes); current = temp; } @@ -144,9 +148,9 @@ fn keccak_permute(input: &mut [u8; TOTAL_STATE_SIZE]) { // for x in 0..5 { // temp2[x] = get_lane(lanes, x, y); // } - let temp2: [u64; 5] = array::from_fn(|x| get_lane(lanes, x, y)); + let temp2: [u64; 5] = array::from_fn(|x| get_lane2(lanes, x, y)); for x in 0..5 { - set_lane( + set_lane2( temp2[x] ^ ((!temp2[(x + 1) % 5]) & temp2[(x + 2) % 5]), x, y, @@ -166,7 +170,7 @@ fn keccak_permute(input: &mut [u8; TOTAL_STATE_SIZE]) { // } if lfsr_out { - xor_lane((1 as u64) << bit_pos, lanes, 0, 0); + xor_lane2((1 as u64) << bit_pos, lanes, 0, 0); } } } @@ -182,6 +186,16 @@ fn set_lane(lane: u64, x: usize, y: usize, lanes: &mut [[u8; 8]]) { lanes[x + 5 * y] = lane.to_ne_bytes(); } +#[inline] +fn get_lane2(lanes: &[u64], x: usize, y: usize) -> u64 { + lanes[x + 5 * y] +} + +#[inline] +fn set_lane2(lane: u64, x: usize, y: usize, lanes: &mut [u64]) { + lanes[x + 5 * y] = lane; +} + #[inline] fn rol64(v: u64, off: usize) -> u64 { ((v) << off) ^ ((v) >> (64 - off)) @@ -192,6 +206,11 @@ fn xor_lane(lane: u64, lanes: &mut [[u8; 8]], x: usize, y: usize) { set_lane(get_lane(lanes, x, y) ^ lane, x, y, lanes); } +#[inline] +fn xor_lane2(lane: u64, lanes: &mut [u64], x: usize, y: usize) { + set_lane2(get_lane2(lanes, x, y) ^ lane, x, y, lanes); +} + // Function that computes the linear feedback shift register (LFSR) // I have absolutely no idea wtf is this shit. Copied from a github repo lol. // SUSCEPTIBLE TO BE CONVERTED INTO A TABLE