#[cfg(feature = "rustcrypto_api")] pub use stream_cipher::generic_array; pub use ppv_lite86::Machine; use ppv_lite86::{vec128_storage, ArithOps, BitOps32, LaneWords4, MultiLane, StoreBytes, Vec4}; pub(crate) const BLOCK: usize = 64; pub(crate) const BLOCK64: u64 = BLOCK as u64; const LOG2_BUFBLOCKS: u64 = 2; const BUFBLOCKS: u64 = 1 << LOG2_BUFBLOCKS; pub(crate) const BUFSZ64: u64 = BLOCK64 * BUFBLOCKS; pub(crate) const BUFSZ: usize = BUFSZ64 as usize; #[derive(Clone)] pub struct ChaCha { pub(crate) b: vec128_storage, pub(crate) c: vec128_storage, pub(crate) d: vec128_storage, } #[derive(Clone)] pub struct State { pub(crate) a: V, pub(crate) b: V, pub(crate) c: V, pub(crate) d: V, } #[inline(always)] pub(crate) fn round(mut x: State) -> State { x.a += x.b; x.d = (x.d ^ x.a).rotate_each_word_right16(); x.c += x.d; x.b = (x.b ^ x.c).rotate_each_word_right20(); x.a += x.b; x.d = (x.d ^ x.a).rotate_each_word_right24(); x.c += x.d; x.b = (x.b ^ x.c).rotate_each_word_right25(); x } #[inline(always)] pub(crate) fn diagonalize(mut x: State) -> State { x.b = x.b.shuffle_lane_words3012(); x.c = x.c.shuffle_lane_words2301(); x.d = x.d.shuffle_lane_words1230(); x } #[inline(always)] pub(crate) fn undiagonalize(mut x: State) -> State { x.b = x.b.shuffle_lane_words1230(); x.c = x.c.shuffle_lane_words2301(); x.d = x.d.shuffle_lane_words3012(); x } impl ChaCha { #[inline(always)] pub fn new(key: &[u8; 32], nonce: &[u8]) -> Self { init_chacha(key, nonce) } #[inline(always)] fn pos64(&self, m: M) -> u64 { let d: M::u32x4 = m.unpack(self.d); ((d.extract(1) as u64) << 32) | d.extract(0) as u64 } /// Set 64-bit block count, affecting next refill. #[inline(always)] pub(crate) fn seek64(&mut self, m: M, blockct: u64) { let d: M::u32x4 = m.unpack(self.d); self.d = d .insert((blockct >> 32) as u32, 1) .insert(blockct as u32, 0) .into(); } /// Set 32-bit block count, affecting next refill. #[inline(always)] pub(crate) fn seek32(&mut self, m: M, blockct: u32) { let d: M::u32x4 = m.unpack(self.d); self.d = d.insert(blockct, 0).into(); } /// Produce output from the current state. #[inline(always)] fn output_narrow(&mut self, m: M, x: State, out: &mut [u8; BLOCK]) { let k = m.vec([0x6170_7865, 0x3320_646e, 0x7962_2d32, 0x6b20_6574]); (x.a + k).write_le(&mut out[0..16]); (x.b + m.unpack(self.b)).write_le(&mut out[16..32]); (x.c + m.unpack(self.c)).write_le(&mut out[32..48]); (x.d + m.unpack(self.d)).write_le(&mut out[48..64]); } /// Add one to the block counter (no overflow check). #[inline(always)] fn inc_block_ct(&mut self, m: M) { let mut pos = self.pos64(m); let d0: M::u32x4 = m.unpack(self.d); pos += 1; let d1 = d0.insert((pos >> 32) as u32, 1).insert(pos as u32, 0); self.d = d1.into(); } /// Produce 4 blocks of output, advancing the state #[inline(always)] pub fn refill4(&mut self, drounds: u32, out: &mut [u8; BUFSZ]) { refill_wide(self, drounds, out) } /// Produce a block of output, advancing the state #[inline(always)] pub fn refill(&mut self, drounds: u32, out: &mut [u8; BLOCK]) { refill_narrow(self, drounds, out) } #[inline(always)] pub(crate) fn refill_rounds(&mut self, drounds: u32) -> State { refill_narrow_rounds(self, drounds) } #[inline(always)] pub fn set_stream_param(&mut self, param: u32, value: u64) { set_stream_param(self, param, value) } #[inline(always)] pub fn get_stream_param(&self, param: u32) -> u64 { get_stream_param(self, param) } } #[inline(always)] fn refill_wide_impl( m: Mach, state: &mut ChaCha, drounds: u32, out: &mut [u8; BUFSZ], ) { let k = m.vec([0x6170_7865, 0x3320_646e, 0x7962_2d32, 0x6b20_6574]); let mut pos = state.pos64(m); let d0: Mach::u32x4 = m.unpack(state.d); pos += 1; let d1 = d0.insert((pos >> 32) as u32, 1).insert(pos as u32, 0); pos += 1; let d2 = d0.insert((pos >> 32) as u32, 1).insert(pos as u32, 0); pos += 1; let d3 = d0.insert((pos >> 32) as u32, 1).insert(pos as u32, 0); let b = m.unpack(state.b); let c = m.unpack(state.c); let mut x = State { a: Mach::u32x4x4::from_lanes([k, k, k, k]), b: Mach::u32x4x4::from_lanes([b, b, b, b]), c: Mach::u32x4x4::from_lanes([c, c, c, c]), d: m.unpack(Mach::u32x4x4::from_lanes([d0, d1, d2, d3]).into()), }; for _ in 0..drounds { x = round(x); x = undiagonalize(round(diagonalize(x))); } let mut pos = state.pos64(m); let d0: Mach::u32x4 = m.unpack(state.d); pos += 1; let d1 = d0.insert((pos >> 32) as u32, 1).insert(pos as u32, 0); pos += 1; let d2 = d0.insert((pos >> 32) as u32, 1).insert(pos as u32, 0); pos += 1; let d3 = d0.insert((pos >> 32) as u32, 1).insert(pos as u32, 0); pos += 1; let d4 = d0.insert((pos >> 32) as u32, 1).insert(pos as u32, 0); let (a, b, c, d) = ( x.a.to_lanes(), x.b.to_lanes(), x.c.to_lanes(), x.d.to_lanes(), ); let sb = m.unpack(state.b); let sc = m.unpack(state.c); let sd = [m.unpack(state.d), d1, d2, d3]; state.d = d4.into(); let mut words = out.chunks_exact_mut(16); for ((((&a, &b), &c), &d), &sd) in a.iter().zip(&b).zip(&c).zip(&d).zip(&sd) { (a + k).write_le(words.next().unwrap()); (b + sb).write_le(words.next().unwrap()); (c + sc).write_le(words.next().unwrap()); (d + sd).write_le(words.next().unwrap()); } } dispatch!(m, Mach, { fn refill_wide(state: &mut ChaCha, drounds: u32, out: &mut [u8; BUFSZ]) { refill_wide_impl(m, state, drounds, out); } }); /// Refill the buffer from a single-block round, updating the block count. dispatch_light128!(m, Mach, { fn refill_narrow(state: &mut ChaCha, drounds: u32, out: &mut [u8; BLOCK]) { let x = refill_narrow_rounds(state, drounds); let x = State { a: m.unpack(x.a), b: m.unpack(x.b), c: m.unpack(x.c), d: m.unpack(x.d), }; state.output_narrow(m, x, out); state.inc_block_ct(m); } }); /// Single-block, rounds-only; shared by try_apply_keystream for tails shorter than BUFSZ /// and XChaCha's setup step. dispatch!(m, Mach, { fn refill_narrow_rounds(state: &mut ChaCha, drounds: u32) -> State { let k: Mach::u32x4 = m.vec([0x6170_7865, 0x3320_646e, 0x7962_2d32, 0x6b20_6574]); let mut x = State { a: k, b: m.unpack(state.b), c: m.unpack(state.c), d: m.unpack(state.d), }; for _ in 0..drounds { x = round(x); x = undiagonalize(round(diagonalize(x))); } State { a: x.a.into(), b: x.b.into(), c: x.c.into(), d: x.d.into(), } } }); dispatch_light128!(m, Mach, { fn set_stream_param(state: &mut ChaCha, param: u32, value: u64) { let d: Mach::u32x4 = m.unpack(state.d); state.d = d .insert((value >> 32) as u32, (param << 1) | 1) .insert(value as u32, param << 1) .into(); } }); dispatch_light128!(m, Mach, { fn get_stream_param(state: &ChaCha, param: u32) -> u64 { let d: Mach::u32x4 = m.unpack(state.d); ((d.extract((param << 1) | 1) as u64) << 32) | d.extract(param << 1) as u64 } }); fn read_u32le(xs: &[u8]) -> u32 { assert_eq!(xs.len(), 4); u32::from(xs[0]) | (u32::from(xs[1]) << 8) | (u32::from(xs[2]) << 16) | (u32::from(xs[3]) << 24) } dispatch_light128!(m, Mach, { fn init_chacha(key: &[u8; 32], nonce: &[u8]) -> ChaCha { let ctr_nonce = [ 0, if nonce.len() == 12 { read_u32le(&nonce[0..4]) } else { 0 }, read_u32le(&nonce[nonce.len() - 8..nonce.len() - 4]), read_u32le(&nonce[nonce.len() - 4..]), ]; let key0: Mach::u32x4 = m.read_le(&key[..16]); let key1: Mach::u32x4 = m.read_le(&key[16..]); ChaCha { b: key0.into(), c: key1.into(), d: ctr_nonce.into(), } } }); dispatch_light128!(m, Mach, { fn init_chacha_x(key: &[u8; 32], nonce: &[u8; 24], rounds: u32) -> ChaCha { let key0: Mach::u32x4 = m.read_le(&key[..16]); let key1: Mach::u32x4 = m.read_le(&key[16..]); let nonce0: Mach::u32x4 = m.read_le(&nonce[..16]); let mut state = ChaCha { b: key0.into(), c: key1.into(), d: nonce0.into(), }; let x = refill_narrow_rounds(&mut state, rounds); let ctr_nonce1 = [0, 0, read_u32le(&nonce[16..20]), read_u32le(&nonce[20..24])]; state.b = x.a; state.c = x.d; state.d = ctr_nonce1.into(); state } });