1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
|
// Copyright 2021 Brian Smith.
// Portions Copyright (c) 2014, Google Inc.
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
// Adapted from the public domain, estream code by D. Bernstein.
// Adapted from the BoringSSL crypto/chacha/chacha.c.
use super::{super::overlapping::IndexError, Counter, Key, Overlapping, BLOCK_LEN};
use crate::{bb, polyfill::sliceutil};
use core::mem::size_of;
pub(super) fn ChaCha20_ctr32(key: &Key, counter: Counter, mut in_out: Overlapping<'_>) {
const SIGMA: [u32; 4] = [
u32::from_le_bytes(*b"expa"),
u32::from_le_bytes(*b"nd 3"),
u32::from_le_bytes(*b"2-by"),
u32::from_le_bytes(*b"te k"),
];
let key = key.words_less_safe();
let counter = counter.into_words_less_safe();
let mut state = [
SIGMA[0], SIGMA[1], SIGMA[2], SIGMA[3], key[0], key[1], key[2], key[3], key[4], key[5],
key[6], key[7], counter[0], counter[1], counter[2], counter[3],
];
let mut in_out_len = in_out.len();
let mut buf = [0u8; BLOCK_LEN];
while in_out_len > 0 {
chacha_core(&mut buf, &state);
state[12] += 1;
debug_assert_eq!(in_out_len, in_out.len());
// Both branches do the same thing, but the duplication helps the
// compiler optimize (vectorize) the `BLOCK_LEN` case.
if in_out_len >= BLOCK_LEN {
in_out = in_out
.split_first_chunk::<BLOCK_LEN>(|in_out| {
bb::xor_assign_at_start(&mut buf, in_out.input());
sliceutil::overwrite_at_start(in_out.into_unwritten_output(), &buf);
})
.unwrap_or_else(|IndexError { .. }| {
// Since `in_out_len == in_out.len() && in_out_len >= BLOCK_LEN`.
unreachable!()
});
} else {
bb::xor_assign_at_start(&mut buf, in_out.input());
sliceutil::overwrite_at_start(in_out.into_unwritten_output(), &buf);
break;
}
in_out_len -= BLOCK_LEN;
}
}
// Performs 20 rounds of ChaCha on `input`, storing the result in `output`.
#[inline(always)]
fn chacha_core(output: &mut [u8; BLOCK_LEN], input: &State) {
let mut x = *input;
for _ in (0..20).step_by(2) {
quarterround(&mut x, 0, 4, 8, 12);
quarterround(&mut x, 1, 5, 9, 13);
quarterround(&mut x, 2, 6, 10, 14);
quarterround(&mut x, 3, 7, 11, 15);
quarterround(&mut x, 0, 5, 10, 15);
quarterround(&mut x, 1, 6, 11, 12);
quarterround(&mut x, 2, 7, 8, 13);
quarterround(&mut x, 3, 4, 9, 14);
}
for (x, input) in x.iter_mut().zip(input.iter()) {
*x = x.wrapping_add(*input);
}
output
.chunks_exact_mut(size_of::<u32>())
.zip(x.iter())
.for_each(|(output, &x)| output.copy_from_slice(&x.to_le_bytes()));
}
#[inline(always)]
fn quarterround(x: &mut State, a: usize, b: usize, c: usize, d: usize) {
#[inline(always)]
fn step(x: &mut State, a: usize, b: usize, c: usize, rotation: u32) {
x[a] = x[a].wrapping_add(x[b]);
x[c] = (x[c] ^ x[a]).rotate_left(rotation);
}
step(x, a, b, d, 16);
step(x, c, d, b, 12);
step(x, a, b, d, 8);
step(x, c, d, b, 7);
}
type State = [u32; BLOCK_LEN / 4];
|