Change |CRYPTO_chacha_20| to use 96-bit nonces, 32-bit counters.

The new function |CRYPTO_chacha_96_bit_nonce_from_64_bit_nonce| can be
used to adapt code from that uses 64 bit nonces, in a way that is
compatible with the old semantics.
This commit is contained in:
Brian Smith 2015-10-03 08:13:36 -10:00
parent 8813dea863
commit f0015bc1d5
7 changed files with 413 additions and 339 deletions

View File

@ -54,8 +54,8 @@ static const uint8_t sigma[16] = { 'e', 'x', 'p', 'a', 'n', 'd', ' ', '3',
#if defined(OPENSSL_ARM) && !defined(OPENSSL_NO_ASM)
/* Defined in chacha_vec.c */
void CRYPTO_chacha_20_neon(uint8_t *out, const uint8_t *in, size_t in_len,
const uint8_t key[32], const uint8_t nonce[8],
size_t counter);
const uint8_t key[32], const uint8_t nonce[12],
uint32_t counter);
#endif
/* chacha_core performs 20 rounds of ChaCha on the input words in
@ -85,8 +85,8 @@ static void chacha_core(uint8_t output[64], const uint32_t input[16]) {
}
void CRYPTO_chacha_20(uint8_t *out, const uint8_t *in, size_t in_len,
const uint8_t key[32], const uint8_t nonce[8],
size_t counter) {
const uint8_t key[32], const uint8_t nonce[12],
uint32_t counter) {
uint32_t input[16];
uint8_t buf[64];
size_t todo, i;
@ -114,9 +114,9 @@ void CRYPTO_chacha_20(uint8_t *out, const uint8_t *in, size_t in_len,
input[11] = U8TO32_LITTLE(key + 28);
input[12] = counter;
input[13] = ((uint64_t)counter) >> 32;
input[14] = U8TO32_LITTLE(nonce + 0);
input[15] = U8TO32_LITTLE(nonce + 4);
input[13] = U8TO32_LITTLE(nonce + 0);
input[14] = U8TO32_LITTLE(nonce + 4);
input[15] = U8TO32_LITTLE(nonce + 8);
while (in_len > 0) {
todo = sizeof(buf);
@ -134,9 +134,6 @@ void CRYPTO_chacha_20(uint8_t *out, const uint8_t *in, size_t in_len,
in_len -= todo;
input[12]++;
if (input[12] == 0) {
input[13]++;
}
}
}

View File

@ -154,12 +154,12 @@ void CRYPTO_chacha_20(
const uint8_t *in,
size_t inlen,
const uint8_t key[32],
const uint8_t nonce[8],
size_t counter)
const uint8_t nonce[12],
uint32_t counter)
{
unsigned iters, i, *op=(unsigned *)out, *ip=(unsigned *)in, *kp;
#if defined(__ARM_NEON__)
uint32_t np[2];
uint32_t np[3];
uint8_t alignment_buffer[16] __attribute__((aligned(16)));
#endif
vec s0, s1, s2, s3;
@ -167,20 +167,16 @@ void CRYPTO_chacha_20(
{0x61707865,0x3320646E,0x79622D32,0x6B206574};
kp = (unsigned *)key;
#if defined(__ARM_NEON__)
memcpy(np, nonce, 8);
memcpy(np, nonce, 12);
#endif
s0 = LOAD_ALIGNED(chacha_const);
s1 = LOAD(&((vec*)kp)[0]);
s2 = LOAD(&((vec*)kp)[1]);
s3 = (vec){
counter & 0xffffffff,
#if __ARM_NEON__ || defined(OPENSSL_X86)
0, /* can't right-shift 32 bits on a 32-bit system. */
#else
counter >> 32,
#endif
counter,
((uint32_t*)nonce)[0],
((uint32_t*)nonce)[1]
((uint32_t*)nonce)[1],
((uint32_t*)nonce)[2]
};
for (iters = 0; iters < inlen/(BPI*64); iters++)
@ -212,8 +208,8 @@ void CRYPTO_chacha_20(
x2 = chacha_const[2]; x3 = chacha_const[3];
x4 = kp[0]; x5 = kp[1]; x6 = kp[2]; x7 = kp[3];
x8 = kp[4]; x9 = kp[5]; x10 = kp[6]; x11 = kp[7];
x12 = counter+BPI*iters+(BPI-1); x13 = 0;
x14 = np[0]; x15 = np[1];
x12 = counter+BPI*iters+(BPI-1); x13 = np[0];
x14 = np[1]; x15 = np[2];
#endif
for (i = CHACHA_RNDS/2; i; i--)
{
@ -265,9 +261,9 @@ void CRYPTO_chacha_20(
op[10] = REVW_BE(REVW_BE(ip[10]) ^ (x10 + kp[6]));
op[11] = REVW_BE(REVW_BE(ip[11]) ^ (x11 + kp[7]));
op[12] = REVW_BE(REVW_BE(ip[12]) ^ (x12 + counter+BPI*iters+(BPI-1)));
op[13] = REVW_BE(REVW_BE(ip[13]) ^ (x13));
op[14] = REVW_BE(REVW_BE(ip[14]) ^ (x14 + np[0]));
op[15] = REVW_BE(REVW_BE(ip[15]) ^ (x15 + np[1]));
op[13] = REVW_BE(REVW_BE(ip[13]) ^ (x13 + np[0]));
op[14] = REVW_BE(REVW_BE(ip[14]) ^ (x14 + np[1]));
op[15] = REVW_BE(REVW_BE(ip[15]) ^ (x15 + np[2]));
s3 += ONE;
ip += 16;
op += 16;

View File

@ -60,137 +60,138 @@
.thumb_func
.type CRYPTO_chacha_20_neon, %function
CRYPTO_chacha_20_neon:
@ args = 8, pretend = 0, frame = 152
@ args = 8, pretend = 0, frame = 160
@ frame_needed = 1, uses_anonymous_args = 0
push {r4, r5, r6, r7, r8, r9, r10, fp, lr}
mov r8, r3
mov r9, r3
vpush.64 {d8, d9, d10, d11, d12, d13, d14, d15}
mov r9, r2
mov r10, r2
ldr r4, .L91+16
mov fp, r0
mov r10, r1
mov lr, r8
mov fp, r1
mov r8, r9
.LPIC16:
add r4, pc
sub sp, sp, #156
sub sp, sp, #164
add r7, sp, #0
sub sp, sp, #112
add r6, r7, #144
str r0, [r7, #88]
add lr, r7, #148
str r0, [r7, #80]
str r1, [r7, #12]
str r2, [r7, #8]
ldmia r4, {r0, r1, r2, r3}
add r4, sp, #15
bic r4, r4, #15
ldr ip, [r7, #256]
str r4, [r7, #84]
ldr r6, [r7, #264]
str r4, [r7, #88]
mov r5, r4
adds r4, r4, #64
adds r5, r5, #80
str r8, [r7, #68]
add ip, r5, #80
str r9, [r7, #56]
stmia r4, {r0, r1, r2, r3}
movw r4, #43691
ldr r0, [ip] @ unaligned
ldr r0, [r6] @ unaligned
movt r4, 43690
ldr r1, [ip, #4] @ unaligned
ldr r3, [r7, #84]
ldr r2, [r8, #8] @ unaligned
mov r8, #0
stmia r6!, {r0, r1}
mov r6, r5
ldr r1, [lr, #4] @ unaligned
ldr r0, [lr] @ unaligned
vldr d24, [r3, #64]
vldr d25, [r3, #72]
ldr r3, [lr, #12] @ unaligned
str r5, [r7, #80]
stmia r5!, {r0, r1, r2, r3}
ldr r0, [lr, #16]! @ unaligned
ldr r2, [r7, #84]
umull r4, r5, r9, r4
ldr r1, [r6, #4] @ unaligned
ldr r2, [r6, #8] @ unaligned
ldr r3, [r9, #12] @ unaligned
str ip, [r7, #84]
stmia lr!, {r0, r1, r2}
mov lr, ip
ldr r1, [r9, #4] @ unaligned
ldr r2, [r9, #8] @ unaligned
ldr r0, [r9] @ unaligned
vldr d24, [r5, #64]
vldr d25, [r5, #72]
umull r4, r5, r10, r4
stmia ip!, {r0, r1, r2, r3}
ldr r0, [r8, #16]! @ unaligned
ldr r2, [r7, #88]
ldr r4, [r7, #268]
ldr r1, [r8, #4] @ unaligned
vldr d26, [r2, #80]
vldr d27, [r2, #88]
ldr r1, [lr, #4] @ unaligned
ldr r2, [lr, #8] @ unaligned
ldr r3, [lr, #12] @ unaligned
ldr r4, [r7, #260]
stmia r6!, {r0, r1, r2, r3}
ldr r3, [ip]
ldr r1, [r7, #84]
ldr r2, [ip, #4]
str r3, [r7, #64]
vldr d28, [r1, #80]
vldr d29, [r1, #88]
str r3, [r7, #136]
ldr r3, [r8, #12] @ unaligned
ldr r2, [r8, #8] @ unaligned
stmia lr!, {r0, r1, r2, r3}
ldr r3, [r6]
ldr r1, [r6, #4]
ldr r6, [r6, #8]
str r3, [r7, #68]
str r3, [r7, #132]
lsrs r3, r5, #7
str r6, [r7, #140]
str r6, [r7, #60]
ldr r6, [r7, #88]
str r4, [r7, #128]
str r2, [r7, #140]
str r8, [r7, #132]
str r2, [r7, #60]
str r1, [r7, #136]
str r1, [r7, #64]
vldr d28, [r6, #80]
vldr d29, [r6, #88]
vldr d22, [r7, #128]
vldr d23, [r7, #136]
beq .L26
mov r5, r6
lsls r2, r3, #8
ldr r5, [r1, #64]
sub r3, r2, r3, lsl #6
ldr r2, [r1, #68]
ldr r2, [r5, #68]
ldr r6, [r6, #64]
vldr d0, .L91
vldr d1, .L91+8
adds r4, r4, #2
str r5, [r7, #56]
str r2, [r7, #52]
ldr r5, [r1, #72]
ldr r2, [r1, #76]
str r2, [r7, #48]
ldr r2, [r5, #72]
str r3, [r7, #4]
str r5, [r7, #48]
str r6, [r7, #52]
str r2, [r7, #44]
mov r2, fp
str r4, [r7, #72]
adds r2, r4, #2
str r2, [r7, #72]
ldr r2, [r5, #76]
str fp, [r7, #76]
str r2, [r7, #40]
ldr r2, [r7, #80]
adds r3, r2, r3
str r10, [r7, #76]
str r3, [r7, #16]
.L4:
ldr r5, [r7, #68]
add r8, r7, #44
ldr r4, [r7, #72]
ldr r5, [r7, #56]
add r8, r7, #40
ldr r4, [r7, #68]
vadd.i32 q3, q11, q0
ldmia r8, {r8, r9, r10, fp}
vmov q8, q14 @ v4si
mov r1, r5
ldr r2, [r5, #4]
vmov q1, q13 @ v4si
vmov q8, q14 @ v4si
ldr r3, [r5]
vmov q1, q13 @ v4si
ldr r6, [r1, #28]
vmov q9, q12 @ v4si
ldr lr, [r5, #20]
vmov q2, q11 @ v4si
mov r0, r2
ldr r2, [r5, #8]
str r3, [r7, #108]
mov r3, r5
ldr ip, [r5, #16]
vmov q15, q14 @ v4si
mov r1, r2
ldr r2, [r5, #12]
ldr r5, [r5, #24]
vmov q5, q13 @ v4si
ldr r6, [r3, #28]
vmov q10, q12 @ v4si
ldr r3, [r7, #64]
str r5, [r7, #116]
movs r5, #10
str r6, [r7, #120]
str r4, [r7, #112]
movs r1, #10
ldr r4, [r7, #72]
vmov q2, q11 @ v4si
ldr lr, [r5, #20]
vmov q15, q14 @ v4si
str r3, [r7, #108]
vmov q5, q13 @ v4si
str r2, [r7, #116]
vmov q10, q12 @ v4si
ldr r2, [r5, #12]
ldr ip, [r5, #16]
ldr r3, [r7, #64]
ldr r5, [r5, #24]
str r6, [r7, #120]
str r1, [r7, #92]
ldr r6, [r7, #60]
str r4, [r7, #100]
ldr r1, [r7, #116]
ldr r4, [r7, #108]
str r8, [r7, #96]
mov r8, r10
ldr r4, [r7, #108]
mov r10, r9
ldr r9, [r7, #116]
str lr, [r7, #104]
mov r10, r9
mov lr, r3
str r5, [r7, #92]
movs r5, #0
mov r9, r5
str r6, [r7, #124]
str r5, [r7, #100]
b .L92
.L93:
.align 3
@ -213,25 +214,24 @@ CRYPTO_chacha_20_neon:
str r5, [r7, #116]
add r10, r10, r1
vrev32.16 q3, q3
eor lr, lr, r10
str r6, [r7, #108]
vadd.i32 q8, q8, q3
vrev32.16 q2, q2
vadd.i32 q15, q15, q2
mov fp, r3
ldr r3, [r7, #112]
ldr r3, [r7, #100]
veor q4, q8, q1
str r6, [r7, #112]
veor q6, q15, q5
add fp, fp, r2
eors r3, r3, r5
mov r5, r6
ldr r6, [r7, #100]
ldr r6, [r7, #112]
vshl.i32 q1, q4, #12
vshl.i32 q5, q6, #12
add fp, fp, r2
eors r6, r6, r5
ror r3, r3, #16
eors r6, r6, r5
eor lr, lr, r10
vsri.32 q1, q4, #20
ror lr, lr, #16
mov r5, r6
ldr r6, [r7, #124]
vsri.32 q5, q6, #20
@ -239,25 +239,26 @@ CRYPTO_chacha_20_neon:
eor r6, r6, fp
ror r5, r5, #16
vadd.i32 q9, q9, q1
add r9, r9, lr
ror lr, lr, #16
ror r3, r6, #16
ldr r6, [r7, #124]
vadd.i32 q10, q10, q5
str r3, [r7, #108]
add r9, r9, lr
veor q4, q9, q3
add ip, ip, r6
ldr r6, [r7, #104]
veor q6, q10, q2
eor r4, ip, r4
eor r1, r9, r1
str r3, [r7, #104]
vshl.i32 q3, q4, #8
eor r1, r9, r1
mov r8, r6
ldr r6, [r7, #120]
vshl.i32 q2, q6, #8
ror r4, r4, #20
add r6, r6, r3
vsri.32 q3, q4, #24
str r6, [r7, #104]
str r6, [r7, #100]
eors r2, r2, r6
ldr r6, [r7, #116]
vsri.32 q2, q6, #24
@ -268,7 +269,7 @@ CRYPTO_chacha_20_neon:
eor r0, r8, r0
vadd.i32 q15, q15, q2
mov r3, r6
ldr r6, [r7, #112]
ldr r6, [r7, #108]
veor q6, q4, q1
ror r0, r0, #20
str r3, [r7, #112]
@ -285,7 +286,7 @@ CRYPTO_chacha_20_neon:
ror r1, r1, #20
eors r5, r5, r6
vsri.32 q8, q6, #25
ldr r6, [r7, #108]
ldr r6, [r7, #104]
ror r3, r3, #24
ror r5, r5, #24
vsri.32 q1, q5, #25
@ -297,7 +298,7 @@ CRYPTO_chacha_20_neon:
vext.32 q8, q8, q8, #1
str ip, [r7, #124]
add ip, r5, r8
ldr r5, [r7, #104]
ldr r5, [r7, #100]
eor lr, r10, lr
ror r6, r6, #24
vext.32 q1, q1, q1, #1
@ -410,7 +411,7 @@ CRYPTO_chacha_20_neon:
veor q6, q15, q1
ldr r3, [r7, #116]
vshl.i32 q1, q4, #7
str r2, [r7, #112]
str r2, [r7, #100]
add r3, r3, r2
str r3, [r7, #120]
vshl.i32 q5, q6, #7
@ -423,7 +424,7 @@ CRYPTO_chacha_20_neon:
vsri.32 q5, q6, #25
ldr r3, [r7, #92]
ror r4, r4, #25
str r6, [r7, #100]
str r6, [r7, #112]
ror r0, r0, #25
subs r3, r3, #1
str r5, [r7, #104]
@ -437,308 +438,325 @@ CRYPTO_chacha_20_neon:
vext.32 q5, q5, q5, #3
vext.32 q1, q1, q1, #3
bne .L3
ldr r3, [r7, #80]
ldr r3, [r7, #84]
vadd.i32 q4, q12, q10
str r9, [r7, #116]
str r9, [r7, #92]
mov r9, r10
mov r10, r8
ldr r8, [r7, #96]
str lr, [r7, #96]
mov lr, r5
ldr r5, [r7, #56]
ldr r5, [r7, #52]
vadd.i32 q5, q13, q5
ldr r6, [r7, #76]
vadd.i32 q15, q14, q15
add fp, fp, r5
ldr r5, [r7, #52]
str r4, [r7, #108]
ldr r5, [r7, #48]
str r3, [r7, #104]
vadd.i32 q7, q14, q8
ldr r4, [r7, #112]
add r5, r10, r5
str r3, [r7, #112]
vadd.i32 q2, q11, q2
ldr r3, [r6, #12] @ unaligned
vadd.i32 q6, q12, q9
str r0, [r7, #92]
vadd.i32 q1, q13, q1
add r10, r10, r5
str r0, [r7, #36]
vadd.i32 q2, q11, q2
ldr r0, [r6] @ unaligned
vadd.i32 q6, q12, q9
ldr r5, [r7, #104]
vadd.i32 q1, q13, q1
str r1, [r7, #116]
vadd.i32 q11, q11, q0
str r1, [r7, #40]
str r2, [r7, #36]
vadd.i32 q3, q11, q3
ldr r1, [r6, #4] @ unaligned
vadd.i32 q11, q11, q0
str r2, [r7, #32]
vadd.i32 q3, q11, q3
ldr r2, [r6, #8] @ unaligned
str r5, [r7, #104]
vadd.i32 q11, q11, q0
ldr r5, [r7, #112]
ldr r10, [r7, #80]
str r4, [r7, #108]
ldr r4, [r7, #100]
vadd.i32 q11, q11, q0
stmia r5!, {r0, r1, r2, r3}
mov r5, r10
ldr r0, [r7, #84]
ldr r2, [r7, #48]
ldr r3, [r7, #72]
vldr d20, [r0, #80]
vldr d21, [r0, #88]
add r9, r9, r2
ldr r2, [r7, #88]
ldr r3, [r7, #44]
ldr r5, [r7, #84]
vldr d20, [r2, #80]
vldr d21, [r2, #88]
add r3, r9, r3
str r3, [r7, #104]
veor q10, q10, q4
ldr r2, [r7, #44]
ldr r3, [r7, #40]
add r3, r8, r3
str r3, [r7, #100]
ldr r3, [r7, #72]
vstr d20, [r2, #80]
vstr d21, [r2, #88]
adds r1, r4, r3
str r1, [r7, #28]
add r2, r8, r2
str r2, [r7, #32]
vstr d20, [r0, #80]
vstr d21, [r0, #88]
ldmia r5!, {r0, r1, r2, r3}
ldr r4, [r7, #68]
ldr r5, [r7, #112]
ldr r8, [r7, #84]
add r5, r5, r4
ldr r4, [r7, #96]
str r5, [r7, #24]
ldr r5, [r7, #64]
add r4, r4, r5
ldr r5, [r7, #124]
ldr r5, [r7, #60]
str r4, [r7, #96]
ldr r4, [r7, #60]
add r5, r5, r4
ldr r4, [r7, #88]
str r5, [r7, #24]
mov r5, r10
ldr r4, [r7, #124]
add r4, r4, r5
str r4, [r7, #20]
ldr r4, [r7, #80]
mov r5, r8
str r0, [r4] @ unaligned
mov r0, r4
str r1, [r4, #4] @ unaligned
mov r8, r0
mov r4, r8
str r2, [r0, #8] @ unaligned
mov r4, r10
mov r8, r0
str r3, [r0, #12] @ unaligned
mov r9, r4
ldr r0, [r6, #16]! @ unaligned
ldr r3, [r6, #12] @ unaligned
ldr r1, [r6, #4] @ unaligned
ldr r2, [r6, #8] @ unaligned
ldr r3, [r6, #12] @ unaligned
ldr r6, [r7, #76]
stmia r5!, {r0, r1, r2, r3}
mov r5, r10
ldr r3, [r7, #84]
mov r5, r8
ldr r3, [r7, #88]
vldr d20, [r3, #80]
vldr d21, [r3, #88]
veor q10, q10, q5
vstr d20, [r3, #80]
vstr d21, [r3, #88]
ldmia r4!, {r0, r1, r2, r3}
mov r4, r8
mov r4, r9
str r0, [r8, #16] @ unaligned
str r1, [r8, #20] @ unaligned
str r2, [r8, #24] @ unaligned
str r3, [r8, #28] @ unaligned
mov r8, r4
mov r8, r5
ldr r0, [r6, #32]! @ unaligned
str r10, [r7, #124]
mov r5, r9
ldr r1, [r6, #4] @ unaligned
ldr r2, [r6, #8] @ unaligned
ldr r3, [r6, #12] @ unaligned
ldr r6, [r7, #76]
stmia r5!, {r0, r1, r2, r3}
mov r5, r10
ldr r2, [r7, #84]
vldr d16, [r2, #80]
vldr d17, [r2, #88]
mov r5, r8
ldr r1, [r7, #88]
vldr d16, [r1, #80]
vldr d17, [r1, #88]
veor q15, q8, q15
vstr d30, [r2, #80]
vstr d31, [r2, #88]
ldmia r10!, {r0, r1, r2, r3}
str r0, [r4, #32] @ unaligned
str r1, [r4, #36] @ unaligned
str r2, [r4, #40] @ unaligned
str r3, [r4, #44] @ unaligned
vstr d30, [r1, #80]
vstr d31, [r1, #88]
ldmia r4!, {r0, r1, r2, r3}
mov r4, r9
str r0, [r8, #32] @ unaligned
str r1, [r8, #36] @ unaligned
str r2, [r8, #40] @ unaligned
str r3, [r8, #44] @ unaligned
mov r8, r5
ldr r0, [r6, #48]! @ unaligned
ldr r1, [r6, #4] @ unaligned
ldr r2, [r6, #8] @ unaligned
ldr r3, [r6, #12] @ unaligned
ldr r6, [r7, #76]
stmia r5!, {r0, r1, r2, r3}
ldr r1, [r7, #84]
stmia r4!, {r0, r1, r2, r3}
mov r4, r9
ldr r1, [r7, #88]
str r9, [r7, #112]
vldr d18, [r1, #80]
vldr d19, [r1, #88]
veor q9, q9, q2
vstr d18, [r1, #80]
vstr d19, [r1, #88]
ldr r3, [r7, #112]
ldr r5, [r7, #80]
mov r10, r3
ldmia r10!, {r0, r1, r2, r3}
str r0, [r4, #48] @ unaligned
str r1, [r4, #52] @ unaligned
str r2, [r4, #56] @ unaligned
str r3, [r4, #60] @ unaligned
ldmia r9!, {r0, r1, r2, r3}
str r0, [r5, #48] @ unaligned
str r1, [r5, #52] @ unaligned
str r2, [r5, #56] @ unaligned
str r3, [r5, #60] @ unaligned
ldr r0, [r6, #64]! @ unaligned
ldr r1, [r6, #4] @ unaligned
ldr r2, [r6, #8] @ unaligned
ldr r3, [r6, #12] @ unaligned
ldr r6, [r7, #76]
stmia r5!, {r0, r1, r2, r3}
ldr r1, [r7, #84]
ldr r3, [r7, #112]
ldr r5, [r7, #80]
mov r9, r6
mov r6, r4
stmia r6!, {r0, r1, r2, r3}
mov r6, r4
ldr r1, [r7, #88]
vldr d18, [r1, #80]
vldr d19, [r1, #88]
veor q9, q9, q6
mov r10, r3
str r5, [r7, #20]
vstr d18, [r1, #80]
vstr d19, [r1, #88]
ldmia r10!, {r0, r1, r2, r3}
str r1, [r4, #68] @ unaligned
str r2, [r4, #72] @ unaligned
str r3, [r4, #76] @ unaligned
str r0, [r4, #64] @ unaligned
ldr r0, [r6, #80]! @ unaligned
ldr r1, [r6, #4] @ unaligned
ldr r2, [r6, #8] @ unaligned
ldr r3, [r6, #12] @ unaligned
ldmia r4!, {r0, r1, r2, r3}
mov r4, r6
str r3, [r5, #76] @ unaligned
mov r3, r9
str r2, [r5, #72] @ unaligned
str r0, [r5, #64] @ unaligned
str r1, [r5, #68] @ unaligned
mov r5, r4
ldr r0, [r3, #80]! @ unaligned
mov r9, r3
ldr r1, [r9, #4] @ unaligned
ldr r2, [r9, #8] @ unaligned
ldr r3, [r9, #12] @ unaligned
mov r9, r4
ldr r6, [r7, #76]
str r9, [r7, #124]
stmia r5!, {r0, r1, r2, r3}
ldr r1, [r7, #84]
ldr r3, [r7, #20]
ldr r5, [r7, #80]
mov r5, r8
ldr r1, [r7, #88]
vldr d18, [r1, #80]
vldr d19, [r1, #88]
veor q1, q9, q1
mov r10, r3
vstr d2, [r1, #80]
vstr d3, [r1, #88]
ldmia r10!, {r0, r1, r2, r3}
mov r10, r5
str r0, [r4, #80] @ unaligned
str r1, [r4, #84] @ unaligned
str r2, [r4, #88] @ unaligned
str r3, [r4, #92] @ unaligned
ldmia r4!, {r0, r1, r2, r3}
mov r4, r9
str r0, [r8, #80] @ unaligned
str r1, [r8, #84] @ unaligned
str r2, [r8, #88] @ unaligned
str r3, [r8, #92] @ unaligned
ldr r0, [r6, #96]! @ unaligned
ldr r3, [r6, #12] @ unaligned
ldr r1, [r6, #4] @ unaligned
ldr r2, [r6, #8] @ unaligned
ldr r3, [r6, #12] @ unaligned
ldr r6, [r7, #76]
stmia r5!, {r0, r1, r2, r3}
mov r5, r10
ldr r3, [r7, #84]
stmia r4!, {r0, r1, r2, r3}
mov r4, r9
ldr r3, [r7, #88]
vldr d16, [r3, #80]
vldr d17, [r3, #88]
veor q8, q8, q7
vstr d16, [r3, #80]
vstr d17, [r3, #88]
ldmia r10!, {r0, r1, r2, r3}
str r0, [r4, #96] @ unaligned
str r1, [r4, #100] @ unaligned
str r2, [r4, #104] @ unaligned
str r3, [r4, #108] @ unaligned
ldmia r9!, {r0, r1, r2, r3}
str r0, [r5, #96] @ unaligned
str r1, [r5, #100] @ unaligned
str r2, [r5, #104] @ unaligned
str r3, [r5, #108] @ unaligned
ldr r0, [r6, #112]! @ unaligned
ldr r1, [r6, #4] @ unaligned
ldr r2, [r6, #8] @ unaligned
ldr r3, [r6, #12] @ unaligned
mov r6, r5
mov r6, r4
stmia r6!, {r0, r1, r2, r3}
ldr r3, [r7, #84]
mov r6, r5
ldr r3, [r7, #88]
vldr d16, [r3, #80]
vldr d17, [r3, #88]
veor q8, q8, q3
vstr d16, [r3, #80]
vstr d17, [r3, #88]
ldmia r5!, {r0, r1, r2, r3}
str r1, [r4, #116] @ unaligned
ldr r1, [r7, #76]
str r0, [r4, #112] @ unaligned
str r2, [r4, #120] @ unaligned
str r3, [r4, #124] @ unaligned
ldr r3, [r1, #128]
ldr r2, [r7, #104]
ldmia r4!, {r0, r1, r2, r3}
mov r4, r5
mov r8, r4
str r2, [r5, #120] @ unaligned
ldr r2, [r7, #76]
str r0, [r5, #112] @ unaligned
str r1, [r5, #116] @ unaligned
str r3, [r5, #124] @ unaligned
ldr r3, [r2, #128]
ldr r1, [r7, #104]
eor r3, fp, r3
str r3, [r4, #128]
ldr r3, [r1, #132]
eors r2, r2, r3
str r2, [r8, #132]
ldr r3, [r1, #136]
ldr r5, [r7, #68]
ldr r6, [r7, #32]
eor r3, r9, r3
str r3, [r4, #136]
ldr r3, [r1, #140]
ldr r0, [r7, #92]
eors r3, r3, r6
ldr r6, [r7, #108]
str r3, [r5, #128]
ldr r3, [r2, #132]
mov r5, r2
eor r3, r10, r3
str r3, [r6, #132]
ldr r3, [r2, #136]
mov r6, r5
eors r1, r1, r3
str r1, [r8, #136]
ldr r1, [r7, #56]
ldr r3, [r2, #140]
ldr r2, [r7, #100]
ldr r0, [r7, #108]
eors r3, r3, r2
str r3, [r4, #140]
ldr r3, [r5]
ldr r2, [r1, #144]
add r6, r6, r3
eors r2, r2, r6
ldr r3, [r1]
ldr r2, [r5, #144]
mov r8, r0
add r8, r8, r3
mov r5, r6
mov r3, r8
eors r2, r2, r3
str r2, [r4, #144]
ldr r2, [r5, #4]
ldr r3, [r1, #148]
add r0, r0, r2
ldr r3, [r6, #148]
ldr r2, [r1, #4]
ldr r6, [r7, #36]
eors r3, r3, r0
ldr r0, [r7, #40]
str r3, [r4, #148]
ldr r2, [r5, #8]
ldr r3, [r1, #152]
add r0, r0, r2
eors r3, r3, r0
str r3, [r4, #152]
ldr r2, [r5, #12]
mov r0, r4
ldr r3, [r1, #156]
mov r4, r1
add r6, r6, r2
mov r1, r0
eors r3, r3, r6
str r3, [r0, #156]
ldr r2, [r5, #16]
ldr r3, [r4, #160]
mov r6, r1
str r3, [r4, #148]
ldr r2, [r1, #8]
ldr r1, [r7, #116]
ldr r3, [r5, #152]
mov r8, r1
add r8, r8, r2
ldr r1, [r7, #32]
mov r2, r8
eors r3, r3, r2
str r3, [r4, #152]
mov r8, r4
ldr r2, [r6, #12]
ldr r3, [r5, #156]
add r1, r1, r2
eors r3, r3, r1
str r3, [r4, #156]
ldr r2, [r6, #16]
mov r1, r4
ldr r3, [r5, #160]
mov r4, r5
add ip, ip, r2
mov r5, r6
eor r3, ip, r3
str r3, [r1, #160]
ldr r2, [r5, #20]
ldr r2, [r6, #20]
ldr r3, [r4, #164]
add lr, lr, r2
ldr r2, [r7, #116]
ldr r2, [r7, #92]
eor r3, lr, r3
str r3, [r1, #164]
ldr r6, [r5, #24]
mov lr, r4
ldr r3, [r4, #168]
add r2, r2, r6
mov r6, r4
ldr r6, [r7, #120]
eors r3, r3, r2
str r3, [r1, #168]
ldr r5, [r5, #28]
mov r2, r1
ldr r3, [r4, #172]
ldr r0, [r7, #120]
add r0, r0, r5
ldr r5, [r7, #24]
eors r3, r3, r0
add r6, r6, r5
eors r3, r3, r6
str r3, [r1, #172]
ldr r3, [r7, #72]
ldr r4, [r4, #176]
ldr r1, [r7, #28]
eors r4, r4, r1
adds r1, r3, #3
str r4, [r2, #176]
ldr r3, [r7, #100]
ldr r0, [r7, #28]
ldr r5, [r7, #24]
eors r4, r4, r0
str r4, [r8, #176]
ldr r0, [lr, #180]
str r1, [r7, #72]
eors r3, r3, r0
mov r0, r3
mov r3, r2
str r0, [r2, #180]
adds r3, r3, #192
ldr r1, [lr, #184]
ldr r2, [r7, #96]
eors r0, r0, r5
str r0, [r8, #180]
ldr r1, [lr, #184]
ldr r4, [r7, #20]
eors r1, r1, r2
str r1, [r3, #-8]
str r1, [r8, #184]
ldr r2, [lr, #188]
mov r1, r6
adds r1, r1, #192
str r1, [r7, #76]
eors r2, r2, r5
str r2, [r3, #-4]
add r1, lr, #192
ldr r3, [r7, #72]
eors r2, r2, r4
str r2, [r8, #188]
ldr r2, [r7, #16]
str r3, [r7, #88]
adds r3, r3, #3
str r3, [r7, #72]
mov r3, r8
adds r3, r3, #192
str r1, [r7, #76]
cmp r2, r3
str r3, [r7, #80]
bne .L4
ldr r3, [r7, #12]
ldr r2, [r7, #4]
@ -757,8 +775,8 @@ CRYPTO_chacha_20_neon:
beq .L6
ldr r5, [r7, #12]
ldr r4, [r7, #16]
ldr r6, [r7, #84]
ldr lr, [r7, #80]
ldr r6, [r7, #88]
ldr lr, [r7, #84]
vldr d30, .L94
vldr d31, .L94+8
str fp, [r7, #120]
@ -964,7 +982,7 @@ CRYPTO_chacha_20_neon:
mov r9, r5
bhi .L88
vadd.i32 q12, q12, q10
ldr r3, [r7, #84]
ldr r3, [r7, #88]
vst1.64 {d24-d25}, [r3:128]
.L14:
ldr r3, [r7, #8]
@ -1001,7 +1019,7 @@ CRYPTO_chacha_20_neon:
movcs r1, ip
cmp r1, #0
beq .L17
ldr r5, [r7, #84]
ldr r5, [r7, #88]
cmp r1, #1
ldrb r0, [r0] @ zero_extendqisi2
add r3, r2, #1
@ -1136,7 +1154,7 @@ CRYPTO_chacha_20_neon:
ldr r5, [r7, #16]
cmp r6, #1
add r0, r1, r2
ldr r1, [r7, #84]
ldr r1, [r7, #88]
add r1, r1, r2
vld1.64 {d18-d19}, [r0:64]
add r2, r2, r5
@ -1174,7 +1192,7 @@ CRYPTO_chacha_20_neon:
add r3, r3, lr
beq .L1
.L19:
ldr r4, [r7, #84]
ldr r4, [r7, #88]
adds r2, r3, #1
ldr r1, [r7, #12]
cmp r2, r9
@ -1289,7 +1307,7 @@ CRYPTO_chacha_20_neon:
eor r1, r1, r0
strb r1, [r5, r2]
bls .L1
ldr r2, [r7, #84]
ldr r2, [r7, #88]
ldrb r1, [r2, r3] @ zero_extendqisi2
ldr r2, [r7, #12]
ldrb r2, [r2, r3] @ zero_extendqisi2
@ -1297,7 +1315,7 @@ CRYPTO_chacha_20_neon:
ldr r1, [r7, #16]
strb r2, [r1, r3]
.L1:
adds r7, r7, #156
adds r7, r7, #164
mov sp, r7
@ sp needed
vldm sp!, {d8-d15}
@ -1305,7 +1323,7 @@ CRYPTO_chacha_20_neon:
.L88:
ldr r5, [r7, #12]
vadd.i32 q12, q12, q10
ldr r4, [r7, #80]
ldr r4, [r7, #84]
cmp r9, #31
ldr r0, [r5] @ unaligned
ldr r1, [r5, #4] @ unaligned
@ -1313,7 +1331,7 @@ CRYPTO_chacha_20_neon:
ldr r2, [r5, #8] @ unaligned
ldr r3, [r5, #12] @ unaligned
stmia r6!, {r0, r1, r2, r3}
ldr r2, [r7, #84]
ldr r2, [r7, #88]
ldr r6, [r7, #16]
vldr d18, [r2, #80]
vldr d19, [r2, #88]
@ -1328,7 +1346,7 @@ CRYPTO_chacha_20_neon:
str r3, [r6, #12] @ unaligned
bhi .L89
vadd.i32 q13, q13, q15
ldr r3, [r7, #84]
ldr r3, [r7, #88]
vstr d26, [r3, #16]
vstr d27, [r3, #24]
b .L14
@ -1337,7 +1355,7 @@ CRYPTO_chacha_20_neon:
ldr r2, [r7, #12]
add r2, r2, r9
mov r5, r2
ldr r2, [r7, #84]
ldr r2, [r7, #88]
add r2, r2, r3
mov r3, r2
.L24:
@ -1347,17 +1365,18 @@ CRYPTO_chacha_20_neon:
eor r2, r2, r1
strb r2, [r4], #1
bne .L24
adds r7, r7, #156
adds r7, r7, #164
mov sp, r7
@ sp needed
vldm sp!, {d8-d15}
pop {r4, r5, r6, r7, r8, r9, r10, fp, pc}
.L26:
str fp, [r7, #16]
ldr r3, [r7, #80]
str r3, [r7, #16]
b .L2
.L89:
mov r3, r5
ldr r4, [r7, #80]
ldr r4, [r7, #84]
ldr r0, [r3, #16]! @ unaligned
add lr, r1, #16
mov r5, r1
@ -1368,7 +1387,7 @@ CRYPTO_chacha_20_neon:
ldr r2, [r3, #8] @ unaligned
ldr r3, [r3, #12] @ unaligned
stmia r6!, {r0, r1, r2, r3}
ldr r2, [r7, #84]
ldr r2, [r7, #88]
vldr d18, [r2, #80]
vldr d19, [r2, #88]
veor q13, q9, q13
@ -1381,16 +1400,16 @@ CRYPTO_chacha_20_neon:
str r3, [lr, #12] @ unaligned
bhi .L90
vadd.i32 q8, q14, q8
ldr r3, [r7, #84]
ldr r3, [r7, #88]
vstr d16, [r3, #32]
vstr d17, [r3, #40]
b .L14
.L90:
ldr r3, [r7, #12]
add lr, r5, #32
ldr r4, [r7, #80]
ldr r4, [r7, #84]
vadd.i32 q8, q14, q8
ldr r5, [r7, #84]
ldr r5, [r7, #88]
vadd.i32 q11, q11, q3
ldr r0, [r3, #32]! @ unaligned
mov r6, r4

53
crypto/chacha/internal.h Normal file
View File

@ -0,0 +1,53 @@
/* Copyright 2015 Brian Smith.
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
#ifndef OPENSSL_HEADER_CHACHA_INTERNAL_H
#define OPENSSL_HEADER_CHACHA_INTERNAL_H
#include "../internal.h"
#include "string.h"
#if defined(__cplusplus)
extern "C" {
#endif
/* CRYPTO_chacha_96_bit_nonce_from_64_bit_nonce formats a nonce for use with
* |CRYPTO_chacha_20| that is compatible with the formulation used in older
* versions. Previously |CRYPTO_chacha_20| used a 64 bit counter and took a 64
* bit nonce, whereas the current version uses a 32-bit counter and a 96-bit
* nonce:
*
* Old: counter low || counter high || nonce low || nonce high
* New: 32-bit counter || nonce low || nonce mid || nonce high
* This: 32-bit counter || 0 || nonce low || nonce high
*
* This allows an implementation of the old construction to be implemented with
* |CRYPTO_chacha_20|, which implements the new construction, with the
* limitation that no more than 2^32 blocks may be encrypted. An implementation
* of a protocol that uses 96-bit counters as nonces cannot use this function,
* though, since this function shifts the nonce 32 bits. */
static inline void CRYPTO_chacha_96_bit_nonce_from_64_bit_nonce(
uint8_t out[12], const uint8_t in[8]) {
out[0] = 0;
out[1] = 0;
out[2] = 0;
out[3] = 0;
memcpy(out + 4, in, 8);
}
#if defined(__cplusplus)
}
#endif
#endif /* OPENSSL_HEADER_CHACHA_INTERNAL_H */

View File

@ -22,10 +22,10 @@
#include <openssl/poly1305.h>
#include "internal.h"
#include "../chacha/internal.h"
#define POLY1305_TAG_LEN 16
#define CHACHA20_NONCE_LEN 8
struct aead_chacha20_poly1305_ctx {
unsigned char key[32];
@ -90,16 +90,15 @@ static void poly1305_update_with_length(poly1305_state *poly1305,
static int aead_chacha20_poly1305_seal(const EVP_AEAD_CTX *ctx, uint8_t *out,
size_t *out_len, size_t max_out_len,
const uint8_t *nonce,
const uint8_t *in, size_t in_len,
const uint8_t *ad, size_t ad_len) {
const uint8_t *nonce, const uint8_t *in,
size_t in_len, const uint8_t *ad,
size_t ad_len) {
const struct aead_chacha20_poly1305_ctx *c20_ctx = ctx->aead_state;
uint8_t poly1305_key[32] ALIGNED;
poly1305_state poly1305;
const uint64_t in_len_64 = in_len;
/* The underlying ChaCha implementation may not overflow the block
* counter into the second counter word. Therefore we disallow
/* |CRYPTO_chacha_20| uses a 32-bit block counter. Therefore we disallow
* individual operations that work on more than 256GB at a time.
* |in_len_64| is needed because, on 32-bit platforms, size_t is only
* 32-bits and this produces a warning because it's always false.
@ -120,13 +119,16 @@ static int aead_chacha20_poly1305_seal(const EVP_AEAD_CTX *ctx, uint8_t *out,
return 0;
}
uint8_t nonce_96[12];
CRYPTO_chacha_96_bit_nonce_from_64_bit_nonce(nonce_96, nonce);
memset(poly1305_key, 0, sizeof(poly1305_key));
CRYPTO_chacha_20(poly1305_key, poly1305_key, sizeof(poly1305_key),
c20_ctx->key, nonce, 0);
c20_ctx->key, nonce_96, 0);
CRYPTO_poly1305_init(&poly1305, poly1305_key);
poly1305_update_with_length(&poly1305, ad, ad_len);
CRYPTO_chacha_20(out, in, in_len, c20_ctx->key, nonce, 1);
CRYPTO_chacha_20(out, in, in_len, c20_ctx->key, nonce_96, 1);
poly1305_update_with_length(&poly1305, out, in_len);
uint8_t tag[POLY1305_TAG_LEN] ALIGNED;
@ -138,9 +140,9 @@ static int aead_chacha20_poly1305_seal(const EVP_AEAD_CTX *ctx, uint8_t *out,
static int aead_chacha20_poly1305_open(const EVP_AEAD_CTX *ctx, uint8_t *out,
size_t *out_len, size_t max_out_len,
const uint8_t *nonce,
const uint8_t *in, size_t in_len,
const uint8_t *ad, size_t ad_len) {
const uint8_t *nonce, const uint8_t *in,
size_t in_len, const uint8_t *ad,
size_t ad_len) {
const struct aead_chacha20_poly1305_ctx *c20_ctx = ctx->aead_state;
uint8_t mac[POLY1305_TAG_LEN];
uint8_t poly1305_key[32] ALIGNED;
@ -153,8 +155,7 @@ static int aead_chacha20_poly1305_open(const EVP_AEAD_CTX *ctx, uint8_t *out,
return 0;
}
/* The underlying ChaCha implementation may not overflow the block
* counter into the second counter word. Therefore we disallow
/* |CRYPTO_chacha_20| uses a 32-bit block counter. Therefore we disallow
* individual operations that work on more than 256GB at a time.
* |in_len_64| is needed because, on 32-bit platforms, size_t is only
* 32-bits and this produces a warning because it's always false.
@ -165,6 +166,9 @@ static int aead_chacha20_poly1305_open(const EVP_AEAD_CTX *ctx, uint8_t *out,
return 0;
}
uint8_t nonce_96[12];
CRYPTO_chacha_96_bit_nonce_from_64_bit_nonce(nonce_96, nonce);
plaintext_len = in_len - c20_ctx->tag_len;
if (max_out_len < plaintext_len) {
@ -174,7 +178,7 @@ static int aead_chacha20_poly1305_open(const EVP_AEAD_CTX *ctx, uint8_t *out,
memset(poly1305_key, 0, sizeof(poly1305_key));
CRYPTO_chacha_20(poly1305_key, poly1305_key, sizeof(poly1305_key),
c20_ctx->key, nonce, 0);
c20_ctx->key, nonce_96, 0);
CRYPTO_poly1305_init(&poly1305, poly1305_key);
poly1305_update_with_length(&poly1305, ad, ad_len);
@ -186,14 +190,14 @@ static int aead_chacha20_poly1305_open(const EVP_AEAD_CTX *ctx, uint8_t *out,
return 0;
}
CRYPTO_chacha_20(out, in, plaintext_len, c20_ctx->key, nonce, 1);
CRYPTO_chacha_20(out, in, plaintext_len, c20_ctx->key, nonce_96, 1);
*out_len = plaintext_len;
return 1;
}
static const EVP_AEAD aead_chacha20_poly1305 = {
32, /* key len */
CHACHA20_NONCE_LEN, /* nonce len */
8, /* nonce len */
POLY1305_TAG_LEN, /* overhead */
POLY1305_TAG_LEN, /* max tag length */
aead_chacha20_poly1305_init,

View File

@ -23,6 +23,7 @@
#include <openssl/mem.h>
#include "internal.h"
#include "../chacha/internal.h"
#include "../internal.h"
@ -159,17 +160,21 @@ int RAND_bytes(uint8_t *buf, size_t len) {
if (todo > kMaxBytesPerCall) {
todo = kMaxBytesPerCall;
}
CRYPTO_chacha_20(buf, buf, todo, state->key,
(uint8_t *)&state->calls_used, 0);
uint8_t nonce[12];
CRYPTO_chacha_96_bit_nonce_from_64_bit_nonce(
nonce, (const uint8_t *)&state->calls_used);
CRYPTO_chacha_20(buf, buf, todo, state->key, nonce, 0);
buf += todo;
remaining -= todo;
state->calls_used++;
}
} else {
if (sizeof(state->partial_block) - state->partial_block_used < len) {
uint8_t nonce[12];
CRYPTO_chacha_96_bit_nonce_from_64_bit_nonce(
nonce, (const uint8_t *)&state->calls_used);
CRYPTO_chacha_20(state->partial_block, state->partial_block,
sizeof(state->partial_block), state->key,
(uint8_t *)&state->calls_used, 0);
sizeof(state->partial_block), state->key, nonce, 0);
state->partial_block_used = 0;
}

View File

@ -27,7 +27,7 @@ extern "C" {
* initial block counter is specified by |counter|. */
OPENSSL_EXPORT void CRYPTO_chacha_20(uint8_t *out, const uint8_t *in,
size_t in_len, const uint8_t key[32],
const uint8_t nonce[8], size_t counter);
const uint8_t nonce[12], uint32_t counter);
#if defined(__cplusplus)