Skip to content

Commit cfe2e31

Browse files
committed
OpenCL AES: Correction for supporting two or more AES contexts at once
This currently applies to AES-XTS, used by diskcryptor and truecrypt. Also separate Truecrypt kernel from the (potentially) shared PBKDF2-RIPE- MD160 code for good measure. Nvidia 550.144.03 bugs out on this for truecrypt so we revert that format to bitsliced AES (it doesn't matter for speed).
1 parent 4e9f5b4 commit cfe2e31

File tree

7 files changed

+146
-130
lines changed

7 files changed

+146
-130
lines changed

run/opencl/diskcryptor_aes_kernel.cl

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,18 +9,7 @@
99
#define AES_SRC_TYPE MAYBE_CONSTANT
1010

1111
#include "pbkdf2_hmac_sha512_kernel.cl"
12-
13-
/*
14-
* AES_256_XTS uses two AES keys at once so need double the
15-
* shared memory.
16-
*/
17-
#define AES_SHARED_THREADS_DECREASED 1
18-
#if gpu_amd(DEVICE_INFO)
19-
#define AES_SHARED_THREADS (WARP_SIZE >> 2)
20-
#else
21-
#define AES_SHARED_THREADS (WARP_SIZE >> 1)
22-
#endif
23-
#include "opencl_aes.h"
12+
#include "opencl_aes_xts.h"
2413

2514
typedef struct {
2615
salt_t pbkdf2;

run/opencl/opencl_aes.h

Lines changed: 1 addition & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -313,74 +313,7 @@ INLINE void AES_cfb_decrypt(AES_SRC_TYPE void *_in,
313313
}
314314
}
315315

316-
INLINE void AES_256_XTS_first_sector(AES_SRC_TYPE uint *in, AES_DST_TYPE uint *out,
317-
AES_KEY_TYPE uchar *double_key,
318-
__local aes_local_t *lt1, __local aes_local_t *lt2)
319-
{
320-
uint tweak[4] = { 0 };
321-
uint buf[4];
322-
int i;
323-
AES_KEY akey1, akey2; akey1.lt = lt1; akey2.lt = lt2;
324-
325-
AES_set_decrypt_key(double_key, 256, &akey1);
326-
AES_set_encrypt_key(double_key + 32, 256, &akey2);
327-
328-
AES_encrypt((uchar*)tweak, (uchar*)tweak, &akey2);
329-
330-
for (i = 0; i < 4; i++)
331-
buf[i] = in[i] ^ tweak[i];
332-
333-
AES_decrypt((uchar*)buf, (uchar*)buf, &akey1);
334-
335-
for (i = 0; i < 4; i++)
336-
out[i] = buf[i] ^ tweak[i];
337-
}
338-
339-
INLINE void AES_256_XTS_DiskCryptor(AES_SRC_TYPE uchar *data, AES_DST_TYPE uchar *output,
340-
AES_KEY_TYPE uchar *double_key, int len,
341-
__local aes_local_t *lt1, __local aes_local_t *lt2)
342-
{
343-
uchar buf[16];
344-
int i, j, cnt;
345-
AES_KEY key1; key1.lt = lt1;
346-
AES_KEY key2; key2.lt = lt2;
347-
int bits = 256;
348-
uchar buffer[96];
349-
uchar *out = buffer;
350-
unsigned char tweak[16] = { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
351-
352-
AES_set_decrypt_key(double_key, bits, &key1);
353-
AES_set_encrypt_key(&double_key[bits / 8], bits, &key2);
354-
355-
// first aes tweak, we do it right over tweak
356-
AES_encrypt(tweak, tweak, &key2);
357-
358-
cnt = len / 16;
359-
for (j = 0;;) {
360-
for (i = 0; i < 16; ++i) buf[i] = data[i]^tweak[i];
361-
AES_decrypt(buf, out, &key1);
362-
for (i = 0; i < 16; ++i) out[i] ^= tweak[i];
363-
++j;
364-
if (j == cnt)
365-
break;
366-
else {
367-
unsigned char Cin, Cout;
368-
unsigned x;
369-
Cin = 0;
370-
for (x = 0; x < 16; ++x) {
371-
Cout = (tweak[x] >> 7) & 1;
372-
tweak[x] = ((tweak[x] << 1) + Cin) & 0xFF;
373-
Cin = Cout;
374-
}
375-
if (Cout)
376-
tweak[0] ^= 135; // GF_128_FDBK;
377-
}
378-
data += 16;
379-
out += 16;
380-
}
381-
382-
memcpy_macro(output, buffer, 96);
383-
}
316+
/* AES-XTS has its own file opencl_aes_xts.h */
384317

385318
#define N_WORDS (AES_BLOCK_SIZE / sizeof(unsigned long))
386319

run/opencl/opencl_aes_plain.h

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -27,22 +27,26 @@
2727
#define AES_LOCAL_TABLES 1
2828
#endif
2929

30+
/*
31+
* Formats using two or more keys at once (AES-XTS uses two) must set this. It
32+
* needs to be a power of two so is named and used as a shift.
33+
*/
34+
#ifndef AES_SIMULTANEOUS_CTX_SHIFT
35+
#define AES_SIMULTANEOUS_CTX_SHIFT 0
36+
#endif
37+
3038
/*
3139
* Even with 64K LDS, an AMD device can't fit exclusive tables to every thread
32-
* in a wavefront, so we have to decrease the number.
33-
* A format can force this if it uses two or more keys at once. (diskcryptor
34-
* does)
40+
* in a wavefront of 64 threads, so we have to decrease the number.
41+
* Also, the number of simultaneous AES contexts need to be considered per above.
3542
*/
36-
#ifndef AES_SHARED_THREADS_DECREASED
3743
#if SHARED_MEM_SIZE < (WARP_SIZE * (256*4 + 256) + 2*4 + 4)
38-
#define AES_SHARED_THREADS_DECREASED 1
39-
#define AES_SHARED_THREADS (WARP_SIZE >> 1)
44+
#define AES_SHARED_THREADS (WARP_SIZE >> (AES_SIMULTANEOUS_CTX_SHIFT + 1))
4045
#else
41-
#define AES_SHARED_THREADS WARP_SIZE
46+
#define AES_SHARED_THREADS (WARP_SIZE >> (AES_SIMULTANEOUS_CTX_SHIFT))
4247
#endif
43-
#endif /* AES_SHARED_THREADS_DECREASED */
4448

45-
#define AES_SHARED_THREADS_MASK (AES_SHARED_THREADS - 1)
49+
#define AES_SHARED_THREADS_MASK (AES_SHARED_THREADS - 1)
4650

4751
#include "opencl_aes_tables.h"
4852
#if AES_LOCAL_TABLES

run/opencl/opencl_aes_xts.h

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
/*
2+
* AES OpenCL XTS functions
3+
*
4+
* Copyright (c) 2017-2025, magnum.
5+
*
6+
* This software is hereby released to the general public under
7+
* the following terms: Redistribution and use in source and binary
8+
* forms, with or without modification, are permitted.
9+
*/
10+
11+
#ifndef _OPENCL_AES_XTS_H_
12+
#define _OPENCL_AES_XTS_H_
13+
14+
#ifdef _OPENCL_AES_H_
15+
#error "opencl_aes_xts.h cannot be sourced after opencl_aes.h"
16+
#endif
17+
18+
/* Tell the AES code we use two contexts simultaneously */
19+
#define AES_SIMULTANEOUS_CTX_SHIFT 1
20+
#include "opencl_aes.h"
21+
22+
INLINE void AES_256_XTS_first_sector(AES_SRC_TYPE uint *in, AES_DST_TYPE uint *out,
23+
AES_KEY_TYPE uchar *double_key,
24+
__local aes_local_t *lt1, __local aes_local_t *lt2)
25+
{
26+
uint tweak[4] = { 0 };
27+
uint buf[4];
28+
int i;
29+
AES_KEY akey1, akey2; akey1.lt = lt1; akey2.lt = lt2;
30+
31+
AES_set_decrypt_key(double_key, 256, &akey1);
32+
AES_set_encrypt_key(double_key + 32, 256, &akey2);
33+
34+
AES_encrypt((uchar*)tweak, (uchar*)tweak, &akey2);
35+
36+
for (i = 0; i < 4; i++)
37+
buf[i] = in[i] ^ tweak[i];
38+
39+
AES_decrypt((uchar*)buf, (uchar*)buf, &akey1);
40+
41+
for (i = 0; i < 4; i++)
42+
out[i] = buf[i] ^ tweak[i];
43+
}
44+
45+
INLINE void AES_256_XTS_DiskCryptor(AES_SRC_TYPE uchar *data, AES_DST_TYPE uchar *output,
46+
AES_KEY_TYPE uchar *double_key, int len,
47+
__local aes_local_t *lt1, __local aes_local_t *lt2)
48+
{
49+
uchar buf[16];
50+
int i, j, cnt;
51+
AES_KEY key1; key1.lt = lt1;
52+
AES_KEY key2; key2.lt = lt2;
53+
int bits = 256;
54+
uchar buffer[96];
55+
uchar *out = buffer;
56+
unsigned char tweak[16] = { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
57+
58+
AES_set_decrypt_key(double_key, bits, &key1);
59+
AES_set_encrypt_key(&double_key[bits / 8], bits, &key2);
60+
61+
// first aes tweak, we do it right over tweak
62+
AES_encrypt(tweak, tweak, &key2);
63+
64+
cnt = len / 16;
65+
for (j = 0;;) {
66+
for (i = 0; i < 16; ++i) buf[i] = data[i]^tweak[i];
67+
AES_decrypt(buf, out, &key1);
68+
for (i = 0; i < 16; ++i) out[i] ^= tweak[i];
69+
++j;
70+
if (j == cnt)
71+
break;
72+
else {
73+
unsigned char Cin, Cout;
74+
unsigned x;
75+
Cin = 0;
76+
for (x = 0; x < 16; ++x) {
77+
Cout = (tweak[x] >> 7) & 1;
78+
tweak[x] = ((tweak[x] << 1) + Cin) & 0xFF;
79+
Cin = Cout;
80+
}
81+
if (Cout)
82+
tweak[0] ^= 135; // GF_128_FDBK;
83+
}
84+
data += 16;
85+
out += 16;
86+
}
87+
88+
memcpy_macro(output, buffer, 96);
89+
}
90+
91+
#endif /* _OPENCL_AES_XTS_H_ */

run/opencl/pbkdf2_ripemd160_kernel.cl renamed to run/opencl/pbkdf2_ripemd160.h

Lines changed: 1 addition & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* RIPEMD-160 implementation. Copyright (c) 2015, magnum
2+
* RIPEMD-160 implementation. Copyright (c) 2015-2025, magnum
33
* This software is hereby released to the general public under
44
* the following terms: Redistribution and use in source and binary
55
* forms, with or without modification, are permitted.
@@ -11,19 +11,6 @@
1111

1212
#include "opencl_misc.h"
1313
#include "opencl_ripemd.h"
14-
#define AES_SRC_TYPE __constant
15-
#define AES_DST_TYPE __global
16-
/*
17-
* AES_256_XTS uses two AES keys at once so need double the
18-
* shared memory. These two are only needed if we re-test with table AES
19-
*/
20-
#define AES_SHARED_THREADS_DECREASED 1
21-
#if gpu_amd(DEVICE_INFO)
22-
#define AES_SHARED_THREADS (WARP_SIZE >> 2)
23-
#else
24-
#define AES_SHARED_THREADS (WARP_SIZE >> 1)
25-
#endif
26-
#include "opencl_aes.h"
2714

2815
#define ITERATIONS 2000
2916

@@ -32,15 +19,6 @@ typedef struct {
3219
uchar v[KEYLEN];
3320
} pbkdf2_password;
3421

35-
typedef struct {
36-
uint v[16 / 4];
37-
} tc_hash;
38-
39-
typedef struct {
40-
uint salt[SALTLEN / 4];
41-
uint bin[(512 - 64) / 4];
42-
} tc_salt;
43-
4422
INLINE void preproc(__global const uchar *key, uint keylen, uint *state,
4523
uint padding)
4624
{
@@ -152,20 +130,3 @@ INLINE void pbkdf2(__global const uchar *pass, uint passlen,
152130
PUTCHAR(out, t, ((uchar*)tmp_out)[i]);
153131
}
154132
}
155-
156-
__kernel void tc_ripemd_aesxts(__global const pbkdf2_password *inbuffer,
157-
__global tc_hash *outbuffer,
158-
__constant tc_salt *salt)
159-
{
160-
__local aes_local_t lt1;
161-
__local aes_local_t lt2;
162-
uint idx = get_global_id(0);
163-
union {
164-
uint u32[64 / 4];
165-
uchar uc[64];
166-
} key;
167-
168-
pbkdf2(inbuffer[idx].v, inbuffer[idx].length, salt->salt, key.u32);
169-
170-
AES_256_XTS_first_sector(salt->bin, outbuffer[idx].v, key.uc, &lt1, &lt2);
171-
}

run/opencl/truecrypt_kernel.cl

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
/*
2+
* Truecrypt implementation. Copyright (c) 2015-2025, magnum
3+
* This software is hereby released to the general public under
4+
* the following terms: Redistribution and use in source and binary
5+
* forms, with or without modification, are permitted.
6+
*/
7+
8+
#include "opencl_misc.h"
9+
#define AES_SRC_TYPE __constant
10+
#define AES_DST_TYPE __global
11+
#include "opencl_aes_xts.h"
12+
#include "pbkdf2_ripemd160.h"
13+
14+
typedef struct {
15+
uint v[16 / 4];
16+
} tc_hash;
17+
18+
typedef struct {
19+
uint salt[SALTLEN / 4];
20+
uint bin[(512 - 64) / 4];
21+
} tc_salt;
22+
23+
__kernel void tc_ripemd_aesxts(__global const pbkdf2_password *inbuffer,
24+
__global tc_hash *outbuffer,
25+
__constant tc_salt *salt)
26+
{
27+
__local aes_local_t lt1;
28+
__local aes_local_t lt2;
29+
uint idx = get_global_id(0);
30+
union {
31+
uint u32[64 / 4];
32+
uchar uc[64];
33+
} key;
34+
35+
pbkdf2(inbuffer[idx].v, inbuffer[idx].length, salt->salt, key.u32);
36+
37+
AES_256_XTS_first_sector(salt->bin, outbuffer[idx].v, key.uc, &lt1, &lt2);
38+
}

src/opencl_tc_fmt_plug.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ static void reset(struct db_main *db)
194194
(int)sizeof(inbuffer->v),
195195
(int)sizeof(currentsalt.salt),
196196
OUTLEN);
197-
opencl_init("$JOHN/opencl/pbkdf2_ripemd160_kernel.cl",
197+
opencl_init("$JOHN/opencl/truecrypt_kernel.cl",
198198
gpu_id, build_opts);
199199

200200
crypt_kernel = clCreateKernel(program[gpu_id], "tc_ripemd_aesxts",

0 commit comments

Comments
 (0)