summaryrefslogtreecommitdiff
path: root/traces/add_threshold/traces-tbc.patch
blob: 4943c9c7ea24950388c0688f7eaefae3e3a9e73b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
diff --git a/src/add_threshold/cipher.c b/src/add_threshold/cipher.c
index 778a100..3b49db5 100644
--- a/src/add_threshold/cipher.c
+++ b/src/add_threshold/cipher.c
@@ -25,6 +25,8 @@ throughout the entire round function in order to avoid extra randomness
 generation to switch from 2 shares to 3 shares and vice versa.
 */
 
+#include "debug.h"
+
 #include <stdint.h>
 #include <string.h>
 
@@ -100,6 +102,8 @@ static void _state_init(
     uint8_t SHARES_1[BLOCK_BYTES];
     randombytes(sizeof(SHARES_0), SHARES_0);
     randombytes(sizeof(SHARES_1), SHARES_1);
+    debug_dump_buffer("SHARES_0", sizeof(SHARES_0), SHARES_0, 8);
+    debug_dump_buffer("SHARES_1", sizeof(SHARES_1), SHARES_1, 8);
 
     memcpy(X, SHARES_0, BLOCK_BYTES);
     memcpy(Y, SHARES_1, BLOCK_BYTES);
@@ -117,15 +121,25 @@ static void _compute_round_tweakeys(
     uint8_t RTK_Y[ROUNDS][ROUND_TWEAKEY_BYTES]
 )
 {
+    fprintf(DUMP, "computing %zu round sub-tweakeys\n", (size_t)ROUNDS);
+
     uint8_t TK_X[TWEAKEY_BYTES];
     uint8_t TK_Y[TWEAKEY_BYTES];
     tweakey_state_init(TK_X, TK_Y, key, tweak);
     tweakey_state_extract(TK_X, TK_Y, 0, RTK_X[0], RTK_Y[0]);
 
+    fprintf(DUMP, "    0\n");
+    debug_dump_buffer("RTK_X", ROUND_TWEAKEY_BYTES, RTK_X[0], 8);
+    debug_dump_buffer("RTK_Y", ROUND_TWEAKEY_BYTES, RTK_Y[0], 8);
+
     for (size_t i=1; i<ROUNDS; i++)
     {
         tweakey_state_update(TK_X, TK_Y);
+        debug_dump_buffer("TK_X", TWEAKEY_BYTES, TK_X, 8);
+        debug_dump_buffer("TK_Y", TWEAKEY_BYTES, TK_Y, 8);
         tweakey_state_extract(TK_X, TK_Y, i, RTK_X[i], RTK_Y[i]);
+        debug_dump_buffer("RTK_X", ROUND_TWEAKEY_BYTES, RTK_X[i], 8);
+        debug_dump_buffer("RTK_Y", ROUND_TWEAKEY_BYTES, RTK_Y[i], 8);
     }
 }
 
@@ -138,6 +152,12 @@ static void _nonlinear_layer(
     const uint8_t RTK_Y[ROUND_TWEAKEY_BYTES]
 )
 {
+    fprintf(DUMP, "        nonlinear layer\n");
+
+    debug_dump_buffer("X", BLOCK_BYTES, X, 12);
+    debug_dump_buffer("Y", BLOCK_BYTES, Y, 12);
+    debug_dump_buffer("Z", BLOCK_BYTES, Z, 12);
+
     uint8_t x_hi, y_hi, z_hi;   // High nibbles for the Feistel network
     uint8_t x_lo, y_lo, z_lo;   // Low nibbles for the Feistel network
     uint8_t tmp0, tmp1, tmp2;
@@ -152,9 +172,14 @@ static void _nonlinear_layer(
         TMP_Y[j] = Y[j] ^ RTK_Y[j];
     }
 
+    debug_dump_buffer("Xj XOR RTK_Xj", sizeof(TMP_X), TMP_X, 12);
+    debug_dump_buffer("Yj XOR RTK_Yj", sizeof(TMP_Y), TMP_Y, 12);
+
     // Threshold Implementation of the 8-bit S-box
     for (size_t j=0; j<ROUND_TWEAKEY_BYTES; j++)
     {
+        fprintf(DUMP, "        S-box (%zu/%zu)\n", j+1, (size_t)ROUND_TWEAKEY_BYTES);
+
         // Decomposition into nibbles
         x_hi = TMP_X[j] >> 4;
         x_lo = TMP_X[j] & 0xf;
@@ -162,20 +187,54 @@ static void _nonlinear_layer(
         y_lo = TMP_Y[j] & 0xf;
         z_hi = Z[j] >> 4;
         z_lo = Z[j] & 0xf;
+
+        fprintf(DUMP, "            x_hi: %u\n", x_hi);
+        fprintf(DUMP, "            x_lo: %u\n", x_lo);
+        fprintf(DUMP, "            y_hi: %u\n", y_hi);
+        fprintf(DUMP, "            y_lo: %u\n", y_lo);
+        fprintf(DUMP, "            z_hi: %u\n", z_hi);
+        fprintf(DUMP, "            z_lo: %u\n", z_lo);
+
         // First 4-bit S-box
+        fprintf(DUMP, "            First 4-bit S-box\n");
+
         tmp0 = G[(y_lo&7)>>1][z_lo];
         tmp1 = G[(z_lo&7)>>1][x_lo];
         tmp2 = G[(x_lo&7)>>1][y_lo];
         x_hi ^= F[tmp1][tmp2];
         y_hi ^= F[tmp2][tmp0];
         z_hi ^= F[tmp0][tmp1];
+
+        fprintf(DUMP, "            tmp0: %u\n", tmp0);
+        fprintf(DUMP, "            tmp1: %u\n", tmp1);
+        fprintf(DUMP, "            tmp2: %u\n", tmp2);
+        fprintf(DUMP, "            x_hi: %u\n", x_hi);
+        fprintf(DUMP, "            y_hi: %u\n", y_hi);
+        fprintf(DUMP, "            z_hi: %u\n", z_hi);
+
         // Second 4-bit S-box
+        fprintf(DUMP, "            First 4-bit S-box\n");
+
         tmp0 = P[Q[y_hi&3 ^ (y_hi&8)>>1][z_hi]];
         tmp1 = P[Q[z_hi&3 ^ (z_hi&8)>>1][x_hi]];
         tmp2 = P[Q[x_hi&3 ^ (x_hi&8)>>1][y_hi]];
         x_lo ^= Q[tmp1&3 ^ (tmp1&8)>>1][tmp2];
         y_lo ^= Q[tmp2&3 ^ (tmp2&8)>>1][tmp0];
         z_lo ^= Q[tmp0&3 ^ (tmp0&8)>>1][tmp1];
+
+        fprintf(DUMP, "            y_hi&3 ^ (y_hi&8)>>1: %u\n", y_hi&3 ^ (y_hi&8)>>1);
+        fprintf(DUMP, "            z_hi&3 ^ (z_hi&8)>>1: %u\n", z_hi&3 ^ (z_hi&8)>>1);
+        fprintf(DUMP, "            x_hi&3 ^ (x_hi&8)>>1: %u\n", x_hi&3 ^ (x_hi&8)>>1);
+        fprintf(DUMP, "            Q[y_hi&3 ^ (y_hi&8)>>1][z_hi]: %u\n", Q[y_hi&3 ^ (y_hi&8)>>1][z_hi]);
+        fprintf(DUMP, "            Q[z_hi&3 ^ (z_hi&8)>>1][x_hi]: %u\n", Q[z_hi&3 ^ (z_hi&8)>>1][x_hi]);
+        fprintf(DUMP, "            Q[x_hi&3 ^ (x_hi&8)>>1][y_hi]: %u\n", Q[x_hi&3 ^ (x_hi&8)>>1][y_hi]);
+        fprintf(DUMP, "            tmp0: %u\n", tmp0);
+        fprintf(DUMP, "            tmp1: %u\n", tmp1);
+        fprintf(DUMP, "            tmp2: %u\n", tmp2);
+        fprintf(DUMP, "            x_lo: %u\n", x_lo);
+        fprintf(DUMP, "            y_lo: %u\n", y_lo);
+        fprintf(DUMP, "            z_lo: %u\n", z_lo);
+
         // Third 4-bit S-box
         tmp0 = G[(y_lo&7)>>1][z_lo] ^ 1;
         tmp1 = G[(z_lo&7)>>1][x_lo];
@@ -183,12 +242,28 @@ static void _nonlinear_layer(
         x_hi ^= F[tmp1][tmp2];
         y_hi ^= F[tmp2][tmp0];
         z_hi ^= F[tmp0][tmp1];
+
+        fprintf(DUMP, "            tmp0: %u\n", tmp0);
+        fprintf(DUMP, "            tmp1: %u\n", tmp1);
+        fprintf(DUMP, "            tmp2: %u\n", tmp2);
+        fprintf(DUMP, "            x_hi: %u\n", x_hi);
+        fprintf(DUMP, "            y_hi: %u\n", y_hi);
+        fprintf(DUMP, "            z_hi: %u\n", z_hi);
+
         // Build bytes from nibbles
         TMP_X[j] = (x_hi << 4 | x_lo);
         TMP_Y[j] = (y_hi << 4 | y_lo);
         TMP_Z[j] = (z_hi << 4 | z_lo);
+
+        debug_dump_buffer("TMP_X", sizeof(TMP_X), TMP_X, 12);
+        debug_dump_buffer("TMP_Y", sizeof(TMP_Y), TMP_Y, 12);
+        debug_dump_buffer("TMP_Z", sizeof(TMP_Z), TMP_Z, 12);
     }
 
+    debug_dump_buffer("TMP_X (post-S-box)", sizeof(TMP_X), TMP_X, 12);
+    debug_dump_buffer("TMP_Y (post-S-box)", sizeof(TMP_Y), TMP_Y, 12);
+    debug_dump_buffer("TMP_Z (post-S-box)", sizeof(TMP_Z), TMP_Z, 12);
+
     for (size_t j=0; j<8; j++)
     {
         size_t dest_j = 15-j;
@@ -196,10 +271,16 @@ static void _nonlinear_layer(
         Y[dest_j] ^= TMP_Y[j];
         Z[dest_j] ^= TMP_Z[j];
     }
+
+    debug_dump_buffer("X (post-XOR)", BLOCK_BYTES, X, 12);
+    debug_dump_buffer("Y (post-XOR)", BLOCK_BYTES, Y, 12);
+    debug_dump_buffer("Z (post-XOR)", BLOCK_BYTES, Z, 12);
 }
 
 static void _linear_layer(uint8_t X[BLOCK_BYTES])
 {
+    fprintf(DUMP, "        linear layer\n");
+
     X[15] ^= X[1];
     X[15] ^= X[2];
     X[15] ^= X[3];
@@ -214,6 +295,8 @@ static void _linear_layer(uint8_t X[BLOCK_BYTES])
     X[11] ^= X[7];
     X[10] ^= X[7];
     X[9]  ^= X[7];
+
+    debug_dump_buffer("X", BLOCK_BYTES, X, 12);
 }
 
 static void _permutation_layer(uint8_t X[BLOCK_BYTES], permutation p)
@@ -223,6 +306,8 @@ static void _permutation_layer(uint8_t X[BLOCK_BYTES], permutation p)
         return;
     }
 
+    fprintf(DUMP, "        permutation layer\n");
+
     uint8_t X_old[BLOCK_BYTES];
     memcpy(X_old, X, BLOCK_BYTES);
 
@@ -232,6 +317,8 @@ static void _permutation_layer(uint8_t X[BLOCK_BYTES], permutation p)
     {
         X[pi[j]] = X_old[j];
     }
+
+    debug_dump_buffer("X", BLOCK_BYTES, X, 12);
 }
 
 static void _one_round_egfn(
@@ -270,11 +357,15 @@ void lilliput_tbc_encrypt(
     _compute_round_tweakeys(key, tweak, RTK_X, RTK_Y);
 
 
+    fprintf(DUMP, "running EGFN %zu times\n", (size_t)ROUNDS);
+
     for (size_t i=0; i<ROUNDS-1; i++)
     {
+        fprintf(DUMP, "    round %zu\n", (size_t)i);
         _one_round_egfn(X, Y, Z, RTK_X[i], RTK_Y[i], PERMUTATION_ENCRYPTION);
     }
 
+    fprintf(DUMP, "    round %zu\n", (size_t)(ROUNDS-1));
     _one_round_egfn(X, Y, Z, RTK_X[ROUNDS-1], RTK_Y[ROUNDS-1], PERMUTATION_NONE);
 
 
diff --git a/src/add_threshold/random.c b/src/add_threshold/random.c
index a966a8e..8d5f2cc 100644
--- a/src/add_threshold/random.c
+++ b/src/add_threshold/random.c
@@ -21,6 +21,8 @@ This file provides a system-specific function to generate random bytes.
 
 #define _GNU_SOURCE
 
+#include "debug.h"
+
 #include <stddef.h>
 #include <stdint.h>
 
@@ -32,5 +34,6 @@ This file provides a system-specific function to generate random bytes.
 
 void randombytes(size_t nb, uint8_t out[nb])
 {
-    syscall(SYS_getrandom, out, nb, 0);
+    for (size_t i=0; i<nb; i++)
+        out[i] = i;
 }
diff --git a/src/add_threshold/tweakey.c b/src/add_threshold/tweakey.c
index 7822564..e1abbb6 100644
--- a/src/add_threshold/tweakey.c
+++ b/src/add_threshold/tweakey.c
@@ -20,6 +20,8 @@ This file provides a first-order threshold implementation of Lilliput-TBC's
 tweakey schedule, where the tweak and the key are split into two shares.
 */
 
+#include "debug.h"
+
 #include <stdint.h>
 #include <string.h>
 
@@ -43,6 +45,7 @@ void tweakey_state_init(
 {
     uint8_t SHARES_0[KEY_BYTES];
     randombytes(sizeof(SHARES_0), SHARES_0);
+    debug_dump_buffer("SHARES_0", sizeof(SHARES_0), SHARES_0, 8);
 
     memcpy(TK_Y, SHARES_0, KEY_BYTES);
     memcpy(TK_X, tweak, TWEAK_BYTES);
@@ -68,20 +71,32 @@ void tweakey_state_extract(
     {
         const uint8_t *TKj_X = TK_X + j*LANE_BYTES;
 
+        fprintf(DUMP, "        XORing lane %zu/%zu (RTK_X)\n", 1+j, (size_t)LANES_NB);
+        debug_dump_buffer("RTK_X", ROUND_TWEAKEY_BYTES, round_tweakey_X, 12);
+        debug_dump_buffer("lane[j]", LANE_BYTES, TKj_X, 12);
+
         for (size_t k=0; k<LANE_BYTES; k++)
         {
             round_tweakey_X[k] ^= TKj_X[k];
         }
+
+        debug_dump_buffer("=> RTK_X", ROUND_TWEAKEY_BYTES, round_tweakey_X, 12);
     }
 
     for (size_t j=0; j<KEY_LANES_NB; j++)
     {
         const uint8_t *TKj_Y = TK_Y + j*LANE_BYTES;
 
+        fprintf(DUMP, "        XORing lane %zu/%zu (RTK_Y)\n", 1+j, (size_t)LANES_NB);
+        debug_dump_buffer("RTK_Y", ROUND_TWEAKEY_BYTES, round_tweakey_Y, 12);
+        debug_dump_buffer("lane[j]", LANE_BYTES, TKj_Y, 12);
+
         for (size_t k=0; k<LANE_BYTES; k++)
         {
             round_tweakey_Y[k] ^= TKj_Y[k];
         }
+
+        debug_dump_buffer("=> RTK_Y", ROUND_TWEAKEY_BYTES, round_tweakey_Y, 12);
     }
 
     round_tweakey_X[0] ^= round_constant;
@@ -100,6 +115,10 @@ static const matrix_multiplication ALPHAS[7] = {
     _multiply_MR3
 };
 
+static char const * const ALPHAS_STR[7] = {
+    "M", "M²", "M³", "M⁴", "MR", "MR²", "MR³"
+};
+
 
 void tweakey_state_update(uint8_t TK_X[TWEAKEY_BYTES], uint8_t TK_Y[KEY_BYTES])
 {
@@ -111,6 +130,10 @@ void tweakey_state_update(uint8_t TK_X[TWEAKEY_BYTES], uint8_t TK_Y[KEY_BYTES])
         memcpy(TKj_old_X, TKj_X, LANE_BYTES);
 
         ALPHAS[j](TKj_old_X, TKj_X);
+
+        fprintf(DUMP, "        multiplying lane %zu/%zu by %s\n", 1+j, (size_t)LANES_NB, ALPHAS_STR[j]);
+        debug_dump_buffer("TK_j_X^i-1", LANE_BYTES, TKj_old_X, 12);
+        debug_dump_buffer("TK_j_X^i", LANE_BYTES, TKj_X, 12);
     }
 
     for (size_t j=0; j<KEY_LANES_NB; j++)
@@ -125,5 +148,11 @@ void tweakey_state_update(uint8_t TK_X[TWEAKEY_BYTES], uint8_t TK_Y[KEY_BYTES])
 
         ALPHAS[j + TWEAK_LANES_NB](TKj_X_old, TKj_X);
         ALPHAS[j + TWEAK_LANES_NB](TKj_Y_old, TKj_Y);
+
+        fprintf(DUMP, "        multiplying lane %zu/%zu by %s\n", 1+j + TWEAK_LANES_NB, (size_t)LANES_NB, ALPHAS_STR[j + TWEAK_LANES_NB]);
+        debug_dump_buffer("TK_j_X^i-1", LANE_BYTES, TKj_X_old, 12);
+        debug_dump_buffer("TK_j_X^i", LANE_BYTES, TKj_X, 12);
+        debug_dump_buffer("TK_j_Y^i-1", LANE_BYTES, TKj_Y_old, 12);
+        debug_dump_buffer("TK_j_Y^i", LANE_BYTES, TKj_Y, 12);
     }
 }