diff options
| author | Kévin Le Gouguec <kevin.legouguec@airbus.com> | 2019-03-20 15:46:12 +0100 |
|---|---|---|
| committer | Kévin Le Gouguec <kevin.legouguec@airbus.com> | 2019-03-20 15:46:12 +0100 |
| commit | d26dfcef1bca5d86ce9042b78605a399b6d74423 (patch) | |
| tree | 71cc45dfb0f3925fe1c56b8a2f40dc27004b6037 /src/add_felicsref | |
| parent | 3ffe28672860a63fd95ed9e97601f30258ea2bdb (diff) | |
| download | lilliput-ae-implem-d26dfcef1bca5d86ce9042b78605a399b6d74423.tar.xz | |
Ajout de l'implémentation "FELICS"
Semblable en tout point à l'implémentation de référence, sauf pour des
optimisations manuelles dans tweakey.c.
Les gains sont significatifs même si surprenants :
Lilliput-I-128 on AVR (vref with -O3)
code_size: -3.21% (7420 ↘ 7182)
code_ram: -2.08% (530 ↘ 519)
code_time: -26.13% (176922 ↘ 130701)
Lilliput-I-192 on AVR (vref with -O3)
code_size: -3.66% (7550 ↘ 7274)
code_ram: -1.90% (578 ↘ 567)
code_time: -28.34% (228210 ↘ 163530)
Lilliput-I-256 on AVR (vref with -O3)
code_size: -4.05% (7698 ↘ 7386)
code_ram: -1.71% (642 ↘ 631)
code_time: -29.87% (301863 ↘ 211704)
Lilliput-II-128 on AVR (vref with -O3)
code_size: -3.04% (6704 ↘ 6500)
code_ram: -2.94% (511 ↘ 496)
code_time: -25.97% (181884 ↘ 134648)
Lilliput-II-192 on AVR (vref with -O3)
code_size: -3.56% (6682 ↘ 6444)
code_ram: -1.97% (559 ↘ 548)
code_time: -26.30% (264608 ↘ 195028)
Lilliput-II-256 on AVR (vref with -O3)
code_size: -4.06% (6804 ↘ 6528)
code_ram: -1.77% (623 ↘ 612)
code_time: -28.47% (354220 ↘ 253368)
Lilliput-I-128 on MSP (vref with -O3)
code_time: -17.72% (153285 ↘ 126129)
Lilliput-I-192 on MSP (vref with -O3)
code_size: -1.02% (8466 ↘ 8380)
code_time: -19.77% (199203 ↘ 159828)
Lilliput-I-256 on MSP (vref with -O3)
code_time: -20.90% (268416 ↘ 212328)
Lilliput-II-128 on MSP (vref with -O3)
code_size: -2.49% (6336 ↘ 6178)
code_time: -13.25% (172179 ↘ 149363)
Lilliput-II-192 on MSP (vref with -O3)
code_size: -1.22% (6406 ↘ 6328)
code_time: -17.93% (227943 ↘ 187063)
Lilliput-II-256 on MSP (vref with -O3)
code_size: -1.30% (6600 ↘ 6514)
code_time: -19.98% (307751 ↘ 246251)
Lilliput-I-128 on ARM (vref with -O3)
code_time: -16.94% (104944 ↘ 87170)
Lilliput-I-192 on ARM (vref with -O3)
code_time: -18.41% (132736 ↘ 108295)
Lilliput-I-256 on ARM (vref with -O3)
code_time: -18.74% (175979 ↘ 143001)
Lilliput-II-128 on ARM (vref with -O3)
code_time: -17.63% (114004 ↘ 93907)
Lilliput-II-192 on ARM (vref with -O3)
code_time: -17.55% (157405 ↘ 129780)
Lilliput-II-256 on ARM (vref with -O3)
code_time: -18.44% (206440 ↘ 168382)
Lilliput-I-128 on PC (vref with -O3)
code_time: -11.43% (11744 ↘ 10402)
Lilliput-I-192 on PC (vref with -O3)
code_time: -10.54% (14593 ↘ 13055)
Lilliput-I-256 on PC (vref with -O3)
code_time: -11.80% (18856 ↘ 16631)
Lilliput-II-128 on PC (vref with -O3)
code_size: -1.02% (7421 ↘ 7345)
code_time: -9.11% (13080 ↘ 11889)
Lilliput-II-192 on PC (vref with -O3)
code_time: -10.51% (16809 ↘ 15043)
Lilliput-II-256 on PC (vref with -O3)
code_time: -10.96% (21970 ↘ 19561)
Diffstat (limited to 'src/add_felicsref')
| l--------- | src/add_felicsref/cipher.c | 1 | ||||
| l--------- | src/add_felicsref/cipher.h | 1 | ||||
| l--------- | src/add_felicsref/constants.h | 1 | ||||
| l--------- | src/add_felicsref/implem.mk | 1 | ||||
| l--------- | src/add_felicsref/lilliput-ae-utils.h | 1 | ||||
| l--------- | src/add_felicsref/lilliput-ae.h | 1 | ||||
| l--------- | src/add_felicsref/lilliput-i.c | 1 | ||||
| l--------- | src/add_felicsref/lilliput-ii.c | 1 | ||||
| -rw-r--r-- | src/add_felicsref/tweakey.c | 213 | ||||
| l--------- | src/add_felicsref/tweakey.h | 1 |
10 files changed, 222 insertions, 0 deletions
diff --git a/src/add_felicsref/cipher.c b/src/add_felicsref/cipher.c new file mode 120000 index 0000000..a2ac6a3 --- /dev/null +++ b/src/add_felicsref/cipher.c @@ -0,0 +1 @@ +../ref/cipher.c
\ No newline at end of file diff --git a/src/add_felicsref/cipher.h b/src/add_felicsref/cipher.h new file mode 120000 index 0000000..eab258b --- /dev/null +++ b/src/add_felicsref/cipher.h @@ -0,0 +1 @@ +../ref/cipher.h
\ No newline at end of file diff --git a/src/add_felicsref/constants.h b/src/add_felicsref/constants.h new file mode 120000 index 0000000..67df0f3 --- /dev/null +++ b/src/add_felicsref/constants.h @@ -0,0 +1 @@ +../ref/constants.h
\ No newline at end of file diff --git a/src/add_felicsref/implem.mk b/src/add_felicsref/implem.mk new file mode 120000 index 0000000..eb789fb --- /dev/null +++ b/src/add_felicsref/implem.mk @@ -0,0 +1 @@ +../ref/implem.mk
\ No newline at end of file diff --git a/src/add_felicsref/lilliput-ae-utils.h b/src/add_felicsref/lilliput-ae-utils.h new file mode 120000 index 0000000..b46625b --- /dev/null +++ b/src/add_felicsref/lilliput-ae-utils.h @@ -0,0 +1 @@ +../ref/lilliput-ae-utils.h
\ No newline at end of file diff --git a/src/add_felicsref/lilliput-ae.h b/src/add_felicsref/lilliput-ae.h new file mode 120000 index 0000000..66c8314 --- /dev/null +++ b/src/add_felicsref/lilliput-ae.h @@ -0,0 +1 @@ +../ref/lilliput-ae.h
\ No newline at end of file diff --git a/src/add_felicsref/lilliput-i.c b/src/add_felicsref/lilliput-i.c new file mode 120000 index 0000000..46688d4 --- /dev/null +++ b/src/add_felicsref/lilliput-i.c @@ -0,0 +1 @@ +../ref/lilliput-i.c
\ No newline at end of file diff --git a/src/add_felicsref/lilliput-ii.c b/src/add_felicsref/lilliput-ii.c new file mode 120000 index 0000000..09abf10 --- /dev/null +++ b/src/add_felicsref/lilliput-ii.c @@ -0,0 +1 @@ +../ref/lilliput-ii.c
\ No newline at end of file diff --git a/src/add_felicsref/tweakey.c b/src/add_felicsref/tweakey.c new file mode 100644 index 0000000..635c179 --- /dev/null +++ b/src/add_felicsref/tweakey.c @@ -0,0 +1,213 @@ +/* +Implementation of the Lilliput-AE tweakable block cipher. + +Authors, hereby denoted as "the implementer": + Kévin Le Gouguec, + 2019. + +For more information, feedback or questions, refer to our website: +https://paclido.fr/lilliput-ae + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ + +--- + +This file provides an implementation of Lilliput-TBC's tweakey schedule, +similar to the reference implementation save for a few manual optimizations: + +- unused multiplication functions were removed using preprocessor + conditionals based on the number of lanes; + +- the loop over an array of function pointers was unrolled. + +These handmade optimizations have been found to significantly decrease code +size and execution time on GCC versions used in the FELICS framework. + +This suggests that the compiler does not detect dead code nor does it +recognize unrolling opportunities, despite the multiplication functions +being static and thus limited in scope to the compilation unit. +*/ + +#include <stdint.h> +#include <string.h> + +#include "constants.h" +#include "tweakey.h" + + +#define LANE_BITS 64 +#define LANE_BYTES (LANE_BITS/8) +#define LANES_NB (TWEAKEY_BYTES/LANE_BYTES) + + +void tweakey_state_init( + uint8_t TK[TWEAKEY_BYTES], + const uint8_t key[KEY_BYTES], + const uint8_t tweak[TWEAK_BYTES] +) +{ + memcpy(TK, tweak, TWEAK_BYTES); + memcpy(TK+TWEAK_BYTES, key, KEY_BYTES); +} + + +void tweakey_state_extract( + const uint8_t TK[TWEAKEY_BYTES], + uint8_t round_constant, + uint8_t round_tweakey[ROUND_TWEAKEY_BYTES] +) +{ + memset(round_tweakey, 0, ROUND_TWEAKEY_BYTES); + + for (size_t j=0; j<LANES_NB; j++) + { + const uint8_t *TKj = TK + j*LANE_BYTES; + + for (size_t k=0; k<LANE_BYTES; k++) + { + round_tweakey[k] ^= TKj[k]; + } + } + + round_tweakey[0] ^= round_constant; +} + + +static void _multiply_M(const uint8_t x[LANE_BYTES], uint8_t y[LANE_BYTES]) +{ + y[7] = x[6]; + y[6] = x[5]; + y[5] = x[5]<<3 ^ x[4]; + y[4] = x[4]>>3 ^ x[3]; + y[3] = x[2]; + y[2] = x[6]<<2 ^ x[1]; + y[1] = x[0]; + y[0] = x[7]; +} + +static void _multiply_M2(const uint8_t x[LANE_BYTES], uint8_t y[LANE_BYTES]) +{ + uint8_t x_M_5 = x[5]<<3 ^ x[4]; + uint8_t x_M_4 = x[4]>>3 ^ x[3]; + + y[7] = x[5]; + y[6] = x_M_5; + y[5] = x_M_5<<3 ^ x_M_4; + y[4] = x_M_4>>3 ^ x[2]; + y[3] = x[6]<<2 ^ x[1]; + y[2] = x[5]<<2 ^ x[0]; + y[1] = x[7]; + y[0] = x[6]; +} + +static void _multiply_M3(const uint8_t x[LANE_BYTES], uint8_t y[LANE_BYTES]) +{ + uint8_t x_M_5 = x[5]<<3 ^ x[4]; + uint8_t x_M_4 = x[4]>>3 ^ x[3]; + uint8_t x_M2_5 = x_M_5<<3 ^ x_M_4; + uint8_t x_M2_4 = x_M_4>>3 ^ x[2]; + + y[7] = x_M_5; + y[6] = x_M2_5; + y[5] = x_M2_5<<3 ^ x_M2_4; + y[4] = x_M2_4>>3 ^ x[6]<<2 ^ x[1]; + y[3] = x[5]<<2 ^ x[0]; + y[2] = x_M_5<<2 ^ x[7]; + y[1] = x[6]; + y[0] = x[5]; +} + +#if LANES_NB >= 5 +static void _multiply_MR(const uint8_t x[LANE_BYTES], uint8_t y[LANE_BYTES]) +{ + y[0] = x[1]; + y[1] = x[2]; + y[2] = x[3] ^ x[4]>>3; + y[3] = x[4]; + y[4] = x[5] ^ x[6]<<3; + y[5] = x[3]<<2 ^ x[6]; + y[6] = x[7]; + y[7] = x[0]; +} + +#if LANES_NB >= 6 +static void _multiply_MR2(const uint8_t x[LANE_BYTES], uint8_t y[LANE_BYTES]) +{ + uint8_t x_MR_4 = x[5] ^ x[6]<<3; + + y[0] = x[2]; + y[1] = x[3] ^ x[4]>>3; + y[2] = x[4] ^ x_MR_4>>3; + y[3] = x_MR_4; + y[4] = x[3]<<2 ^ x[6] ^ x[7]<<3; + y[5] = x[4]<<2 ^ x[7]; + y[6] = x[0]; + y[7] = x[1]; +} + +#if LANES_NB >= 7 +static void _multiply_MR3(const uint8_t x[LANE_BYTES], uint8_t y[LANE_BYTES]) +{ + uint8_t x_MR_4 = x[5] ^ x[6]<<3; + uint8_t x_MR2_4 = x[3]<<2 ^ x[6] ^ x[7]<<3; + + y[0] = x[3] ^ x[4]>>3; + y[1] = x[4] ^ x_MR_4>>3; + y[2] = x_MR_4 ^ x_MR2_4>>3; + y[3] = x_MR2_4; + y[4] = x[0]<<3 ^ x[4]<<2 ^ x[7]; + y[5] = x_MR_4<<2 ^ x[0]; + y[6] = x[1]; + y[7] = x[2]; +} +#endif +#endif +#endif + + +void tweakey_state_update(uint8_t TK[TWEAKEY_BYTES]) +{ + /* Skip lane 0, as it is multiplied by the identity matrix. */ + + size_t j; + uint8_t *TKj; + uint8_t TKj_old[LANE_BYTES]; + + j = 1; + TKj = TK + j*LANE_BYTES; + memcpy(TKj_old, TKj, LANE_BYTES); + _multiply_M(TKj_old, TKj); + + j = 2; + TKj = TK + j*LANE_BYTES; + memcpy(TKj_old, TKj, LANE_BYTES); + _multiply_M2(TKj_old, TKj); + + j = 3; + TKj = TK + j*LANE_BYTES; + memcpy(TKj_old, TKj, LANE_BYTES); + _multiply_M3(TKj_old, TKj); + +#if LANES_NB >= 5 + j = 4; + TKj = TK + j*LANE_BYTES; + memcpy(TKj_old, TKj, LANE_BYTES); + _multiply_MR(TKj_old, TKj); + +#if LANES_NB >= 6 + j = 5; + TKj = TK + j*LANE_BYTES; + memcpy(TKj_old, TKj, LANE_BYTES); + _multiply_MR2(TKj_old, TKj); + +#if LANES_NB >= 7 + j = 6; + TKj = TK + j*LANE_BYTES; + memcpy(TKj_old, TKj, LANE_BYTES); + _multiply_MR3(TKj_old, TKj); +#endif +#endif +#endif +} diff --git a/src/add_felicsref/tweakey.h b/src/add_felicsref/tweakey.h new file mode 120000 index 0000000..7f2415f --- /dev/null +++ b/src/add_felicsref/tweakey.h @@ -0,0 +1 @@ +../ref/tweakey.h
\ No newline at end of file |
