Merge remote-tracking branch 'origin/master' into fix-vhdltbc

author: Gaetan Leplus <gaetan.leplus@airbus.com> 2019-07-05 16:16:19 +0200
committer: Gaetan Leplus <gaetan.leplus@airbus.com> 2019-07-05 16:16:19 +0200
commit: a432c19745907a96303b3a25111e0fd622202e0c (patch)
tree: 3ac352a3598fa444d45695dbb2b4cee63698ac57 /src
parent: 92893d79b36c9fb5a90644b82d16d9fa2563feb1 (diff)
parent: 4f58d99e11e1c412a600f39f32a8d181765f0246 (diff)
download: lilliput-ae-implem-a432c19745907a96303b3a25111e0fd622202e0c.tar.xz
14 files changed, 220 insertions, 150 deletions
diff --git a/src/add_felicsref/cipher.c b/src/add_felicsref/cipher.c
index 7de0a08..59bc5d8 100644
--- a/src/add_felicsref/cipher.c
+++ b/src/add_felicsref/cipher.c
@@ -150,7 +150,7 @@ void lilliput_tbc_encrypt(
     uint8_t RTK[ROUND_TWEAKEY_BYTES];
     tweakey_state_init(TK, key, tweak);
 
-    for (unsigned i=0; i<ROUNDS-1; i++)
+    for (size_t i=0; i<ROUNDS-1; i++)
     {
         tweakey_state_extract(TK, i, RTK);
         _one_round_egfn(ciphertext, RTK, PERMUTATION_ENCRYPTION);
@@ -174,7 +174,7 @@ void lilliput_tbc_decrypt(
     uint8_t RTK[ROUNDS][ROUND_TWEAKEY_BYTES];
     _compute_round_tweakeys(key, tweak, RTK);
 
-    for (uint8_t i=0; i<ROUNDS-1; i++)
+    for (size_t i=0; i<ROUNDS-1; i++)
     {
         _one_round_egfn(X, RTK[ROUNDS-1-i], PERMUTATION_DECRYPTION);
     }
diff --git a/src/add_felicsref/tweakey.c b/src/add_felicsref/tweakey.c
index 18a7792..47badde 100644
--- a/src/add_felicsref/tweakey.c
+++ b/src/add_felicsref/tweakey.c
@@ -81,11 +81,10 @@ static void _multiply(uint8_t TKj[LANE_BYTES], matrix_multiplication alpha)
 
 void tweakey_state_update(uint8_t TK[TWEAKEY_BYTES])
 {
-    /* Skip lane 0, as it is multiplied by the identity matrix. */
-
-    _multiply(TK + 1*LANE_BYTES, _multiply_M);
-    _multiply(TK + 2*LANE_BYTES, _multiply_M2);
-    _multiply(TK + 3*LANE_BYTES, _multiply_M3);
+    _multiply(TK + 0*LANE_BYTES, _multiply_M);
+    _multiply(TK + 1*LANE_BYTES, _multiply_M2);
+    _multiply(TK + 2*LANE_BYTES, _multiply_M3);
+    _multiply(TK + 3*LANE_BYTES, _multiply_M4);
 
 #if LANES_NB >= 5
     _multiply(TK + 4*LANE_BYTES, _multiply_MR);
diff --git a/src/add_python/lilliput/ae_common.py b/src/add_python/lilliput/ae_common.py
index b94be1b..db14ec3 100644
--- a/src/add_python/lilliput/ae_common.py
+++ b/src/add_python/lilliput/ae_common.py
@@ -15,7 +15,7 @@
 """Helper functions used in both Lilliput-I and Lilliput-II."""
 
 
-from .constants import BLOCK_BITS, BLOCK_BYTES
+from .constants import BLOCK_BYTES
 from .helpers import xor
 from . import tbc
 
@@ -48,11 +48,11 @@ def block_matrix_to_bytes(matrix):
 
 def pad10(X):
     zeroes = [0] * (BLOCK_BYTES-len(X)-1)
-    return zeroes + [0b10000000] + X
+    return X + [0b10000000] + zeroes
 
 
 def integer_to_byte_array(i, n):
-    return list(i.to_bytes(n, 'little'))
+    return list(i.to_bytes(n, 'big'))
 
 
 def _tweak_associated_data(t, i, padded):
@@ -61,8 +61,8 @@ def _tweak_associated_data(t, i, padded):
     prefix = 0b0110 if padded else 0b0010
 
     # Clear upper 4 bits and set them to prefix.
-    tweak[-1] &= 0b00001111
-    tweak[-1] = prefix << 4
+    tweak[0] &= 0b00001111
+    tweak[0] |= prefix << 4
 
     return tweak
 
diff --git a/src/add_python/lilliput/ae_mode_1.py b/src/add_python/lilliput/ae_mode_1.py
index 4a40b78..197bf37 100644
--- a/src/add_python/lilliput/ae_mode_1.py
+++ b/src/add_python/lilliput/ae_mode_1.py
@@ -52,27 +52,26 @@ def _lower_nibble(i):
     return i & 0b00001111
 
 
-def _byte_from_nibbles(lower, upper):
-    return upper<<4 | lower
+def _byte(high, low):
+    return high<<4 ^ low
 
 
 def _tweak_message(N, j, prefix):
-    # j is encoded on 68 bits; get 72 and clear the upper 4.
-    j_len = (TWEAK_BITS-NONCE_BITS-4)//8 + 1
-    tweak = integer_to_byte_array(j, j_len)
-    tweak[-1] &= 0b00001111
+    tweak = [_byte(prefix.value, _upper_nibble(N[0]))]
 
-    # Add nonce.
-    tweak[-1] |= _lower_nibble(N[0]) << 4
     tweak.extend(
-        _byte_from_nibbles(_upper_nibble(N[i-1]), _lower_nibble(N[i]))
+        _byte(_lower_nibble(N[i-1]), _upper_nibble(N[i]))
         for i in range(1, NONCE_BITS//8)
     )
 
-    # Add last nibble from nonce and prefix.
-    tweak.append(
-        _byte_from_nibbles(_upper_nibble(N[-1]), prefix.value)
-    )
+    # j is encoded on 68 bits; get 72 then set the upper 4 to the
+    # nonce's lower 4.
+    j_len = (TWEAK_BITS-NONCE_BITS-4)//8 + 1
+    j_array = integer_to_byte_array(j, j_len)
+    j_array[0] &= 0b00001111
+    j_array[0] |= _lower_nibble(N[-1]) << 4
+
+    tweak.extend(j_array)
 
     return tweak
 
diff --git a/src/add_python/lilliput/ae_mode_2.py b/src/add_python/lilliput/ae_mode_2.py
index 79d1bcd..a55ecb8 100644
--- a/src/add_python/lilliput/ae_mode_2.py
+++ b/src/add_python/lilliput/ae_mode_2.py
@@ -18,6 +18,8 @@ This module provides the functions for authenticated encryption and decryption
 using Lilliput-AE's nonce-misuse-resistant mode based on SCT-2.
 """
 
+from enum import Enum
+
 from .constants import BLOCK_BYTES
 from .ae_common import (
     bytes_to_block_matrix,
@@ -35,22 +37,24 @@ TWEAK_BITS = 128
 TWEAK_BYTES = TWEAK_BITS//8
 
 
-def _tweak_tag(j, padded):
-    tweak = integer_to_byte_array(j, TWEAK_BYTES)
+class _TagTweak(Enum):
+    BLOCK = 0b0000
+    PAD = 0b0100
 
-    prefix = 0b0100 if padded else 0b0000
+
+def _tweak_tag(j, prefix):
+    tweak = integer_to_byte_array(j, TWEAK_BYTES)
 
     # Clear upper 4 bits and set them to prefix.
-    tweak[-1] &= 0b00001111
-    tweak[-1] = prefix << 4
+    tweak[0] &= 0b00001111
+    tweak[0] |= prefix.value << 4
 
     return tweak
 
 
 def _add_tag_j(tag, j):
-    array_j = integer_to_byte_array(j, TWEAK_BYTES)
-    tweak = xor(tag, array_j)
-    tweak[-1] |= 0b10000000
+    tweak = xor(tag, integer_to_byte_array(j, TWEAK_BYTES))
+    tweak[0] |= 0b10000000
 
     return tweak
 
@@ -63,18 +67,16 @@ def _message_auth_tag(M, N, Auth, key):
     M = bytes_to_block_matrix(M)
 
     for j in range(0, l):
-        tweak = _tweak_tag(j, False)
+        tweak = _tweak_tag(j, _TagTweak.BLOCK)
         encryption = tbc.encrypt(tweak, key, M[j])
         tag = xor(tag, encryption)
 
     if need_padding:
-        tweak = _tweak_tag(l, True)
+        tweak = _tweak_tag(l, _TagTweak.PAD)
         encryption = tbc.encrypt(tweak, key, pad10(M[l]))
         tag = xor(tag, encryption)
 
-    tweak = N + [0b00010000]
-    encryption = tbc.encrypt(tweak, key, tag)
-    tag = encryption
+    tag = tbc.encrypt([0b00010000]+N, key, tag)
 
     return tag
 
@@ -88,12 +90,12 @@ def _message_encryption(M, N, tag, key):
 
     for j in range(0, l):
         tweak = _add_tag_j(tag, j)
-        encryption = tbc.encrypt(tweak, key, N+[0b00000000])
+        encryption = tbc.encrypt(tweak, key, [0b00000000]+N)
         C.append(xor(M[j], encryption))
 
     if need_padding:
         tweak = _add_tag_j(tag, l)
-        encryption = tbc.encrypt(tweak, key, N+[0b00000000])
+        encryption = tbc.encrypt(tweak, key, [0b00000000]+N)
         C.append(xor(M[l], encryption))
 
     return C
diff --git a/src/add_python/lilliput/multiplications.py b/src/add_python/lilliput/multiplications.py
index a5faa55..09eaa08 100644
--- a/src/add_python/lilliput/multiplications.py
+++ b/src/add_python/lilliput/multiplications.py
@@ -23,8 +23,11 @@ from functools import reduce
 from operator import xor
 
 
+def _shl(xi, n):
+    return (xi << n) & 0xff
+
 def _Sl(n):
-    return lambda xi: (xi<<n) & 0xff
+    return lambda xi: _shl(xi, n)
 
 def _Sr(n):
     return lambda xi: xi>>n
@@ -36,16 +39,25 @@ def _0(xi):
     return 0
 
 def _M1(xi):
-    return (xi<<3 ^ xi>>3) & 0xff
+    return _shl(xi, 3) ^ xi>>3
 
 def _M2(xi):
-    return (xi<<6 ^ (xi&0b11111000) ^ xi>>6) & 0xff
+    return _shl(xi, 6) ^ xi&0b11111000 ^ xi>>6
 
 def _M3(xi):
-    return xi & 0b00011111
+    return _shl(xi>>3, 6) ^ xi>>6<<3
 
 def _M4(xi):
-    return ((xi<<2) & 0xff) >> 3
+    return _shl(xi, 2) >> 3
+
+def _M5(xi):
+    return _shl(xi, 5) ^ xi>>3<<2
+
+def _M6(xi):
+    return xi & 0b00011111
+
+def _M7(xi):
+    return _shl(xi, 2) >> 3
 
 
 M = (
@@ -81,6 +93,17 @@ M3 = (
     ( _0,     _0,    _Id,     _0,     _0,     _0,  _0,  _0),
 )
 
+M4 = (
+    ( _0,     _0, _Sl(6),    _M1,    _Id,     _0,     _0,  _0),
+    ( _0,     _0,     _0,    _M2,    _M1,    _Id,     _0,  _0),
+    ( _0, _Sl(2),     _0,    _M3,    _M2,    _M1,    _Id,  _0),
+    ( _0,    _M4, _Sl(2),     _0,     _0, _Sr(6), _Sr(3), _Id),
+    (_Id,     _0, _Sl(5), _Sl(2),     _0,     _0,     _0,  _0),
+    ( _0,    _Id,     _0,    _M5, _Sl(2),     _0,     _0,  _0),
+    ( _0,     _0,    _Id,     _0,     _0,     _0,     _0,  _0),
+    ( _0,     _0, _Sl(3),    _Id,     _0,     _0,     _0,  _0),
+)
+
 # NB: shift directions are reversed with respect to the specification
 # for powers of M_R, since the specification reverses the byte order
 # for those matrices.
@@ -99,7 +122,7 @@ MR = (
 MR2 = (
     ( _0,  _0, _Id,     _0,     _0,     _0,     _0,     _0),
     ( _0,  _0,  _0,    _Id, _Sr(3),     _0,     _0,     _0),
-    ( _0,  _0,  _0,     _0,    _Id, _Sr(3),    _M3,     _0),
+    ( _0,  _0,  _0,     _0,    _Id, _Sr(3),    _M6,     _0),
     ( _0,  _0,  _0,     _0,     _0,    _Id, _Sl(3),     _0),
     ( _0,  _0,  _0, _Sl(2),     _0,     _0,    _Id, _Sl(3)),
     ( _0,  _0,  _0,     _0, _Sl(2),     _0,     _0,    _Id),
@@ -109,8 +132,8 @@ MR2 = (
 
 MR3 = (
     (    _0,  _0,  _0,    _Id, _Sr(3),     _0,     _0,     _0),
-    (    _0,  _0,  _0,     _0,    _Id, _Sr(3),    _M3,     _0),
-    (    _0,  _0,  _0,    _M4,     _0,    _Id,    _M1,    _M3),
+    (    _0,  _0,  _0,     _0,    _Id, _Sr(3),    _M6,     _0),
+    (    _0,  _0,  _0,    _M7,     _0,    _Id,    _M1,    _M6),
     (    _0,  _0,  _0, _Sl(2),     _0,     _0,    _Id, _Sl(3)),
     (_Sl(3),  _0,  _0,     _0, _Sl(2),     _0,     _0,    _Id),
     (   _Id,  _0,  _0,     _0,     _0, _Sl(2), _Sl(5),     _0),
@@ -135,10 +158,10 @@ def _multiplication(m, reverse=True):
 
 
 ALPHAS = (
-    list,                       # Identity.
     _multiplication(M),
     _multiplication(M2),
     _multiplication(M3),
+    _multiplication(M4),
     _multiplication(MR, reverse=False),
     _multiplication(MR2, reverse=False),
     _multiplication(MR3, reverse=False)
diff --git a/src/add_threshold/tweakey.c b/src/add_threshold/tweakey.c
index 8f531d9..7822564 100644
--- a/src/add_threshold/tweakey.c
+++ b/src/add_threshold/tweakey.c
@@ -90,10 +90,11 @@ void tweakey_state_extract(
 
 typedef void (*matrix_multiplication)(const uint8_t x[LANE_BYTES], uint8_t y[LANE_BYTES]);
 
-static const matrix_multiplication ALPHAS[6] = {
+static const matrix_multiplication ALPHAS[7] = {
     _multiply_M,
     _multiply_M2,
     _multiply_M3,
+    _multiply_M4,
     _multiply_MR,
     _multiply_MR2,
     _multiply_MR3
@@ -102,16 +103,14 @@ static const matrix_multiplication ALPHAS[6] = {
 
 void tweakey_state_update(uint8_t TK_X[TWEAKEY_BYTES], uint8_t TK_Y[KEY_BYTES])
 {
-    /* Skip lane 0, as it is multiplied by the identity matrix. */
-
-    for (size_t j=1; j<TWEAK_LANES_NB; j++)
+    for (size_t j=0; j<TWEAK_LANES_NB; j++)
     {
         uint8_t *TKj_X = TK_X + j*LANE_BYTES;
 
         uint8_t TKj_old_X[LANE_BYTES];
         memcpy(TKj_old_X, TKj_X, LANE_BYTES);
 
-        ALPHAS[j-1](TKj_old_X, TKj_X);
+        ALPHAS[j](TKj_old_X, TKj_X);
     }
 
     for (size_t j=0; j<KEY_LANES_NB; j++)
@@ -124,7 +123,7 @@ void tweakey_state_update(uint8_t TK_X[TWEAKEY_BYTES], uint8_t TK_Y[KEY_BYTES])
         memcpy(TKj_X_old, TKj_X, LANE_BYTES);
         memcpy(TKj_Y_old, TKj_Y, LANE_BYTES);
 
-        ALPHAS[j-1 + TWEAK_LANES_NB](TKj_X_old, TKj_X);
-        ALPHAS[j-1 + TWEAK_LANES_NB](TKj_Y_old, TKj_Y);
+        ALPHAS[j + TWEAK_LANES_NB](TKj_X_old, TKj_X);
+        ALPHAS[j + TWEAK_LANES_NB](TKj_Y_old, TKj_Y);
     }
 }
diff --git a/src/add_tweakeyloop/multiplications.h b/src/add_tweakeyloop/multiplications.h
index 45b9eaa..650373b 100644
--- a/src/add_tweakeyloop/multiplications.h
+++ b/src/add_tweakeyloop/multiplications.h
@@ -55,6 +55,17 @@ static void _multiply_M3(const uint8_t x[LANE_BYTES], uint8_t y[LANE_BYTES])
     _multiply_M(M2_x, y);
 }
 
+static void _multiply_M4(const uint8_t x[LANE_BYTES], uint8_t y[LANE_BYTES])
+{
+    uint8_t M_x[LANE_BYTES];
+    uint8_t M2_x[LANE_BYTES];
+    uint8_t M3_x[LANE_BYTES];
+    _multiply_M(x, M_x);
+    _multiply_M(M_x, M2_x);
+    _multiply_M(M2_x, M3_x);
+    _multiply_M(M3_x, y);
+}
+
 static void _multiply_MR(const uint8_t x[LANE_BYTES], uint8_t y[LANE_BYTES])
 {
     y[0] = x[1];
diff --git a/src/ref/cipher.c b/src/ref/cipher.c
index 8ebbbc3..07405e1 100644
--- a/src/ref/cipher.c
+++ b/src/ref/cipher.c
@@ -148,7 +148,7 @@ void lilliput_tbc_encrypt(
     uint8_t RTK[ROUNDS][ROUND_TWEAKEY_BYTES];
     _compute_round_tweakeys(key, tweak, RTK);
 
-    for (uint8_t i=0; i<ROUNDS-1; i++)
+    for (size_t i=0; i<ROUNDS-1; i++)
     {
         _one_round_egfn(X, RTK[i], PERMUTATION_ENCRYPTION);
     }
@@ -171,7 +171,7 @@ void lilliput_tbc_decrypt(
     uint8_t RTK[ROUNDS][ROUND_TWEAKEY_BYTES];
     _compute_round_tweakeys(key, tweak, RTK);
 
-    for (uint8_t i=0; i<ROUNDS-1; i++)
+    for (size_t i=0; i<ROUNDS-1; i++)
     {
         _one_round_egfn(X, RTK[ROUNDS-1-i], PERMUTATION_DECRYPTION);
     }
diff --git a/src/ref/lilliput-ae-utils.h b/src/ref/lilliput-ae-utils.h
index 0efb776..19b4623 100644
--- a/src/ref/lilliput-ae-utils.h
+++ b/src/ref/lilliput-ae-utils.h
@@ -28,16 +28,6 @@ This file provides functions used by both authenticated encryption modes.
 #include "constants.h"
 
 
-static inline uint8_t upper_nibble(uint8_t i)
-{
-    return i >> 4;
-}
-
-static inline uint8_t lower_nibble(uint8_t i)
-{
-    return i & 0x0f;
-}
-
 static inline void encrypt(const uint8_t K[KEY_BYTES],
                            const uint8_t T[TWEAK_BYTES],
                            const uint8_t M[BLOCK_BYTES],
@@ -68,35 +58,41 @@ static inline void xor_arrays(size_t len, uint8_t out[len], const uint8_t a[len]
 
 static inline void pad10(size_t X_len, const uint8_t X[X_len], uint8_t padded[BLOCK_BYTES])
 {
-    /* pad10*(X) = X || 1 || 0^{n-|X|-1} */
-
-    /* For example, with uint8_t X[3] = { [0]=0x01, [1]=0x02, [2]=0x03 }
+    /* Assuming 0 < |X| < n:
+     *
+     * pad10*(X) = X || 1 || 0^{n-|X|-1}
+     *
+     * For example, with uint8_t X[3] = { [0]=0x01, [1]=0x02, [2]=0x03 }
      *
      * pad10*(X) =
-     *       X[2]     X[1]     X[0]   1 0*
-     *     00000011 00000010 00000001 1 0000000 00000000...
+     *       X[0]     X[1]     X[2]   1 0*
+     *     00000001 00000010 00000011 1 0000000 00000000...
      *
-     * - padded[0, 11]:  zeroes
-     * - padded[12]:     10000000
-     * - padded[13, 15]: X[0, 2]
+     * - padded[0, 2]:  X[0, 2]
+     * - padded[3]:     10000000
+     * - padded[4, 15]: zeroes
      */
 
-    /* Assume that X_len<BLOCK_BYTES. */
+    memcpy(padded, X, X_len);
+    padded[X_len] = 0x80;
 
-    size_t pad_len = BLOCK_BYTES-X_len;
+    /* memset(&padded[BLOCK_BYTES], 0, 0) may or may not constitute
+     * undefined behaviour; use a straight loop instead. */
 
-    memset(padded, 0, pad_len-1);
-    padded[pad_len-1] = 0x80;
-    memcpy(padded+pad_len, X, X_len);
+    for (size_t i=X_len+1; i<BLOCK_BYTES; i++)
+    {
+        padded[i] = 0;
+    }
 }
 
 static inline void copy_block_index(size_t index, uint8_t tweak[TWEAK_BYTES])
 {
-    /* NB: little-endian architectures can simply use:
-     *     memcpy(tweak, &index, sizeof(index)); */
-    for (size_t i=0; i<sizeof(index); i++)
+    size_t s = sizeof(index);
+    uint8_t *dest = &tweak[TWEAK_BYTES-s];
+
+    for (size_t i=0; i<s; i++)
     {
-        tweak[i] = index >> 8*i & 0xff;
+        dest[i] = index >> 8*(s-1-i);
     }
 }
 
@@ -106,19 +102,22 @@ static inline void fill_index_tweak(
     uint8_t tweak[TWEAK_BYTES]
 )
 {
-    /* With an s-bit block index, the t-bit tweak is filled as follows:
+    /* The t-bit tweak is filled as follows:
      *
-     * - bits [  1, t-4]: block index
-     *        [  1,   s]: actual block index
-     *        [s+1, t-4]: 0-padding
-     * - bits [t-3,   t]: 4-bit prefix
+     *   1    4    5         t
+     * [ prefix || block index ]
+     *
+     * The s-bit block index is encoded as follows:
+     *
+     *   5        t-s    t-s+1                t
+     * [ zero padding || block index, MSB first ]
      */
 
-    copy_block_index(block_index, tweak);
+    tweak[0] = prefix<<4;
 
     /* Assume padding bytes have already been set to 0. */
 
-    tweak[TWEAK_BYTES-1] |= prefix << 4;
+    copy_block_index(block_index, tweak);
 }
 
 static void process_associated_data(
diff --git a/src/ref/lilliput-i.c b/src/ref/lilliput-i.c
index 6f869c3..3358b10 100644
--- a/src/ref/lilliput-i.c
+++ b/src/ref/lilliput-i.c
@@ -32,58 +32,71 @@ static const uint8_t _0n[BLOCK_BYTES] = {
 };
 
 
+static uint8_t _upper_nibble(uint8_t i)
+{
+    return i >> 4;
+}
+
+static uint8_t _lower_nibble(uint8_t i)
+{
+    return i & 0x0f;
+}
+
 static void _init_msg_tweak(const uint8_t N[NONCE_BYTES], uint8_t tweak[TWEAK_BYTES])
 {
-    /* With an s-bit block index, the t-bit tweak is filled as follows:
+    /* The t-bit tweak is filled as follows:
+     *
+     *   1    4    5     |N|+4    |N|+5     t
+     * [ prefix ||  nonce      || block index ]
      *
-     * - bits [      1, t-|N|-4]: block index
-     *        [      1,       s]: actual block index
-     *        [    s+1, t-|N|-4]: 0-padding
-     * - bits [t-|N|-3,     t-4]: nonce
-     * - bits [    t-3,       t]: 4-bit prefix
+     * The s-bit block index is encoded as follows:
      *
-     * This function sets bits s+1 to t-4 once and for all.
+     *   |N|+5    t-s    t-s+1                t
+     * [ zero padding || block index, MSB first ]
+     *
+     * This function sets bits 5 to t-s once and for all.
      */
 
-    size_t N_start = TWEAK_BYTES - NONCE_BYTES - 1;
-
-    for (size_t i=sizeof(size_t); i<N_start; i++)
-    {
-        tweak[i] = 0;
-    }
-
-    tweak[N_start] = lower_nibble(N[0]) << 4;
+    tweak[0] = _upper_nibble(N[0]);
 
     for (size_t i=1; i<NONCE_BYTES; i++)
     {
-        tweak[N_start+i] = lower_nibble(N[i]) << 4 ^ upper_nibble(N[i-1]);
+        tweak[i] = _lower_nibble(N[i-1]) << 4 ^ _upper_nibble(N[i]);
     }
 
-    tweak[TWEAK_BYTES-1] = upper_nibble(N[NONCE_BYTES-1]);
+    tweak[NONCE_BYTES] = _lower_nibble(N[NONCE_BYTES-1]) << 4;
+
+    /* The number of bits we need to zero out is:
+     *     t - |N| - s - 4        - 4
+     *                   (prefix)   (zeroed out by previous assignment)
+     */
+    memset(&tweak[NONCE_BYTES+1], 0, TWEAK_BYTES-NONCE_BYTES-sizeof(size_t)-1);
 }
 
 static void _fill_msg_tweak(
-    uint8_t       prefix,
-    size_t        block_index,
-    uint8_t       tweak[TWEAK_BYTES]
+    uint8_t prefix,
+    size_t  block_index,
+    uint8_t tweak[TWEAK_BYTES]
 )
 {
-    /* With an s-bit block index, the t-bit tweak is filled as follows:
+    /* The t-bit tweak is filled as follows:
      *
-     * - bits [      1, t-|N|-4]: block index
-     *        [      1,       s]: actual block index
-     *        [    s+1, t-|N|-4]: 0-padding
-     * - bits [t-|N|-3,     t-4]: nonce
-     * - bits [    t-3,       t]: 4-bit prefix
+     *   1    4    5     |N|+4    |N|+5     t
+     * [ prefix ||  nonce      || block index ]
      *
-     * This function assumes bits s+1 to t-3 have already been set,
-     * and only sets bits 1 to s and t-3 to t.
+     * The s-bit block index is encoded as follows:
+     *
+     *   |N|+5    t-s    t-s+1                t
+     * [ zero padding || block index, MSB first ]
+     *
+     * This function assumes bits 5 to t-s have already been set, and
+     * only sets bits 1 to 4 and t-s+1 to t.
      */
 
-    copy_block_index(block_index, tweak);
+    uint8_t *msb = &tweak[0];
+    *msb = prefix<<4 ^ _lower_nibble(*msb);
 
-    uint8_t *msb = &tweak[TWEAK_BYTES-1];
-    *msb = prefix<<4 ^ lower_nibble(*msb);
+    copy_block_index(block_index, tweak);
 }
 
 static void _encrypt_message(
diff --git a/src/ref/lilliput-ii.c b/src/ref/lilliput-ii.c
index 6811d49..bb43d08 100644
--- a/src/ref/lilliput-ii.c
+++ b/src/ref/lilliput-ii.c
@@ -28,36 +28,42 @@ This file implements Lilliput-AE's nonce-misuse-resistant mode based on SCT-2.
 
 static void _init_msg_tweak(const uint8_t tag[TAG_BYTES], uint8_t tweak[TWEAK_BYTES])
 {
-    /* With an s-bit block index, the t-bit tweak is filled as follows:
+    /* The t-bit tweak is filled as follows:
+     *
+     *   1    2                      t
+     * [ 1 || tag[2,t] XOR block index  ]
+     *
+     * The s-bit block index is XORed to the tag as follows:
      *
-     * - bits [  1, t-1]: tag + block index
-     *        [  1,   s]: tag[1..s] XOR block index
-     *        [s+1, t-1]: tag[s+1..t-1]
-     * - bit t: 1
+     *   2       t-s    t-s+1                                  t
+     * [ tag[2, t-s] || tag[t-s+1, t] XOR block index, MSB first ]
      *
-     * This function sets bits s+1 to t once and for all.
+     * This function sets bits 1 to t-s once and for all.
      */
 
-    memcpy(tweak+sizeof(size_t), tag+sizeof(size_t), TAG_BYTES-sizeof(size_t));
-    tweak[TWEAK_BYTES-1] |= 0x80;
+    memcpy(tweak, tag, TAG_BYTES-sizeof(size_t));
+    tweak[0] |= 0x80;
 }
 
 static void _fill_msg_tweak(const uint8_t tag[TAG_BYTES], size_t block_index, uint8_t tweak[TWEAK_BYTES])
 {
-    /* With an s-bit block index, the t-bit tweak is filled as follows:
+    /* The t-bit tweak is filled as follows:
+     *
+     *   1    2                      t
+     * [ 1 || tag[2,t] XOR block index  ]
+     *
+     * The s-bit block index is XORed to the tag as follows:
      *
-     * - bits [  1, t-1]: tag + block index
-     *        [  1,   s]: tag[1..s] XOR block index
-     *        [s+1, t-1]: tag[s+1..t-1]
-     * - bit t: 1
+     *   2       t-s    t-s+1                                  t
+     * [ tag[2, t-s] || tag[t-s+1, t] XOR block index, MSB first ]
      *
-     * This function assumes bits s+1 to t have already been set, and
-     * only sets bits 1 to s.
+     * This function assumes bits 1 to t-s have already been set, and
+     * only sets bits t-s+1 to t.
      */
 
     copy_block_index(block_index, tweak);
 
-    for (size_t i=0; i<sizeof(block_index); i++)
+    for (size_t i=TWEAK_BYTES-sizeof(size_t); i<TWEAK_BYTES; i++)
     {
         tweak[i] ^= tag[i];
     }
@@ -67,12 +73,12 @@ static void _fill_tag_tweak(const uint8_t N[NONCE_BYTES], uint8_t tweak[TWEAK_BY
 {
     /* The t-bit tweak is filled as follows:
      *
-     * - bits [  1, t-7]: N
-     * - bits [t-7,   t]: 0001||0^4
+     *   1  4    5   8    t-|N|+1     t
+     * [ 0001 ||  0^4  ||        nonce  ]
      */
 
-    memcpy(tweak, N, TWEAK_BYTES-1);
-    tweak[TWEAK_BYTES-1] = 0x10;
+    tweak[0] = 0x10;
+    memcpy(&tweak[1], N, TWEAK_BYTES-1);
 }
 
 static void _generate_tag(
@@ -129,8 +135,8 @@ static void _encrypt_message(
     _init_msg_tweak(tag, tweak);
 
     uint8_t padded_N[BLOCK_BYTES];
-    memcpy(padded_N, N, NONCE_BYTES);
-    padded_N[BLOCK_BYTES-1] = 0;
+    padded_N[0] = 0;
+    memcpy(&padded_N[1], N, NONCE_BYTES);
 
     size_t l = M_len / BLOCK_BYTES;
     size_t rest = M_len % BLOCK_BYTES;
diff --git a/src/ref/multiplications.h b/src/ref/multiplications.h
index 4de1848..c0645b9 100644
--- a/src/ref/multiplications.h
+++ b/src/ref/multiplications.h
@@ -71,6 +71,26 @@ static void _multiply_M3(const uint8_t x[LANE_BYTES], uint8_t y[LANE_BYTES])
     y[0] = x[5];
 }
 
+static void _multiply_M4(const uint8_t x[LANE_BYTES], uint8_t y[LANE_BYTES])
+{
+    uint8_t a_5  = x[5]<<3  ^ x[4];
+    uint8_t a_4  = x[4]>>3  ^ x[3];
+    uint8_t b_5 = a_5<<3 ^ a_4;
+    uint8_t b_4 = a_4>>3 ^ x[2];
+
+    uint8_t c_4 = b_4>>3 ^ x[6]<<2 ^ x[1];
+    uint8_t c_5 = b_5<<3 ^ b_4;
+
+    y[7] = b_5;
+    y[6] = c_5;
+    y[5] = c_5<<3 ^ c_4;
+    y[4] = c_4>>3 ^ x[5]<<2 ^ x[0];
+    y[3] = a_5<<2 ^ x[7];
+    y[2] = b_5<<2 ^ x[6];
+    y[1] = x[5];
+    y[0] = a_5;
+}
+
 static void _multiply_MR(const uint8_t x[LANE_BYTES], uint8_t y[LANE_BYTES])
 {
     y[0] = x[1];
diff --git a/src/ref/tweakey.c b/src/ref/tweakey.c
index 2f357ca..510f35a 100644
--- a/src/ref/tweakey.c
+++ b/src/ref/tweakey.c
@@ -63,10 +63,11 @@ void tweakey_state_extract(
 
 typedef void (*matrix_multiplication)(const uint8_t x[LANE_BYTES], uint8_t y[LANE_BYTES]);
 
-static const matrix_multiplication ALPHAS[6] = {
+static const matrix_multiplication ALPHAS[7] = {
     _multiply_M,
     _multiply_M2,
     _multiply_M3,
+    _multiply_M4,
     _multiply_MR,
     _multiply_MR2,
     _multiply_MR3
@@ -75,15 +76,13 @@ static const matrix_multiplication ALPHAS[6] = {
 
 void tweakey_state_update(uint8_t TK[TWEAKEY_BYTES])
 {
-    /* Skip lane 0, as it is multiplied by the identity matrix. */
-
-    for (size_t j=1; j<LANES_NB; j++)
+    for (size_t j=0; j<LANES_NB; j++)
     {
         uint8_t *TKj = TK + j*LANE_BYTES;
 
         uint8_t TKj_old[LANE_BYTES];
         memcpy(TKj_old, TKj, LANE_BYTES);
 
-        ALPHAS[j-1](TKj_old, TKj);
+        ALPHAS[j](TKj_old, TKj);
     }
 }
author	Gaetan Leplus <gaetan.leplus@airbus.com>	2019-07-05 16:16:19 +0200
committer	Gaetan Leplus <gaetan.leplus@airbus.com>	2019-07-05 16:16:19 +0200
commit	a432c19745907a96303b3a25111e0fd622202e0c (patch)
tree	3ac352a3598fa444d45695dbb2b4cee63698ac57 /src
parent	92893d79b36c9fb5a90644b82d16d9fa2563feb1 (diff)
parent	4f58d99e11e1c412a600f39f32a8d181765f0246 (diff)
download	lilliput-ae-implem-a432c19745907a96303b3a25111e0fd622202e0c.tar.xz