Skip to content

Commit

Permalink
[prim, rom_ctrl] Remove S&P layer from data scrambling
Browse files Browse the repository at this point in the history
As elaborated on #20788, the S&P layer is disabled in the SRAM scrambling
devices in order to improve error detection guarantees, interactions with
ECC and timing.

In order to minimize changes and keep the implementation around in case
it is needed for byte parity at some point, we just set the NumDiffRounds
parameter to zero for the modules that leverage prim_ram_1p_scr.

In case of rom_ctrl, the functionality is removed entirely.

Signed-off-by: Michael Schaffner <[email protected]>
  • Loading branch information
msfschaffner committed Jan 17, 2024
1 parent d53236c commit 8e4bd19
Show file tree
Hide file tree
Showing 13 changed files with 122 additions and 144 deletions.
1 change: 0 additions & 1 deletion hw/dv/sv/mem_bkdr_util/mem_bkdr_util__rom.sv
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ virtual function bit [38:0] rom_encrypt_read32(bit [bus_params_pkg::BUS_AW-1:0]
zero_key[i] = '0;
end

data_arr = sram_scrambler_pkg::sp_decrypt(data_arr, 39, zero_key);
for (int i = 0; i < 39; i++) begin
data[i] = data_arr[i] ^ keystream[i];
end
Expand Down
119 changes: 64 additions & 55 deletions hw/dv/sv/mem_bkdr_util/sram_scrambler_pkg.sv
Original file line number Diff line number Diff line change
Expand Up @@ -237,10 +237,10 @@ package sram_scrambler_pkg;
// SRAM data encryption is more involved, we need to run 2 rounds of PRINCE on the nonce and key
// and then XOR the result with the data.
//
// After that, the XORed data neeeds to them be passed through the S&P network one byte at a time.
// Optionally, the XORed data can be passed through the S&P network.
function automatic state_t encrypt_sram_data(logic data[], int data_width, int sp_width,
logic addr[], int addr_width,
logic key[], logic nonce[]);
logic key[], logic nonce[], bit use_sp_layer = 0);
logic keystream[] = new[SRAM_BLOCK_WIDTH];
logic data_enc[] = new[data_width];
logic byte_to_enc[] = new[8];
Expand All @@ -262,31 +262,33 @@ package sram_scrambler_pkg;
data_enc[i] = data[i] ^ keystream[i % ks_width];
end

if (data_width == sp_width) begin
// pass the entire word through the subst/perm network at once (the next cases would give the
// same results too, but this should be a bit more efficient)
data_enc = sp_encrypt(data_enc, data_width, zero_key);
end else if (sp_width == 8) begin
// pass each byte of the encoded result through the subst/perm network (special case of the
// general code below)
for (int i = 0; i < data_width / 8; i++) begin
byte_to_enc = data_enc[i*8 +: 8];
enc_byte = sp_encrypt(byte_to_enc, 8, zero_key);
data_enc[i*8 +: 8] = enc_byte;
end
end else begin
// divide the word into sp_width chunks to pass it through the subst/perm network
for (int chunk_lsb = 0; chunk_lsb < data_width; chunk_lsb += sp_width) begin
int bits_remaining = data_width - chunk_lsb;
int chunk_width = (bits_remaining < sp_width) ? bits_remaining : sp_width;
logic chunk[] = new[chunk_width];

for (int j = 0; j < chunk_width; j++) begin
chunk[j] = data_enc[chunk_lsb + j];
if (use_sp_layer) begin
if (data_width == sp_width) begin
// pass the entire word through the subst/perm network at once (the next cases would give the
// same results too, but this should be a bit more efficient)
data_enc = sp_encrypt(data_enc, data_width, zero_key);
end else if (sp_width == 8) begin
// pass each byte of the encoded result through the subst/perm network (special case of the
// general code below)
for (int i = 0; i < data_width / 8; i++) begin
byte_to_enc = data_enc[i*8 +: 8];
enc_byte = sp_encrypt(byte_to_enc, 8, zero_key);
data_enc[i*8 +: 8] = enc_byte;
end
chunk = sp_encrypt(chunk, chunk_width, zero_key);
for (int j = 0; j < chunk_width; j++) begin
data_enc[chunk_lsb + j] = chunk[j];
end else begin
// divide the word into sp_width chunks to pass it through the subst/perm network
for (int chunk_lsb = 0; chunk_lsb < data_width; chunk_lsb += sp_width) begin
int bits_remaining = data_width - chunk_lsb;
int chunk_width = (bits_remaining < sp_width) ? bits_remaining : sp_width;
logic chunk[] = new[chunk_width];

for (int j = 0; j < chunk_width; j++) begin
chunk[j] = data_enc[chunk_lsb + j];
end
chunk = sp_encrypt(chunk, chunk_width, zero_key);
for (int j = 0; j < chunk_width; j++) begin
data_enc[chunk_lsb + j] = chunk[j];
end
end
end
end
Expand All @@ -296,7 +298,7 @@ package sram_scrambler_pkg;

function automatic state_t decrypt_sram_data(logic data[], int data_width, int sp_width,
logic addr[], int addr_width,
logic key[], logic nonce[]);
logic key[], logic nonce[], bit use_sp_layer = 0);
logic keystream[] = new[SRAM_BLOCK_WIDTH];
logic data_dec[] = new[data_width];
logic byte_to_dec[] = new[8];
Expand All @@ -312,38 +314,45 @@ package sram_scrambler_pkg;
// Generate the keystream
keystream = gen_keystream(addr, addr_width, key, nonce);

if (data_width == sp_width) begin
// pass the entire word through the subst/perm network at once (the next cases would give the
// same results too, but this should be a bit more efficient)
data_dec = sp_decrypt(data, data_width, zero_key);
end else if (sp_width == 8) begin
// pass each byte of the data through the subst/perm network (special case of the general code
// below)
for (int i = 0; i < data_width / 8; i++) begin
byte_to_dec = data[i*8 +: 8];
dec_byte = sp_decrypt(byte_to_dec, 8, zero_key);
data_dec[i*8 +: 8] = dec_byte;
end
end else begin
// divide the word into sp_width chunks to pass it through the subst/perm network
for (int chunk_lsb = 0; chunk_lsb < data_width; chunk_lsb += sp_width) begin
int bits_remaining = data_width - chunk_lsb;
int chunk_width = (bits_remaining < sp_width) ? bits_remaining : sp_width;
logic chunk[] = new[chunk_width];

for (int j = 0; j < chunk_width; j++) begin
chunk[j] = data[chunk_lsb + j];
if (use_sp_layer) begin
if (data_width == sp_width) begin
// pass the entire word through the subst/perm network at once (the next cases would give the
// same results too, but this should be a bit more efficient)
data_dec = sp_decrypt(data, data_width, zero_key);
end else if (sp_width == 8) begin
// pass each byte of the data through the subst/perm network (special case of the general code
// below)
for (int i = 0; i < data_width / 8; i++) begin
byte_to_dec = data[i*8 +: 8];
dec_byte = sp_decrypt(byte_to_dec, 8, zero_key);
data_dec[i*8 +: 8] = dec_byte;
end
chunk = sp_decrypt(chunk, chunk_width, zero_key);
for (int j = 0; j < chunk_width; j++) begin
data_dec[chunk_lsb + j] = chunk[j];
end else begin
// divide the word into sp_width chunks to pass it through the subst/perm network
for (int chunk_lsb = 0; chunk_lsb < data_width; chunk_lsb += sp_width) begin
int bits_remaining = data_width - chunk_lsb;
int chunk_width = (bits_remaining < sp_width) ? bits_remaining : sp_width;
logic chunk[] = new[chunk_width];

for (int j = 0; j < chunk_width; j++) begin
chunk[j] = data[chunk_lsb + j];
end
chunk = sp_decrypt(chunk, chunk_width, zero_key);
for (int j = 0; j < chunk_width; j++) begin
data_dec[chunk_lsb + j] = chunk[j];
end
end
end
end

// XOR result data with the keystream
for (int i = 0; i < data_width; i++) begin
data_dec[i] = data_dec[i] ^ keystream[i % ks_width];
// XOR result data with the keystream
for (int i = 0; i < data_width; i++) begin
data_dec[i] = data_dec[i] ^ keystream[i % ks_width];
end
end else begin
// XOR result data with the keystream
for (int i = 0; i < data_width; i++) begin
data_dec[i] = data[i] ^ keystream[i % ks_width];
end
end

return data_dec;
Expand Down
10 changes: 6 additions & 4 deletions hw/dv/verilator/cpp/scrambled_ecc32_mem_area.cc
Original file line number Diff line number Diff line change
Expand Up @@ -162,9 +162,10 @@ void ScrambledEcc32MemArea::WriteBuffer(uint8_t buf[SV_MEM_WIDTH_BYTES],
std::vector<uint8_t> ScrambledEcc32MemArea::ReadUnscrambled(
const uint8_t buf[SV_MEM_WIDTH_BYTES], uint32_t src_word) const {
std::vector<uint8_t> scrambled_data(buf, buf + GetPhysWidthByte());
return scramble_decrypt_data(
scrambled_data, GetPhysWidth(), 39, AddrIntToBytes(src_word, addr_width_),
addr_width_, GetScrambleNonce(), GetScrambleKey(), repeat_keystream_);
return scramble_decrypt_data(scrambled_data, GetPhysWidth(), 39,
AddrIntToBytes(src_word, addr_width_),
addr_width_, GetScrambleNonce(),
GetScrambleKey(), repeat_keystream_, false);
}

void ScrambledEcc32MemArea::ReadBuffer(std::vector<uint8_t> &data,
Expand Down Expand Up @@ -196,7 +197,8 @@ void ScrambledEcc32MemArea::ScrambleBuffer(uint8_t buf[SV_MEM_WIDTH_BYTES],
// Scramble data with integrity
scramble_buf = scramble_encrypt_data(
scramble_buf, GetPhysWidth(), 39, AddrIntToBytes(dst_word, addr_width_),
addr_width_, GetScrambleNonce(), GetScrambleKey(), repeat_keystream_);
addr_width_, GetScrambleNonce(), GetScrambleKey(), repeat_keystream_,
false);

// Copy scrambled data to write buffer
std::copy(scramble_buf.begin(), scramble_buf.end(), &buf[0]);
Expand Down
4 changes: 1 addition & 3 deletions hw/ip/otbn/rtl/otbn.sv
Original file line number Diff line number Diff line change
Expand Up @@ -332,8 +332,7 @@ module otbn
.Width (39),
.Depth (ImemSizeWords),
.DataBitsPerMask(39),
.EnableParity (0),
.DiffWidth (39)
.EnableParity (0)
) u_imem (
.clk_i,
.rst_ni(rst_n),
Expand Down Expand Up @@ -535,7 +534,6 @@ module otbn
.Depth (DmemSizeWords),
.DataBitsPerMask (39),
.EnableParity (0),
.DiffWidth (39),
.ReplicateKeyStream(1)
) u_dmem (
.clk_i,
Expand Down
30 changes: 17 additions & 13 deletions hw/ip/prim/dv/prim_ram_scr/cpp/scramble_model.cc
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ std::vector<uint8_t> scramble_encrypt_data(
const std::vector<uint8_t> &data_in, uint32_t data_width,
uint32_t subst_perm_width, const std::vector<uint8_t> &addr,
uint32_t addr_width, const std::vector<uint8_t> &nonce,
const std::vector<uint8_t> &key, bool repeat_keystream) {
const std::vector<uint8_t> &key, bool repeat_keystream, bool use_sp_layer) {
assert(data_in.size() == ((data_width + 7) / 8));
assert(addr.size() == ((addr_width + 7) / 8));

Expand All @@ -335,28 +335,32 @@ std::vector<uint8_t> scramble_encrypt_data(

auto data_enc = xor_vectors(data_in, keystream);

return scramble_subst_perm_full_width(data_enc, data_width, subst_perm_width,
true);
if (use_sp_layer) {
return scramble_subst_perm_full_width(data_enc, data_width,
subst_perm_width, true);
} else {
return data_enc;
}
}

std::vector<uint8_t> scramble_decrypt_data(
const std::vector<uint8_t> &data_in, uint32_t data_width,
uint32_t subst_perm_width, const std::vector<uint8_t> &addr,
uint32_t addr_width, const std::vector<uint8_t> &nonce,
const std::vector<uint8_t> &key, bool repeat_keystream) {
const std::vector<uint8_t> &key, bool repeat_keystream, bool use_sp_layer) {
assert(data_in.size() == ((data_width + 7) / 8));
assert(addr.size() == ((addr_width + 7) / 8));

// Data is decrypted by reversing substitution/permutation layer then XORing
// with keystream
auto data_sp_out = scramble_subst_perm_full_width(data_in, data_width,
subst_perm_width, false);

auto keystream =
scramble_gen_keystream(addr, addr_width, nonce, key, data_width,
kNumPrinceHalfRounds, repeat_keystream);

auto data_dec = xor_vectors(data_sp_out, keystream);

return data_dec;
if (use_sp_layer) {
// Data is decrypted by reversing substitution/permutation layer then XORing
// with keystream
auto data_sp_out = scramble_subst_perm_full_width(data_in, data_width,
subst_perm_width, false);
return xor_vectors(data_sp_out, keystream);
} else {
return xor_vectors(data_in, keystream);
}
}
10 changes: 8 additions & 2 deletions hw/ip/prim/dv/prim_ram_scr/cpp/scramble_model.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,16 @@ std::vector<uint8_t> scramble_addr(const std::vector<uint8_t> &addr_in,
* @param repeat_keystream Repeat the keystream of one single PRINCE instance if
* set to true. Otherwise multiple PRINCE instances are
* used.
* @param use_sp_layer Use the S&P layer for data diffusion. In HW this is
* disabled by default since it interacts adversely with
* the end-to-end integrity scheme.
* @return Byte vector with decrypted data
*/
std::vector<uint8_t> scramble_decrypt_data(
const std::vector<uint8_t> &data_in, uint32_t data_width,
uint32_t subst_perm_width, const std::vector<uint8_t> &addr,
uint32_t addr_width, const std::vector<uint8_t> &nonce,
const std::vector<uint8_t> &key, bool repeat_keystream);
const std::vector<uint8_t> &key, bool repeat_keystream, bool use_sp_layer);

/** Encrypt scrambled data
* @param data_in Byte vector of data to encrypt
Expand All @@ -60,12 +63,15 @@ std::vector<uint8_t> scramble_decrypt_data(
* @param repeat_keystream Repeat the keystream of one single PRINCE instance if
* set to true. Otherwise multiple PRINCE instances are
* used.
* @param use_sp_layer Use the S&P layer for data diffusion. In HW this is
* disabled by default since it interacts adversely with
* the end-to-end integrity scheme.
* @return Byte vector with encrypted data
*/
std::vector<uint8_t> scramble_encrypt_data(
const std::vector<uint8_t> &data_in, uint32_t data_width,
uint32_t subst_perm_width, const std::vector<uint8_t> &addr,
uint32_t addr_width, const std::vector<uint8_t> &nonce,
const std::vector<uint8_t> &key, bool repeat_keystream);
const std::vector<uint8_t> &key, bool repeat_keystream, bool use_sp_layer);

#endif // OPENTITAN_HW_IP_PRIM_DV_PRIM_RAM_SCR_CPP_SCRAMBLE_MODEL_H_
7 changes: 5 additions & 2 deletions hw/ip/prim/rtl/prim_ram_1p_scr.sv
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,11 @@ module prim_ram_1p_scr import prim_ram_1p_pkg::*; #(
// to 2*5 + 1 effective rounds. Setting this to 2 halves this to approximately 5 effective rounds.
// Number of PRINCE half rounds, can be [1..5]
parameter int NumPrinceRoundsHalf = 2,
// Number of extra diffusion rounds. Setting this to 0 to disable diffusion.
parameter int NumDiffRounds = 2,
// Number of extra diffusion rounds. Setting this to 0 to disables diffusion.
// NOTE: this is zero by default, since the non-linear transformation of data bits can interact
// adversely with end-to-end ECC integrity. Only enable this if you know what you are doing
// (e.g. using this primitive in a different context with byte parity). See #20788 for context.
parameter int NumDiffRounds = 0,
// This parameter governs the block-width of additional diffusion layers.
// For intra-byte diffusion, set this parameter to 8.
parameter int DiffWidth = DataBitsPerMask,
Expand Down
Loading

0 comments on commit 8e4bd19

Please sign in to comment.