From a49156432b1e2974f1f50599b8d7cdcb3b14fdc4 Mon Sep 17 00:00:00 2001 From: Michael Schaffner Date: Tue, 16 Jan 2024 16:58:27 -0800 Subject: [PATCH] [prim, rom_ctrl] Remove S&P layer from data scrrambling As elaborated on #20788, the S&P layer is disabled in the SRAM scrambling devices in order to improve error detection guarantees, interactions with ECC and timing. In order to minimize changes and keep the implementation around in case it is needed for byte parity at some point, we just set the NumDiffRounds parameter to zero for the modules that leverage prim_ram_1p_scr. In case of rom_ctrl, the functionality is removed entirely. Signed-off-by: Michael Schaffner --- hw/dv/sv/mem_bkdr_util/mem_bkdr_util__rom.sv | 1 - hw/dv/sv/mem_bkdr_util/sram_scrambler_pkg.sv | 106 ++++++++++--------- hw/ip/otbn/rtl/otbn.sv | 4 +- hw/ip/prim/rtl/prim_ram_1p_scr.sv | 5 +- hw/ip/rom_ctrl/doc/rom_ctrl_blockdiag.svg | 57 +++------- hw/ip/rom_ctrl/doc/theory_of_operation.md | 2 +- hw/ip/rom_ctrl/rtl/rom_ctrl_scrambled_rom.sv | 16 +-- hw/ip/sram_ctrl/doc/sram_ctrl_blockdiag.svg | 2 +- hw/ip/sram_ctrl/doc/theory_of_operation.md | 5 +- hw/ip/sram_ctrl/rtl/sram_ctrl.sv | 3 +- 10 files changed, 81 insertions(+), 120 deletions(-) diff --git a/hw/dv/sv/mem_bkdr_util/mem_bkdr_util__rom.sv b/hw/dv/sv/mem_bkdr_util/mem_bkdr_util__rom.sv index d021ca569d10cf..e96c15bc4171fa 100644 --- a/hw/dv/sv/mem_bkdr_util/mem_bkdr_util__rom.sv +++ b/hw/dv/sv/mem_bkdr_util/mem_bkdr_util__rom.sv @@ -59,7 +59,6 @@ virtual function bit [38:0] rom_encrypt_read32(bit [bus_params_pkg::BUS_AW-1:0] zero_key[i] = '0; end - data_arr = sram_scrambler_pkg::sp_decrypt(data_arr, 39, zero_key); for (int i = 0; i < 39; i++) begin data[i] = data_arr[i] ^ keystream[i]; end diff --git a/hw/dv/sv/mem_bkdr_util/sram_scrambler_pkg.sv b/hw/dv/sv/mem_bkdr_util/sram_scrambler_pkg.sv index f832a5f5b319b6..41c1ee4eb84b0f 100644 --- a/hw/dv/sv/mem_bkdr_util/sram_scrambler_pkg.sv +++ b/hw/dv/sv/mem_bkdr_util/sram_scrambler_pkg.sv @@ -237,10 +237,10 @@ package sram_scrambler_pkg; // SRAM data encryption is more involved, we need to run 2 rounds of PRINCE on the nonce and key // and then XOR the result with the data. // - // After that, the XORed data neeeds to them be passed through the S&P network one byte at a time. + // Optionally, the XORed data can be passed through the S&P network. function automatic state_t encrypt_sram_data(logic data[], int data_width, int sp_width, logic addr[], int addr_width, - logic key[], logic nonce[]); + logic key[], logic nonce[], bit use_sp_layer = 0); logic keystream[] = new[SRAM_BLOCK_WIDTH]; logic data_enc[] = new[data_width]; logic byte_to_enc[] = new[8]; @@ -262,31 +262,33 @@ package sram_scrambler_pkg; data_enc[i] = data[i] ^ keystream[i % ks_width]; end - if (data_width == sp_width) begin - // pass the entire word through the subst/perm network at once (the next cases would give the - // same results too, but this should be a bit more efficient) - data_enc = sp_encrypt(data_enc, data_width, zero_key); - end else if (sp_width == 8) begin - // pass each byte of the encoded result through the subst/perm network (special case of the - // general code below) - for (int i = 0; i < data_width / 8; i++) begin - byte_to_enc = data_enc[i*8 +: 8]; - enc_byte = sp_encrypt(byte_to_enc, 8, zero_key); - data_enc[i*8 +: 8] = enc_byte; - end - end else begin - // divide the word into sp_width chunks to pass it through the subst/perm network - for (int chunk_lsb = 0; chunk_lsb < data_width; chunk_lsb += sp_width) begin - int bits_remaining = data_width - chunk_lsb; - int chunk_width = (bits_remaining < sp_width) ? bits_remaining : sp_width; - logic chunk[] = new[chunk_width]; - - for (int j = 0; j < chunk_width; j++) begin - chunk[j] = data_enc[chunk_lsb + j]; + if (use_sp_layer) begin + if (data_width == sp_width) begin + // pass the entire word through the subst/perm network at once (the next cases would give the + // same results too, but this should be a bit more efficient) + data_enc = sp_encrypt(data_enc, data_width, zero_key); + end else if (sp_width == 8) begin + // pass each byte of the encoded result through the subst/perm network (special case of the + // general code below) + for (int i = 0; i < data_width / 8; i++) begin + byte_to_enc = data_enc[i*8 +: 8]; + enc_byte = sp_encrypt(byte_to_enc, 8, zero_key); + data_enc[i*8 +: 8] = enc_byte; end - chunk = sp_encrypt(chunk, chunk_width, zero_key); - for (int j = 0; j < chunk_width; j++) begin - data_enc[chunk_lsb + j] = chunk[j]; + end else begin + // divide the word into sp_width chunks to pass it through the subst/perm network + for (int chunk_lsb = 0; chunk_lsb < data_width; chunk_lsb += sp_width) begin + int bits_remaining = data_width - chunk_lsb; + int chunk_width = (bits_remaining < sp_width) ? bits_remaining : sp_width; + logic chunk[] = new[chunk_width]; + + for (int j = 0; j < chunk_width; j++) begin + chunk[j] = data_enc[chunk_lsb + j]; + end + chunk = sp_encrypt(chunk, chunk_width, zero_key); + for (int j = 0; j < chunk_width; j++) begin + data_enc[chunk_lsb + j] = chunk[j]; + end end end end @@ -296,7 +298,7 @@ package sram_scrambler_pkg; function automatic state_t decrypt_sram_data(logic data[], int data_width, int sp_width, logic addr[], int addr_width, - logic key[], logic nonce[]); + logic key[], logic nonce[], bit use_sp_layer = 0); logic keystream[] = new[SRAM_BLOCK_WIDTH]; logic data_dec[] = new[data_width]; logic byte_to_dec[] = new[8]; @@ -312,31 +314,33 @@ package sram_scrambler_pkg; // Generate the keystream keystream = gen_keystream(addr, addr_width, key, nonce); - if (data_width == sp_width) begin - // pass the entire word through the subst/perm network at once (the next cases would give the - // same results too, but this should be a bit more efficient) - data_dec = sp_decrypt(data, data_width, zero_key); - end else if (sp_width == 8) begin - // pass each byte of the data through the subst/perm network (special case of the general code - // below) - for (int i = 0; i < data_width / 8; i++) begin - byte_to_dec = data[i*8 +: 8]; - dec_byte = sp_decrypt(byte_to_dec, 8, zero_key); - data_dec[i*8 +: 8] = dec_byte; - end - end else begin - // divide the word into sp_width chunks to pass it through the subst/perm network - for (int chunk_lsb = 0; chunk_lsb < data_width; chunk_lsb += sp_width) begin - int bits_remaining = data_width - chunk_lsb; - int chunk_width = (bits_remaining < sp_width) ? bits_remaining : sp_width; - logic chunk[] = new[chunk_width]; - - for (int j = 0; j < chunk_width; j++) begin - chunk[j] = data[chunk_lsb + j]; + if (use_sp_layer) begin + if (data_width == sp_width) begin + // pass the entire word through the subst/perm network at once (the next cases would give the + // same results too, but this should be a bit more efficient) + data_dec = sp_decrypt(data, data_width, zero_key); + end else if (sp_width == 8) begin + // pass each byte of the data through the subst/perm network (special case of the general code + // below) + for (int i = 0; i < data_width / 8; i++) begin + byte_to_dec = data[i*8 +: 8]; + dec_byte = sp_decrypt(byte_to_dec, 8, zero_key); + data_dec[i*8 +: 8] = dec_byte; end - chunk = sp_decrypt(chunk, chunk_width, zero_key); - for (int j = 0; j < chunk_width; j++) begin - data_dec[chunk_lsb + j] = chunk[j]; + end else begin + // divide the word into sp_width chunks to pass it through the subst/perm network + for (int chunk_lsb = 0; chunk_lsb < data_width; chunk_lsb += sp_width) begin + int bits_remaining = data_width - chunk_lsb; + int chunk_width = (bits_remaining < sp_width) ? bits_remaining : sp_width; + logic chunk[] = new[chunk_width]; + + for (int j = 0; j < chunk_width; j++) begin + chunk[j] = data[chunk_lsb + j]; + end + chunk = sp_decrypt(chunk, chunk_width, zero_key); + for (int j = 0; j < chunk_width; j++) begin + data_dec[chunk_lsb + j] = chunk[j]; + end end end end diff --git a/hw/ip/otbn/rtl/otbn.sv b/hw/ip/otbn/rtl/otbn.sv index 03c0b01cc13fe0..d35f08c9aa6b18 100644 --- a/hw/ip/otbn/rtl/otbn.sv +++ b/hw/ip/otbn/rtl/otbn.sv @@ -332,8 +332,7 @@ module otbn .Width (39), .Depth (ImemSizeWords), .DataBitsPerMask(39), - .EnableParity (0), - .DiffWidth (39) + .EnableParity (0) ) u_imem ( .clk_i, .rst_ni(rst_n), @@ -535,7 +534,6 @@ module otbn .Depth (DmemSizeWords), .DataBitsPerMask (39), .EnableParity (0), - .DiffWidth (39), .ReplicateKeyStream(1) ) u_dmem ( .clk_i, diff --git a/hw/ip/prim/rtl/prim_ram_1p_scr.sv b/hw/ip/prim/rtl/prim_ram_1p_scr.sv index 30655819c6d7ac..fa9fd6ee5311aa 100644 --- a/hw/ip/prim/rtl/prim_ram_1p_scr.sv +++ b/hw/ip/prim/rtl/prim_ram_1p_scr.sv @@ -35,7 +35,10 @@ module prim_ram_1p_scr import prim_ram_1p_pkg::*; #( // Number of PRINCE half rounds, can be [1..5] parameter int NumPrinceRoundsHalf = 2, // Number of extra diffusion rounds. Setting this to 0 to disable diffusion. - parameter int NumDiffRounds = 2, + // NOTE: this setting is disabled by default, since it can interact adversely when using this + // primitive with end-to-end ECC integrity. Only enable this if you know what you are doing. See + // also #20788 for more context. + parameter int NumDiffRounds = 0, // This parameter governs the block-width of additional diffusion layers. // For intra-byte diffusion, set this parameter to 8. parameter int DiffWidth = DataBitsPerMask, diff --git a/hw/ip/rom_ctrl/doc/rom_ctrl_blockdiag.svg b/hw/ip/rom_ctrl/doc/rom_ctrl_blockdiag.svg index 969c1f57c57822..f4069c8d40ec1c 100644 --- a/hw/ip/rom_ctrl/doc/rom_ctrl_blockdiag.svg +++ b/hw/ip/rom_ctrl/doc/rom_ctrl_blockdiag.svg @@ -1,12 +1,5 @@ + height="779.99994" + xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" + xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd" + xmlns="http://www.w3.org/2000/svg" + xmlns:svg="http://www.w3.org/2000/svg" + xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" + xmlns:cc="http://creativecommons.org/ns#" + xmlns:dc="http://purl.org/dc/elements/1.1/"> @@ -716,13 +716,13 @@ guidetolerance="10" inkscape:pageopacity="0" inkscape:pageshadow="2" - inkscape:window-width="1920" - inkscape:window-height="1043" + inkscape:window-width="3440" + inkscape:window-height="1376" id="namedview2851" showgrid="false" inkscape:zoom="1.7927814" - inkscape:cx="1069.3263" - inkscape:cy="296.72112" + inkscape:cx="1069.5671" + inkscape:cy="296.46671" inkscape:window-x="0" inkscape:window-y="0" inkscape:window-maximized="1" @@ -733,7 +733,8 @@ fit-margin-top="0" fit-margin-left="0" fit-margin-right="0" - fit-margin-bottom="0"> + fit-margin-bottom="0" + inkscape:pagecheckerboard="0"> S&P - - - S&P - (trunc) - \ No newline at end of file + \ No newline at end of file diff --git a/hw/ip/sram_ctrl/doc/theory_of_operation.md b/hw/ip/sram_ctrl/doc/theory_of_operation.md index f3de13bd8554cc..206d8345554294 100644 --- a/hw/ip/sram_ctrl/doc/theory_of_operation.md +++ b/hw/ip/sram_ctrl/doc/theory_of_operation.md @@ -26,11 +26,8 @@ The individual mechanisms are explained in more detail in the subsections below. ## Scrambling Primitive As explained in [`prim_ram_1p_scr`](../../prim/doc/prim_ram_1p_scr.md) the scrambling mechanism employs a reduced-round PRINCE block cipher in CTR mode to scramble the data. -Since plain CTR mode does not diffuse the data bits due to the bitwise XOR, the scheme is augmented by passing each word through a shallow substitution-permutation (S&P) network implemented with the `prim_subst_perm` primitive. -The S&P network employed is similar to the one employed in PRESENT and is explained in more detail [here](../../prim/doc/prim_ram_1p_scr.md#custom-substitution-permutation-network). -Another CTR mode augmentation that is aimed at breaking the linear address space is SRAM address scrambling. -The same S&P network construction that is used for intra-word diffusion is leveraged to non-linearly remap the SRAM address as shown in the block diagram above. +In order to break the linear address space, the CTR mode is augmented with an S&P network to non-linearly remap the SRAM address as shown in the block diagram above. The S&P network employed is similar to the one employed in PRESENT and is explained in more detail [here](../../prim/doc/prim_ram_1p_scr.md#custom-substitution-permutation-network). ### Integrity Error Handling diff --git a/hw/ip/sram_ctrl/rtl/sram_ctrl.sv b/hw/ip/sram_ctrl/rtl/sram_ctrl.sv index e2185e0214b19c..7a1035e5dd0437 100644 --- a/hw/ip/sram_ctrl/rtl/sram_ctrl.sv +++ b/hw/ip/sram_ctrl/rtl/sram_ctrl.sv @@ -492,8 +492,7 @@ module sram_ctrl .Width(DataWidth), .Depth(Depth), .EnableParity(0), - .DataBitsPerMask(DataWidth), - .DiffWidth(DataWidth) + .DataBitsPerMask(DataWidth) ) u_prim_ram_1p_scr ( .clk_i, .rst_ni,