diff --git a/hw/dv/sv/mem_bkdr_util/mem_bkdr_util__rom.sv b/hw/dv/sv/mem_bkdr_util/mem_bkdr_util__rom.sv index d021ca569d10cf..e96c15bc4171fa 100644 --- a/hw/dv/sv/mem_bkdr_util/mem_bkdr_util__rom.sv +++ b/hw/dv/sv/mem_bkdr_util/mem_bkdr_util__rom.sv @@ -59,7 +59,6 @@ virtual function bit [38:0] rom_encrypt_read32(bit [bus_params_pkg::BUS_AW-1:0] zero_key[i] = '0; end - data_arr = sram_scrambler_pkg::sp_decrypt(data_arr, 39, zero_key); for (int i = 0; i < 39; i++) begin data[i] = data_arr[i] ^ keystream[i]; end diff --git a/hw/dv/sv/mem_bkdr_util/sram_scrambler_pkg.sv b/hw/dv/sv/mem_bkdr_util/sram_scrambler_pkg.sv index f832a5f5b319b6..41c1ee4eb84b0f 100644 --- a/hw/dv/sv/mem_bkdr_util/sram_scrambler_pkg.sv +++ b/hw/dv/sv/mem_bkdr_util/sram_scrambler_pkg.sv @@ -237,10 +237,10 @@ package sram_scrambler_pkg; // SRAM data encryption is more involved, we need to run 2 rounds of PRINCE on the nonce and key // and then XOR the result with the data. // - // After that, the XORed data neeeds to them be passed through the S&P network one byte at a time. + // Optionally, the XORed data can be passed through the S&P network. function automatic state_t encrypt_sram_data(logic data[], int data_width, int sp_width, logic addr[], int addr_width, - logic key[], logic nonce[]); + logic key[], logic nonce[], bit use_sp_layer = 0); logic keystream[] = new[SRAM_BLOCK_WIDTH]; logic data_enc[] = new[data_width]; logic byte_to_enc[] = new[8]; @@ -262,31 +262,33 @@ package sram_scrambler_pkg; data_enc[i] = data[i] ^ keystream[i % ks_width]; end - if (data_width == sp_width) begin - // pass the entire word through the subst/perm network at once (the next cases would give the - // same results too, but this should be a bit more efficient) - data_enc = sp_encrypt(data_enc, data_width, zero_key); - end else if (sp_width == 8) begin - // pass each byte of the encoded result through the subst/perm network (special case of the - // general code below) - for (int i = 0; i < data_width / 8; i++) begin - byte_to_enc = data_enc[i*8 +: 8]; - enc_byte = sp_encrypt(byte_to_enc, 8, zero_key); - data_enc[i*8 +: 8] = enc_byte; - end - end else begin - // divide the word into sp_width chunks to pass it through the subst/perm network - for (int chunk_lsb = 0; chunk_lsb < data_width; chunk_lsb += sp_width) begin - int bits_remaining = data_width - chunk_lsb; - int chunk_width = (bits_remaining < sp_width) ? bits_remaining : sp_width; - logic chunk[] = new[chunk_width]; - - for (int j = 0; j < chunk_width; j++) begin - chunk[j] = data_enc[chunk_lsb + j]; + if (use_sp_layer) begin + if (data_width == sp_width) begin + // pass the entire word through the subst/perm network at once (the next cases would give the + // same results too, but this should be a bit more efficient) + data_enc = sp_encrypt(data_enc, data_width, zero_key); + end else if (sp_width == 8) begin + // pass each byte of the encoded result through the subst/perm network (special case of the + // general code below) + for (int i = 0; i < data_width / 8; i++) begin + byte_to_enc = data_enc[i*8 +: 8]; + enc_byte = sp_encrypt(byte_to_enc, 8, zero_key); + data_enc[i*8 +: 8] = enc_byte; end - chunk = sp_encrypt(chunk, chunk_width, zero_key); - for (int j = 0; j < chunk_width; j++) begin - data_enc[chunk_lsb + j] = chunk[j]; + end else begin + // divide the word into sp_width chunks to pass it through the subst/perm network + for (int chunk_lsb = 0; chunk_lsb < data_width; chunk_lsb += sp_width) begin + int bits_remaining = data_width - chunk_lsb; + int chunk_width = (bits_remaining < sp_width) ? bits_remaining : sp_width; + logic chunk[] = new[chunk_width]; + + for (int j = 0; j < chunk_width; j++) begin + chunk[j] = data_enc[chunk_lsb + j]; + end + chunk = sp_encrypt(chunk, chunk_width, zero_key); + for (int j = 0; j < chunk_width; j++) begin + data_enc[chunk_lsb + j] = chunk[j]; + end end end end @@ -296,7 +298,7 @@ package sram_scrambler_pkg; function automatic state_t decrypt_sram_data(logic data[], int data_width, int sp_width, logic addr[], int addr_width, - logic key[], logic nonce[]); + logic key[], logic nonce[], bit use_sp_layer = 0); logic keystream[] = new[SRAM_BLOCK_WIDTH]; logic data_dec[] = new[data_width]; logic byte_to_dec[] = new[8]; @@ -312,31 +314,33 @@ package sram_scrambler_pkg; // Generate the keystream keystream = gen_keystream(addr, addr_width, key, nonce); - if (data_width == sp_width) begin - // pass the entire word through the subst/perm network at once (the next cases would give the - // same results too, but this should be a bit more efficient) - data_dec = sp_decrypt(data, data_width, zero_key); - end else if (sp_width == 8) begin - // pass each byte of the data through the subst/perm network (special case of the general code - // below) - for (int i = 0; i < data_width / 8; i++) begin - byte_to_dec = data[i*8 +: 8]; - dec_byte = sp_decrypt(byte_to_dec, 8, zero_key); - data_dec[i*8 +: 8] = dec_byte; - end - end else begin - // divide the word into sp_width chunks to pass it through the subst/perm network - for (int chunk_lsb = 0; chunk_lsb < data_width; chunk_lsb += sp_width) begin - int bits_remaining = data_width - chunk_lsb; - int chunk_width = (bits_remaining < sp_width) ? bits_remaining : sp_width; - logic chunk[] = new[chunk_width]; - - for (int j = 0; j < chunk_width; j++) begin - chunk[j] = data[chunk_lsb + j]; + if (use_sp_layer) begin + if (data_width == sp_width) begin + // pass the entire word through the subst/perm network at once (the next cases would give the + // same results too, but this should be a bit more efficient) + data_dec = sp_decrypt(data, data_width, zero_key); + end else if (sp_width == 8) begin + // pass each byte of the data through the subst/perm network (special case of the general code + // below) + for (int i = 0; i < data_width / 8; i++) begin + byte_to_dec = data[i*8 +: 8]; + dec_byte = sp_decrypt(byte_to_dec, 8, zero_key); + data_dec[i*8 +: 8] = dec_byte; end - chunk = sp_decrypt(chunk, chunk_width, zero_key); - for (int j = 0; j < chunk_width; j++) begin - data_dec[chunk_lsb + j] = chunk[j]; + end else begin + // divide the word into sp_width chunks to pass it through the subst/perm network + for (int chunk_lsb = 0; chunk_lsb < data_width; chunk_lsb += sp_width) begin + int bits_remaining = data_width - chunk_lsb; + int chunk_width = (bits_remaining < sp_width) ? bits_remaining : sp_width; + logic chunk[] = new[chunk_width]; + + for (int j = 0; j < chunk_width; j++) begin + chunk[j] = data[chunk_lsb + j]; + end + chunk = sp_decrypt(chunk, chunk_width, zero_key); + for (int j = 0; j < chunk_width; j++) begin + data_dec[chunk_lsb + j] = chunk[j]; + end end end end diff --git a/hw/dv/verilator/cpp/scrambled_ecc32_mem_area.cc b/hw/dv/verilator/cpp/scrambled_ecc32_mem_area.cc index 65f2b36b610ada..1206c8656990c0 100644 --- a/hw/dv/verilator/cpp/scrambled_ecc32_mem_area.cc +++ b/hw/dv/verilator/cpp/scrambled_ecc32_mem_area.cc @@ -162,9 +162,10 @@ void ScrambledEcc32MemArea::WriteBuffer(uint8_t buf[SV_MEM_WIDTH_BYTES], std::vector ScrambledEcc32MemArea::ReadUnscrambled( const uint8_t buf[SV_MEM_WIDTH_BYTES], uint32_t src_word) const { std::vector scrambled_data(buf, buf + GetPhysWidthByte()); - return scramble_decrypt_data( - scrambled_data, GetPhysWidth(), 39, AddrIntToBytes(src_word, addr_width_), - addr_width_, GetScrambleNonce(), GetScrambleKey(), repeat_keystream_); + return scramble_decrypt_data(scrambled_data, GetPhysWidth(), 39, + AddrIntToBytes(src_word, addr_width_), + addr_width_, GetScrambleNonce(), + GetScrambleKey(), repeat_keystream_, false); } void ScrambledEcc32MemArea::ReadBuffer(std::vector &data, @@ -196,7 +197,8 @@ void ScrambledEcc32MemArea::ScrambleBuffer(uint8_t buf[SV_MEM_WIDTH_BYTES], // Scramble data with integrity scramble_buf = scramble_encrypt_data( scramble_buf, GetPhysWidth(), 39, AddrIntToBytes(dst_word, addr_width_), - addr_width_, GetScrambleNonce(), GetScrambleKey(), repeat_keystream_); + addr_width_, GetScrambleNonce(), GetScrambleKey(), repeat_keystream_, + false); // Copy scrambled data to write buffer std::copy(scramble_buf.begin(), scramble_buf.end(), &buf[0]); diff --git a/hw/ip/otbn/rtl/otbn.sv b/hw/ip/otbn/rtl/otbn.sv index 03c0b01cc13fe0..d35f08c9aa6b18 100644 --- a/hw/ip/otbn/rtl/otbn.sv +++ b/hw/ip/otbn/rtl/otbn.sv @@ -332,8 +332,7 @@ module otbn .Width (39), .Depth (ImemSizeWords), .DataBitsPerMask(39), - .EnableParity (0), - .DiffWidth (39) + .EnableParity (0) ) u_imem ( .clk_i, .rst_ni(rst_n), @@ -535,7 +534,6 @@ module otbn .Depth (DmemSizeWords), .DataBitsPerMask (39), .EnableParity (0), - .DiffWidth (39), .ReplicateKeyStream(1) ) u_dmem ( .clk_i, diff --git a/hw/ip/prim/dv/prim_ram_scr/cpp/scramble_model.cc b/hw/ip/prim/dv/prim_ram_scr/cpp/scramble_model.cc index e8f7d7af8b053c..223703bf1f2ea4 100644 --- a/hw/ip/prim/dv/prim_ram_scr/cpp/scramble_model.cc +++ b/hw/ip/prim/dv/prim_ram_scr/cpp/scramble_model.cc @@ -322,7 +322,7 @@ std::vector scramble_encrypt_data( const std::vector &data_in, uint32_t data_width, uint32_t subst_perm_width, const std::vector &addr, uint32_t addr_width, const std::vector &nonce, - const std::vector &key, bool repeat_keystream) { + const std::vector &key, bool repeat_keystream, bool use_sp_layer) { assert(data_in.size() == ((data_width + 7) / 8)); assert(addr.size() == ((addr_width + 7) / 8)); @@ -335,22 +335,30 @@ std::vector scramble_encrypt_data( auto data_enc = xor_vectors(data_in, keystream); - return scramble_subst_perm_full_width(data_enc, data_width, subst_perm_width, - true); + if (use_sp_layer) { + return scramble_subst_perm_full_width(data_enc, data_width, + subst_perm_width, true); + } else { + return data_enc; + } } std::vector scramble_decrypt_data( const std::vector &data_in, uint32_t data_width, uint32_t subst_perm_width, const std::vector &addr, uint32_t addr_width, const std::vector &nonce, - const std::vector &key, bool repeat_keystream) { + const std::vector &key, bool repeat_keystream, bool use_sp_layer) { assert(data_in.size() == ((data_width + 7) / 8)); assert(addr.size() == ((addr_width + 7) / 8)); - // Data is decrypted by reversing substitution/permutation layer then XORing - // with keystream - auto data_sp_out = scramble_subst_perm_full_width(data_in, data_width, - subst_perm_width, false); + if (use_sp_layer) { + // Data is decrypted by reversing substitution/permutation layer then XORing + // with keystream + auto data_sp_out = scramble_subst_perm_full_width(data_in, data_width, + subst_perm_width, false); + } else { + auto data_sp_out = data_in; + } auto keystream = scramble_gen_keystream(addr, addr_width, nonce, key, data_width, diff --git a/hw/ip/prim/rtl/prim_ram_1p_scr.sv b/hw/ip/prim/rtl/prim_ram_1p_scr.sv index 30655819c6d7ac..d69dfd68d925b8 100644 --- a/hw/ip/prim/rtl/prim_ram_1p_scr.sv +++ b/hw/ip/prim/rtl/prim_ram_1p_scr.sv @@ -34,8 +34,12 @@ module prim_ram_1p_scr import prim_ram_1p_pkg::*; #( // to 2*5 + 1 effective rounds. Setting this to 2 halves this to approximately 5 effective rounds. // Number of PRINCE half rounds, can be [1..5] parameter int NumPrinceRoundsHalf = 2, - // Number of extra diffusion rounds. Setting this to 0 to disable diffusion. - parameter int NumDiffRounds = 2, + // Number of extra diffusion rounds. Setting this to 0 to disables diffusion. + // NOTE: this is disabled by default, since the non-linear transformation of data bits can + // interact adversely with end-to-end ECC integrity. Only enable this if you know what you are + // doing (e.g. using this primitive in a different context with byte parity). + // See also #20788 for more context. + parameter int NumDiffRounds = 0, // This parameter governs the block-width of additional diffusion layers. // For intra-byte diffusion, set this parameter to 8. parameter int DiffWidth = DataBitsPerMask, diff --git a/hw/ip/rom_ctrl/doc/rom_ctrl_blockdiag.svg b/hw/ip/rom_ctrl/doc/rom_ctrl_blockdiag.svg index 969c1f57c57822..f4069c8d40ec1c 100644 --- a/hw/ip/rom_ctrl/doc/rom_ctrl_blockdiag.svg +++ b/hw/ip/rom_ctrl/doc/rom_ctrl_blockdiag.svg @@ -1,12 +1,5 @@ + height="779.99994" + xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" + xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd" + xmlns="http://www.w3.org/2000/svg" + xmlns:svg="http://www.w3.org/2000/svg" + xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" + xmlns:cc="http://creativecommons.org/ns#" + xmlns:dc="http://purl.org/dc/elements/1.1/"> @@ -716,13 +716,13 @@ guidetolerance="10" inkscape:pageopacity="0" inkscape:pageshadow="2" - inkscape:window-width="1920" - inkscape:window-height="1043" + inkscape:window-width="3440" + inkscape:window-height="1376" id="namedview2851" showgrid="false" inkscape:zoom="1.7927814" - inkscape:cx="1069.3263" - inkscape:cy="296.72112" + inkscape:cx="1069.5671" + inkscape:cy="296.46671" inkscape:window-x="0" inkscape:window-y="0" inkscape:window-maximized="1" @@ -733,7 +733,8 @@ fit-margin-top="0" fit-margin-left="0" fit-margin-right="0" - fit-margin-bottom="0"> + fit-margin-bottom="0" + inkscape:pagecheckerboard="0"> S&P - - - S&P - (trunc) - \ No newline at end of file + \ No newline at end of file diff --git a/hw/ip/sram_ctrl/doc/theory_of_operation.md b/hw/ip/sram_ctrl/doc/theory_of_operation.md index f3de13bd8554cc..206d8345554294 100644 --- a/hw/ip/sram_ctrl/doc/theory_of_operation.md +++ b/hw/ip/sram_ctrl/doc/theory_of_operation.md @@ -26,11 +26,8 @@ The individual mechanisms are explained in more detail in the subsections below. ## Scrambling Primitive As explained in [`prim_ram_1p_scr`](../../prim/doc/prim_ram_1p_scr.md) the scrambling mechanism employs a reduced-round PRINCE block cipher in CTR mode to scramble the data. -Since plain CTR mode does not diffuse the data bits due to the bitwise XOR, the scheme is augmented by passing each word through a shallow substitution-permutation (S&P) network implemented with the `prim_subst_perm` primitive. -The S&P network employed is similar to the one employed in PRESENT and is explained in more detail [here](../../prim/doc/prim_ram_1p_scr.md#custom-substitution-permutation-network). -Another CTR mode augmentation that is aimed at breaking the linear address space is SRAM address scrambling. -The same S&P network construction that is used for intra-word diffusion is leveraged to non-linearly remap the SRAM address as shown in the block diagram above. +In order to break the linear address space, the CTR mode is augmented with an S&P network to non-linearly remap the SRAM address as shown in the block diagram above. The S&P network employed is similar to the one employed in PRESENT and is explained in more detail [here](../../prim/doc/prim_ram_1p_scr.md#custom-substitution-permutation-network). ### Integrity Error Handling diff --git a/hw/ip/sram_ctrl/rtl/sram_ctrl.sv b/hw/ip/sram_ctrl/rtl/sram_ctrl.sv index e2185e0214b19c..7a1035e5dd0437 100644 --- a/hw/ip/sram_ctrl/rtl/sram_ctrl.sv +++ b/hw/ip/sram_ctrl/rtl/sram_ctrl.sv @@ -492,8 +492,7 @@ module sram_ctrl .Width(DataWidth), .Depth(Depth), .EnableParity(0), - .DataBitsPerMask(DataWidth), - .DiffWidth(DataWidth) + .DataBitsPerMask(DataWidth) ) u_prim_ram_1p_scr ( .clk_i, .rst_ni,