diff --git a/DataFormats/Provenance/interface/CompactHash.h b/DataFormats/Provenance/interface/CompactHash.h new file mode 100644 index 0000000000000..7f9517d60a5b4 --- /dev/null +++ b/DataFormats/Provenance/interface/CompactHash.h @@ -0,0 +1,163 @@ +#ifndef DataFormats_Provenance_CompactHash_h +#define DataFormats_Provenance_CompactHash_h + +#include +#include +#include + +namespace cms { + class Digest; +} + +namespace edm { + + namespace detail { + // This string is the 16-byte, non-printable version. + std::array const& InvalidCompactHash(); + } // namespace detail + + namespace compact_hash_detail { + using value_type = std::array; + void toString_(std::string& result, value_type const& hash); + void toDigest_(cms::Digest& digest, value_type const& hash); + std::ostream& print_(std::ostream& os, value_type const& hash); + bool isValid_(value_type const& hash); + size_t smallHash_(value_type const& hash); + value_type fromHex_(std::string_view); + void throwIfIllFormed(std::string_view v); + } // namespace compact_hash_detail + + template + class CompactHash { + public: + typedef compact_hash_detail::value_type value_type; + + CompactHash(); + explicit CompactHash(value_type const& v); + explicit CompactHash(std::string_view v); + + CompactHash(CompactHash const&) = default; + CompactHash& operator=(CompactHash const& iRHS) = default; + + CompactHash(CompactHash&&) = default; + CompactHash& operator=(CompactHash&&) = default; + + void reset(); + + // For now, just check the most basic: a default constructed + // ParameterSetID is not valid. This is very crude: we are + // assuming that nobody created a ParameterSetID from an empty + // string, nor from any string that is not a valid string + // representation of an MD5 checksum. + bool isValid() const; + + bool operator<(CompactHash const& other) const; + bool operator>(CompactHash const& other) const; + bool operator==(CompactHash const& other) const; + bool operator!=(CompactHash const& other) const; + std::ostream& print(std::ostream& os) const; + void toString(std::string& result) const; + void toDigest(cms::Digest& digest) const; + + // Return the 16-byte (non-printable) string form. + value_type const& compactForm() const; + + ///returns a short hash which can be used with hashing containers + size_t smallHash() const; + + //Used by ROOT storage + // CMS_CLASS_VERSION(3) // This macro is not defined here, so expand it. + static short Class_Version() { return 3; } + + private: + template + bool compareUsing(CompactHash const& iOther, Op op) const { + return op(this->hash_, iOther.hash_); + } + + value_type hash_; + }; + + //-------------------------------------------------------------------- + // + // Implementation details follow... + //-------------------------------------------------------------------- + + template + inline CompactHash::CompactHash() : hash_(detail::InvalidCompactHash()) {} + + template + inline CompactHash::CompactHash(value_type const& v) : hash_(v) {} + + template + inline CompactHash::CompactHash(std::string_view v) { + if (v.size() == 32) { + hash_ = compact_hash_detail::fromHex_(v); + } else { + compact_hash_detail::throwIfIllFormed(v); + std::copy(v.begin(), v.end(), hash_.begin()); + } + } + + template + inline void CompactHash::reset() { + hash_ = detail::InvalidCompactHash(); + } + + template + inline bool CompactHash::isValid() const { + return compact_hash_detail::isValid_(hash_); + } + + template + inline bool CompactHash::operator<(CompactHash const& other) const { + return this->compareUsing(other, std::less()); + } + + template + inline bool CompactHash::operator>(CompactHash const& other) const { + return this->compareUsing(other, std::greater()); + } + + template + inline bool CompactHash::operator==(CompactHash const& other) const { + return this->compareUsing(other, std::equal_to()); + } + + template + inline bool CompactHash::operator!=(CompactHash const& other) const { + return this->compareUsing(other, std::not_equal_to()); + } + + template + inline std::ostream& CompactHash::print(std::ostream& os) const { + return compact_hash_detail::print_(os, hash_); + } + + template + inline void CompactHash::toString(std::string& result) const { + compact_hash_detail::toString_(result, hash_); + } + + template + inline void CompactHash::toDigest(cms::Digest& digest) const { + compact_hash_detail::toDigest_(digest, hash_); + } + + template + inline typename CompactHash::value_type const& CompactHash::compactForm() const { + return hash_; + } + + template + inline size_t CompactHash::smallHash() const { + return compact_hash_detail::smallHash_(hash_); + } + + template + inline std::ostream& operator<<(std::ostream& os, CompactHash const& h) { + return h.print(os); + } + +} // namespace edm +#endif diff --git a/DataFormats/Provenance/interface/ParentageID.h b/DataFormats/Provenance/interface/ParentageID.h index 4721d99b0525a..7d1b3e75a4f0f 100644 --- a/DataFormats/Provenance/interface/ParentageID.h +++ b/DataFormats/Provenance/interface/ParentageID.h @@ -2,10 +2,13 @@ #define DataFormats_Provenance_ParentageID_h #include "DataFormats/Provenance/interface/HashedTypes.h" +#include "DataFormats/Provenance/interface/CompactHash.h" #include "DataFormats/Provenance/interface/Hash.h" namespace edm { - typedef Hash ParentageID; -} + using ParentageID = CompactHash; + + using StoredParentageID = Hash; +} // namespace edm #endif diff --git a/DataFormats/Provenance/interface/ProductProvenance.h b/DataFormats/Provenance/interface/ProductProvenance.h index 585e392470f44..32eab1d5b0dac 100644 --- a/DataFormats/Provenance/interface/ProductProvenance.h +++ b/DataFormats/Provenance/interface/ProductProvenance.h @@ -39,8 +39,7 @@ namespace edm { ParentageID const& parentageID() const { return parentageID_; } Parentage const& parentage() const; - void set(ParentageID id) { parentageID_ = std::move(id); } - ParentageID moveParentageID() { return std::move(parentageID_); } + void set(ParentageID const& id) { parentageID_ = id; } private: BranchID branchID_; diff --git a/DataFormats/Provenance/interface/ProductProvenanceLookup.h b/DataFormats/Provenance/interface/ProductProvenanceLookup.h index 5b1311cad7cb9..8d820eff723a8 100644 --- a/DataFormats/Provenance/interface/ProductProvenanceLookup.h +++ b/DataFormats/Provenance/interface/ProductProvenanceLookup.h @@ -33,7 +33,7 @@ namespace edm { ProductProvenanceLookup& operator=(ProductProvenanceLookup const&) = delete; ProductProvenance const* branchIDToProvenance(BranchID const& bid) const; - void insertIntoSet(ProductProvenance provenanceProduct) const; + void insertIntoSet(ProductProvenance const& provenanceProduct) const; ProductProvenance const* branchIDToProvenanceForProducedOnly(BranchID const& bid) const; void update(edm::ProductRegistry const&); @@ -78,8 +78,8 @@ namespace edm { bool isParentageSet() const noexcept { return isParentageSet_.load(std::memory_order_acquire); } - void threadsafe_set(ParentageID id) const { - provenance_.set(std::move(id)); + void threadsafe_set(ParentageID const& id) const { + provenance_.set(id); isParentageSet_.store(true, std::memory_order_release); } diff --git a/DataFormats/Provenance/src/CompactHash.cc b/DataFormats/Provenance/src/CompactHash.cc new file mode 100644 index 0000000000000..593544069a1b8 --- /dev/null +++ b/DataFormats/Provenance/src/CompactHash.cc @@ -0,0 +1,71 @@ +#include "DataFormats/Provenance/interface/CompactHash.h" +#include "FWCore/Utilities/interface/Algorithms.h" +#include "FWCore/Utilities/interface/Digest.h" +#include "FWCore/Utilities/interface/EDMException.h" + +#include +#include + +namespace { + std::array convert(std::string const& v) { + assert(v.size() == 16); + std::array retValue; + std::copy(v.begin(), v.end(), retValue.begin()); + return retValue; + } +} // namespace +namespace edm { + namespace detail { + // This string is the 16-byte, non-printable version. + std::array const& InvalidCompactHash() { + static std::array const invalid = convert(cms::MD5Result().compactForm()); + return invalid; + } + } // namespace detail + + namespace compact_hash_detail { + size_t smallHash_(value_type const& hash) { + //NOTE: In future we could try to xor the first 8bytes into the second 8bytes of the string to make the hash + std::hash h; + return h(std::string_view(reinterpret_cast(hash.data()), hash.size())); + } + + std::array fromHex_(std::string_view v) { + cms::MD5Result temp; + temp.fromHexifiedString(v); + auto hash = temp.compactForm(); + std::array ret; + std::copy(hash.begin(), hash.end(), ret.begin()); + return ret; + } + + bool isValid_(value_type const& hash) { return hash != detail::InvalidCompactHash(); } + + void throwIfIllFormed(std::string_view v) { + // Fixup not needed here. + if (v.size() != 16) { + throw Exception(errors::LogicError) << "Ill-formed CompactHash instance. " + << "A string_view of size " << v.size() << " passed to constructor."; + } + } + + void toString_(std::string& result, value_type const& hash) { + cms::MD5Result temp; + copy_all(hash, temp.bytes.begin()); + result += temp.toString(); + } + + void toDigest_(cms::Digest& digest, value_type const& hash) { + cms::MD5Result temp; + copy_all(hash, temp.bytes.begin()); + digest.append(temp.toString()); + } + + std::ostream& print_(std::ostream& os, value_type const& hash) { + cms::MD5Result temp; + copy_all(hash, temp.bytes.begin()); + os << temp.toString(); + return os; + } + } // namespace compact_hash_detail +} // namespace edm diff --git a/DataFormats/Provenance/src/Parentage.cc b/DataFormats/Provenance/src/Parentage.cc index 60e7b2bfc416b..43c3f5b7f1e81 100644 --- a/DataFormats/Provenance/src/Parentage.cc +++ b/DataFormats/Provenance/src/Parentage.cc @@ -1,7 +1,8 @@ #include "DataFormats/Provenance/interface/Parentage.h" #include "FWCore/Utilities/interface/Digest.h" -#include +#include #include +//#include /*---------------------------------------------------------------------- @@ -15,14 +16,17 @@ namespace edm { Parentage::Parentage(std::vector&& parents) : parents_(std::move(parents)) {} ParentageID Parentage::id() const { - std::ostringstream oss; + //10 is the maximum number of digits for a 2^32 number + std::array buf; + cms::Digest md5alg; for (auto const& parent : parents_) { - oss << parent << ' '; + //assert(start < end); + auto res = std::to_chars(buf.data(), buf.data() + buf.size(), parent.id()); + //assert(res.ec == std::errc()); + *res.ptr = ' '; + md5alg.append(buf.data(), res.ptr - buf.data() + 1); } - - std::string stringrep = oss.str(); - cms::Digest md5alg(stringrep); - ParentageID id(md5alg.digest().toString()); + ParentageID id(md5alg.digest().bytes); return id; } diff --git a/DataFormats/Provenance/src/ProductProvenanceLookup.cc b/DataFormats/Provenance/src/ProductProvenanceLookup.cc index 366406e509aad..78d598e03f4ab 100644 --- a/DataFormats/Provenance/src/ProductProvenanceLookup.cc +++ b/DataFormats/Provenance/src/ProductProvenanceLookup.cc @@ -39,7 +39,7 @@ namespace edm { setupEntryInfoSet(iReg); } - void ProductProvenanceLookup::insertIntoSet(ProductProvenance entryInfo) const { + void ProductProvenanceLookup::insertIntoSet(ProductProvenance const& entryInfo) const { //NOTE:do not read provenance here because we only need the full // provenance when someone tries to access it not when doing the insert // doing the delay saves 20% of time when doing an analysis job @@ -53,7 +53,7 @@ namespace edm { throw edm::Exception(edm::errors::LogicError) << "ProductProvenanceLookup::insertIntoSet passed a BranchID " << entryInfo.branchID().id() << " that has not been pre-registered"; } - itFound->threadsafe_set(entryInfo.moveParentageID()); + itFound->threadsafe_set(entryInfo.parentageID()); } ProductProvenance const* ProductProvenanceLookup::branchIDToProvenance(BranchID const& bid) const { diff --git a/DataFormats/Provenance/src/classes_def.xml b/DataFormats/Provenance/src/classes_def.xml index 0cd4c91e3409d..cdb733eff0e4a 100644 --- a/DataFormats/Provenance/src/classes_def.xml +++ b/DataFormats/Provenance/src/classes_def.xml @@ -73,7 +73,8 @@ - + + @@ -162,6 +163,7 @@ + @@ -259,5 +261,10 @@ newObj->initializeTransients(); ]]> + + (onfile.hash_); + ]]> + diff --git a/DataFormats/Provenance/test/BuildFile.xml b/DataFormats/Provenance/test/BuildFile.xml index d5cb05c7beeec..688b612b633ba 100644 --- a/DataFormats/Provenance/test/BuildFile.xml +++ b/DataFormats/Provenance/test/BuildFile.xml @@ -1,12 +1,15 @@ + - - + + + + diff --git a/DataFormats/Provenance/test/CompactHash_t.cpp b/DataFormats/Provenance/test/CompactHash_t.cpp new file mode 100644 index 0000000000000..5cef47f2a6a8e --- /dev/null +++ b/DataFormats/Provenance/test/CompactHash_t.cpp @@ -0,0 +1,35 @@ +#include "catch.hpp" + +#include "DataFormats/Provenance/interface/CompactHash.h" +#include "FWCore/Utilities/interface/Digest.h" + +namespace { + using TestHash = edm::CompactHash<100>; +} + +TEST_CASE("CompactHash", "[CompactHash]") { + SECTION("Default construction is invalid") { REQUIRE(TestHash{}.isValid() == false); } + + SECTION("Basic operations") { + cms::Digest d("foo"); + auto result = d.digest().bytes; + + TestHash id{result}; + REQUIRE(id.isValid() == true); + REQUIRE(id.compactForm() == result); + + TestHash id2 = id; + REQUIRE(id2.isValid() == true); + REQUIRE(id2.compactForm() == result); + + cms::Digest b("bar"); + auto diffResult = b.digest().bytes; + REQUIRE(id2 == TestHash{result}); + REQUIRE(id2 != TestHash{diffResult}); + + REQUIRE(id2 > TestHash{diffResult}); + REQUIRE(TestHash{diffResult} < id2); + + REQUIRE(not(id2 < id2)); + } +} diff --git a/DataFormats/Provenance/test/Parentage_t.cpp b/DataFormats/Provenance/test/Parentage_t.cpp index 347f4a8964efd..e65d3f2c7f1fd 100644 --- a/DataFormats/Provenance/test/Parentage_t.cpp +++ b/DataFormats/Provenance/test/Parentage_t.cpp @@ -32,4 +32,35 @@ TEST_CASE("test Parentage", "[Parentage]") { edm::ParentageID id4 = ed4.id(); CHECK(ed4 == ed2); CHECK(id4 == id2); + + SECTION("ParentageID unchanging") { + { + const std::string idString = "d41d8cd98f00b204e9800998ecf8427e"; + std::string toString; + id1.toString(toString); + CHECK(toString == idString); + } + + { + const std::string idString = "2e5751b7cfd7f053cd29e946fb2649a4"; + std::string toString; + id2.toString(toString); + CHECK(toString == idString); + } + { + const std::string idString = "20e13ca818af45e50e369e50db3914b8"; + std::string toString; + id3.toString(toString); + CHECK(toString == idString); + } + { + edm::Parentage ed_mult; + ed_mult.setParents(std::vector({edm::BranchID(1), edm::BranchID(2), edm::BranchID(3)})); + auto id_mult = ed_mult.id(); + const std::string idString = "6a5cf1697e50ec8e8dbe7a28ccad348b"; + std::string toString; + id_mult.toString(toString); + CHECK(toString == idString); + } + } } diff --git a/FWCore/Utilities/interface/Digest.h b/FWCore/Utilities/interface/Digest.h index 05dc6927db724..37d8de4322e64 100644 --- a/FWCore/Utilities/interface/Digest.h +++ b/FWCore/Utilities/interface/Digest.h @@ -5,6 +5,7 @@ #include #include +#include #include namespace cms { @@ -27,7 +28,7 @@ namespace cms { std::string compactForm() const; // Set our data from the given hexdigest string. - void fromHexifiedString(std::string const& s); + void fromHexifiedString(std::string_view); bool isValid() const; }; @@ -48,9 +49,12 @@ namespace cms { public: Digest(); explicit Digest(std::string const& s); + explicit Digest(std::string_view); + explicit Digest(const char*); void append(std::string const& s); void append(const char* data, size_t size); + void append(std::string_view v); MD5Result digest(); diff --git a/FWCore/Utilities/src/Digest.cc b/FWCore/Utilities/src/Digest.cc index aedb8fafe7fa5..ba4b26ec69194 100644 --- a/FWCore/Utilities/src/Digest.cc +++ b/FWCore/Utilities/src/Digest.cc @@ -114,13 +114,13 @@ namespace cms { return std::string(p, p + bytes.size()); } - void MD5Result::fromHexifiedString(std::string const& hexy) { + void MD5Result::fromHexifiedString(std::string_view hexy) { switch (hexy.size()) { case 0: { set_to_default(*this); } break; case 32: { - std::string::const_iterator it = hexy.begin(); + auto it = hexy.cbegin(); for (size_t i = 0; i != 16; ++i) { // first nybble bytes[i] = (unhexify(*it++) << 4); @@ -158,11 +158,26 @@ namespace cms { this->append(s); } + Digest::Digest(std::string_view v) : state_() { + md5_init(&state_); + this->append(v); + } + + Digest::Digest(const char* s) : state_() { + md5_init(&state_); + this->append(s, strlen(s)); + } + void Digest::append(std::string const& s) { const md5_byte_t* data = reinterpret_cast(s.data()); md5_append(&state_, const_cast(data), s.size()); } + void Digest::append(std::string_view v) { + const md5_byte_t* data = reinterpret_cast(v.data()); + md5_append(&state_, const_cast(data), v.size()); + } + void Digest::append(const char* s, size_t size) { const md5_byte_t* data = reinterpret_cast(s); md5_append(&state_, const_cast(data), size); diff --git a/FWCore/Utilities/test/test_catch2_Digest.cc b/FWCore/Utilities/test/test_catch2_Digest.cc index ef834dbd85316..f5aeb10418de7 100644 --- a/FWCore/Utilities/test/test_catch2_Digest.cc +++ b/FWCore/Utilities/test/test_catch2_Digest.cc @@ -26,7 +26,7 @@ namespace { TEST_CASE("Test cms::Digest", "[Digest]") { SECTION("Identical") { Digest dig1; - dig1.append("hello"); + dig1.append(std::string_view("hello")); Digest dig2("hello"); MD5Result r1 = dig1.digest(); @@ -81,4 +81,18 @@ TEST_CASE("Test cms::Digest", "[Digest]") { } } } + SECTION("append equal") { + std::string full("aldjfakl\tsdjf34234 \najdf"); + Digest full_digest{full}; + MD5Result full_r = full_digest.digest(); + REQUIRE(full_r.isValid()); + + Digest append_digest; + append_digest.append(std::string_view(full.data(), 10)); + append_digest.append(std::string_view(full.data() + 10, 10)); + append_digest.append(std::string_view(full.data() + 20, 4)); + MD5Result append_r = append_digest.digest(); + REQUIRE(append_r.isValid()); + REQUIRE(full_r == append_r); + } }