Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid allocations in ParentageID #46729

Merged
merged 4 commits into from
Nov 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
163 changes: 163 additions & 0 deletions DataFormats/Provenance/interface/CompactHash.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
#ifndef DataFormats_Provenance_CompactHash_h
#define DataFormats_Provenance_CompactHash_h

#include <string_view>
#include <array>
#include <functional>

namespace cms {
class Digest;
}

namespace edm {

namespace detail {
// This string is the 16-byte, non-printable version.
std::array<unsigned char, 16> const& InvalidCompactHash();
} // namespace detail

namespace compact_hash_detail {
using value_type = std::array<unsigned char, 16>;
void toString_(std::string& result, value_type const& hash);
void toDigest_(cms::Digest& digest, value_type const& hash);
std::ostream& print_(std::ostream& os, value_type const& hash);
bool isValid_(value_type const& hash);
size_t smallHash_(value_type const& hash);
value_type fromHex_(std::string_view);
void throwIfIllFormed(std::string_view v);
} // namespace compact_hash_detail

template <int I>
class CompactHash {
public:
typedef compact_hash_detail::value_type value_type;

CompactHash();
explicit CompactHash(value_type const& v);
explicit CompactHash(std::string_view v);

CompactHash(CompactHash<I> const&) = default;
CompactHash<I>& operator=(CompactHash<I> const& iRHS) = default;

CompactHash(CompactHash<I>&&) = default;
CompactHash<I>& operator=(CompactHash<I>&&) = default;

void reset();

// For now, just check the most basic: a default constructed
// ParameterSetID is not valid. This is very crude: we are
// assuming that nobody created a ParameterSetID from an empty
// string, nor from any string that is not a valid string
// representation of an MD5 checksum.
bool isValid() const;

bool operator<(CompactHash<I> const& other) const;
bool operator>(CompactHash<I> const& other) const;
bool operator==(CompactHash<I> const& other) const;
bool operator!=(CompactHash<I> const& other) const;
std::ostream& print(std::ostream& os) const;
void toString(std::string& result) const;
void toDigest(cms::Digest& digest) const;

// Return the 16-byte (non-printable) string form.
value_type const& compactForm() const;

///returns a short hash which can be used with hashing containers
size_t smallHash() const;

//Used by ROOT storage
// CMS_CLASS_VERSION(3) // This macro is not defined here, so expand it.
static short Class_Version() { return 3; }

private:
template <typename Op>
bool compareUsing(CompactHash<I> const& iOther, Op op) const {
return op(this->hash_, iOther.hash_);
}

value_type hash_;
};

//--------------------------------------------------------------------
//
// Implementation details follow...
//--------------------------------------------------------------------

template <int I>
inline CompactHash<I>::CompactHash() : hash_(detail::InvalidCompactHash()) {}

template <int I>
inline CompactHash<I>::CompactHash(value_type const& v) : hash_(v) {}

template <int I>
inline CompactHash<I>::CompactHash(std::string_view v) {
if (v.size() == 32) {
hash_ = compact_hash_detail::fromHex_(v);
} else {
compact_hash_detail::throwIfIllFormed(v);
std::copy(v.begin(), v.end(), hash_.begin());
}
}

template <int I>
inline void CompactHash<I>::reset() {
hash_ = detail::InvalidCompactHash();
}

template <int I>
inline bool CompactHash<I>::isValid() const {
return compact_hash_detail::isValid_(hash_);
}

template <int I>
inline bool CompactHash<I>::operator<(CompactHash<I> const& other) const {
return this->compareUsing(other, std::less<value_type>());
}

template <int I>
inline bool CompactHash<I>::operator>(CompactHash<I> const& other) const {
return this->compareUsing(other, std::greater<value_type>());
}

template <int I>
inline bool CompactHash<I>::operator==(CompactHash<I> const& other) const {
return this->compareUsing(other, std::equal_to<value_type>());
}

template <int I>
inline bool CompactHash<I>::operator!=(CompactHash<I> const& other) const {
return this->compareUsing(other, std::not_equal_to<value_type>());
}

template <int I>
inline std::ostream& CompactHash<I>::print(std::ostream& os) const {
return compact_hash_detail::print_(os, hash_);
}

template <int I>
inline void CompactHash<I>::toString(std::string& result) const {
compact_hash_detail::toString_(result, hash_);
}

template <int I>
inline void CompactHash<I>::toDigest(cms::Digest& digest) const {
compact_hash_detail::toDigest_(digest, hash_);
}

template <int I>
inline typename CompactHash<I>::value_type const& CompactHash<I>::compactForm() const {
return hash_;
}

template <int I>
inline size_t CompactHash<I>::smallHash() const {
return compact_hash_detail::smallHash_(hash_);
}

template <int I>
inline std::ostream& operator<<(std::ostream& os, CompactHash<I> const& h) {
return h.print(os);
}

} // namespace edm
#endif
7 changes: 5 additions & 2 deletions DataFormats/Provenance/interface/ParentageID.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@
#define DataFormats_Provenance_ParentageID_h

#include "DataFormats/Provenance/interface/HashedTypes.h"
#include "DataFormats/Provenance/interface/CompactHash.h"
#include "DataFormats/Provenance/interface/Hash.h"

namespace edm {
typedef Hash<ParentageType> ParentageID;
}
using ParentageID = CompactHash<ParentageType>;

using StoredParentageID = Hash<ParentageType>;
} // namespace edm

#endif
3 changes: 1 addition & 2 deletions DataFormats/Provenance/interface/ProductProvenance.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,7 @@ namespace edm {
ParentageID const& parentageID() const { return parentageID_; }
Parentage const& parentage() const;

void set(ParentageID id) { parentageID_ = std::move(id); }
ParentageID moveParentageID() { return std::move(parentageID_); }
void set(ParentageID const& id) { parentageID_ = id; }

private:
BranchID branchID_;
Expand Down
6 changes: 3 additions & 3 deletions DataFormats/Provenance/interface/ProductProvenanceLookup.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ namespace edm {
ProductProvenanceLookup& operator=(ProductProvenanceLookup const&) = delete;

ProductProvenance const* branchIDToProvenance(BranchID const& bid) const;
void insertIntoSet(ProductProvenance provenanceProduct) const;
void insertIntoSet(ProductProvenance const& provenanceProduct) const;
ProductProvenance const* branchIDToProvenanceForProducedOnly(BranchID const& bid) const;

void update(edm::ProductRegistry const&);
Expand Down Expand Up @@ -78,8 +78,8 @@ namespace edm {

bool isParentageSet() const noexcept { return isParentageSet_.load(std::memory_order_acquire); }

void threadsafe_set(ParentageID id) const {
provenance_.set(std::move(id));
void threadsafe_set(ParentageID const& id) const {
provenance_.set(id);
isParentageSet_.store(true, std::memory_order_release);
}

Expand Down
71 changes: 71 additions & 0 deletions DataFormats/Provenance/src/CompactHash.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#include "DataFormats/Provenance/interface/CompactHash.h"
#include "FWCore/Utilities/interface/Algorithms.h"
#include "FWCore/Utilities/interface/Digest.h"
#include "FWCore/Utilities/interface/EDMException.h"

#include <functional>
#include <cassert>

namespace {
std::array<unsigned char, 16> convert(std::string const& v) {
assert(v.size() == 16);
std::array<unsigned char, 16> retValue;
std::copy(v.begin(), v.end(), retValue.begin());
return retValue;
}
} // namespace
namespace edm {
namespace detail {
// This string is the 16-byte, non-printable version.
std::array<unsigned char, 16> const& InvalidCompactHash() {
static std::array<unsigned char, 16> const invalid = convert(cms::MD5Result().compactForm());
return invalid;
}
} // namespace detail

namespace compact_hash_detail {
size_t smallHash_(value_type const& hash) {
//NOTE: In future we could try to xor the first 8bytes into the second 8bytes of the string to make the hash
std::hash<std::string_view> h;
return h(std::string_view(reinterpret_cast<const char*>(hash.data()), hash.size()));
}

std::array<unsigned char, 16> fromHex_(std::string_view v) {
cms::MD5Result temp;
temp.fromHexifiedString(v);
auto hash = temp.compactForm();
std::array<unsigned char, 16> ret;
std::copy(hash.begin(), hash.end(), ret.begin());
return ret;
}

bool isValid_(value_type const& hash) { return hash != detail::InvalidCompactHash(); }

void throwIfIllFormed(std::string_view v) {
// Fixup not needed here.
if (v.size() != 16) {
throw Exception(errors::LogicError) << "Ill-formed CompactHash instance. "
<< "A string_view of size " << v.size() << " passed to constructor.";
}
}

void toString_(std::string& result, value_type const& hash) {
cms::MD5Result temp;
copy_all(hash, temp.bytes.begin());
result += temp.toString();
}

void toDigest_(cms::Digest& digest, value_type const& hash) {
cms::MD5Result temp;
copy_all(hash, temp.bytes.begin());
digest.append(temp.toString());
}

std::ostream& print_(std::ostream& os, value_type const& hash) {
cms::MD5Result temp;
copy_all(hash, temp.bytes.begin());
os << temp.toString();
return os;
}
} // namespace compact_hash_detail
} // namespace edm
18 changes: 11 additions & 7 deletions DataFormats/Provenance/src/Parentage.cc
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
#include "DataFormats/Provenance/interface/Parentage.h"
#include "FWCore/Utilities/interface/Digest.h"
#include <ostream>
#include <charconv>
#include <sstream>
//#include <cassert>

/*----------------------------------------------------------------------

Expand All @@ -15,14 +16,17 @@ namespace edm {
Parentage::Parentage(std::vector<BranchID>&& parents) : parents_(std::move(parents)) {}

ParentageID Parentage::id() const {
std::ostringstream oss;
//10 is the maximum number of digits for a 2^32 number
std::array<char, 10 + 1> buf;
cms::Digest md5alg;
for (auto const& parent : parents_) {
oss << parent << ' ';
//assert(start < end);
auto res = std::to_chars(buf.data(), buf.data() + buf.size(), parent.id());
//assert(res.ec == std::errc());
*res.ptr = ' ';
md5alg.append(buf.data(), res.ptr - buf.data() + 1);
}

std::string stringrep = oss.str();
cms::Digest md5alg(stringrep);
ParentageID id(md5alg.digest().toString());
ParentageID id(md5alg.digest().bytes);
return id;
}

Expand Down
4 changes: 2 additions & 2 deletions DataFormats/Provenance/src/ProductProvenanceLookup.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ namespace edm {
setupEntryInfoSet(iReg);
}

void ProductProvenanceLookup::insertIntoSet(ProductProvenance entryInfo) const {
void ProductProvenanceLookup::insertIntoSet(ProductProvenance const& entryInfo) const {
//NOTE:do not read provenance here because we only need the full
// provenance when someone tries to access it not when doing the insert
// doing the delay saves 20% of time when doing an analysis job
Expand All @@ -53,7 +53,7 @@ namespace edm {
throw edm::Exception(edm::errors::LogicError) << "ProductProvenanceLookup::insertIntoSet passed a BranchID "
<< entryInfo.branchID().id() << " that has not been pre-registered";
}
itFound->threadsafe_set(entryInfo.moveParentageID());
itFound->threadsafe_set(entryInfo.parentageID());
}

ProductProvenance const* ProductProvenanceLookup::branchIDToProvenance(BranchID const& bid) const {
Expand Down
9 changes: 8 additions & 1 deletion DataFormats/Provenance/src/classes_def.xml
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@
<class name="edm::BranchChildren" ClassVersion="10">
<version ClassVersion="10" checksum="137742168"/>
</class>
<class name="edm::ProductProvenance" ClassVersion="11">
<class name="edm::ProductProvenance" ClassVersion="12">
<version ClassVersion="12" checksum="737844716"/>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we store edm::ProductProvenance in the present file format, or only in some old file format?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From what I can tell, we do not store it now. I had changed the class without updating the iorule and only our backwards compatible file format test failed.

<version ClassVersion="11" checksum="3594205707"/>
<version ClassVersion="10" checksum="3685381930"/>
</class>
Expand Down Expand Up @@ -162,6 +163,7 @@
<class name="std::pair<const edm::ParameterSetID,edm::ParameterSetBlob>"/>
<class name="edm::ProcessConfigurationID"/>
<class name="std::vector<edm::ProcessConfigurationID>"/>
<class name="edm::StoredParentageID"/>
<class name="edm::ParentageID"/>
<class name="edm::Parentage" ClassVersion="11">
<version ClassVersion="11" checksum="3091321815"/>
Expand Down Expand Up @@ -259,5 +261,10 @@
newObj->initializeTransients();
]]>
</ioread>
<ioread sourceClass="edm::Hash<5>" targetClass="edm::CompactHash<5>" version="[1-]" source="std::string hash_" target="hash_">
<![CDATA[
*newObj = edm::CompactHash<5>(onfile.hash_);
]]>
</ioread>

</lcgdict>
7 changes: 5 additions & 2 deletions DataFormats/Provenance/test/BuildFile.xml
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
<bin name="testDataFormatsProvenance" file="testRunner.cpp,eventid_t.cppunit.cc,timestamp_t.cppunit.cc,parametersetid_t.cppunit.cc,indexIntoFile_t.cppunit.cc,indexIntoFile1_t.cppunit.cc,indexIntoFile2_t.cppunit.cc,indexIntoFile3_t.cppunit.cc,indexIntoFile4_t.cppunit.cc,indexIntoFile5_t.cppunit.cc,lumirange_t.cppunit.cc,eventrange_t.cppunit.cc,compactEventAuxiliaryVector_t.cppunit.cc">
<use name="cppunit"/>
<use name="DataFormats/Provenance"/>
<bin name="testDataFormatsProvenance" file="testRunner.cpp,eventid_t.cppunit.cc,timestamp_t.cppunit.cc,parametersetid_t.cppunit.cc,indexIntoFile_t.cppunit.cc,indexIntoFile1_t.cppunit.cc,indexIntoFile2_t.cppunit.cc,indexIntoFile3_t.cppunit.cc,indexIntoFile4_t.cppunit.cc,indexIntoFile5_t.cppunit.cc,lumirange_t.cppunit.cc,eventrange_t.cppunit.cc,compactEventAuxiliaryVector_t.cppunit.cc">
</bin>

<bin name="testDataFormatsProvenanceCatch2" file="ElementID_t.cpp,Parentage_t.cpp,StoredProcessBlockHelper_t.cpp">
<bin name="testDataFormatsProvenanceCatch2" file="ElementID_t.cpp,Parentage_t.cpp,StoredProcessBlockHelper_t.cpp,CompactHash_t.cpp">
<use name="DataFormats/Provenance"/>
<use name="catch2"/>
</bin>

<bin name="testProductResolverIndexHelper" file="testRunner.cpp productResolverIndexHelper_t.cppunit.cc">
<use name="DataFormats/TestObjects"/>
<use name="cppunit"/>
<use name="DataFormats/Provenance"/>
</bin>
Loading