Skip to content

Commit

Permalink
Fix #148 Binary SDP format is larger than json format if blocks conta…
Browse files Browse the repository at this point in the history
…in many zeros

If BigFloat is zero, then we write only boolean flag indicating it.
If not, we write the flag(=false) and then serialized bytes.

This makes zeros ~100x more compact (for precision=768, BigFloat needs ~100B).

TODO: maybe we can use the same trick for MPI communication
  • Loading branch information
vasdommes committed Nov 15, 2023
1 parent 0a3b37a commit 855e839
Show file tree
Hide file tree
Showing 4 changed files with 100 additions and 14 deletions.
27 changes: 26 additions & 1 deletion src/boost_serialization.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,31 @@ namespace boost::serialization

template <class Archive>
void serialize(Archive &ar, El::BigFloat &f,
const boost::serialization::version_type &)
const boost::serialization::version_type &version)
{
// SDP blocks may contain many zeros.
// In that case binary format can be even less compact that json,
// see e.g. https://github.com/davidsd/sdpb/issues/148
// To optimize storing zeros, we use boolean flag is_zero.
// if (is_zero == true), we don't serialize all BigFloat bytes.
// Optimization introduced since version = 1

const El::BigFloat zero(0);
bool is_zero = false;
if(version > 0)
{
if(Archive::is_saving::value)
is_zero = (f == zero);
ar & is_zero;
}

if(is_zero)
{
if(Archive::is_loading::value)
f = zero;
return;
}

auto size = f.SerializedSize();
std::vector<El::byte> vec(size);
El::byte *buffer = vec.data();
Expand Down Expand Up @@ -63,4 +86,6 @@ namespace boost::serialization
}
}

BOOST_CLASS_VERSION(El::BigFloat, 1)

BOOST_SERIALIZATION_SPLIT_FREE(El::Matrix<El::BigFloat>)
38 changes: 36 additions & 2 deletions test/src/unit_tests/cases/block_data_serialization.test.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
using Test_Util::random_bigfloat;
using Test_Util::random_matrix;
using Test_Util::random_vector;
using Test_Util::zero_matrix;
using Test_Util::REQUIRE_Equal::diff;

void write_block_data(std::ostream &os, const Dual_Constraint_Group &group,
Expand Down Expand Up @@ -62,6 +63,22 @@ namespace
group.bilinear_bases[1] = random_matrix(12, 24);
return group;
}

// Dual_Constraint_Group with the same sizes as block_0 in integration test
// end-to-end_tests/SingletScalar_cT_test_nmax6/primal_dual_optimal
Dual_Constraint_Group zero_group_from_singlet_scalar_block_0()
{
Dual_Constraint_Group group;
group.dim = 1;
group.num_points = 24;
int P = 24;
int N = 20;
group.constraint_matrix = zero_matrix(P, N);
group.constraint_constants = std::vector<El::BigFloat>(P, 0);
group.bilinear_bases[0] = zero_matrix(12, 24);
group.bilinear_bases[1] = zero_matrix(12, 24);
return group;
}
}

TEST_CASE("block_data serialization")
Expand All @@ -72,12 +89,15 @@ TEST_CASE("block_data serialization")

El::InitializeRandom(true);
Dual_Constraint_Group group = random_group_from_singlet_scalar_block_0();
Dual_Constraint_Group zero_group = zero_group_from_singlet_scalar_block_0();

Block_File_Format format = GENERATE(bin, json);
DYNAMIC_SECTION((format == bin ? ".bin" : ".json"))
{
auto other = serialize_deserialize(group, format);
DIFF(group, other);
other = serialize_deserialize(zero_group, format);
DIFF(zero_group, other);
}
}

Expand All @@ -89,11 +109,14 @@ TEST_CASE("benchmark block_data write+parse", "[!benchmark]")

El::InitializeRandom(true);
Dual_Constraint_Group group = random_group_from_singlet_scalar_block_0();
Dual_Constraint_Group zero_group = zero_group_from_singlet_scalar_block_0();

// Change constraint_matrix size to see how bin/json scales
int B_width = GENERATE(20, 100, 1000, 10000);
group.constraint_matrix
= random_matrix(group.constraint_matrix.Height(), B_width);
zero_group.constraint_matrix
= zero_matrix(zero_group.constraint_matrix.Height(), B_width);

int total_count = 0;
total_count += group.constraint_constants.size();
Expand All @@ -108,8 +131,19 @@ TEST_CASE("benchmark block_data write+parse", "[!benchmark]")
// We could put this benchmarks into different DYNAMIC_SECTION's,
// using Block_File_Format format = GENERATE(bin, json);
// But it would make output less concise
BENCHMARK("write+parse bin") { return serialize_deserialize(group, bin); };
BENCHMARK("write+parse JSON")
BENCHMARK("write+parse bin zero")
{
return serialize_deserialize(zero_group, bin);
};
BENCHMARK("write+parse bin nonzero")
{
return serialize_deserialize(group, bin);
};
BENCHMARK("write+parse JSON zero")
{
return serialize_deserialize(zero_group, json);
};
BENCHMARK("write+parse JSON nonzero")
{
return serialize_deserialize(group, json);
};
Expand Down
42 changes: 31 additions & 11 deletions test/src/unit_tests/cases/boost_serialization.test.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -34,20 +34,40 @@ TEST_CASE("Boost serialization")

SECTION("El::BigFloat")
{
El::BigFloat value = random_bigfloat();
El::BigFloat other = serialize_deserialize(value);
REQUIRE(value == other);
auto zero = El::BigFloat(0);
auto nonzero = random_bigfloat();

for(auto &value : {zero, nonzero})
{
CAPTURE(value);
El::BigFloat other = serialize_deserialize(value);
REQUIRE(value == other);
}

{
INFO("Check that zero serialization is more compact");
std::stringstream ss_zero, ss_nonzero;
boost::archive::binary_oarchive ar_zero(ss_zero), ar_nonzero(ss_nonzero);
ar_zero << zero;
ar_nonzero << nonzero;
REQUIRE(ss_zero.str().size() < ss_nonzero.str().size());
}
}

SECTION("El::Matrix<BigFloat>")
{
int height = 2;
int width = 3;
auto matrix = random_matrix(height, width);

El::Matrix<El::BigFloat> other = serialize_deserialize(matrix);
// Sanity check: deserialized_matrix is not the same as matrix
REQUIRE(matrix.LockedBuffer() != other.LockedBuffer());
DIFF(matrix, other);
int height = 100;
int width = 10;
auto rand_matrix = random_matrix(height, width);
El::Matrix<El::BigFloat> zeros(height, width);
El::Zero(zeros);

for(auto &matrix : {zeros, rand_matrix})
{
El::Matrix<El::BigFloat> other = serialize_deserialize(matrix);
// Sanity check: deserialized_matrix is not the same as matrix
REQUIRE(matrix.LockedBuffer() != other.LockedBuffer());
DIFF(matrix, other);
}
}
}
7 changes: 7 additions & 0 deletions test/src/unit_tests/util/util.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,11 @@ namespace Test_Util

return matrix;
}

inline El::Matrix<El::BigFloat> zero_matrix(int height, int width)
{
El::Matrix<El::BigFloat> zeros(height, width);
El::Zero(zeros);
return zeros;
}
}

0 comments on commit 855e839

Please sign in to comment.