forked from containers/bootc
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
chunking: Bin packing algorithm which allows to minimize
layer deltas using historical builds Revamp basic_packing to follow the prior packing structure if the --prior-build flag exists. This simply modifies existing layers with upgrades/downgrades/removal of packages. The last layer contains any new addition to packages. In the case where --prior-build flag does not exist, the frequency of updates of the packages (frequencyinfo) and size is utilized to segment packages into different partitions (all combinations of low, medium, high frequency and low, medium, high size). The partition that each package falls into is decided by its deviation from mean. Then the packages are alloted to different layers to ensure 1) low frequency packages don't mix with high frequency packages 2) High sized packages are alloted separate bins 3) Low sized packages can be put together in the same bin This problem is aka multi-objective bin packing problem with constraints aka multiple knapsack problem. The objectives are conflicting given our constraints and hence a compromise is taken to minimize layer deltas while respecting the hard limit of overlayfs that the kernel can handle.
- Loading branch information
1 parent
95f2366
commit 5159164
Showing
11 changed files
with
745 additions
and
88 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
//! This module holds implementations of some basic statistical properties, such as mean and standard deviation. | ||
pub(crate) fn mean(data: &[u64]) -> Option<f64> { | ||
if data.is_empty() { | ||
None | ||
} else { | ||
Some(data.iter().sum::<u64>() as f64 / data.len() as f64) | ||
} | ||
} | ||
|
||
pub(crate) fn std_deviation(data: &[u64]) -> Option<f64> { | ||
match (mean(data), data.len()) { | ||
(Some(data_mean), count) if count > 0 => { | ||
let variance = data | ||
.iter() | ||
.map(|value| { | ||
let diff = data_mean - (*value as f64); | ||
diff * diff | ||
}) | ||
.sum::<f64>() | ||
/ count as f64; | ||
Some(variance.sqrt()) | ||
} | ||
_ => None, | ||
} | ||
} | ||
|
||
//Assumed sorted | ||
pub(crate) fn median_absolute_deviation(data: &mut [u64]) -> Option<(f64, f64)> { | ||
if data.is_empty() { | ||
None | ||
} else { | ||
//Sort data | ||
//data.sort_by(|a, b| a.partial_cmp(b).unwrap()); | ||
|
||
//Find median of data | ||
let median_data: f64 = match data.len() % 2 { | ||
1 => data[data.len() / 2] as f64, | ||
_ => 0.5 * (data[data.len() / 2 - 1] + data[data.len() / 2]) as f64, | ||
}; | ||
|
||
//Absolute deviations | ||
let mut absolute_deviations = Vec::new(); | ||
for size in data { | ||
absolute_deviations.push(f64::abs(*size as f64 - median_data)) | ||
} | ||
|
||
absolute_deviations.sort_by(|a, b| a.partial_cmp(b).unwrap()); | ||
let l = absolute_deviations.len(); | ||
let mad: f64 = match l % 2 { | ||
1 => absolute_deviations[l / 2], | ||
_ => 0.5 * (absolute_deviations[l / 2 - 1] + absolute_deviations[l / 2]), | ||
}; | ||
|
||
Some((median_data, mad)) | ||
} | ||
} | ||
|
||
#[test] | ||
fn test_mean() { | ||
assert_eq!(mean(&[]), None); | ||
for v in [0u64, 1, 5, 100] { | ||
assert_eq!(mean(&[v]), Some(v as f64)); | ||
} | ||
assert_eq!(mean(&[0, 1]), Some(0.5)); | ||
assert_eq!(mean(&[0, 5, 100]), Some(35.0)); | ||
assert_eq!(mean(&[7, 4, 30, 14]), Some(13.75)); | ||
} | ||
|
||
#[test] | ||
fn test_std_deviation() { | ||
assert_eq!(std_deviation(&[]), None); | ||
for v in [0u64, 1, 5, 100] { | ||
assert_eq!(std_deviation(&[v]), Some(0 as f64)); | ||
} | ||
assert_eq!(std_deviation(&[1, 4]), Some(1.5)); | ||
assert_eq!(std_deviation(&[2, 2, 2, 2]), Some(0.0)); | ||
assert_eq!( | ||
std_deviation(&[1, 20, 300, 4000, 50000, 600000, 7000000, 80000000]), | ||
Some(26193874.56387471) | ||
); | ||
} | ||
|
||
#[test] | ||
fn test_median_absolute_deviation() { | ||
//Assumes sorted | ||
assert_eq!(median_absolute_deviation(&mut []), None); | ||
for v in [0u64, 1, 5, 100] { | ||
assert_eq!(median_absolute_deviation(&mut [v]), Some((v as f64, 0.0))); | ||
} | ||
assert_eq!(median_absolute_deviation(&mut [1, 4]), Some((2.5, 1.5))); | ||
assert_eq!( | ||
median_absolute_deviation(&mut [2, 2, 2, 2]), | ||
Some((2.0, 0.0)) | ||
); | ||
assert_eq!( | ||
median_absolute_deviation(&mut [ | ||
1, 2, 3, 3, 4, 4, 4, 5, 5, 6, 6, 6, 7, 7, 7, 8, 9, 12, 52, 90 | ||
]), | ||
Some((6.0, 2.0)) | ||
); | ||
|
||
//if more than half of the data has the same value, MAD = 0, thus any | ||
//value different from the residual median is classified as an outlier | ||
assert_eq!( | ||
median_absolute_deviation(&mut [0, 1, 1, 1, 1, 1, 1, 1, 0]), | ||
Some((1.0, 0.0)) | ||
); | ||
} |
Oops, something went wrong.