Skip to content

Commit

Permalink
feat(backups): implement long terme retention
Browse files Browse the repository at this point in the history
long term retention sometimes called GFS ( Grand Father / Father / Son)
is a way to promote some backup to be kept on a long time

that way , the user can use the find the best equilibrium between
storage and security

This commit add the code mechanics to indentify bakcup that
can be deleted safely. It is intended to use with a form
that ask the suer for the number of day, week, month, and year
for which XO will keep the most recent

It extends the actual system of keeping the n most recent backup

Keep in mind that the backup oromoted by week and month can be decaled
  • Loading branch information
fbeauchamp committed Sep 20, 2024
1 parent f969056 commit ee9212a
Show file tree
Hide file tree
Showing 2 changed files with 216 additions and 2 deletions.
86 changes: 84 additions & 2 deletions @xen-orchestra/backups/_getOldEntries.mjs
Original file line number Diff line number Diff line change
@@ -1,4 +1,86 @@
const LTR_DEFINITIONS = {
daily: {
makeDateFormatter: ({ firstHourOfTheDay = 0 } = {}) => {
return date => {
const copy = new Date(date)
copy.setHours(copy.getHours() - firstHourOfTheDay)
return `${copy.getFullYear()}-${copy.getMonth()}-${copy.getDate()}`
}
},
},
weekly: {
makeDateFormatter: ({ firstDayOfWeek = 1 /* sunday is 0 , let's use monday as default instead */ } = {}) => {
return date => {
const copy = new Date(date)
copy.setDate(date.getDate() - ((date.getDay() + 7 - firstDayOfWeek) % 7))
return `${copy.getFullYear()}-${copy.getMonth()}-${copy.getDate()}`
}
},
ancestor: 'daily',
},
monthly: {
makeDateFormatter: ({ firstDayOfMonth = 0 } = {}) => {
return date => {
const copy = new Date(date)
copy.setDate(copy.getDate() - firstDayOfMonth)
return `${copy.getFullYear()}-${copy.getMonth()}`
}
},
ancestor: 'weekly',
},
yearly: {
makeDateFormatter: () => {
return date => `${date.getFullYear()}`
},
ancestor: 'monthly',
},
}

// returns all entries but the last retention-th
export function getOldEntries(retention, entries) {
return entries === undefined ? [] : retention > 0 ? entries.slice(0, -retention) : entries
/**
* return the entries too old to be kept
* if multiple entries are i the same time bucket : keep only the most recent one
* if an entry is valid in any of the bucket OR the minRetentionCount : keep it
* if a bucket is cmpletly empty : it does not count as one, thus it may extend the retention
* @returns Array<Backup>
*/
export function getOldEntries(minRetentionCount, entries, { longTermRetention = {} } = {}) {
const dateBuckets = {}
for (const [duration, { retention, settings }] of Object.entries(longTermRetention)) {
if (LTR_DEFINITIONS[duration] === undefined) {
throw new Error(`Retention of type ${retention} is invalid`)
}
dateBuckets[duration] = {
remaining: retention,
lastMatchingBucket: null,
formatter: LTR_DEFINITIONS[duration].makeDateFormatter(settings),
}
}
const nb = entries.length
const oldEntries = []

for (let i = nb - 1; i >= 0; i--) {
const entry = entries[i]
const entryDate = new Date(entry.timestamp)
let shouldBeKept = false
for (const [duration, { remaining, lastMatchingBucket, formatter }] of Object.entries(dateBuckets)) {
if (remaining === 0) {
continue
}
const bucket = formatter(entryDate)
if (lastMatchingBucket !== bucket) {
shouldBeKept = true
dateBuckets[duration].remaining -= 1
dateBuckets[duration].lastMatchingBucket = bucket
}
}
if (i >= nb - minRetentionCount) {
shouldBeKept = true
}
if (!shouldBeKept) {
oldEntries.push(entry)
}
}
// we expect the entries to be in the right order
return oldEntries.reverse()
}
132 changes: 132 additions & 0 deletions @xen-orchestra/backups/_getOldEntries.test.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
import { describe, it } from 'node:test'
import assert from 'node:assert/strict'

import { getOldEntries } from './_getOldEntries.mjs'

describe('_getOldEntries() should succeed', () => {
const tests = [
{
args: [
1,
[
{ timestamp: 1, id: 1 },
{ timestamp: 3, id: 2 },
{ timestamp: 2, id: 3 },
],
],
expectedIds: [1, 2],
testLabel: 'should handle number based retention ',
},

{
args: [
0,
[
{ timestamp: +new Date('2024-09-01 00:01:00'), id: 1 }, // too old
{ timestamp: +new Date('2024-09-01 00:00:00'), id: 2 }, // too old
{ timestamp: +new Date('2024-09-02 00:09:00'), id: 3 }, // too old in same day
{ timestamp: +new Date('2024-09-02 00:10:00'), id: 4 },
{ timestamp: +new Date('2024-09-03 00:09:00'), id: 5 },
{ timestamp: +new Date('2024-09-04 00:09:00'), id: 6 }, // too old in same day
{ timestamp: +new Date('2024-09-04 00:10:00'), id: 7 },
],
{
longTermRetention: {
daily: { retention: 3 },
},
},
],
expectedIds: [1, 2, 3, 6],
testLabel: 'should handle day based retention ',
},
{
args: [
0,
[
{ timestamp: +new Date('2024-09-01 00:01:00'), id: 1 }, // week n-3 too old
{ timestamp: +new Date('2024-09-02 00:00:00'), id: 2 }, // week n-3 too old
{ timestamp: +new Date('2024-09-03 00:09:00'), id: 3 }, // week n-2
{ timestamp: +new Date('2024-09-04 00:09:00'), id: 4 }, // week n-2
{ timestamp: +new Date('2024-09-05 00:09:00'), id: 5 }, // week n-2
{ timestamp: +new Date('2024-09-06 00:09:00'), id: 6 }, // week n-2
{ timestamp: +new Date('2024-09-07 00:09:00'), id: 7 }, // week n-2
{ timestamp: +new Date('2024-09-09 00:09:00'), id: 8 }, // week n-2 , most recent kept
{ timestamp: +new Date('2024-09-15 00:09:00'), id: 9 }, // week n-1 kept
{ timestamp: +new Date('2024-09-22 00:09:00'), id: 10 }, // week n kept
],
{
longTermRetention: {
weekly: { retention: 3 },
},
},
],
expectedIds: [1, 2, 3, 4, 5, 6, 8],
testLabel: 'should handle week based retention ',
},
{
args: [
0,
[
{ timestamp: +new Date('2024-06-22 00:09:00'), id: 1 }, // too old
{ timestamp: +new Date('2024-07-31 00:09:00'), id: 2 }, // first of july
{ timestamp: +new Date('2024-08-01 00:09:00'), id: 3 }, // older of august
{ timestamp: +new Date('2024-08-05 00:09:00'), id: 4 }, // older of august
{ timestamp: +new Date('2024-08-07 00:09:00'), id: 5 }, // most recent of august
{ timestamp: +new Date('2024-09-09 00:09:00'), id: 6 }, // older of september
{ timestamp: +new Date('2024-09-15 00:09:00'), id: 7 }, // older of september
{ timestamp: +new Date('2024-09-22 00:09:00'), id: 8 }, // most recent of september
],
{
longTermRetention: {
weekly: { retention: 3 },
},
},
],
expectedIds: [1, 2, 3, 4, 6],
testLabel: 'should handle month based retention',
},
{
args: [
0,
[
{ timestamp: +new Date('2023-05-18 00:09:00'), id: 1 }, // too old
{ timestamp: +new Date('2024-06-15 00:09:00'), id: 2 }, // too old in same year
{ timestamp: +new Date('2024-07-04 00:09:00'), id: 3 },
{ timestamp: +new Date('2024-08-12 00:09:00'), id: 4 },
{ timestamp: +new Date('2024-09-05 00:09:00'), id: 5 },
{ timestamp: +new Date('2024-10-02 00:09:00'), id: 6 },
{ timestamp: +new Date('2024-11-01 00:09:00'), id: 7 },
{ timestamp: +new Date('2024-12-17 00:09:00'), id: 8 },
{ timestamp: +new Date('2024-12-24 00:09:00'), id: 10 },
{ timestamp: +new Date('2025-12-31 00:09:00'), id: 11 }, // same day/week/month/year
{ timestamp: +new Date('2025-12-31 00:09:00'), id: 12 }, // new month /year
{ timestamp: +new Date('2025-01-01 00:09:00'), id: 13 }, // same day/week/month/year
{ timestamp: +new Date('2025-01-01 00:10:00'), id: 14 }, // new year /
],
{
longTermRetention: {
daily: { retention: 2 },
weekly: { retention: 4 },
monthly: { retention: 8 },
yearly: { retention: 2 },
},
},
],
expectedIds: [1, 2, 11, 13],
testLabel: 'complete test ',
},
]

for (const { args, expectedIds, testLabel } of tests) {
it(testLabel, () => {
const oldEntries = getOldEntries.apply(null, args)
assert.strictEqual(oldEntries.length, expectedIds.length, 'different length')
for (let i = 0; i < expectedIds.length; i++) {
assert.strictEqual(oldEntries[i].id, expectedIds[i])
}
})
}
})

describe('_getOldEntries() should fail when called incorrectly', () => {})
describe('_getOldEntries() should handle picking specific backup to promote', () => {})

0 comments on commit ee9212a

Please sign in to comment.