diff --git a/docs/blocks-storage/compactor.md b/docs/blocks-storage/compactor.md index 5fb44208ec..35dfe63615 100644 --- a/docs/blocks-storage/compactor.md +++ b/docs/blocks-storage/compactor.md @@ -285,15 +285,33 @@ compactor: # CLI flag: -compactor.ring.wait-active-instance-timeout [wait_active_instance_timeout: <duration> | default = 10m] - # How long block visit marker file should be considered as expired and able to - # be picked up by compactor again. - # CLI flag: -compactor.block-visit-marker-timeout - [block_visit_marker_timeout: <duration> | default = 5m] + # How long shuffle sharding planner would wait before running planning code. + # CLI flag: -compactor.sharding-planner-delay + [sharding_planner_delay: <duration> | default = 10s] - # How frequently block visit marker file should be updated duration + # The compaction mode to use. Supported values are: default, partitioning. + # CLI flag: -compactor.compaction-mode + [compaction_mode: <string> | default = "default"] + + # How long compaction visit marker file should be considered as expired and + # able to be picked up by compactor again. + # CLI flag: -compactor.compaction-visit-marker-timeout + [compaction_visit_marker_timeout: <duration> | default = 1m30s] + + # How frequently compaction visit marker file should be updated duration # compaction. - # CLI flag: -compactor.block-visit-marker-file-update-interval - [block_visit_marker_file_update_interval: <duration> | default = 1m] + # CLI flag: -compactor.compaction-visit-marker-file-update-interval + [compaction_visit_marker_file_update_interval: <duration> | default = 1m] + + # How long cleaner visit marker file should be considered as expired and able + # to be picked up by cleaner again. + # CLI flag: -compactor.cleaner-visit-marker-timeout + [cleaner_visit_marker_timeout: <duration> | default = 10m] + + # How frequently cleaner visit marker file should be updated when cleaning + # user. + # CLI flag: -compactor.cleaner-visit-marker-file-update-interval + [cleaner_visit_marker_file_update_interval: <duration> | default = 5m] # When enabled, index verification will ignore out of order label names. # CLI flag: -compactor.accept-malformed-index diff --git a/docs/configuration/config-file-reference.md b/docs/configuration/config-file-reference.md index 24240c36bf..2693197c85 100644 --- a/docs/configuration/config-file-reference.md +++ b/docs/configuration/config-file-reference.md @@ -2205,14 +2205,32 @@ sharding_ring: # CLI flag: -compactor.ring.wait-active-instance-timeout [wait_active_instance_timeout: <duration> | default = 10m] -# How long block visit marker file should be considered as expired and able to -# be picked up by compactor again. -# CLI flag: -compactor.block-visit-marker-timeout -[block_visit_marker_timeout: <duration> | default = 5m] +# How long shuffle sharding planner would wait before running planning code. +# CLI flag: -compactor.sharding-planner-delay +[sharding_planner_delay: <duration> | default = 10s] -# How frequently block visit marker file should be updated duration compaction. -# CLI flag: -compactor.block-visit-marker-file-update-interval -[block_visit_marker_file_update_interval: <duration> | default = 1m] +# The compaction mode to use. Supported values are: default, partitioning. +# CLI flag: -compactor.compaction-mode +[compaction_mode: <string> | default = "default"] + +# How long compaction visit marker file should be considered as expired and able +# to be picked up by compactor again. +# CLI flag: -compactor.compaction-visit-marker-timeout +[compaction_visit_marker_timeout: <duration> | default = 1m30s] + +# How frequently compaction visit marker file should be updated duration +# compaction. +# CLI flag: -compactor.compaction-visit-marker-file-update-interval +[compaction_visit_marker_file_update_interval: <duration> | default = 1m] + +# How long cleaner visit marker file should be considered as expired and able to +# be picked up by cleaner again. +# CLI flag: -compactor.cleaner-visit-marker-timeout +[cleaner_visit_marker_timeout: <duration> | default = 10m] + +# How frequently cleaner visit marker file should be updated when cleaning user. +# CLI flag: -compactor.cleaner-visit-marker-file-update-interval +[cleaner_visit_marker_file_update_interval: <duration> | default = 5m] # When enabled, index verification will ignore out of order label names. # CLI flag: -compactor.accept-malformed-index @@ -3342,6 +3360,24 @@ query_rejection: # CLI flag: -compactor.tenant-shard-size [compactor_tenant_shard_size: <int> | default = 0] +# Index size limit in bytes for each compaction partition. 0 means no limit +# CLI flag: -compactor.partition-index-size-limit-in-bytes +[compactor_partition_index_size_limit_in_bytes: <int> | default = 0] + +# Time series count limit for each compaction partition. 0 means no limit +# CLI flag: -compactor.partition-series-count-limit +[compactor_partition_series_count_limit: <int> | default = 0] + +# Index size limit in bytes for each level 1 compaction partition. 0 means no +# limit +# CLI flag: -compactor.partition-level1-index-size-limit-in-bytes +[compactor_partition_level1_index_size_limit_in_bytes: <int> | default = 0] + +# Time series count limit for each level 1 compaction partition. 0 means no +# limit +# CLI flag: -compactor.partition-level1-series-count-limit +[compactor_partition_level1_series_count_limit: <int> | default = 0] + # S3 server-side encryption type. Required to enable server-side encryption # overrides for a specific tenant. If not set, the default S3 client settings # are used. diff --git a/pkg/compactor/background_chunks_series_set.go b/pkg/compactor/background_chunks_series_set.go new file mode 100644 index 0000000000..bca40f73d2 --- /dev/null +++ b/pkg/compactor/background_chunks_series_set.go @@ -0,0 +1,60 @@ +package compactor + +import ( + "context" + + "github.com/prometheus/prometheus/storage" + "github.com/prometheus/prometheus/util/annotations" +) + +type backgrounChunkSeriesSet struct { + nextSet chan storage.ChunkSeries + actual storage.ChunkSeries + cs storage.ChunkSeriesSet +} + +func (b *backgrounChunkSeriesSet) Next() bool { + s, ok := <-b.nextSet + b.actual = s + return ok +} + +func (b *backgrounChunkSeriesSet) At() storage.ChunkSeries { + return b.actual +} + +func (b *backgrounChunkSeriesSet) Err() error { + return b.cs.Err() +} + +func (b *backgrounChunkSeriesSet) Warnings() annotations.Annotations { + return b.cs.Warnings() +} + +func (b *backgrounChunkSeriesSet) run(ctx context.Context) { + for { + if !b.cs.Next() { + close(b.nextSet) + return + } + + select { + case b.nextSet <- b.cs.At(): + case <-ctx.Done(): + return + } + } +} + +func NewBackgroundChunkSeriesSet(ctx context.Context, cs storage.ChunkSeriesSet) storage.ChunkSeriesSet { + r := &backgrounChunkSeriesSet{ + cs: cs, + nextSet: make(chan storage.ChunkSeries, 1000), + } + + go func() { + r.run(ctx) + }() + + return r +} diff --git a/pkg/compactor/block_visit_marker.go b/pkg/compactor/block_visit_marker.go index b391421fd3..347a6f8e55 100644 --- a/pkg/compactor/block_visit_marker.go +++ b/pkg/compactor/block_visit_marker.go @@ -49,8 +49,12 @@ func (b *BlockVisitMarker) isVisitedByCompactor(blockVisitMarkerTimeout time.Dur return b.CompactorID == compactorID && time.Now().Before(time.Unix(b.VisitTime, 0).Add(blockVisitMarkerTimeout)) } +func GetBlockVisitMarkerFile(blockID string) string { + return path.Join(blockID, BlockVisitMarkerFile) +} + func ReadBlockVisitMarker(ctx context.Context, bkt objstore.InstrumentedBucketReader, logger log.Logger, blockID string, blockVisitMarkerReadFailed prometheus.Counter) (*BlockVisitMarker, error) { - visitMarkerFile := path.Join(blockID, BlockVisitMarkerFile) + visitMarkerFile := GetBlockVisitMarkerFile(blockID) visitMarkerFileReader, err := bkt.ReaderWithExpectedErrs(bkt.IsObjNotFoundErr).Get(ctx, visitMarkerFile) if err != nil { if bkt.IsObjNotFoundErr(err) { diff --git a/pkg/compactor/blocks_cleaner.go b/pkg/compactor/blocks_cleaner.go index d1a81f401c..92036fbeea 100644 --- a/pkg/compactor/blocks_cleaner.go +++ b/pkg/compactor/blocks_cleaner.go @@ -15,6 +15,7 @@ import ( "github.com/thanos-io/objstore" "github.com/thanos-io/thanos/pkg/block" "github.com/thanos-io/thanos/pkg/block/metadata" + "go.uber.org/atomic" "github.com/cortexproject/cortex/pkg/storage/bucket" cortex_tsdb "github.com/cortexproject/cortex/pkg/storage/tsdb" @@ -27,6 +28,9 @@ import ( const ( defaultDeleteBlocksConcurrency = 16 + reasonLabelValue = "retention" + activeStatus = "active" + deletedStatus = "deleted" ) type BlocksCleanerConfig struct { @@ -35,6 +39,8 @@ type BlocksCleanerConfig struct { CleanupConcurrency int BlockDeletionMarksMigrationEnabled bool // TODO Discuss whether we should remove it in Cortex 1.8.0 and document that upgrading to 1.7.0 before 1.8.0 is required. TenantCleanupDelay time.Duration // Delay before removing tenant deletion mark and "debug". + ShardingStrategy string + CompactionMode string } type BlocksCleaner struct { @@ -46,47 +52,97 @@ type BlocksCleaner struct { bucketClient objstore.InstrumentedBucket usersScanner *cortex_tsdb.UsersScanner + ringLifecyclerID string + // Keep track of the last owned users. lastOwnedUsers []string + cleanerVisitMarkerTimeout time.Duration + cleanerVisitMarkerFileUpdateInterval time.Duration + partitionVisitMarkerTimeout time.Duration + // Metrics. - runsStarted prometheus.Counter - runsCompleted prometheus.Counter - runsFailed prometheus.Counter - runsLastSuccess prometheus.Gauge + runsStarted *prometheus.CounterVec + runsCompleted *prometheus.CounterVec + runsFailed *prometheus.CounterVec + runsLastSuccess *prometheus.GaugeVec blocksCleanedTotal prometheus.Counter blocksFailedTotal prometheus.Counter - blocksMarkedForDeletion prometheus.Counter + blocksMarkedForDeletion *prometheus.CounterVec + CleanerVisitMarkerReadFailed prometheus.Counter + CleanerVisitMarkerWriteFailed prometheus.Counter tenantBlocks *prometheus.GaugeVec tenantBlocksMarkedForDelete *prometheus.GaugeVec tenantBlocksMarkedForNoCompaction *prometheus.GaugeVec tenantPartialBlocks *prometheus.GaugeVec tenantBucketIndexLastUpdate *prometheus.GaugeVec + tenantBlocksCleanedTotal *prometheus.CounterVec + tenantCleanDuration *prometheus.GaugeVec + compactorPartitionError *prometheus.CounterVec + partitionedGroupInfoReadFailed prometheus.Counter + compactionVisitMarkerReadFailed prometheus.Counter + compactionVisitMarkerWriteFailed prometheus.Counter + remainingPlannedCompactions *prometheus.GaugeVec + inProgressCompactions *prometheus.GaugeVec + oldestPartitionGroupOffset *prometheus.GaugeVec } -func NewBlocksCleaner(cfg BlocksCleanerConfig, bucketClient objstore.InstrumentedBucket, usersScanner *cortex_tsdb.UsersScanner, cfgProvider ConfigProvider, logger log.Logger, reg prometheus.Registerer) *BlocksCleaner { +func NewBlocksCleaner( + cfg BlocksCleanerConfig, + bucketClient objstore.InstrumentedBucket, + usersScanner *cortex_tsdb.UsersScanner, + partitionVisitMarkerTimeout time.Duration, + cfgProvider ConfigProvider, + logger log.Logger, + reg prometheus.Registerer, + cleanerVisitMarkerTimeout time.Duration, + cleanerVisitMarkerFileUpdateInterval time.Duration, + blocksMarkedForDeletion *prometheus.CounterVec, + partitionedGroupInfoReadFailed prometheus.Counter, + compactionVisitMarkerReadFailed prometheus.Counter, + compactionVisitMarkerWriteFailed prometheus.Counter, + remainingPlannedCompactions *prometheus.GaugeVec, +) *BlocksCleaner { + + var inProgressCompactions *prometheus.GaugeVec + var oldestPartitionGroupOffset *prometheus.GaugeVec + if cfg.ShardingStrategy == util.ShardingStrategyShuffle { + inProgressCompactions = promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{ + Name: "cortex_compactor_in_progress_compactions", + Help: "Total number of in progress compactions. Only available with shuffle-sharding strategy", + }, CommonLabels) + oldestPartitionGroupOffset = promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{ + Name: "cortex_compactor_oldest_partition_offset", + Help: "Time in seconds between now and the oldest created partition group not completed.", + }, CommonLabels) + } + c := &BlocksCleaner{ - cfg: cfg, - bucketClient: bucketClient, - usersScanner: usersScanner, - cfgProvider: cfgProvider, - logger: log.With(logger, "component", "cleaner"), - runsStarted: promauto.With(reg).NewCounter(prometheus.CounterOpts{ + cfg: cfg, + bucketClient: bucketClient, + usersScanner: usersScanner, + partitionVisitMarkerTimeout: partitionVisitMarkerTimeout, + cfgProvider: cfgProvider, + logger: log.With(logger, "component", "cleaner"), + ringLifecyclerID: "default-cleaner", + cleanerVisitMarkerTimeout: cleanerVisitMarkerTimeout, + cleanerVisitMarkerFileUpdateInterval: cleanerVisitMarkerFileUpdateInterval, + runsStarted: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ Name: "cortex_compactor_block_cleanup_started_total", Help: "Total number of blocks cleanup runs started.", - }), - runsCompleted: promauto.With(reg).NewCounter(prometheus.CounterOpts{ + }, []string{"tenant_status"}), + runsCompleted: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ Name: "cortex_compactor_block_cleanup_completed_total", Help: "Total number of blocks cleanup runs successfully completed.", - }), - runsFailed: promauto.With(reg).NewCounter(prometheus.CounterOpts{ + }, []string{"tenant_status"}), + runsFailed: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ Name: "cortex_compactor_block_cleanup_failed_total", Help: "Total number of blocks cleanup runs failed.", - }), - runsLastSuccess: promauto.With(reg).NewGauge(prometheus.GaugeOpts{ + }, []string{"tenant_status"}), + runsLastSuccess: promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{ Name: "cortex_compactor_block_cleanup_last_successful_run_timestamp_seconds", Help: "Unix timestamp of the last successful blocks cleanup run.", - }), + }, []string{"tenant_status"}), blocksCleanedTotal: promauto.With(reg).NewCounter(prometheus.CounterOpts{ Name: "cortex_compactor_blocks_cleaned_total", Help: "Total number of blocks deleted.", @@ -95,10 +151,14 @@ func NewBlocksCleaner(cfg BlocksCleanerConfig, bucketClient objstore.Instrumente Name: "cortex_compactor_block_cleanup_failures_total", Help: "Total number of blocks failed to be deleted.", }), - blocksMarkedForDeletion: promauto.With(reg).NewCounter(prometheus.CounterOpts{ - Name: blocksMarkedForDeletionName, - Help: blocksMarkedForDeletionHelp, - ConstLabels: prometheus.Labels{"reason": "retention"}, + blocksMarkedForDeletion: blocksMarkedForDeletion, + CleanerVisitMarkerReadFailed: promauto.With(reg).NewCounter(prometheus.CounterOpts{ + Name: "cortex_compactor_cleaner_visit_marker_read_failed", + Help: "Number of cleaner visit marker file failed to be read.", + }), + CleanerVisitMarkerWriteFailed: promauto.With(reg).NewCounter(prometheus.CounterOpts{ + Name: "cortex_compactor_cleaner_visit_marker_write_failed", + Help: "Number of cleaner visit marker file failed to be written.", }), // The following metrics don't have the "cortex_compactor" prefix because not strictly related to @@ -107,65 +167,214 @@ func NewBlocksCleaner(cfg BlocksCleanerConfig, bucketClient objstore.Instrumente tenantBlocks: promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{ Name: "cortex_bucket_blocks_count", Help: "Total number of blocks in the bucket. Includes blocks marked for deletion, but not partial blocks.", - }, []string{"user"}), + }, CommonLabels), tenantBlocksMarkedForDelete: promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{ Name: "cortex_bucket_blocks_marked_for_deletion_count", Help: "Total number of blocks marked for deletion in the bucket.", - }, []string{"user"}), + }, CommonLabels), tenantBlocksMarkedForNoCompaction: promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{ Name: "cortex_bucket_blocks_marked_for_no_compaction_count", Help: "Total number of blocks marked for no compaction in the bucket.", - }, []string{"user"}), + }, CommonLabels), tenantPartialBlocks: promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{ Name: "cortex_bucket_blocks_partials_count", Help: "Total number of partial blocks.", - }, []string{"user"}), + }, CommonLabels), tenantBucketIndexLastUpdate: promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{ Name: "cortex_bucket_index_last_successful_update_timestamp_seconds", Help: "Timestamp of the last successful update of a tenant's bucket index.", - }, []string{"user"}), + }, CommonLabels), + tenantBlocksCleanedTotal: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ + Name: "cortex_bucket_blocks_cleaned_total", + Help: "Total number of blocks deleted for a tenant.", + }, CommonLabels), + tenantCleanDuration: promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{ + Name: "cortex_bucket_clean_duration_seconds", + Help: "Duration of cleaner runtime for a tenant in seconds", + }, CommonLabels), + compactorPartitionError: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ + Name: "cortex_compactor_partition_error", + Help: "Count of errors happened during partitioning compaction.", + ConstLabels: prometheus.Labels{"reason": "parent-block-mismatch"}, + }, CommonLabels), + partitionedGroupInfoReadFailed: partitionedGroupInfoReadFailed, + compactionVisitMarkerReadFailed: compactionVisitMarkerReadFailed, + compactionVisitMarkerWriteFailed: compactionVisitMarkerWriteFailed, + remainingPlannedCompactions: remainingPlannedCompactions, + inProgressCompactions: inProgressCompactions, + oldestPartitionGroupOffset: oldestPartitionGroupOffset, } - c.Service = services.NewTimerService(cfg.CleanupInterval, c.starting, c.ticker, nil) + c.Service = services.NewBasicService(c.starting, c.loop, nil) return c } +type cleanerJob struct { + users []string + timestamp int64 +} + +func (c *BlocksCleaner) SetRingLifecyclerID(ringLifecyclerID string) { + c.ringLifecyclerID = ringLifecyclerID +} + func (c *BlocksCleaner) starting(ctx context.Context) error { // Run a cleanup so that any other service depending on this service // is guaranteed to start once the initial cleanup has been done. - c.runCleanup(ctx, true) + activeUsers, deletedUsers, err := c.scanUsers(ctx) + if err != nil { + level.Error(c.logger).Log("msg", "failed to scan users on startup", "err", err.Error()) + c.runsFailed.WithLabelValues(deletedStatus).Inc() + c.runsFailed.WithLabelValues(activeStatus).Inc() + return nil + } + if err = c.cleanUpActiveUsers(ctx, activeUsers, true); err != nil { + c.runsFailed.WithLabelValues(activeStatus).Inc() + } + if err = c.cleanDeletedUsers(ctx, deletedUsers); err != nil { + c.runsFailed.WithLabelValues(deletedStatus).Inc() + } return nil } -func (c *BlocksCleaner) ticker(ctx context.Context) error { - c.runCleanup(ctx, false) +func (c *BlocksCleaner) loop(ctx context.Context) error { + t := time.NewTicker(c.cfg.CleanupInterval) + defer t.Stop() - return nil + usersChan := make(chan *cleanerJob) + deleteChan := make(chan *cleanerJob) + defer close(usersChan) + defer close(deleteChan) + + go func() { + c.runActiveUserCleanup(ctx, usersChan) + }() + go func() { + c.runDeleteUserCleanup(ctx, deleteChan) + }() + + for { + select { + case <-t.C: + activeUsers, deletedUsers, err := c.scanUsers(ctx) + if err != nil { + level.Error(c.logger).Log("msg", "failed to scan users blocks cleanup and maintenance", "err", err.Error()) + c.runsFailed.WithLabelValues(deletedStatus).Inc() + c.runsFailed.WithLabelValues(activeStatus).Inc() + continue + } + cleanJobTimestamp := time.Now().Unix() + usersChan <- &cleanerJob{ + users: activeUsers, + timestamp: cleanJobTimestamp, + } + deleteChan <- &cleanerJob{ + users: deletedUsers, + timestamp: cleanJobTimestamp, + } + + case <-ctx.Done(): + return nil + } + } +} + +func (c *BlocksCleaner) runActiveUserCleanup(ctx context.Context, jobChan chan *cleanerJob) { + for job := range jobChan { + if job.timestamp < time.Now().Add(-c.cfg.CleanupInterval).Unix() { + level.Warn(c.logger).Log("Active user cleaner job too old. Ignoring to get recent data") + continue + } + c.cleanUpActiveUsers(ctx, job.users, false) //nolint:errcheck + } } -func (c *BlocksCleaner) runCleanup(ctx context.Context, firstRun bool) { - level.Info(c.logger).Log("msg", "started blocks cleanup and maintenance") - c.runsStarted.Inc() +func (c *BlocksCleaner) cleanUpActiveUsers(ctx context.Context, users []string, firstRun bool) error { + level.Info(c.logger).Log("msg", "started blocks cleanup and maintenance for active users") + c.runsStarted.WithLabelValues(activeStatus).Inc() + + err := concurrency.ForEachUser(ctx, users, c.cfg.CleanupConcurrency, func(ctx context.Context, userID string) error { + userLogger := util_log.WithUserID(userID, c.logger) + userBucket := bucket.NewUserBucketClient(userID, c.bucketClient, c.cfgProvider) + visitMarkerManager, err := c.obtainVisitMarkerManager(ctx, userLogger, userBucket) + if err != nil { + return err + } + if visitMarkerManager == nil { + return nil + } + errChan := make(chan error, 1) + go visitMarkerManager.HeartBeat(ctx, errChan, c.cleanerVisitMarkerFileUpdateInterval, true) + defer func() { + errChan <- nil + }() + return errors.Wrapf(c.cleanUser(ctx, userLogger, userBucket, userID, firstRun), "failed to delete blocks for user: %s", userID) + }) - if err := c.cleanUsers(ctx, firstRun); err == nil { - level.Info(c.logger).Log("msg", "successfully completed blocks cleanup and maintenance") - c.runsCompleted.Inc() - c.runsLastSuccess.SetToCurrentTime() + if err == nil { + level.Info(c.logger).Log("msg", "successfully completed blocks cleanup and maintenance for active users") + c.runsCompleted.WithLabelValues(activeStatus).Inc() + c.runsLastSuccess.WithLabelValues(activeStatus).SetToCurrentTime() } else if errors.Is(err, context.Canceled) { - level.Info(c.logger).Log("msg", "canceled blocks cleanup and maintenance", "err", err) - return + level.Info(c.logger).Log("msg", "canceled blocks cleanup and maintenance for active users", "err", err) } else { - level.Error(c.logger).Log("msg", "failed to run blocks cleanup and maintenance", "err", err.Error()) - c.runsFailed.Inc() + level.Error(c.logger).Log("msg", "failed to run blocks cleanup and maintenance for active users", "err", err.Error()) + c.runsFailed.WithLabelValues(activeStatus).Inc() } + return err } -func (c *BlocksCleaner) cleanUsers(ctx context.Context, firstRun bool) error { +func (c *BlocksCleaner) runDeleteUserCleanup(ctx context.Context, jobChan chan *cleanerJob) { + for job := range jobChan { + if job.timestamp < time.Now().Add(-c.cfg.CleanupInterval).Unix() { + level.Warn(c.logger).Log("Delete users cleaner job too old. Ignoring to get recent data") + continue + } + c.cleanDeletedUsers(ctx, job.users) //nolint:errcheck + } +} + +func (c *BlocksCleaner) cleanDeletedUsers(ctx context.Context, users []string) error { + level.Info(c.logger).Log("msg", "started blocks cleanup and maintenance for deleted users") + c.runsStarted.WithLabelValues(deletedStatus).Inc() + + err := concurrency.ForEachUser(ctx, users, c.cfg.CleanupConcurrency, func(ctx context.Context, userID string) error { + userLogger := util_log.WithUserID(userID, c.logger) + userBucket := bucket.NewUserBucketClient(userID, c.bucketClient, c.cfgProvider) + visitMarkerManager, err := c.obtainVisitMarkerManager(ctx, userLogger, userBucket) + if err != nil { + return err + } + if visitMarkerManager == nil { + return nil + } + errChan := make(chan error, 1) + go visitMarkerManager.HeartBeat(ctx, errChan, c.cleanerVisitMarkerFileUpdateInterval, true) + defer func() { + errChan <- nil + }() + return errors.Wrapf(c.deleteUserMarkedForDeletion(ctx, userLogger, userBucket, userID), "failed to delete user marked for deletion: %s", userID) + }) + + if err == nil { + level.Info(c.logger).Log("msg", "successfully completed blocks cleanup and maintenance for deleted users") + c.runsCompleted.WithLabelValues(deletedStatus).Inc() + c.runsLastSuccess.WithLabelValues(deletedStatus).SetToCurrentTime() + } else if errors.Is(err, context.Canceled) { + level.Info(c.logger).Log("msg", "canceled blocks cleanup and maintenance for deleted users", "err", err) + } else { + level.Error(c.logger).Log("msg", "failed to run blocks cleanup and maintenance for deleted users", "err", err.Error()) + c.runsFailed.WithLabelValues(deletedStatus).Inc() + } + return err +} + +func (c *BlocksCleaner) scanUsers(ctx context.Context) ([]string, []string, error) { users, deleted, err := c.usersScanner.ScanUsers(ctx) if err != nil { - return errors.Wrap(err, "failed to discover users from bucket") + return nil, nil, errors.Wrap(err, "failed to discover users from bucket") } isActive := util.StringsMap(users) @@ -182,22 +391,35 @@ func (c *BlocksCleaner) cleanUsers(ctx context.Context, firstRun bool) error { c.tenantBlocksMarkedForNoCompaction.DeleteLabelValues(userID) c.tenantPartialBlocks.DeleteLabelValues(userID) c.tenantBucketIndexLastUpdate.DeleteLabelValues(userID) + if c.cfg.ShardingStrategy == util.ShardingStrategyShuffle { + c.inProgressCompactions.DeleteLabelValues(userID) + c.remainingPlannedCompactions.DeleteLabelValues(userID) + c.oldestPartitionGroupOffset.DeleteLabelValues(userID) + } } } c.lastOwnedUsers = allUsers - return concurrency.ForEachUser(ctx, allUsers, c.cfg.CleanupConcurrency, func(ctx context.Context, userID string) error { - if isDeleted[userID] { - return errors.Wrapf(c.deleteUserMarkedForDeletion(ctx, userID), "failed to delete user marked for deletion: %s", userID) - } - return errors.Wrapf(c.cleanUser(ctx, userID, firstRun), "failed to delete blocks for user: %s", userID) - }) + return users, deleted, nil +} + +func (c *BlocksCleaner) obtainVisitMarkerManager(ctx context.Context, userLogger log.Logger, userBucket objstore.InstrumentedBucket) (*VisitMarkerManager, error) { + cleanerVisitMarker := NewCleanerVisitMarker(c.ringLifecyclerID) + visitMarkerManager := NewVisitMarkerManager(userBucket, userLogger, c.ringLifecyclerID, cleanerVisitMarker, c.CleanerVisitMarkerReadFailed, c.CleanerVisitMarkerWriteFailed) + + existingCleanerVisitMarker := &CleanerVisitMarker{} + err := visitMarkerManager.ReadVisitMarker(ctx, existingCleanerVisitMarker) + if err != nil && !errors.Is(err, ErrorVisitMarkerNotFound) { + return nil, errors.Wrapf(err, "failed to read cleaner visit marker") + } + if errors.Is(err, ErrorVisitMarkerNotFound) || !existingCleanerVisitMarker.IsVisited(c.cleanerVisitMarkerTimeout) { + return visitMarkerManager, nil + } + return nil, nil } // Remove blocks and remaining data for tenant marked for deletion. -func (c *BlocksCleaner) deleteUserMarkedForDeletion(ctx context.Context, userID string) error { - userLogger := util_log.WithUserID(userID, c.logger) - userBucket := bucket.NewUserBucketClient(userID, c.bucketClient, c.cfgProvider) +func (c *BlocksCleaner) deleteUserMarkedForDeletion(ctx context.Context, userLogger log.Logger, userBucket objstore.InstrumentedBucket, userID string) error { level.Info(userLogger).Log("msg", "deleting blocks for tenant marked for deletion") @@ -213,7 +435,7 @@ func (c *BlocksCleaner) deleteUserMarkedForDeletion(ctx context.Context, userID } c.tenantBucketIndexLastUpdate.DeleteLabelValues(userID) - var deletedBlocks, failed int + var blocksToDelete []interface{} err := userBucket.Iter(ctx, "", func(name string) error { if err := ctx.Err(); err != nil { return err @@ -223,34 +445,43 @@ func (c *BlocksCleaner) deleteUserMarkedForDeletion(ctx context.Context, userID if !ok { return nil } + blocksToDelete = append(blocksToDelete, id) + return nil + }) + if err != nil { + return err + } - err := block.Delete(ctx, userLogger, userBucket, id) + var deletedBlocks, failed atomic.Int64 + err = concurrency.ForEach(ctx, blocksToDelete, defaultDeleteBlocksConcurrency, func(ctx context.Context, job interface{}) error { + blockID := job.(ulid.ULID) + err := block.Delete(ctx, userLogger, userBucket, blockID) if err != nil { - failed++ + failed.Add(1) c.blocksFailedTotal.Inc() - level.Warn(userLogger).Log("msg", "failed to delete block", "block", id, "err", err) + level.Warn(userLogger).Log("msg", "failed to delete block", "block", blockID, "err", err) return nil // Continue with other blocks. } - deletedBlocks++ + deletedBlocks.Add(1) c.blocksCleanedTotal.Inc() - level.Info(userLogger).Log("msg", "deleted block", "block", id) + c.tenantBlocksCleanedTotal.WithLabelValues(userID).Inc() + level.Info(userLogger).Log("msg", "deleted block", "block", blockID) return nil }) - if err != nil { return err } - if failed > 0 { + if failed.Load() > 0 { // The number of blocks left in the storage is equal to the number of blocks we failed // to delete. We also consider them all marked for deletion given the next run will try // to delete them again. - c.tenantBlocks.WithLabelValues(userID).Set(float64(failed)) - c.tenantBlocksMarkedForDelete.WithLabelValues(userID).Set(float64(failed)) + c.tenantBlocks.WithLabelValues(userID).Set(float64(failed.Load())) + c.tenantBlocksMarkedForDelete.WithLabelValues(userID).Set(float64(failed.Load())) c.tenantPartialBlocks.WithLabelValues(userID).Set(0) - return errors.Errorf("failed to delete %d blocks", failed) + return errors.Errorf("failed to delete %d blocks", failed.Load()) } // Given all blocks have been deleted, we can also remove the metrics. @@ -259,8 +490,8 @@ func (c *BlocksCleaner) deleteUserMarkedForDeletion(ctx context.Context, userID c.tenantBlocksMarkedForNoCompaction.DeleteLabelValues(userID) c.tenantPartialBlocks.DeleteLabelValues(userID) - if deletedBlocks > 0 { - level.Info(userLogger).Log("msg", "deleted blocks for tenant marked for deletion", "deletedBlocks", deletedBlocks) + if deletedBlocks.Load() > 0 { + level.Info(userLogger).Log("msg", "deleted blocks for tenant marked for deletion", "deletedBlocks", deletedBlocks.Load()) } mark, err := cortex_tsdb.ReadTenantDeletionMark(ctx, c.bucketClient, userID) @@ -274,7 +505,7 @@ func (c *BlocksCleaner) deleteUserMarkedForDeletion(ctx context.Context, userID // If we have just deleted some blocks, update "finished" time. Also update "finished" time if it wasn't set yet, but there are no blocks. // Note: this UPDATES the tenant deletion mark. Components that use caching bucket will NOT SEE this update, // but that is fine -- they only check whether tenant deletion marker exists or not. - if deletedBlocks > 0 || mark.FinishedTime == 0 { + if deletedBlocks.Load() > 0 || mark.FinishedTime == 0 { level.Info(userLogger).Log("msg", "updating finished time in tenant deletion mark") mark.FinishedTime = time.Now().Unix() return errors.Wrap(cortex_tsdb.WriteTenantDeletionMark(ctx, c.bucketClient, userID, mark), "failed to update tenant deletion mark") @@ -293,6 +524,15 @@ func (c *BlocksCleaner) deleteUserMarkedForDeletion(ctx context.Context, userID level.Info(userLogger).Log("msg", "deleted files under "+block.DebugMetas+" for tenant marked for deletion", "count", deleted) } + if c.cfg.CompactionMode == util.CompactionModePartitioning { + // Clean up partitioned group info files + if deleted, err := bucket.DeletePrefix(ctx, userBucket, PartitionedGroupDirectory, userLogger); err != nil { + return errors.Wrap(err, "failed to delete "+PartitionedGroupDirectory) + } else if deleted > 0 { + level.Info(userLogger).Log("msg", "deleted files under "+PartitionedGroupDirectory+" for tenant marked for deletion", "count", deleted) + } + } + if deleted, err := bucket.DeletePrefix(ctx, userBucket, bucketindex.MarkersPathname, userLogger); err != nil { return errors.Wrap(err, "failed to delete marker files") } else if deleted > 0 { @@ -306,9 +546,8 @@ func (c *BlocksCleaner) deleteUserMarkedForDeletion(ctx context.Context, userID return nil } -func (c *BlocksCleaner) cleanUser(ctx context.Context, userID string, firstRun bool) (returnErr error) { - userLogger := util_log.WithUserID(userID, c.logger) - userBucket := bucket.NewUserBucketClient(userID, c.bucketClient, c.cfgProvider) +func (c *BlocksCleaner) cleanUser(ctx context.Context, userLogger log.Logger, userBucket objstore.InstrumentedBucket, userID string, firstRun bool) (returnErr error) { + c.blocksMarkedForDeletion.WithLabelValues(userID, reasonLabelValue) startTime := time.Now() level.Info(userLogger).Log("msg", "started blocks cleanup and maintenance") @@ -318,6 +557,7 @@ func (c *BlocksCleaner) cleanUser(ctx context.Context, userID string, firstRun b } else { level.Info(userLogger).Log("msg", "completed blocks cleanup and maintenance", "duration", time.Since(startTime)) } + c.tenantCleanDuration.WithLabelValues(userID).Set(time.Since(startTime).Seconds()) }() // Migrate block deletion marks to the global markers location. This operation is a best-effort. @@ -341,6 +581,7 @@ func (c *BlocksCleaner) cleanUser(ctx context.Context, userID string, firstRun b idxs.SyncTime = time.Now().Unix() // Read the bucket index. + begin := time.Now() idx, err := bucketindex.ReadIndex(ctx, c.bucketClient, userID, c.cfgProvider, c.logger) defer func() { @@ -365,6 +606,7 @@ func (c *BlocksCleaner) cleanUser(ctx context.Context, userID string, firstRun b idxs.Status = bucketindex.GenericError return err } + level.Info(userLogger).Log("msg", "finish reading index", "duration", time.Since(begin), "duration_ms", time.Since(begin).Milliseconds()) // Mark blocks for future deletion based on the retention period for the user. // Note doing this before UpdateIndex, so it reads in the deletion marks. @@ -374,19 +616,22 @@ func (c *BlocksCleaner) cleanUser(ctx context.Context, userID string, firstRun b // We do not want to stop the remaining work in the cleaner if an // error occurs here. Errors are logged in the function. retention := c.cfgProvider.CompactorBlocksRetentionPeriod(userID) - c.applyUserRetentionPeriod(ctx, idx, retention, userBucket, userLogger) + c.applyUserRetentionPeriod(ctx, idx, retention, userBucket, userLogger, userID) } // Generate an updated in-memory version of the bucket index. + begin = time.Now() w := bucketindex.NewUpdater(c.bucketClient, userID, c.cfgProvider, c.logger) idx, partials, totalBlocksBlocksMarkedForNoCompaction, err := w.UpdateIndex(ctx, idx) if err != nil { idxs.Status = bucketindex.GenericError return err } + level.Info(userLogger).Log("msg", "finish updating index", "duration", time.Since(begin), "duration_ms", time.Since(begin).Milliseconds()) // Delete blocks marked for deletion. We iterate over a copy of deletion marks because // we'll need to manipulate the index (removing blocks which get deleted). + begin = time.Now() blocksToDelete := make([]interface{}, 0, len(idx.BlockDeletionMarks)) var mux sync.Mutex for _, mark := range idx.BlockDeletionMarks.Clone() { @@ -395,8 +640,10 @@ func (c *BlocksCleaner) cleanUser(ctx context.Context, userID string, firstRun b } blocksToDelete = append(blocksToDelete, mark.ID) } + level.Info(userLogger).Log("msg", "finish getting blocks to be deleted", "duration", time.Since(begin), "duration_ms", time.Since(begin).Milliseconds()) // Concurrently deletes blocks marked for deletion, and removes blocks from index. + begin = time.Now() _ = concurrency.ForEach(ctx, blocksToDelete, defaultDeleteBlocksConcurrency, func(ctx context.Context, job interface{}) error { blockID := job.(ulid.ULID) @@ -412,32 +659,126 @@ func (c *BlocksCleaner) cleanUser(ctx context.Context, userID string, firstRun b mux.Unlock() c.blocksCleanedTotal.Inc() + c.tenantBlocksCleanedTotal.WithLabelValues(userID).Inc() level.Info(userLogger).Log("msg", "deleted block marked for deletion", "block", blockID) return nil }) + level.Info(userLogger).Log("msg", "finish deleting blocks", "duration", time.Since(begin), "duration_ms", time.Since(begin).Milliseconds()) // Partial blocks with a deletion mark can be cleaned up. This is a best effort, so we don't return // error if the cleanup of partial blocks fail. if len(partials) > 0 { - c.cleanUserPartialBlocks(ctx, partials, idx, userBucket, userLogger) + begin = time.Now() + c.cleanUserPartialBlocks(ctx, userID, partials, idx, userBucket, userLogger) + level.Info(userLogger).Log("msg", "finish cleaning partial blocks", "duration", time.Since(begin), "duration_ms", time.Since(begin).Milliseconds()) } // Upload the updated index to the storage. + begin = time.Now() if err := bucketindex.WriteIndex(ctx, c.bucketClient, userID, c.cfgProvider, idx); err != nil { return err } + level.Info(userLogger).Log("msg", "finish writing new index", "duration", time.Since(begin), "duration_ms", time.Since(begin).Milliseconds()) + + if c.cfg.ShardingStrategy == util.ShardingStrategyShuffle && c.cfg.CompactionMode == util.CompactionModePartitioning { + begin = time.Now() + c.cleanPartitionedGroupInfo(ctx, userBucket, userLogger, userID) + level.Info(userLogger).Log("msg", "finish cleaning partitioned group info files", "duration", time.Since(begin), "duration_ms", time.Since(begin).Milliseconds()) + } c.tenantBlocks.WithLabelValues(userID).Set(float64(len(idx.Blocks))) c.tenantBlocksMarkedForDelete.WithLabelValues(userID).Set(float64(len(idx.BlockDeletionMarks))) c.tenantBlocksMarkedForNoCompaction.WithLabelValues(userID).Set(float64(totalBlocksBlocksMarkedForNoCompaction)) c.tenantBucketIndexLastUpdate.WithLabelValues(userID).SetToCurrentTime() c.tenantPartialBlocks.WithLabelValues(userID).Set(float64(len(partials))) + return nil } +func (c *BlocksCleaner) cleanPartitionedGroupInfo(ctx context.Context, userBucket objstore.InstrumentedBucket, userLogger log.Logger, userID string) { + deletePartitionedGroupInfo := make(map[*PartitionedGroupInfo]struct { + path string + status PartitionedGroupStatus + }) + err := userBucket.Iter(ctx, PartitionedGroupDirectory, func(file string) error { + partitionedGroupInfo, err := ReadPartitionedGroupInfoFile(ctx, userBucket, userLogger, file, c.partitionedGroupInfoReadFailed) + if err != nil { + level.Warn(userLogger).Log("msg", "failed to read partitioned group info", "partitioned_group_info", file) + return nil + } + + status := partitionedGroupInfo.getPartitionedGroupStatus(ctx, userBucket, c.partitionVisitMarkerTimeout, userLogger, c.compactionVisitMarkerReadFailed, c.compactionVisitMarkerWriteFailed) + level.Info(userLogger).Log("msg", "got partitioned group status", "partitioned_group_status", status.String()) + deletePartitionedGroupInfo[partitionedGroupInfo] = struct { + path string + status PartitionedGroupStatus + }{ + path: file, + status: status, + } + return nil + }) + + if err != nil { + level.Warn(userLogger).Log("msg", "error return when going through partitioned group directory", "err", err) + } + + remainingCompactions := 0 + inProgressCompactions := 0 + var oldestPartitionGroup *PartitionedGroupInfo + defer func() { + c.remainingPlannedCompactions.WithLabelValues(userID).Set(float64(remainingCompactions)) + c.inProgressCompactions.WithLabelValues(userID).Set(float64(inProgressCompactions)) + if c.oldestPartitionGroupOffset != nil { + if oldestPartitionGroup != nil { + c.oldestPartitionGroupOffset.WithLabelValues(userID).Set(float64(time.Now().Unix() - oldestPartitionGroup.CreationTime)) + level.Info(userLogger).Log("msg", "partition group info with oldest creation time", "partitioned_group_id", oldestPartitionGroup.PartitionedGroupID, "creation_time", oldestPartitionGroup.CreationTime) + } else { + c.oldestPartitionGroupOffset.WithLabelValues(userID).Set(0) + } + } + }() + for partitionedGroupInfo, extraInfo := range deletePartitionedGroupInfo { + partitionedGroupInfoFile := extraInfo.path + + remainingCompactions += extraInfo.status.PendingPartitions + inProgressCompactions += extraInfo.status.InProgressPartitions + if oldestPartitionGroup == nil || partitionedGroupInfo.CreationTime < oldestPartitionGroup.CreationTime { + oldestPartitionGroup = partitionedGroupInfo + } + if extraInfo.status.CanDelete { + if extraInfo.status.IsCompleted { + // Try to remove all blocks included in partitioned group info + if err := partitionedGroupInfo.markAllBlocksForDeletion(ctx, userBucket, userLogger, c.blocksMarkedForDeletion, userID); err != nil { + level.Warn(userLogger).Log("msg", "unable to mark all blocks in partitioned group info for deletion", "partitioned_group_id", partitionedGroupInfo.PartitionedGroupID) + // if one block can not be marked for deletion, we should + // skip delete this partitioned group. next iteration + // would try it again. + continue + } + } + + if err := userBucket.Delete(ctx, partitionedGroupInfoFile); err != nil { + level.Warn(userLogger).Log("msg", "failed to delete partitioned group info", "partitioned_group_info", partitionedGroupInfoFile, "err", err) + } else { + level.Info(userLogger).Log("msg", "deleted partitioned group info", "partitioned_group_info", partitionedGroupInfoFile) + } + } + + if extraInfo.status.CanDelete || extraInfo.status.DeleteVisitMarker { + // Remove partition visit markers + if _, err := bucket.DeletePrefix(ctx, userBucket, GetPartitionVisitMarkerDirectoryPath(partitionedGroupInfo.PartitionedGroupID), userLogger); err != nil { + level.Warn(userLogger).Log("msg", "failed to delete partition visit markers for partitioned group", "partitioned_group_info", partitionedGroupInfoFile, "err", err) + } else { + level.Info(userLogger).Log("msg", "deleted partition visit markers for partitioned group", "partitioned_group_info", partitionedGroupInfoFile) + } + } + } +} + // cleanUserPartialBlocks delete partial blocks which are safe to be deleted. The provided partials map // and index are updated accordingly. -func (c *BlocksCleaner) cleanUserPartialBlocks(ctx context.Context, partials map[ulid.ULID]error, idx *bucketindex.Index, userBucket objstore.InstrumentedBucket, userLogger log.Logger) { +func (c *BlocksCleaner) cleanUserPartialBlocks(ctx context.Context, userID string, partials map[ulid.ULID]error, idx *bucketindex.Index, userBucket objstore.InstrumentedBucket, userLogger log.Logger) { // Collect all blocks with missing meta.json into buffered channel. blocks := make([]interface{}, 0, len(partials)) @@ -492,13 +833,14 @@ func (c *BlocksCleaner) cleanUserPartialBlocks(ctx context.Context, partials map mux.Unlock() c.blocksCleanedTotal.Inc() + c.tenantBlocksCleanedTotal.WithLabelValues(userID).Inc() level.Info(userLogger).Log("msg", "deleted partial block marked for deletion", "block", blockID) return nil }) } // applyUserRetentionPeriod marks blocks for deletion which have aged past the retention period. -func (c *BlocksCleaner) applyUserRetentionPeriod(ctx context.Context, idx *bucketindex.Index, retention time.Duration, userBucket objstore.Bucket, userLogger log.Logger) { +func (c *BlocksCleaner) applyUserRetentionPeriod(ctx context.Context, idx *bucketindex.Index, retention time.Duration, userBucket objstore.Bucket, userLogger log.Logger, userID string) { // The retention period of zero is a special value indicating to never delete. if retention <= 0 { return @@ -511,7 +853,7 @@ func (c *BlocksCleaner) applyUserRetentionPeriod(ctx context.Context, idx *bucke // the cleaner will retry applying the retention in its next cycle. for _, b := range blocks { level.Info(userLogger).Log("msg", "applied retention: marking block for deletion", "block", b.ID, "maxTime", b.MaxTime) - if err := block.MarkForDeletion(ctx, userLogger, userBucket, b.ID, fmt.Sprintf("block exceeding retention of %v", retention), c.blocksMarkedForDeletion); err != nil { + if err := block.MarkForDeletion(ctx, userLogger, userBucket, b.ID, fmt.Sprintf("block exceeding retention of %v", retention), c.blocksMarkedForDeletion.WithLabelValues(userID, reasonLabelValue)); err != nil { level.Warn(userLogger).Log("msg", "failed to mark block for deletion", "block", b.ID, "err", err) } } diff --git a/pkg/compactor/blocks_cleaner_test.go b/pkg/compactor/blocks_cleaner_test.go index 98f9565fd1..f4b22fb520 100644 --- a/pkg/compactor/blocks_cleaner_test.go +++ b/pkg/compactor/blocks_cleaner_test.go @@ -3,6 +3,7 @@ package compactor import ( "context" "crypto/rand" + "errors" "fmt" "path" "strings" @@ -12,16 +13,21 @@ import ( "github.com/go-kit/log" "github.com/oklog/ulid" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" "github.com/prometheus/client_golang/prometheus/testutil" prom_testutil "github.com/prometheus/client_golang/prometheus/testutil" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "github.com/thanos-io/objstore" "github.com/thanos-io/thanos/pkg/block" "github.com/thanos-io/thanos/pkg/block/metadata" + "github.com/cortexproject/cortex/pkg/storage/bucket" "github.com/cortexproject/cortex/pkg/storage/tsdb" "github.com/cortexproject/cortex/pkg/storage/tsdb/bucketindex" cortex_testutil "github.com/cortexproject/cortex/pkg/storage/tsdb/testutil" + "github.com/cortexproject/cortex/pkg/util" + util_log "github.com/cortexproject/cortex/pkg/util/log" "github.com/cortexproject/cortex/pkg/util/services" ) @@ -74,17 +80,26 @@ func TestBlockCleaner_KeyPermissionDenied(t *testing.T) { DeletionDelay: deletionDelay, CleanupInterval: time.Minute, CleanupConcurrency: 1, + ShardingStrategy: util.ShardingStrategyShuffle, } logger := log.NewNopLogger() scanner := tsdb.NewUsersScanner(mbucket, tsdb.AllUsers, logger) cfgProvider := newMockConfigProvider() + blocksMarkedForDeletion := prometheus.NewCounterVec(prometheus.CounterOpts{ + Name: blocksMarkedForDeletionName, + Help: blocksMarkedForDeletionHelp, + }, append(CommonLabels, ReasonLabelName)) + dummyCounter := prometheus.NewCounter(prometheus.CounterOpts{}) + dummyGaugeVec := prometheus.NewGaugeVec(prometheus.GaugeOpts{}, []string{"test"}) - cleaner := NewBlocksCleaner(cfg, mbucket, scanner, cfgProvider, logger, nil) + cleaner := NewBlocksCleaner(cfg, mbucket, scanner, 10*time.Second, cfgProvider, logger, nil, time.Minute, 30*time.Second, blocksMarkedForDeletion, dummyCounter, dummyCounter, dummyCounter, dummyGaugeVec) // Clean User with no error cleaner.bucketClient = bkt - err := cleaner.cleanUser(ctx, userID, false) + userLogger := util_log.WithUserID(userID, cleaner.logger) + userBucket := bucket.NewUserBucketClient(userID, cleaner.bucketClient, cleaner.cfgProvider) + err := cleaner.cleanUser(ctx, userLogger, userBucket, userID, false) require.NoError(t, err) s, err := bucketindex.ReadSyncStatus(ctx, bkt, userID, logger) require.NoError(t, err) @@ -93,7 +108,9 @@ func TestBlockCleaner_KeyPermissionDenied(t *testing.T) { // Clean with cmk error cleaner.bucketClient = mbucket - err = cleaner.cleanUser(ctx, userID, false) + userLogger = util_log.WithUserID(userID, cleaner.logger) + userBucket = bucket.NewUserBucketClient(userID, cleaner.bucketClient, cleaner.cfgProvider) + err = cleaner.cleanUser(ctx, userLogger, userBucket, userID, false) require.NoError(t, err) s, err = bucketindex.ReadSyncStatus(ctx, bkt, userID, logger) require.NoError(t, err) @@ -102,7 +119,9 @@ func TestBlockCleaner_KeyPermissionDenied(t *testing.T) { // Re grant access to the key cleaner.bucketClient = bkt - err = cleaner.cleanUser(ctx, userID, false) + userLogger = util_log.WithUserID(userID, cleaner.logger) + userBucket = bucket.NewUserBucketClient(userID, cleaner.bucketClient, cleaner.cfgProvider) + err = cleaner.cleanUser(ctx, userLogger, userBucket, userID, false) require.NoError(t, err) s, err = bucketindex.ReadSyncStatus(ctx, bkt, userID, logger) require.NoError(t, err) @@ -138,7 +157,7 @@ func testBlocksCleanerWithOptions(t *testing.T, options testBlocksCleanerOptions createDeletionMark(t, bucketClient, "user-1", block4, now.Add(-deletionDelay).Add(time.Hour)) // Partial block hasn't reached the deletion threshold yet. createDeletionMark(t, bucketClient, "user-1", block5, now.Add(-deletionDelay).Add(-time.Hour)) // Partial block reached the deletion threshold. require.NoError(t, bucketClient.Delete(ctx, path.Join("user-1", block6.String(), metadata.MetaFilename))) // Partial block without deletion mark. - createBlockVisitMarker(t, bucketClient, "user-1", block11) // Partial block only has visit marker. + createLegacyBlockVisitMarker(t, bucketClient, "user-1", block11) // Partial block only has visit marker. createDeletionMark(t, bucketClient, "user-2", block7, now.Add(-deletionDelay).Add(-time.Hour)) // Block reached the deletion threshold. // Blocks for user-3, marked for deletion. @@ -170,14 +189,21 @@ func testBlocksCleanerWithOptions(t *testing.T, options testBlocksCleanerOptions CleanupConcurrency: options.concurrency, BlockDeletionMarksMigrationEnabled: options.markersMigrationEnabled, TenantCleanupDelay: options.tenantDeletionDelay, + ShardingStrategy: util.ShardingStrategyShuffle, } reg := prometheus.NewPedanticRegistry() logger := log.NewNopLogger() scanner := tsdb.NewUsersScanner(bucketClient, tsdb.AllUsers, logger) cfgProvider := newMockConfigProvider() - - cleaner := NewBlocksCleaner(cfg, bucketClient, scanner, cfgProvider, logger, reg) + blocksMarkedForDeletion := promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ + Name: blocksMarkedForDeletionName, + Help: blocksMarkedForDeletionHelp, + }, append(CommonLabels, ReasonLabelName)) + dummyCounter := prometheus.NewCounter(prometheus.CounterOpts{}) + dummyGaugeVec := prometheus.NewGaugeVec(prometheus.GaugeOpts{}, []string{"test"}) + + cleaner := NewBlocksCleaner(cfg, bucketClient, scanner, 60*time.Second, cfgProvider, logger, reg, time.Minute, 30*time.Second, blocksMarkedForDeletion, dummyCounter, dummyCounter, dummyCounter, dummyGaugeVec) require.NoError(t, services.StartAndAwaitRunning(ctx, cleaner)) defer services.StopAndAwaitTerminated(ctx, cleaner) //nolint:errcheck @@ -203,7 +229,7 @@ func testBlocksCleanerWithOptions(t *testing.T, options testBlocksCleanerOptions // Should not delete a partial block without deletion mark. {path: path.Join("user-1", block6.String(), "index"), expectedExists: true}, // Should delete a partial block with only visit marker. - {path: path.Join("user-1", block11.String(), BlockVisitMarkerFile), expectedExists: false}, + {path: path.Join("user-1", GetBlockVisitMarkerFile(block11.String())), expectedExists: false}, // Should completely delete blocks for user-3, marked for deletion {path: path.Join("user-3", block9.String(), metadata.MetaFilename), expectedExists: false}, {path: path.Join("user-3", block9.String(), "index"), expectedExists: false}, @@ -229,9 +255,27 @@ func testBlocksCleanerWithOptions(t *testing.T, options testBlocksCleanerOptions assert.Equal(t, tc.expectedExists, exists, tc.user) } - assert.Equal(t, float64(1), testutil.ToFloat64(cleaner.runsStarted)) - assert.Equal(t, float64(1), testutil.ToFloat64(cleaner.runsCompleted)) - assert.Equal(t, float64(0), testutil.ToFloat64(cleaner.runsFailed)) + runsStartedActive, err := cleaner.runsStarted.GetMetricWithLabelValues(activeStatus) + require.NoError(t, err) + assert.Equal(t, float64(1), testutil.ToFloat64(runsStartedActive)) + runsStartedDeleted, err := cleaner.runsStarted.GetMetricWithLabelValues(deletedStatus) + require.NoError(t, err) + assert.Equal(t, float64(1), testutil.ToFloat64(runsStartedDeleted)) + + runsCompletedActive, err := cleaner.runsCompleted.GetMetricWithLabelValues(activeStatus) + require.NoError(t, err) + assert.Equal(t, float64(1), testutil.ToFloat64(runsCompletedActive)) + runsCompletedDeleted, err := cleaner.runsCompleted.GetMetricWithLabelValues(deletedStatus) + require.NoError(t, err) + assert.Equal(t, float64(1), testutil.ToFloat64(runsCompletedDeleted)) + + runsFailedActive, err := cleaner.runsFailed.GetMetricWithLabelValues(activeStatus) + require.NoError(t, err) + assert.Equal(t, float64(0), testutil.ToFloat64(runsFailedActive)) + runsFailedDeleted, err := cleaner.runsFailed.GetMetricWithLabelValues(deletedStatus) + require.NoError(t, err) + assert.Equal(t, float64(0), testutil.ToFloat64(runsFailedDeleted)) + assert.Equal(t, float64(7), testutil.ToFloat64(cleaner.blocksCleanedTotal)) assert.Equal(t, float64(0), testutil.ToFloat64(cleaner.blocksFailedTotal)) @@ -319,22 +363,29 @@ func TestBlocksCleaner_ShouldContinueOnBlockDeletionFailure(t *testing.T) { createDeletionMark(t, bucketClient, userID, block4, now.Add(-deletionDelay).Add(-time.Hour)) // To emulate a failure deleting a block, we wrap the bucket client in a mocked one. - bucketClient = &cortex_testutil.MockBucketFailure{ + bucketClient = objstore.WithNoopInstr(&mockBucketFailure{ Bucket: bucketClient, DeleteFailures: []string{path.Join(userID, block3.String(), metadata.MetaFilename)}, - } + }) cfg := BlocksCleanerConfig{ DeletionDelay: deletionDelay, CleanupInterval: time.Minute, CleanupConcurrency: 1, + ShardingStrategy: util.ShardingStrategyShuffle, } logger := log.NewNopLogger() scanner := tsdb.NewUsersScanner(bucketClient, tsdb.AllUsers, logger) cfgProvider := newMockConfigProvider() - - cleaner := NewBlocksCleaner(cfg, bucketClient, scanner, cfgProvider, logger, nil) + blocksMarkedForDeletion := prometheus.NewCounterVec(prometheus.CounterOpts{ + Name: blocksMarkedForDeletionName, + Help: blocksMarkedForDeletionHelp, + }, append(CommonLabels, ReasonLabelName)) + dummyCounter := prometheus.NewCounter(prometheus.CounterOpts{}) + dummyGaugeVec := prometheus.NewGaugeVec(prometheus.GaugeOpts{}, []string{"test"}) + + cleaner := NewBlocksCleaner(cfg, bucketClient, scanner, 60*time.Second, cfgProvider, logger, nil, time.Minute, 30*time.Second, blocksMarkedForDeletion, dummyCounter, dummyCounter, dummyCounter, dummyGaugeVec) require.NoError(t, services.StartAndAwaitRunning(ctx, cleaner)) defer services.StopAndAwaitTerminated(ctx, cleaner) //nolint:errcheck @@ -352,9 +403,27 @@ func TestBlocksCleaner_ShouldContinueOnBlockDeletionFailure(t *testing.T) { assert.Equal(t, tc.expectedExists, exists, tc.path) } - assert.Equal(t, float64(1), testutil.ToFloat64(cleaner.runsStarted)) - assert.Equal(t, float64(1), testutil.ToFloat64(cleaner.runsCompleted)) - assert.Equal(t, float64(0), testutil.ToFloat64(cleaner.runsFailed)) + runsStartedActive, err := cleaner.runsStarted.GetMetricWithLabelValues(activeStatus) + require.NoError(t, err) + assert.Equal(t, float64(1), testutil.ToFloat64(runsStartedActive)) + runsStartedDeleted, err := cleaner.runsStarted.GetMetricWithLabelValues(deletedStatus) + require.NoError(t, err) + assert.Equal(t, float64(1), testutil.ToFloat64(runsStartedDeleted)) + + runsCompletedActive, err := cleaner.runsCompleted.GetMetricWithLabelValues(activeStatus) + require.NoError(t, err) + assert.Equal(t, float64(1), testutil.ToFloat64(runsCompletedActive)) + runsCompletedDeleted, err := cleaner.runsCompleted.GetMetricWithLabelValues(deletedStatus) + require.NoError(t, err) + assert.Equal(t, float64(1), testutil.ToFloat64(runsCompletedDeleted)) + + runsFailedActive, err := cleaner.runsFailed.GetMetricWithLabelValues(activeStatus) + require.NoError(t, err) + assert.Equal(t, float64(0), testutil.ToFloat64(runsFailedActive)) + runsFailedDeleted, err := cleaner.runsFailed.GetMetricWithLabelValues(deletedStatus) + require.NoError(t, err) + assert.Equal(t, float64(0), testutil.ToFloat64(runsFailedDeleted)) + assert.Equal(t, float64(2), testutil.ToFloat64(cleaner.blocksCleanedTotal)) assert.Equal(t, float64(1), testutil.ToFloat64(cleaner.blocksFailedTotal)) @@ -388,13 +457,20 @@ func TestBlocksCleaner_ShouldRebuildBucketIndexOnCorruptedOne(t *testing.T) { DeletionDelay: deletionDelay, CleanupInterval: time.Minute, CleanupConcurrency: 1, + ShardingStrategy: util.ShardingStrategyShuffle, } logger := log.NewNopLogger() scanner := tsdb.NewUsersScanner(bucketClient, tsdb.AllUsers, logger) cfgProvider := newMockConfigProvider() - - cleaner := NewBlocksCleaner(cfg, bucketClient, scanner, cfgProvider, logger, nil) + blocksMarkedForDeletion := prometheus.NewCounterVec(prometheus.CounterOpts{ + Name: blocksMarkedForDeletionName, + Help: blocksMarkedForDeletionHelp, + }, append(CommonLabels, ReasonLabelName)) + dummyCounter := prometheus.NewCounter(prometheus.CounterOpts{}) + dummyGaugeVec := prometheus.NewGaugeVec(prometheus.GaugeOpts{}, []string{"test"}) + + cleaner := NewBlocksCleaner(cfg, bucketClient, scanner, 60*time.Second, cfgProvider, logger, nil, time.Minute, 30*time.Second, blocksMarkedForDeletion, dummyCounter, dummyCounter, dummyCounter, dummyGaugeVec) require.NoError(t, services.StartAndAwaitRunning(ctx, cleaner)) defer services.StopAndAwaitTerminated(ctx, cleaner) //nolint:errcheck @@ -411,9 +487,27 @@ func TestBlocksCleaner_ShouldRebuildBucketIndexOnCorruptedOne(t *testing.T) { assert.Equal(t, tc.expectedExists, exists, tc.path) } - assert.Equal(t, float64(1), testutil.ToFloat64(cleaner.runsStarted)) - assert.Equal(t, float64(1), testutil.ToFloat64(cleaner.runsCompleted)) - assert.Equal(t, float64(0), testutil.ToFloat64(cleaner.runsFailed)) + runsStartedActive, err := cleaner.runsStarted.GetMetricWithLabelValues(activeStatus) + require.NoError(t, err) + assert.Equal(t, float64(1), testutil.ToFloat64(runsStartedActive)) + runsStartedDeleted, err := cleaner.runsStarted.GetMetricWithLabelValues(deletedStatus) + require.NoError(t, err) + assert.Equal(t, float64(1), testutil.ToFloat64(runsStartedDeleted)) + + runsCompletedActive, err := cleaner.runsCompleted.GetMetricWithLabelValues(activeStatus) + require.NoError(t, err) + assert.Equal(t, float64(1), testutil.ToFloat64(runsCompletedActive)) + runsCompletedDeleted, err := cleaner.runsCompleted.GetMetricWithLabelValues(deletedStatus) + require.NoError(t, err) + assert.Equal(t, float64(1), testutil.ToFloat64(runsCompletedDeleted)) + + runsFailedActive, err := cleaner.runsFailed.GetMetricWithLabelValues(activeStatus) + require.NoError(t, err) + assert.Equal(t, float64(0), testutil.ToFloat64(runsFailedActive)) + runsFailedDeleted, err := cleaner.runsFailed.GetMetricWithLabelValues(deletedStatus) + require.NoError(t, err) + assert.Equal(t, float64(0), testutil.ToFloat64(runsFailedDeleted)) + assert.Equal(t, float64(1), testutil.ToFloat64(cleaner.blocksCleanedTotal)) assert.Equal(t, float64(0), testutil.ToFloat64(cleaner.blocksFailedTotal)) @@ -440,6 +534,7 @@ func TestBlocksCleaner_ShouldRemoveMetricsForTenantsNotBelongingAnymoreToTheShar DeletionDelay: time.Hour, CleanupInterval: time.Minute, CleanupConcurrency: 1, + ShardingStrategy: util.ShardingStrategyShuffle, } ctx := context.Background() @@ -447,9 +542,18 @@ func TestBlocksCleaner_ShouldRemoveMetricsForTenantsNotBelongingAnymoreToTheShar reg := prometheus.NewPedanticRegistry() scanner := tsdb.NewUsersScanner(bucketClient, tsdb.AllUsers, logger) cfgProvider := newMockConfigProvider() - - cleaner := NewBlocksCleaner(cfg, bucketClient, scanner, cfgProvider, logger, reg) - require.NoError(t, cleaner.cleanUsers(ctx, true)) + blocksMarkedForDeletion := promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ + Name: blocksMarkedForDeletionName, + Help: blocksMarkedForDeletionHelp, + }, append(CommonLabels, ReasonLabelName)) + dummyCounter := prometheus.NewCounter(prometheus.CounterOpts{}) + dummyGaugeVec := prometheus.NewGaugeVec(prometheus.GaugeOpts{}, []string{"test"}) + + cleaner := NewBlocksCleaner(cfg, bucketClient, scanner, 60*time.Second, cfgProvider, logger, reg, time.Minute, 30*time.Second, blocksMarkedForDeletion, dummyCounter, dummyCounter, dummyCounter, dummyGaugeVec) + activeUsers, deleteUsers, err := cleaner.scanUsers(ctx) + require.NoError(t, err) + require.NoError(t, cleaner.cleanUpActiveUsers(ctx, activeUsers, true)) + require.NoError(t, cleaner.cleanDeletedUsers(ctx, deleteUsers)) assert.NoError(t, prom_testutil.GatherAndCompare(reg, strings.NewReader(` # HELP cortex_bucket_blocks_count Total number of blocks in the bucket. Includes blocks marked for deletion, but not partial blocks. @@ -459,10 +563,10 @@ func TestBlocksCleaner_ShouldRemoveMetricsForTenantsNotBelongingAnymoreToTheShar # HELP cortex_bucket_blocks_marked_for_deletion_count Total number of blocks marked for deletion in the bucket. # TYPE cortex_bucket_blocks_marked_for_deletion_count gauge cortex_bucket_blocks_marked_for_deletion_count{user="user-1"} 0 - cortex_bucket_blocks_marked_for_deletion_count{user="user-2"} 0 - # HELP cortex_bucket_blocks_partials_count Total number of partial blocks. - # TYPE cortex_bucket_blocks_partials_count gauge - cortex_bucket_blocks_partials_count{user="user-1"} 0 + cortex_bucket_blocks_marked_for_deletion_count{user="user-2"} 0 + # HELP cortex_bucket_blocks_partials_count Total number of partial blocks. + # TYPE cortex_bucket_blocks_partials_count gauge + cortex_bucket_blocks_partials_count{user="user-1"} 0 cortex_bucket_blocks_partials_count{user="user-2"} 0 `), "cortex_bucket_blocks_count", @@ -477,7 +581,10 @@ func TestBlocksCleaner_ShouldRemoveMetricsForTenantsNotBelongingAnymoreToTheShar createTSDBBlock(t, bucketClient, "user-1", 40, 50, nil) createTSDBBlock(t, bucketClient, "user-2", 50, 60, nil) - require.NoError(t, cleaner.cleanUsers(ctx, false)) + activeUsers, deleteUsers, err = cleaner.scanUsers(ctx) + require.NoError(t, err) + require.NoError(t, cleaner.cleanUpActiveUsers(ctx, activeUsers, false)) + require.NoError(t, cleaner.cleanDeletedUsers(ctx, deleteUsers)) assert.NoError(t, prom_testutil.GatherAndCompare(reg, strings.NewReader(` # HELP cortex_bucket_blocks_count Total number of blocks in the bucket. Includes blocks marked for deletion, but not partial blocks. @@ -571,6 +678,7 @@ func TestBlocksCleaner_ShouldRemoveBlocksOutsideRetentionPeriod(t *testing.T) { DeletionDelay: time.Hour, CleanupInterval: time.Minute, CleanupConcurrency: 1, + ShardingStrategy: util.ShardingStrategyShuffle, } ctx := context.Background() @@ -578,8 +686,14 @@ func TestBlocksCleaner_ShouldRemoveBlocksOutsideRetentionPeriod(t *testing.T) { reg := prometheus.NewPedanticRegistry() scanner := tsdb.NewUsersScanner(bucketClient, tsdb.AllUsers, logger) cfgProvider := newMockConfigProvider() + blocksMarkedForDeletion := promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ + Name: blocksMarkedForDeletionName, + Help: blocksMarkedForDeletionHelp, + }, append(CommonLabels, ReasonLabelName)) + dummyCounter := prometheus.NewCounter(prometheus.CounterOpts{}) + dummyGaugeVec := prometheus.NewGaugeVec(prometheus.GaugeOpts{}, []string{"test"}) - cleaner := NewBlocksCleaner(cfg, bucketClient, scanner, cfgProvider, logger, reg) + cleaner := NewBlocksCleaner(cfg, bucketClient, scanner, 60*time.Second, cfgProvider, logger, reg, time.Minute, 30*time.Second, blocksMarkedForDeletion, dummyCounter, dummyCounter, dummyCounter, dummyGaugeVec) assertBlockExists := func(user string, block ulid.ULID, expectExists bool) { exists, err := bucketClient.Exists(ctx, path.Join(user, block.String(), metadata.MetaFilename)) @@ -589,10 +703,17 @@ func TestBlocksCleaner_ShouldRemoveBlocksOutsideRetentionPeriod(t *testing.T) { // Existing behaviour - retention period disabled. { + // clean up cleaner visit marker before running test + bucketClient.Delete(ctx, path.Join("user-1", GetCleanerVisitMarkerFilePath())) //nolint:errcheck + bucketClient.Delete(ctx, path.Join("user-2", GetCleanerVisitMarkerFilePath())) //nolint:errcheck + cfgProvider.userRetentionPeriods["user-1"] = 0 cfgProvider.userRetentionPeriods["user-2"] = 0 - require.NoError(t, cleaner.cleanUsers(ctx, true)) + activeUsers, deleteUsers, err := cleaner.scanUsers(ctx) + require.NoError(t, err) + require.NoError(t, cleaner.cleanUpActiveUsers(ctx, activeUsers, true)) + require.NoError(t, cleaner.cleanDeletedUsers(ctx, deleteUsers)) assertBlockExists("user-1", block1, true) assertBlockExists("user-1", block2, true) assertBlockExists("user-2", block3, true) @@ -609,7 +730,8 @@ func TestBlocksCleaner_ShouldRemoveBlocksOutsideRetentionPeriod(t *testing.T) { cortex_bucket_blocks_marked_for_deletion_count{user="user-2"} 0 # HELP cortex_compactor_blocks_marked_for_deletion_total Total number of blocks marked for deletion in compactor. # TYPE cortex_compactor_blocks_marked_for_deletion_total counter - cortex_compactor_blocks_marked_for_deletion_total{reason="retention"} 0 + cortex_compactor_blocks_marked_for_deletion_total{reason="retention",user="user-1"} 0 + cortex_compactor_blocks_marked_for_deletion_total{reason="retention",user="user-2"} 0 `), "cortex_bucket_blocks_count", "cortex_bucket_blocks_marked_for_deletion_count", @@ -619,9 +741,16 @@ func TestBlocksCleaner_ShouldRemoveBlocksOutsideRetentionPeriod(t *testing.T) { // Retention enabled only for a single user, but does nothing. { + // clean up cleaner visit marker before running test + bucketClient.Delete(ctx, path.Join("user-1", GetCleanerVisitMarkerFilePath())) //nolint:errcheck + bucketClient.Delete(ctx, path.Join("user-2", GetCleanerVisitMarkerFilePath())) //nolint:errcheck + cfgProvider.userRetentionPeriods["user-1"] = 9 * time.Hour - require.NoError(t, cleaner.cleanUsers(ctx, false)) + activeUsers, deleteUsers, err := cleaner.scanUsers(ctx) + require.NoError(t, err) + require.NoError(t, cleaner.cleanUpActiveUsers(ctx, activeUsers, false)) + require.NoError(t, cleaner.cleanDeletedUsers(ctx, deleteUsers)) assertBlockExists("user-1", block1, true) assertBlockExists("user-1", block2, true) assertBlockExists("user-2", block3, true) @@ -631,9 +760,16 @@ func TestBlocksCleaner_ShouldRemoveBlocksOutsideRetentionPeriod(t *testing.T) { // Retention enabled only for a single user, marking a single block. // Note the block won't be deleted yet due to deletion delay. { + // clean up cleaner visit marker before running test + bucketClient.Delete(ctx, path.Join("user-1", GetCleanerVisitMarkerFilePath())) //nolint:errcheck + bucketClient.Delete(ctx, path.Join("user-2", GetCleanerVisitMarkerFilePath())) //nolint:errcheck + cfgProvider.userRetentionPeriods["user-1"] = 7 * time.Hour - require.NoError(t, cleaner.cleanUsers(ctx, false)) + activeUsers, deleteUsers, err := cleaner.scanUsers(ctx) + require.NoError(t, err) + require.NoError(t, cleaner.cleanUpActiveUsers(ctx, activeUsers, false)) + require.NoError(t, cleaner.cleanDeletedUsers(ctx, deleteUsers)) assertBlockExists("user-1", block1, true) assertBlockExists("user-1", block2, true) assertBlockExists("user-2", block3, true) @@ -650,7 +786,8 @@ func TestBlocksCleaner_ShouldRemoveBlocksOutsideRetentionPeriod(t *testing.T) { cortex_bucket_blocks_marked_for_deletion_count{user="user-2"} 0 # HELP cortex_compactor_blocks_marked_for_deletion_total Total number of blocks marked for deletion in compactor. # TYPE cortex_compactor_blocks_marked_for_deletion_total counter - cortex_compactor_blocks_marked_for_deletion_total{reason="retention"} 1 + cortex_compactor_blocks_marked_for_deletion_total{reason="retention",user="user-1"} 1 + cortex_compactor_blocks_marked_for_deletion_total{reason="retention",user="user-2"} 0 `), "cortex_bucket_blocks_count", "cortex_bucket_blocks_marked_for_deletion_count", @@ -660,7 +797,14 @@ func TestBlocksCleaner_ShouldRemoveBlocksOutsideRetentionPeriod(t *testing.T) { // Marking the block again, before the deletion occurs, should not cause an error. { - require.NoError(t, cleaner.cleanUsers(ctx, false)) + // clean up cleaner visit marker before running test + bucketClient.Delete(ctx, path.Join("user-1", GetCleanerVisitMarkerFilePath())) //nolint:errcheck + bucketClient.Delete(ctx, path.Join("user-2", GetCleanerVisitMarkerFilePath())) //nolint:errcheck + + activeUsers, deleteUsers, err := cleaner.scanUsers(ctx) + require.NoError(t, err) + require.NoError(t, cleaner.cleanUpActiveUsers(ctx, activeUsers, false)) + require.NoError(t, cleaner.cleanDeletedUsers(ctx, deleteUsers)) assertBlockExists("user-1", block1, true) assertBlockExists("user-1", block2, true) assertBlockExists("user-2", block3, true) @@ -669,9 +813,16 @@ func TestBlocksCleaner_ShouldRemoveBlocksOutsideRetentionPeriod(t *testing.T) { // Reduce the deletion delay. Now the block will be deleted. { + // clean up cleaner visit marker before running test + bucketClient.Delete(ctx, path.Join("user-1", GetCleanerVisitMarkerFilePath())) //nolint:errcheck + bucketClient.Delete(ctx, path.Join("user-2", GetCleanerVisitMarkerFilePath())) //nolint:errcheck + cleaner.cfg.DeletionDelay = 0 - require.NoError(t, cleaner.cleanUsers(ctx, false)) + activeUsers, deleteUsers, err := cleaner.scanUsers(ctx) + require.NoError(t, err) + require.NoError(t, cleaner.cleanUpActiveUsers(ctx, activeUsers, false)) + require.NoError(t, cleaner.cleanDeletedUsers(ctx, deleteUsers)) assertBlockExists("user-1", block1, false) assertBlockExists("user-1", block2, true) assertBlockExists("user-2", block3, true) @@ -688,7 +839,8 @@ func TestBlocksCleaner_ShouldRemoveBlocksOutsideRetentionPeriod(t *testing.T) { cortex_bucket_blocks_marked_for_deletion_count{user="user-2"} 0 # HELP cortex_compactor_blocks_marked_for_deletion_total Total number of blocks marked for deletion in compactor. # TYPE cortex_compactor_blocks_marked_for_deletion_total counter - cortex_compactor_blocks_marked_for_deletion_total{reason="retention"} 1 + cortex_compactor_blocks_marked_for_deletion_total{reason="retention",user="user-1"} 1 + cortex_compactor_blocks_marked_for_deletion_total{reason="retention",user="user-2"} 0 `), "cortex_bucket_blocks_count", "cortex_bucket_blocks_marked_for_deletion_count", @@ -698,9 +850,16 @@ func TestBlocksCleaner_ShouldRemoveBlocksOutsideRetentionPeriod(t *testing.T) { // Retention enabled for other user; test deleting multiple blocks. { + // clean up cleaner visit marker before running test + bucketClient.Delete(ctx, path.Join("user-1", GetCleanerVisitMarkerFilePath())) //nolint:errcheck + bucketClient.Delete(ctx, path.Join("user-2", GetCleanerVisitMarkerFilePath())) //nolint:errcheck + cfgProvider.userRetentionPeriods["user-2"] = 5 * time.Hour - require.NoError(t, cleaner.cleanUsers(ctx, false)) + activeUsers, deleteUsers, err := cleaner.scanUsers(ctx) + require.NoError(t, err) + require.NoError(t, cleaner.cleanUpActiveUsers(ctx, activeUsers, false)) + require.NoError(t, cleaner.cleanDeletedUsers(ctx, deleteUsers)) assertBlockExists("user-1", block1, false) assertBlockExists("user-1", block2, true) assertBlockExists("user-2", block3, false) @@ -717,7 +876,8 @@ func TestBlocksCleaner_ShouldRemoveBlocksOutsideRetentionPeriod(t *testing.T) { cortex_bucket_blocks_marked_for_deletion_count{user="user-2"} 0 # HELP cortex_compactor_blocks_marked_for_deletion_total Total number of blocks marked for deletion in compactor. # TYPE cortex_compactor_blocks_marked_for_deletion_total counter - cortex_compactor_blocks_marked_for_deletion_total{reason="retention"} 3 + cortex_compactor_blocks_marked_for_deletion_total{reason="retention",user="user-1"} 1 + cortex_compactor_blocks_marked_for_deletion_total{reason="retention",user="user-2"} 2 `), "cortex_bucket_blocks_count", "cortex_bucket_blocks_marked_for_deletion_count", @@ -726,6 +886,96 @@ func TestBlocksCleaner_ShouldRemoveBlocksOutsideRetentionPeriod(t *testing.T) { } } +func TestBlocksCleaner_CleanPartitionedGroupInfo(t *testing.T) { + bucketClient, _ := cortex_testutil.PrepareFilesystemBucket(t) + bucketClient = bucketindex.BucketWithGlobalMarkers(bucketClient) + + ts := func(hours int) int64 { + return time.Now().Add(time.Duration(hours)*time.Hour).Unix() * 1000 + } + + userID := "user-1" + partitionedGroupID := uint32(123) + partitionCount := 1 + startTime := ts(-10) + endTime := ts(-8) + block1 := createTSDBBlock(t, bucketClient, userID, startTime, endTime, nil) + + cfg := BlocksCleanerConfig{ + DeletionDelay: time.Hour, + CleanupInterval: time.Minute, + CleanupConcurrency: 1, + ShardingStrategy: util.ShardingStrategyShuffle, + } + + ctx := context.Background() + logger := log.NewNopLogger() + reg := prometheus.NewPedanticRegistry() + scanner := tsdb.NewUsersScanner(bucketClient, tsdb.AllUsers, logger) + cfgProvider := newMockConfigProvider() + blocksMarkedForDeletion := promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ + Name: blocksMarkedForDeletionName, + Help: blocksMarkedForDeletionHelp, + }, append(CommonLabels, ReasonLabelName)) + dummyCounter := prometheus.NewCounter(prometheus.CounterOpts{}) + dummyGaugeVec := prometheus.NewGaugeVec(prometheus.GaugeOpts{}, []string{"test"}) + + cleaner := NewBlocksCleaner(cfg, bucketClient, scanner, 60*time.Second, cfgProvider, logger, reg, time.Minute, 30*time.Second, blocksMarkedForDeletion, dummyCounter, dummyCounter, dummyCounter, dummyGaugeVec) + + userBucket := bucket.NewUserBucketClient(userID, bucketClient, cfgProvider) + + partitionedGroupInfo := PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: partitionCount, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{block1}, + }, + }, + RangeStart: startTime, + RangeEnd: endTime, + CreationTime: time.Now().Add(-5 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + } + _, err := UpdatePartitionedGroupInfo(ctx, userBucket, logger, partitionedGroupInfo, dummyCounter, dummyCounter) + require.NoError(t, err) + + partitionVisitMarker := &PartitionVisitMarker{ + PartitionedGroupID: partitionedGroupID, + PartitionID: 0, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + } + visitMarkerManager := NewVisitMarkerManager(userBucket, logger, "dummy-cleaner", partitionVisitMarker, dummyCounter, dummyCounter) + err = visitMarkerManager.updateVisitMarker(ctx) + require.NoError(t, err) + + cleaner.cleanPartitionedGroupInfo(ctx, userBucket, logger, userID) + + partitionedGroupFileExists, err := userBucket.Exists(ctx, GetPartitionedGroupFile(partitionedGroupID)) + require.NoError(t, err) + require.False(t, partitionedGroupFileExists) + + block1DeletionMarkerExists, err := userBucket.Exists(ctx, path.Join(block1.String(), metadata.DeletionMarkFilename)) + require.NoError(t, err) + require.True(t, block1DeletionMarkerExists) + +} + +type mockBucketFailure struct { + objstore.Bucket + + DeleteFailures []string +} + +func (m *mockBucketFailure) Delete(ctx context.Context, name string) error { + if util.StringsContain(m.DeleteFailures, name) { + return errors.New("mocked delete failure") + } + return m.Bucket.Delete(ctx, name) +} + type mockConfigProvider struct { userRetentionPeriods map[string]time.Duration } @@ -754,3 +1004,10 @@ func (m *mockConfigProvider) S3SSEKMSKeyID(userID string) string { func (m *mockConfigProvider) S3SSEKMSEncryptionContext(userID string) string { return "" } + +func createLegacyBlockVisitMarker(t *testing.T, bkt objstore.Bucket, userID string, blockID ulid.ULID) { + content := mockBlockVisitMarker() + markPath := path.Join(userID, GetBlockVisitMarkerFile(blockID.String())) + + require.NoError(t, bkt.Upload(context.Background(), markPath, strings.NewReader(content))) +} diff --git a/pkg/compactor/cleaner_visit_marker.go b/pkg/compactor/cleaner_visit_marker.go new file mode 100644 index 0000000000..89d9cd4ffb --- /dev/null +++ b/pkg/compactor/cleaner_visit_marker.go @@ -0,0 +1,97 @@ +package compactor + +import ( + "path" + "time" + + "github.com/cortexproject/cortex/pkg/storage/tsdb/bucketindex" +) + +const ( + // CleanerVisitMarkerName is the name of cleaner visit marker file. + CleanerVisitMarkerName = "cleaner-visit-marker.json" + // CleanerVisitMarkerVersion1 is the current supported version of cleaner visit mark file. + CleanerVisitMarkerVersion1 = 1 +) + +type CleanerVisitMarker struct { + CompactorID string `json:"compactorID"` + Status VisitStatus `json:"status"` + // VisitTime is a unix timestamp of when the partition was visited (mark updated). + VisitTime int64 `json:"visitTime"` + // Version of the file. + Version int `json:"version"` +} + +func NewCleanerVisitMarker(compactorID string) *CleanerVisitMarker { + return &CleanerVisitMarker{ + CompactorID: compactorID, + } +} + +func (b *CleanerVisitMarker) IsExpired(cleanerVisitMarkerTimeout time.Duration) bool { + return !time.Now().Before(time.Unix(b.VisitTime, 0).Add(cleanerVisitMarkerTimeout)) +} + +func (b *CleanerVisitMarker) IsVisited(cleanerVisitMarkerTimeout time.Duration) bool { + return !b.IsCompleted() && !b.IsFailed() && !b.IsExpired(cleanerVisitMarkerTimeout) +} + +func (b *CleanerVisitMarker) IsCompleted() bool { + return b.Status == Completed +} + +func (b *CleanerVisitMarker) IsFailed() bool { + return b.Status == Failed +} + +func (b *CleanerVisitMarker) IsInProgress() bool { + return b.Status == InProgress +} + +func (b *CleanerVisitMarker) IsPending() bool { + return b.Status == Pending +} + +func (b *CleanerVisitMarker) GetVisitMarkerFilePath() string { + return GetCleanerVisitMarkerFilePath() +} + +func (b *CleanerVisitMarker) MarkInProgress(ownerIdentifier string) { + b.CompactorID = ownerIdentifier + b.Status = InProgress + b.VisitTime = time.Now().Unix() +} + +func (b *CleanerVisitMarker) MarkPending(ownerIdentifier string) { + b.CompactorID = ownerIdentifier + b.Status = Pending + b.VisitTime = time.Now().Unix() +} + +func (b *CleanerVisitMarker) MarkCompleted(ownerIdentifier string) { + b.CompactorID = ownerIdentifier + b.Status = Completed + b.VisitTime = time.Now().Unix() +} + +func (b *CleanerVisitMarker) MarkFailed(ownerIdentifier string) { + b.CompactorID = ownerIdentifier + b.Status = Failed + b.VisitTime = time.Now().Unix() +} + +func (b *CleanerVisitMarker) LogInfo() []string { + return []string{ + "compactor_id", + b.CompactorID, + "status", + string(b.Status), + "visit_time", + time.Unix(b.VisitTime, 0).String(), + } +} + +func GetCleanerVisitMarkerFilePath() string { + return path.Join(bucketindex.MarkersPathname, CleanerVisitMarkerName) +} diff --git a/pkg/compactor/compactor.go b/pkg/compactor/compactor.go index d366bda557..955045bbf1 100644 --- a/pkg/compactor/compactor.go +++ b/pkg/compactor/compactor.go @@ -2,6 +2,7 @@ package compactor import ( "context" + crypto_rand "crypto/rand" "flag" "fmt" "hash/fnv" @@ -13,6 +14,7 @@ import ( "github.com/go-kit/log" "github.com/go-kit/log/level" + "github.com/oklog/ulid" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" @@ -51,51 +53,86 @@ var ( errInvalidBlockRanges = "compactor block range periods should be divisible by the previous one, but %s is not divisible by %s" RingOp = ring.NewOp([]ring.InstanceState{ring.ACTIVE}, nil) - supportedShardingStrategies = []string{util.ShardingStrategyDefault, util.ShardingStrategyShuffle} - errInvalidShardingStrategy = errors.New("invalid sharding strategy") - errInvalidTenantShardSize = errors.New("invalid tenant shard size, the value must be greater than 0") + supportedShardingStrategies = []string{util.ShardingStrategyDefault, util.ShardingStrategyShuffle} + errInvalidShardingStrategy = errors.New("invalid sharding strategy") + errInvalidTenantShardSize = errors.New("invalid tenant shard size, the value must be greater than 0") + supportedCompactionModes = []string{util.CompactionModeDefault, util.CompactionModePartitioning} + errInvalidCompactionMode = errors.New("invalid compaction mode") + errInvalidCompactionModePartitioning = errors.New("compaction mode partitioning can only be enabled when shuffle sharding is enabled") - DefaultBlocksGrouperFactory = func(ctx context.Context, cfg Config, bkt objstore.InstrumentedBucket, logger log.Logger, reg prometheus.Registerer, blocksMarkedForDeletion, blocksMarkedForNoCompaction, garbageCollectedBlocks prometheus.Counter, _ prometheus.Gauge, _ prometheus.Counter, _ prometheus.Counter, _ *ring.Ring, _ *ring.Lifecycler, _ Limits, _ string, _ *compact.GatherNoCompactionMarkFilter) compact.Grouper { - return compact.NewDefaultGrouper( + DefaultBlocksGrouperFactory = func(ctx context.Context, cfg Config, bkt objstore.InstrumentedBucket, logger log.Logger, blocksMarkedForNoCompaction prometheus.Counter, _ prometheus.Counter, _ prometheus.Counter, _ prometheus.Counter, _ prometheus.Counter, syncerMetrics *compact.SyncerMetrics, compactorMetrics *compactorMetrics, _ *ring.Ring, _ *ring.Lifecycler, _ Limits, _ string, _ *compact.GatherNoCompactionMarkFilter) compact.Grouper { + return compact.NewDefaultGrouperWithMetrics( logger, bkt, cfg.AcceptMalformedIndex, true, // Enable vertical compaction - reg, - blocksMarkedForDeletion, - garbageCollectedBlocks, + compactorMetrics.compactions, + compactorMetrics.compactionRunsStarted, + compactorMetrics.compactionRunsCompleted, + compactorMetrics.compactionFailures, + compactorMetrics.verticalCompactions, + syncerMetrics.BlocksMarkedForDeletion, + syncerMetrics.GarbageCollectedBlocks, blocksMarkedForNoCompaction, metadata.NoneFunc, cfg.BlockFilesConcurrency, cfg.BlocksFetchConcurrency) } - ShuffleShardingGrouperFactory = func(ctx context.Context, cfg Config, bkt objstore.InstrumentedBucket, logger log.Logger, reg prometheus.Registerer, blocksMarkedForDeletion, blocksMarkedForNoCompaction, garbageCollectedBlocks prometheus.Counter, remainingPlannedCompactions prometheus.Gauge, blockVisitMarkerReadFailed prometheus.Counter, blockVisitMarkerWriteFailed prometheus.Counter, ring *ring.Ring, ringLifecycle *ring.Lifecycler, limits Limits, userID string, noCompactionMarkFilter *compact.GatherNoCompactionMarkFilter) compact.Grouper { - return NewShuffleShardingGrouper( - ctx, - logger, - bkt, - cfg.AcceptMalformedIndex, - true, // Enable vertical compaction - reg, - blocksMarkedForDeletion, - blocksMarkedForNoCompaction, - garbageCollectedBlocks, - remainingPlannedCompactions, - metadata.NoneFunc, - cfg, - ring, - ringLifecycle.Addr, - ringLifecycle.ID, - limits, - userID, - cfg.BlockFilesConcurrency, - cfg.BlocksFetchConcurrency, - cfg.CompactionConcurrency, - cfg.BlockVisitMarkerTimeout, - blockVisitMarkerReadFailed, - blockVisitMarkerWriteFailed, - noCompactionMarkFilter.NoCompactMarkedBlocks) + ShuffleShardingGrouperFactory = func(ctx context.Context, cfg Config, bkt objstore.InstrumentedBucket, logger log.Logger, blocksMarkedForNoCompaction prometheus.Counter, compactionVisitMarkerReadFailed prometheus.Counter, compactionVisitMarkerWriteFailed prometheus.Counter, partitionedGroupInfoReadFailed prometheus.Counter, partitionedGroupInfoWriteFailed prometheus.Counter, syncerMetrics *compact.SyncerMetrics, compactorMetrics *compactorMetrics, ring *ring.Ring, ringLifecycle *ring.Lifecycler, limits Limits, userID string, noCompactionMarkFilter *compact.GatherNoCompactionMarkFilter) compact.Grouper { + if cfg.CompactionMode == util.CompactionModePartitioning { + return NewPartitionCompactionGrouper( + ctx, + logger, + bkt, + cfg.AcceptMalformedIndex, + true, // Enable vertical compaction + blocksMarkedForNoCompaction, + syncerMetrics, + compactorMetrics, + metadata.NoneFunc, + cfg, + ring, + ringLifecycle.Addr, + ringLifecycle.ID, + limits, + userID, + cfg.BlockFilesConcurrency, + cfg.BlocksFetchConcurrency, + cfg.CompactionConcurrency, + true, + cfg.CompactionVisitMarkerTimeout, + compactionVisitMarkerReadFailed, + compactionVisitMarkerWriteFailed, + partitionedGroupInfoReadFailed, + partitionedGroupInfoWriteFailed, + noCompactionMarkFilter.NoCompactMarkedBlocks, + ) + } else { + return NewShuffleShardingGrouper( + ctx, + logger, + bkt, + cfg.AcceptMalformedIndex, + true, // Enable vertical compaction + blocksMarkedForNoCompaction, + metadata.NoneFunc, + syncerMetrics, + compactorMetrics, + cfg, + ring, + ringLifecycle.Addr, + ringLifecycle.ID, + limits, + userID, + cfg.BlockFilesConcurrency, + cfg.BlocksFetchConcurrency, + cfg.CompactionConcurrency, + cfg.CompactionVisitMarkerTimeout, + compactionVisitMarkerReadFailed, + compactionVisitMarkerWriteFailed, + noCompactionMarkFilter.NoCompactMarkedBlocks) + } } DefaultBlocksCompactorFactory = func(ctx context.Context, cfg Config, logger log.Logger, reg prometheus.Registerer) (compact.Compactor, PlannerFactory, error) { @@ -104,7 +141,7 @@ var ( return nil, nil, err } - plannerFactory := func(ctx context.Context, bkt objstore.InstrumentedBucket, logger log.Logger, cfg Config, noCompactionMarkFilter *compact.GatherNoCompactionMarkFilter, ringLifecycle *ring.Lifecycler, _ prometheus.Counter, _ prometheus.Counter) compact.Planner { + plannerFactory := func(ctx context.Context, bkt objstore.InstrumentedBucket, logger log.Logger, cfg Config, noCompactionMarkFilter *compact.GatherNoCompactionMarkFilter, ringLifecycle *ring.Lifecycler, _ string, _ prometheus.Counter, _ prometheus.Counter, _ prometheus.Counter, _ *compactorMetrics) compact.Planner { return compact.NewPlanner(logger, cfg.BlockRanges.ToMilliseconds(), noCompactionMarkFilter) } @@ -117,12 +154,41 @@ var ( return nil, nil, err } - plannerFactory := func(ctx context.Context, bkt objstore.InstrumentedBucket, logger log.Logger, cfg Config, noCompactionMarkFilter *compact.GatherNoCompactionMarkFilter, ringLifecycle *ring.Lifecycler, blockVisitMarkerReadFailed prometheus.Counter, blockVisitMarkerWriteFailed prometheus.Counter) compact.Planner { + plannerFactory := func(ctx context.Context, bkt objstore.InstrumentedBucket, logger log.Logger, cfg Config, noCompactionMarkFilter *compact.GatherNoCompactionMarkFilter, ringLifecycle *ring.Lifecycler, userID string, compactionVisitMarkerReadFailed prometheus.Counter, compactionVisitMarkerWriteFailed prometheus.Counter, partitionedGroupInfoReadFailed prometheus.Counter, compactorMetrics *compactorMetrics) compact.Planner { - return NewShuffleShardingPlanner(ctx, bkt, logger, cfg.BlockRanges.ToMilliseconds(), noCompactionMarkFilter.NoCompactMarkedBlocks, ringLifecycle.ID, cfg.BlockVisitMarkerTimeout, cfg.BlockVisitMarkerFileUpdateInterval, blockVisitMarkerReadFailed, blockVisitMarkerWriteFailed) + if cfg.CompactionMode == util.CompactionModePartitioning { + return NewPartitionCompactionPlanner(ctx, bkt, logger, cfg.BlockRanges.ToMilliseconds(), noCompactionMarkFilter.NoCompactMarkedBlocks, ringLifecycle.ID, userID, cfg.ShardingPlannerDelay, cfg.CompactionVisitMarkerTimeout, cfg.CompactionVisitMarkerFileUpdateInterval, compactionVisitMarkerReadFailed, compactionVisitMarkerWriteFailed, partitionedGroupInfoReadFailed, compactorMetrics) + } else { + return NewShuffleShardingPlanner(ctx, bkt, logger, cfg.BlockRanges.ToMilliseconds(), noCompactionMarkFilter.NoCompactMarkedBlocks, ringLifecycle.ID, cfg.CompactionVisitMarkerTimeout, cfg.CompactionVisitMarkerFileUpdateInterval, compactionVisitMarkerReadFailed, compactionVisitMarkerWriteFailed) + } } return compactor, plannerFactory, nil } + + DefaultBlockDeletableCheckerFactory = func(_ context.Context, _ objstore.InstrumentedBucket, _ log.Logger, _ prometheus.Counter, _ prometheus.Counter) compact.BlockDeletableChecker { + return compact.DefaultBlockDeletableChecker{} + } + + PartitionCompactionBlockDeletableCheckerFactory = func(ctx context.Context, bkt objstore.InstrumentedBucket, logger log.Logger, partitionVisitMarkerReadFailed prometheus.Counter, partitionedGroupInfoWriteFailed prometheus.Counter) compact.BlockDeletableChecker { + return NewPartitionCompactionBlockDeletableChecker() + } + + DefaultCompactionLifecycleCallbackFactory = func(_ context.Context, _ objstore.InstrumentedBucket, _ log.Logger, _ int, _ string, _ string, _ prometheus.Counter, _ *compactorMetrics) compact.CompactionLifecycleCallback { + return compact.DefaultCompactionLifecycleCallback{} + } + + ShardedCompactionLifecycleCallbackFactory = func(ctx context.Context, userBucket objstore.InstrumentedBucket, logger log.Logger, metaSyncConcurrency int, compactDir string, userID string, partitionedGroupInfoReadFailed prometheus.Counter, compactorMetrics *compactorMetrics) compact.CompactionLifecycleCallback { + return NewShardedCompactionLifecycleCallback( + ctx, + userBucket, + logger, + metaSyncConcurrency, + compactDir, + userID, + partitionedGroupInfoReadFailed, + compactorMetrics, + ) + } ) // BlocksGrouperFactory builds and returns the grouper to use to compact a tenant's blocks. @@ -131,13 +197,13 @@ type BlocksGrouperFactory func( cfg Config, bkt objstore.InstrumentedBucket, logger log.Logger, - reg prometheus.Registerer, - blocksMarkedForDeletion prometheus.Counter, blocksMarkedForNoCompact prometheus.Counter, - garbageCollectedBlocks prometheus.Counter, - remainingPlannedCompactions prometheus.Gauge, - blockVisitMarkerReadFailed prometheus.Counter, - blockVisitMarkerWriteFailed prometheus.Counter, + compactionVisitMarkerReadFailed prometheus.Counter, + compactionVisitMarkerWriteFailed prometheus.Counter, + partitionedGroupInfoReadFailed prometheus.Counter, + partitionedGroupInfoWriteFailed prometheus.Counter, + syncerMetrics *compact.SyncerMetrics, + compactorMetrics *compactorMetrics, ring *ring.Ring, ringLifecycler *ring.Lifecycler, limit Limits, @@ -160,13 +226,39 @@ type PlannerFactory func( cfg Config, noCompactionMarkFilter *compact.GatherNoCompactionMarkFilter, ringLifecycle *ring.Lifecycler, - blockVisitMarkerReadFailed prometheus.Counter, - blockVisitMarkerWriteFailed prometheus.Counter, + userID string, + compactionVisitMarkerReadFailed prometheus.Counter, + compactionVisitMarkerWriteFailed prometheus.Counter, + partitionedGroupInfoReadFailed prometheus.Counter, + compactorMetrics *compactorMetrics, ) compact.Planner +type CompactionLifecycleCallbackFactory func( + ctx context.Context, + userBucket objstore.InstrumentedBucket, + logger log.Logger, + metaSyncConcurrency int, + compactDir string, + userID string, + partitionedGroupInfoReadFailed prometheus.Counter, + compactorMetrics *compactorMetrics, +) compact.CompactionLifecycleCallback + +type BlockDeletableCheckerFactory func( + ctx context.Context, + bkt objstore.InstrumentedBucket, + logger log.Logger, + partitionVisitMarkerReadFailed prometheus.Counter, + partitionedGroupInfoReadFailed prometheus.Counter, +) compact.BlockDeletableChecker + // Limits defines limits used by the Compactor. type Limits interface { CompactorTenantShardSize(userID string) int + CompactorPartitionIndexSizeLimitInBytes(userID string) int64 + CompactorPartitionSeriesCountLimit(userID string) int64 + CompactorPartitionLevel1IndexSizeLimitInBytes(userID string) int64 + CompactorPartitionLevel1SeriesCountLimit(userID string) int64 } // Config holds the Compactor config. @@ -194,9 +286,13 @@ type Config struct { DisabledTenants flagext.StringSliceCSV `yaml:"disabled_tenants"` // Compactors sharding. - ShardingEnabled bool `yaml:"sharding_enabled"` - ShardingStrategy string `yaml:"sharding_strategy"` - ShardingRing RingConfig `yaml:"sharding_ring"` + ShardingEnabled bool `yaml:"sharding_enabled"` + ShardingStrategy string `yaml:"sharding_strategy"` + ShardingRing RingConfig `yaml:"sharding_ring"` + ShardingPlannerDelay time.Duration `yaml:"sharding_planner_delay"` + + // Compaction mode. + CompactionMode string `yaml:"compaction_mode"` // No need to add options to customize the retry backoff, // given the defaults should be fine, but allow to override @@ -208,9 +304,13 @@ type Config struct { BlocksGrouperFactory BlocksGrouperFactory `yaml:"-"` BlocksCompactorFactory BlocksCompactorFactory `yaml:"-"` - // Block visit marker file config - BlockVisitMarkerTimeout time.Duration `yaml:"block_visit_marker_timeout"` - BlockVisitMarkerFileUpdateInterval time.Duration `yaml:"block_visit_marker_file_update_interval"` + // Compaction visit marker file config + CompactionVisitMarkerTimeout time.Duration `yaml:"compaction_visit_marker_timeout"` + CompactionVisitMarkerFileUpdateInterval time.Duration `yaml:"compaction_visit_marker_file_update_interval"` + + // Cleaner visit marker file config + CleanerVisitMarkerTimeout time.Duration `yaml:"cleaner_visit_marker_timeout"` + CleanerVisitMarkerFileUpdateInterval time.Duration `yaml:"cleaner_visit_marker_file_update_interval"` AcceptMalformedIndex bool `yaml:"accept_malformed_index"` CachingBucketEnabled bool `yaml:"caching_bucket_enabled"` @@ -236,6 +336,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) { f.IntVar(&cfg.CleanupConcurrency, "compactor.cleanup-concurrency", 20, "Max number of tenants for which blocks cleanup and maintenance should run concurrently.") f.BoolVar(&cfg.ShardingEnabled, "compactor.sharding-enabled", false, "Shard tenants across multiple compactor instances. Sharding is required if you run multiple compactor instances, in order to coordinate compactions and avoid race conditions leading to the same tenant blocks simultaneously compacted by different instances.") f.StringVar(&cfg.ShardingStrategy, "compactor.sharding-strategy", util.ShardingStrategyDefault, fmt.Sprintf("The sharding strategy to use. Supported values are: %s.", strings.Join(supportedShardingStrategies, ", "))) + f.StringVar(&cfg.CompactionMode, "compactor.compaction-mode", util.CompactionModeDefault, fmt.Sprintf("The compaction mode to use. Supported values are: %s.", strings.Join(supportedCompactionModes, ", "))) f.DurationVar(&cfg.DeletionDelay, "compactor.deletion-delay", 12*time.Hour, "Time before a block marked for deletion is deleted from bucket. "+ "If not 0, blocks will be marked for deletion and compactor component will permanently delete blocks marked for deletion from the bucket. "+ "If 0, blocks will be deleted straight away. Note that deleting blocks immediately can cause query failures.") @@ -248,11 +349,16 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) { f.Var(&cfg.EnabledTenants, "compactor.enabled-tenants", "Comma separated list of tenants that can be compacted. If specified, only these tenants will be compacted by compactor, otherwise all tenants can be compacted. Subject to sharding.") f.Var(&cfg.DisabledTenants, "compactor.disabled-tenants", "Comma separated list of tenants that cannot be compacted by this compactor. If specified, and compactor would normally pick given tenant for compaction (via -compactor.enabled-tenants or sharding), it will be ignored instead.") - f.DurationVar(&cfg.BlockVisitMarkerTimeout, "compactor.block-visit-marker-timeout", 5*time.Minute, "How long block visit marker file should be considered as expired and able to be picked up by compactor again.") - f.DurationVar(&cfg.BlockVisitMarkerFileUpdateInterval, "compactor.block-visit-marker-file-update-interval", 1*time.Minute, "How frequently block visit marker file should be updated duration compaction.") + f.DurationVar(&cfg.CompactionVisitMarkerTimeout, "compactor.compaction-visit-marker-timeout", 90*time.Second, "How long compaction visit marker file should be considered as expired and able to be picked up by compactor again.") + f.DurationVar(&cfg.CompactionVisitMarkerFileUpdateInterval, "compactor.compaction-visit-marker-file-update-interval", 1*time.Minute, "How frequently compaction visit marker file should be updated duration compaction.") + + f.DurationVar(&cfg.CleanerVisitMarkerTimeout, "compactor.cleaner-visit-marker-timeout", 10*time.Minute, "How long cleaner visit marker file should be considered as expired and able to be picked up by cleaner again.") + f.DurationVar(&cfg.CleanerVisitMarkerFileUpdateInterval, "compactor.cleaner-visit-marker-file-update-interval", 5*time.Minute, "How frequently cleaner visit marker file should be updated when cleaning user.") f.BoolVar(&cfg.AcceptMalformedIndex, "compactor.accept-malformed-index", false, "When enabled, index verification will ignore out of order label names.") f.BoolVar(&cfg.CachingBucketEnabled, "compactor.caching-bucket-enabled", false, "When enabled, caching bucket will be used for compactor, except cleaner service, which serves as the source of truth for block status") + + f.DurationVar(&cfg.ShardingPlannerDelay, "compactor.sharding-planner-delay", 10*time.Second, "How long shuffle sharding planner would wait before running planning code.") } func (cfg *Config) Validate(limits validation.Limits) error { @@ -279,6 +385,15 @@ func (cfg *Config) Validate(limits validation.Limits) error { } } + // Make sure a valid compaction mode is being used + if !util.StringsContain(supportedCompactionModes, cfg.CompactionMode) { + return errInvalidCompactionMode + } + + if !cfg.ShardingEnabled && cfg.CompactionMode == util.CompactionModePartitioning { + return errInvalidCompactionModePartitioning + } + return nil } @@ -317,6 +432,10 @@ type Compactor struct { blocksPlannerFactory PlannerFactory + blockDeletableCheckerFactory BlockDeletableCheckerFactory + + compactionLifecycleCallbackFactory CompactionLifecycleCallbackFactory + // Client used to run operations on the bucket storing blocks. bucketClient objstore.InstrumentedBucket @@ -327,26 +446,25 @@ type Compactor struct { ringSubservicesWatcher *services.FailureWatcher // Metrics. - CompactorStartDurationSeconds prometheus.Gauge - compactionRunsStarted prometheus.Counter - compactionRunsInterrupted prometheus.Counter - compactionRunsCompleted prometheus.Counter - compactionRunsFailed prometheus.Counter - compactionRunsLastSuccess prometheus.Gauge - compactionRunDiscoveredTenants prometheus.Gauge - compactionRunSkippedTenants prometheus.Gauge - compactionRunSucceededTenants prometheus.Gauge - compactionRunFailedTenants prometheus.Gauge - compactionRunInterval prometheus.Gauge - blocksMarkedForDeletion prometheus.Counter - blocksMarkedForNoCompaction prometheus.Counter - garbageCollectedBlocks prometheus.Counter - remainingPlannedCompactions prometheus.Gauge - blockVisitMarkerReadFailed prometheus.Counter - blockVisitMarkerWriteFailed prometheus.Counter - - // TSDB syncer metrics - syncerMetrics *syncerMetrics + CompactorStartDurationSeconds prometheus.Gauge + CompactionRunsStarted prometheus.Counter + CompactionRunsInterrupted prometheus.Counter + CompactionRunsCompleted prometheus.Counter + CompactionRunsFailed prometheus.Counter + CompactionRunsLastSuccess prometheus.Gauge + CompactionRunDiscoveredTenants prometheus.Gauge + CompactionRunSkippedTenants prometheus.Gauge + CompactionRunSucceededTenants prometheus.Gauge + CompactionRunFailedTenants prometheus.Gauge + CompactionRunInterval prometheus.Gauge + BlocksMarkedForNoCompaction prometheus.Counter + partitionVisitMarkerReadFailed prometheus.Counter + partitionVisitMarkerWriteFailed prometheus.Counter + PartitionedGroupInfoReadFailed prometheus.Counter + PartitionedGroupInfoWriteFailed prometheus.Counter + + // Thanos compactor metrics per user + compactorMetrics *compactorMetrics } // NewCompactor makes a new Compactor. @@ -373,7 +491,21 @@ func NewCompactor(compactorCfg Config, storageCfg cortex_tsdb.BlocksStorageConfi } } - cortexCompactor, err := newCompactor(compactorCfg, storageCfg, logger, registerer, bucketClientFactory, blocksGrouperFactory, blocksCompactorFactory, limits) + var blockDeletableCheckerFactory BlockDeletableCheckerFactory + if compactorCfg.ShardingStrategy == util.ShardingStrategyShuffle && compactorCfg.CompactionMode == util.CompactionModePartitioning { + blockDeletableCheckerFactory = PartitionCompactionBlockDeletableCheckerFactory + } else { + blockDeletableCheckerFactory = DefaultBlockDeletableCheckerFactory + } + + var compactionLifecycleCallbackFactory CompactionLifecycleCallbackFactory + if compactorCfg.ShardingStrategy == util.ShardingStrategyShuffle && compactorCfg.CompactionMode == util.CompactionModePartitioning { + compactionLifecycleCallbackFactory = ShardedCompactionLifecycleCallbackFactory + } else { + compactionLifecycleCallbackFactory = DefaultCompactionLifecycleCallbackFactory + } + + cortexCompactor, err := newCompactor(compactorCfg, storageCfg, logger, registerer, bucketClientFactory, blocksGrouperFactory, blocksCompactorFactory, blockDeletableCheckerFactory, compactionLifecycleCallbackFactory, limits) if err != nil { return nil, errors.Wrap(err, "failed to create Cortex blocks compactor") } @@ -389,94 +521,95 @@ func newCompactor( bucketClientFactory func(ctx context.Context) (objstore.InstrumentedBucket, error), blocksGrouperFactory BlocksGrouperFactory, blocksCompactorFactory BlocksCompactorFactory, + blockDeletableCheckerFactory BlockDeletableCheckerFactory, + compactionLifecycleCallbackFactory CompactionLifecycleCallbackFactory, limits *validation.Overrides, ) (*Compactor, error) { - var remainingPlannedCompactions prometheus.Gauge + var compactorMetrics *compactorMetrics if compactorCfg.ShardingStrategy == util.ShardingStrategyShuffle { - remainingPlannedCompactions = promauto.With(registerer).NewGauge(prometheus.GaugeOpts{ - Name: "cortex_compactor_remaining_planned_compactions", - Help: "Total number of plans that remain to be compacted. Only available with shuffle-sharding strategy", - }) + compactorMetrics = newCompactorMetrics(registerer) + } else { + compactorMetrics = newDefaultCompactorMetrics(registerer) } c := &Compactor{ - compactorCfg: compactorCfg, - storageCfg: storageCfg, - parentLogger: logger, - logger: log.With(logger, "component", "compactor"), - registerer: registerer, - syncerMetrics: newSyncerMetrics(registerer), - bucketClientFactory: bucketClientFactory, - blocksGrouperFactory: blocksGrouperFactory, - blocksCompactorFactory: blocksCompactorFactory, - allowedTenants: util.NewAllowedTenants(compactorCfg.EnabledTenants, compactorCfg.DisabledTenants), + compactorCfg: compactorCfg, + storageCfg: storageCfg, + parentLogger: logger, + logger: log.With(logger, "component", "compactor"), + registerer: registerer, + bucketClientFactory: bucketClientFactory, + blocksGrouperFactory: blocksGrouperFactory, + blocksCompactorFactory: blocksCompactorFactory, + blockDeletableCheckerFactory: blockDeletableCheckerFactory, + compactionLifecycleCallbackFactory: compactionLifecycleCallbackFactory, + allowedTenants: util.NewAllowedTenants(compactorCfg.EnabledTenants, compactorCfg.DisabledTenants), CompactorStartDurationSeconds: promauto.With(registerer).NewGauge(prometheus.GaugeOpts{ Name: "cortex_compactor_start_duration_seconds", Help: "Time in seconds spent by compactor running start function", }), - compactionRunsStarted: promauto.With(registerer).NewCounter(prometheus.CounterOpts{ + CompactionRunsStarted: promauto.With(registerer).NewCounter(prometheus.CounterOpts{ Name: "cortex_compactor_runs_started_total", Help: "Total number of compaction runs started.", }), - compactionRunsInterrupted: promauto.With(registerer).NewCounter(prometheus.CounterOpts{ + CompactionRunsInterrupted: promauto.With(registerer).NewCounter(prometheus.CounterOpts{ Name: "cortex_compactor_runs_interrupted_total", Help: "Total number of compaction runs interrupted.", }), - compactionRunsCompleted: promauto.With(registerer).NewCounter(prometheus.CounterOpts{ + CompactionRunsCompleted: promauto.With(registerer).NewCounter(prometheus.CounterOpts{ Name: "cortex_compactor_runs_completed_total", Help: "Total number of compaction runs successfully completed.", }), - compactionRunsFailed: promauto.With(registerer).NewCounter(prometheus.CounterOpts{ + CompactionRunsFailed: promauto.With(registerer).NewCounter(prometheus.CounterOpts{ Name: "cortex_compactor_runs_failed_total", Help: "Total number of compaction runs failed.", }), - compactionRunsLastSuccess: promauto.With(registerer).NewGauge(prometheus.GaugeOpts{ + CompactionRunsLastSuccess: promauto.With(registerer).NewGauge(prometheus.GaugeOpts{ Name: "cortex_compactor_last_successful_run_timestamp_seconds", Help: "Unix timestamp of the last successful compaction run.", }), - compactionRunDiscoveredTenants: promauto.With(registerer).NewGauge(prometheus.GaugeOpts{ + CompactionRunDiscoveredTenants: promauto.With(registerer).NewGauge(prometheus.GaugeOpts{ Name: "cortex_compactor_tenants_discovered", Help: "Number of tenants discovered during the current compaction run. Reset to 0 when compactor is idle.", }), - compactionRunSkippedTenants: promauto.With(registerer).NewGauge(prometheus.GaugeOpts{ + CompactionRunSkippedTenants: promauto.With(registerer).NewGauge(prometheus.GaugeOpts{ Name: "cortex_compactor_tenants_skipped", Help: "Number of tenants skipped during the current compaction run. Reset to 0 when compactor is idle.", }), - compactionRunSucceededTenants: promauto.With(registerer).NewGauge(prometheus.GaugeOpts{ + CompactionRunSucceededTenants: promauto.With(registerer).NewGauge(prometheus.GaugeOpts{ Name: "cortex_compactor_tenants_processing_succeeded", Help: "Number of tenants successfully processed during the current compaction run. Reset to 0 when compactor is idle.", }), - compactionRunFailedTenants: promauto.With(registerer).NewGauge(prometheus.GaugeOpts{ + CompactionRunFailedTenants: promauto.With(registerer).NewGauge(prometheus.GaugeOpts{ Name: "cortex_compactor_tenants_processing_failed", Help: "Number of tenants failed processing during the current compaction run. Reset to 0 when compactor is idle.", }), - compactionRunInterval: promauto.With(registerer).NewGauge(prometheus.GaugeOpts{ + CompactionRunInterval: promauto.With(registerer).NewGauge(prometheus.GaugeOpts{ Name: "cortex_compactor_compaction_interval_seconds", Help: "The configured interval on which compaction is run in seconds. Useful when compared to the last successful run metric to accurately detect multiple failed compaction runs.", }), - blocksMarkedForDeletion: promauto.With(registerer).NewCounter(prometheus.CounterOpts{ - Name: blocksMarkedForDeletionName, - Help: blocksMarkedForDeletionHelp, - ConstLabels: prometheus.Labels{"reason": "compaction"}, - }), - blocksMarkedForNoCompaction: promauto.With(registerer).NewCounter(prometheus.CounterOpts{ + BlocksMarkedForNoCompaction: promauto.With(registerer).NewCounter(prometheus.CounterOpts{ Name: "cortex_compactor_blocks_marked_for_no_compaction_total", Help: "Total number of blocks marked for no compact during a compaction run.", }), - garbageCollectedBlocks: promauto.With(registerer).NewCounter(prometheus.CounterOpts{ - Name: "cortex_compactor_garbage_collected_blocks_total", - Help: "Total number of blocks marked for deletion by compactor.", - }), - blockVisitMarkerReadFailed: promauto.With(registerer).NewCounter(prometheus.CounterOpts{ + partitionVisitMarkerReadFailed: promauto.With(registerer).NewCounter(prometheus.CounterOpts{ Name: "cortex_compactor_block_visit_marker_read_failed", Help: "Number of block visit marker file failed to be read.", }), - blockVisitMarkerWriteFailed: promauto.With(registerer).NewCounter(prometheus.CounterOpts{ + partitionVisitMarkerWriteFailed: promauto.With(registerer).NewCounter(prometheus.CounterOpts{ Name: "cortex_compactor_block_visit_marker_write_failed", Help: "Number of block visit marker file failed to be written.", }), - remainingPlannedCompactions: remainingPlannedCompactions, - limits: limits, + PartitionedGroupInfoReadFailed: promauto.With(registerer).NewCounter(prometheus.CounterOpts{ + Name: "cortex_compactor_partitioned_group_info_read_failed", + Help: "Number of partitioned group info file failed to be read.", + }), + PartitionedGroupInfoWriteFailed: promauto.With(registerer).NewCounter(prometheus.CounterOpts{ + Name: "cortex_compactor_partitioned_group_info_write_failed", + Help: "Number of partitioned group info file failed to be written.", + }), + limits: limits, + compactorMetrics: compactorMetrics, } if len(compactorCfg.EnabledTenants) > 0 { @@ -486,10 +619,20 @@ func newCompactor( level.Info(c.logger).Log("msg", "compactor using disabled users", "disabled", strings.Join(compactorCfg.DisabledTenants, ", ")) } + if c.registerer != nil { + // Copied from Thanos, pkg/block/fetcher.go + promauto.With(c.registerer).NewGaugeFunc(prometheus.GaugeOpts{ + Name: "cortex_compactor_meta_sync_consistency_delay_seconds", + Help: "Configured consistency delay in seconds.", + }, func() float64 { + return c.compactorCfg.ConsistencyDelay.Seconds() + }) + } + c.Service = services.NewBasicService(c.starting, c.running, c.stopping) // The last successful compaction run metric is exposed as seconds since epoch, so we need to use seconds for this metric. - c.compactionRunInterval.Set(c.compactorCfg.CompactionInterval.Seconds()) + c.CompactionRunInterval.Set(c.compactorCfg.CompactionInterval.Seconds()) return c, nil } @@ -529,7 +672,10 @@ func (c *Compactor) starting(ctx context.Context) error { CleanupConcurrency: c.compactorCfg.CleanupConcurrency, BlockDeletionMarksMigrationEnabled: c.compactorCfg.BlockDeletionMarksMigrationEnabled, TenantCleanupDelay: c.compactorCfg.TenantCleanupDelay, - }, c.bucketClient, c.usersScanner, c.limits, c.parentLogger, c.registerer) + ShardingStrategy: c.compactorCfg.ShardingStrategy, + CompactionMode: c.compactorCfg.CompactionMode, + }, c.bucketClient, c.usersScanner, c.compactorCfg.CompactionVisitMarkerTimeout, c.limits, c.parentLogger, c.registerer, c.compactorCfg.CleanerVisitMarkerTimeout, c.compactorCfg.CleanerVisitMarkerFileUpdateInterval, + c.compactorMetrics.syncerBlocksMarkedForDeletion, c.PartitionedGroupInfoReadFailed, c.partitionVisitMarkerReadFailed, c.partitionVisitMarkerWriteFailed, c.compactorMetrics.remainingPlannedCompactions) // Initialize the compactors ring if sharding is enabled. if c.compactorCfg.ShardingEnabled { @@ -539,6 +685,8 @@ func (c *Compactor) starting(ctx context.Context) error { return errors.Wrap(err, "unable to initialize compactor ring lifecycler") } + c.blocksCleaner.SetRingLifecyclerID(c.ringLifecycler.ID) + c.ring, err = ring.New(lifecyclerCfg.RingConfig, "compactor", ringKey, c.logger, prometheus.WrapRegistererWithPrefix("cortex_", c.registerer)) if err != nil { return errors.Wrap(err, "unable to initialize compactor ring") @@ -588,12 +736,6 @@ func (c *Compactor) starting(ctx context.Context) error { } } - // Ensure an initial cleanup occurred before starting the compactor. - if err := services.StartAndAwaitRunning(ctx, c.blocksCleaner); err != nil { - c.ringSubservices.StopAsync() - return errors.Wrap(err, "failed to start the blocks cleaner") - } - if c.compactorCfg.CachingBucketEnabled { matchers := cortex_tsdb.NewMatchers() // Do not cache tenant deletion marker and block deletion marker for compactor @@ -624,15 +766,26 @@ func (c *Compactor) stopping(_ error) error { } func (c *Compactor) running(ctx context.Context) error { + // Ensure an initial cleanup occurred as first thing when running compactor. + if err := services.StartAndAwaitRunning(ctx, c.blocksCleaner); err != nil { + c.ringSubservices.StopAsync() + return errors.Wrap(err, "failed to start the blocks cleaner") + } + // Run an initial compaction before starting the interval. + // Insert jitter right before compaction starts to avoid multiple starting compactor to be in sync + time.Sleep(time.Duration(rand.Int63n(int64(float64(c.compactorCfg.CompactionInterval) * 0.1)))) c.compactUsers(ctx) - ticker := time.NewTicker(util.DurationWithJitter(c.compactorCfg.CompactionInterval, 0.05)) + ticker := time.NewTicker(c.compactorCfg.CompactionInterval) defer ticker.Stop() for { select { case <-ticker.C: + // Insert jitter right before compaction starts, so that there will always + // have jitter even compaction time is longer than CompactionInterval + time.Sleep(time.Duration(rand.Int63n(int64(float64(c.compactorCfg.CompactionInterval) * 0.1)))) c.compactUsers(ctx) case <-ctx.Done(): return nil @@ -643,43 +796,38 @@ func (c *Compactor) running(ctx context.Context) error { } func (c *Compactor) compactUsers(ctx context.Context) { - failed := false + succeeded := false interrupted := false + compactionErrorCount := 0 - c.compactionRunsStarted.Inc() + c.CompactionRunsStarted.Inc() defer func() { - // interruptions and successful runs are considered - // mutually exclusive but we consider a run failed if any - // tenant runs failed even if later runs are interrupted - if !interrupted && !failed { - c.compactionRunsCompleted.Inc() - c.compactionRunsLastSuccess.SetToCurrentTime() - } - if interrupted { - c.compactionRunsInterrupted.Inc() - } - if failed { - c.compactionRunsFailed.Inc() + if succeeded && compactionErrorCount == 0 { + c.CompactionRunsCompleted.Inc() + c.CompactionRunsLastSuccess.SetToCurrentTime() + } else if interrupted { + c.CompactionRunsInterrupted.Inc() + } else { + c.CompactionRunsFailed.Inc() } // Reset progress metrics once done. - c.compactionRunDiscoveredTenants.Set(0) - c.compactionRunSkippedTenants.Set(0) - c.compactionRunSucceededTenants.Set(0) - c.compactionRunFailedTenants.Set(0) + c.CompactionRunDiscoveredTenants.Set(0) + c.CompactionRunSkippedTenants.Set(0) + c.CompactionRunSucceededTenants.Set(0) + c.CompactionRunFailedTenants.Set(0) }() level.Info(c.logger).Log("msg", "discovering users from bucket") users, err := c.discoverUsersWithRetries(ctx) if err != nil { - failed = true level.Error(c.logger).Log("msg", "failed to discover users from bucket", "err", err) return } level.Info(c.logger).Log("msg", "discovered users from bucket", "users", len(users)) - c.compactionRunDiscoveredTenants.Set(float64(len(users))) + c.CompactionRunDiscoveredTenants.Set(float64(len(users))) // When starting multiple compactor replicas nearly at the same time, running in a cluster with // a large number of tenants, we may end up in a situation where the 1st user is compacted by @@ -694,17 +842,17 @@ func (c *Compactor) compactUsers(ctx context.Context) { // Ensure the context has not been canceled (ie. compactor shutdown has been triggered). if ctx.Err() != nil { interrupted = true - level.Info(c.logger).Log("msg", "interrupting compaction of user blocks", "user", userID) + level.Info(c.logger).Log("msg", "interrupting compaction of user blocks", "err", err) return } // Ensure the user ID belongs to our shard. if owned, err := c.ownUserForCompaction(userID); err != nil { - c.compactionRunSkippedTenants.Inc() + c.CompactionRunSkippedTenants.Inc() level.Warn(c.logger).Log("msg", "unable to check if user is owned by this shard", "user", userID, "err", err) continue } else if !owned { - c.compactionRunSkippedTenants.Inc() + c.CompactionRunSkippedTenants.Inc() level.Debug(c.logger).Log("msg", "skipping user because it is not owned by this shard", "user", userID) continue } @@ -712,7 +860,7 @@ func (c *Compactor) compactUsers(ctx context.Context) { // Skipping compaction if the bucket index failed to sync due to CMK errors. if idxs, err := bucketindex.ReadSyncStatus(ctx, c.bucketClient, userID, util_log.WithUserID(userID, c.logger)); err == nil { if idxs.Status == bucketindex.CustomerManagedKeyError { - c.compactionRunSkippedTenants.Inc() + c.CompactionRunSkippedTenants.Inc() level.Info(c.logger).Log("msg", "skipping compactUser due CustomerManagedKeyError", "user", userID) continue } @@ -721,11 +869,12 @@ func (c *Compactor) compactUsers(ctx context.Context) { ownedUsers[userID] = struct{}{} if markedForDeletion, err := cortex_tsdb.TenantDeletionMarkExists(ctx, c.bucketClient, userID); err != nil { - c.compactionRunSkippedTenants.Inc() + c.CompactionRunSkippedTenants.Inc() level.Warn(c.logger).Log("msg", "unable to check if user is marked for deletion", "user", userID, "err", err) continue } else if markedForDeletion { - c.compactionRunSkippedTenants.Inc() + c.CompactionRunSkippedTenants.Inc() + c.compactorMetrics.deleteMetricsForDeletedTenant(userID) level.Debug(c.logger).Log("msg", "skipping user because it is marked for deletion", "user", userID) continue } @@ -740,13 +889,13 @@ func (c *Compactor) compactUsers(ctx context.Context) { return } - c.compactionRunFailedTenants.Inc() - failed = true + c.CompactionRunFailedTenants.Inc() + compactionErrorCount++ level.Error(c.logger).Log("msg", "failed to compact user blocks", "user", userID, "err", err) continue } - c.compactionRunSucceededTenants.Inc() + c.CompactionRunSucceededTenants.Inc() level.Info(c.logger).Log("msg", "successfully compacted user blocks", "user", userID) } @@ -776,6 +925,8 @@ func (c *Compactor) compactUsers(ctx context.Context) { } } } + + succeeded = true } func (c *Compactor) compactUserWithRetries(ctx context.Context, userID string) error { @@ -792,28 +943,40 @@ func (c *Compactor) compactUserWithRetries(ctx context.Context, userID string) e if lastErr == nil { return nil } + if ctx.Err() != nil { + level.Warn(util_log.WithContext(ctx, c.logger)).Log("msg", "received context error during compaction", "err", ctx.Err()) + return ctx.Err() + } if c.isCausedByPermissionDenied(lastErr) { - level.Warn(c.logger).Log("msg", "skipping compactUser due to PermissionDenied", "user", userID, "err", lastErr) + level.Warn(c.logger).Log("msg", "skipping compactUser due to PermissionDenied", "org_id", userID, "err", lastErr) return nil } - + if compact.IsHaltError(lastErr) { + level.Error(c.logger).Log("msg", "compactor returned critical error", "org_id", userID, "err", lastErr) + c.compactorMetrics.compactionHaltErrors.WithLabelValues(userID).Inc() + return lastErr + } + c.compactorMetrics.compactionRetryErrors.WithLabelValues(userID).Inc() retries.Wait() } + err := errors.Unwrap(errors.Cause(lastErr)) + if errors.Is(err, PlannerCompletedPartitionError) || errors.Is(err, PlannerVisitedPartitionError) { + return nil + } + return lastErr } func (c *Compactor) compactUser(ctx context.Context, userID string) error { bucket := bucket.NewUserBucketClient(userID, c.bucketClient, c.limits) - reg := prometheus.NewRegistry() - defer c.syncerMetrics.gatherThanosSyncerMetrics(reg) - ulogger := util_log.WithUserID(userID, c.logger) + ulogger = util_log.WithExecutionID(ulid.MustNew(ulid.Now(), crypto_rand.Reader).String(), ulogger) // Filters out duplicate blocks that can be formed from two or more overlapping // blocks that fully submatches the source blocks of the older blocks. - deduplicateBlocksFilter := block.NewDeduplicateFilter(c.compactorCfg.BlockSyncConcurrency) + deduplicateBlocksFilter := &DisabledDeduplicateFilter{} // While fetching blocks, we filter out blocks that were marked for deletion by using IgnoreDeletionMarkFilter. // No delay is used -- all blocks with deletion marker are ignored, and not considered for compaction. @@ -842,19 +1005,20 @@ func (c *Compactor) compactUser(ctx context.Context, userID string) error { return cortex_tsdb.ErrBlockDiscoveryStrategy } - fetcher, err := block.NewMetaFetcher( + fetcher, err := block.NewMetaFetcherWithMetrics( ulogger, c.compactorCfg.MetaSyncConcurrency, bucket, blockLister, c.metaSyncDirForUser(userID), - reg, + c.compactorMetrics.getBaseFetcherMetrics(userID), + c.compactorMetrics.getMetaFetcherMetrics(userID), // List of filters to apply (order matters). []block.MetadataFilter{ // Remove the ingester ID because we don't shard blocks anymore, while still // honoring the shard ID if sharding was done in the past. NewLabelRemoverFilter([]string{cortex_tsdb.IngesterIDExternalLabel}), - block.NewConsistencyDelayMetaFilter(ulogger, c.compactorCfg.ConsistencyDelay, reg), + block.NewConsistencyDelayMetaFilterWithoutMetrics(ulogger, c.compactorCfg.ConsistencyDelay), ignoreDeletionMarkFilter, deduplicateBlocksFilter, noCompactMarkerFilter, @@ -864,28 +1028,27 @@ func (c *Compactor) compactUser(ctx context.Context, userID string) error { return err } - syncer, err := compact.NewMetaSyncer( + syncerMetrics := c.compactorMetrics.getSyncerMetrics(userID) + syncer, err := compact.NewMetaSyncerWithMetrics( ulogger, - reg, + syncerMetrics, bucket, fetcher, deduplicateBlocksFilter, ignoreDeletionMarkFilter, - c.blocksMarkedForDeletion, - c.garbageCollectedBlocks, ) if err != nil { return errors.Wrap(err, "failed to create syncer") } - currentCtx, cancel := context.WithCancel(ctx) - defer cancel() - compactor, err := compact.NewBucketCompactor( + compactor, err := compact.NewBucketCompactorWithCheckerAndCallback( ulogger, syncer, - c.blocksGrouperFactory(currentCtx, c.compactorCfg, bucket, ulogger, reg, c.blocksMarkedForDeletion, c.blocksMarkedForNoCompaction, c.garbageCollectedBlocks, c.remainingPlannedCompactions, c.blockVisitMarkerReadFailed, c.blockVisitMarkerWriteFailed, c.ring, c.ringLifecycler, c.limits, userID, noCompactMarkerFilter), - c.blocksPlannerFactory(currentCtx, bucket, ulogger, c.compactorCfg, noCompactMarkerFilter, c.ringLifecycler, c.blockVisitMarkerReadFailed, c.blockVisitMarkerWriteFailed), + c.blocksGrouperFactory(ctx, c.compactorCfg, bucket, ulogger, c.BlocksMarkedForNoCompaction, c.partitionVisitMarkerReadFailed, c.partitionVisitMarkerWriteFailed, c.PartitionedGroupInfoReadFailed, c.PartitionedGroupInfoWriteFailed, syncerMetrics, c.compactorMetrics, c.ring, c.ringLifecycler, c.limits, userID, noCompactMarkerFilter), + c.blocksPlannerFactory(ctx, bucket, ulogger, c.compactorCfg, noCompactMarkerFilter, c.ringLifecycler, userID, c.partitionVisitMarkerReadFailed, c.partitionVisitMarkerWriteFailed, c.PartitionedGroupInfoReadFailed, c.compactorMetrics), c.blocksCompactor, + c.blockDeletableCheckerFactory(ctx, bucket, ulogger, c.partitionVisitMarkerReadFailed, c.PartitionedGroupInfoReadFailed), + c.compactionLifecycleCallbackFactory(ctx, bucket, ulogger, c.compactorCfg.MetaSyncConcurrency, c.compactDirForUser(userID), userID, c.PartitionedGroupInfoReadFailed, c.compactorMetrics), c.compactDirForUser(userID), bucket, c.compactorCfg.CompactionConcurrency, @@ -896,6 +1059,7 @@ func (c *Compactor) compactUser(ctx context.Context, userID string) error { } if err := compactor.Compact(ctx); err != nil { + level.Warn(ulogger).Log("msg", "compaction failed with error", "err", err) return errors.Wrap(err, "compaction") } @@ -1062,3 +1226,15 @@ func (c *Compactor) isPermissionDeniedErr(err error) bool { } return s.Code() == codes.PermissionDenied } + +type DisabledDeduplicateFilter struct { +} + +func (f *DisabledDeduplicateFilter) Filter(ctx context.Context, metas map[ulid.ULID]*metadata.Meta, synced block.GaugeVec, modified block.GaugeVec) error { + // don't do any deduplicate filtering + return nil +} + +func (f *DisabledDeduplicateFilter) DuplicateIDs() []ulid.ULID { + return nil +} diff --git a/pkg/compactor/compactor_metrics.go b/pkg/compactor/compactor_metrics.go new file mode 100644 index 0000000000..bc2ecf7f76 --- /dev/null +++ b/pkg/compactor/compactor_metrics.go @@ -0,0 +1,263 @@ +package compactor + +import ( + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" + "github.com/thanos-io/thanos/pkg/block" + "github.com/thanos-io/thanos/pkg/compact" + "github.com/thanos-io/thanos/pkg/extprom" +) + +type compactorMetrics struct { + reg prometheus.Registerer + commonLabels []string + compactionLabels []string + + // block.BaseFetcherMetrics + baseFetcherSyncs *prometheus.CounterVec + + // block.FetcherMetrics + metaFetcherSyncs *prometheus.CounterVec + metaFetcherSyncFailures *prometheus.CounterVec + metaFetcherSyncDuration *prometheus.HistogramVec + metaFetcherSynced *extprom.TxGaugeVec + metaFetcherModified *extprom.TxGaugeVec + + // compact.SyncerMetrics + syncerGarbageCollectedBlocks *prometheus.CounterVec + syncerGarbageCollections *prometheus.CounterVec + syncerGarbageCollectionFailures *prometheus.CounterVec + syncerGarbageCollectionDuration *prometheus.HistogramVec + syncerBlocksMarkedForDeletion *prometheus.CounterVec + + compactions *prometheus.CounterVec + compactionPlanned *prometheus.CounterVec + compactionRunsStarted *prometheus.CounterVec + compactionRunsCompleted *prometheus.CounterVec + compactionFailures *prometheus.CounterVec + verticalCompactions *prometheus.CounterVec + partitionCount *prometheus.GaugeVec + compactionsNotPlanned *prometheus.CounterVec + compactionDuration *prometheus.GaugeVec + partitionGroupDuration *prometheus.GaugeVec + blockGroupDuration *prometheus.GaugeVec + compactionRetryErrors *prometheus.CounterVec + compactionHaltErrors *prometheus.CounterVec + remainingPlannedCompactions *prometheus.GaugeVec +} + +const ( + UserLabelName = "user" + TimeRangeLabelName = "time_range_milliseconds" + ReasonLabelName = "reason" +) + +var ( + CommonLabels = []string{UserLabelName} + CompactionLabels = []string{TimeRangeLabelName} +) + +func newDefaultCompactorMetrics(reg prometheus.Registerer) *compactorMetrics { + return newCompactorMetricsWithLabels(reg, CommonLabels, []string{"resolution"}) +} + +func newCompactorMetrics(reg prometheus.Registerer) *compactorMetrics { + return newCompactorMetricsWithLabels(reg, CommonLabels, append(CommonLabels, CompactionLabels...)) +} + +func newCompactorMetricsWithLabels(reg prometheus.Registerer, commonLabels []string, compactionLabels []string) *compactorMetrics { + var m compactorMetrics + m.reg = reg + m.commonLabels = commonLabels + m.compactionLabels = compactionLabels + + // Copied from Thanos, pkg/block/fetcher.go + m.baseFetcherSyncs = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ + Subsystem: block.FetcherSubSys, + Name: "cortex_compactor_meta_base_syncs_total", + Help: "Total blocks metadata synchronization attempts by base Fetcher.", + }, nil) + + // Copied from Thanos, pkg/block/fetcher.go + m.metaFetcherSyncs = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ + Subsystem: block.FetcherSubSys, + Name: "cortex_compactor_meta_syncs_total", + Help: "Total blocks metadata synchronization attempts.", + }, nil) + m.metaFetcherSyncFailures = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ + Subsystem: block.FetcherSubSys, + Name: "cortex_compactor_meta_sync_failures_total", + Help: "Total blocks metadata synchronization failures.", + }, nil) + m.metaFetcherSyncDuration = promauto.With(reg).NewHistogramVec(prometheus.HistogramOpts{ + Subsystem: block.FetcherSubSys, + Name: "cortex_compactor_meta_sync_duration_seconds", + Help: "Duration of the blocks metadata synchronization in seconds.", + Buckets: []float64{0.01, 1, 10, 100, 300, 600, 1000}, + }, nil) + m.metaFetcherSynced = extprom.NewTxGaugeVec( + reg, + prometheus.GaugeOpts{ + Subsystem: block.FetcherSubSys, + Name: "cortex_compactor_meta_synced", + Help: "Number of block metadata synced", + }, + []string{"state"}, + block.DefaultSyncedStateLabelValues()..., + ) + m.metaFetcherModified = extprom.NewTxGaugeVec( + reg, + prometheus.GaugeOpts{ + Subsystem: block.FetcherSubSys, + Name: "cortex_compactor_meta_modified", + Help: "Number of blocks whose metadata changed", + }, + []string{"modified"}, + block.DefaultModifiedLabelValues()..., + ) + + // Copied from Thanos, pkg/compact/compact.go. + m.syncerGarbageCollectedBlocks = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ + Name: "cortex_compactor_garbage_collected_blocks_total", + Help: "Total number of blocks marked for deletion by compactor.", + }, nil) + m.syncerGarbageCollections = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ + Name: "cortex_compactor_garbage_collection_total", + Help: "Total number of garbage collection operations.", + }, nil) + m.syncerGarbageCollectionFailures = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ + Name: "cortex_compactor_garbage_collection_failures_total", + Help: "Total number of failed garbage collection operations.", + }, nil) + m.syncerGarbageCollectionDuration = promauto.With(reg).NewHistogramVec(prometheus.HistogramOpts{ + Name: "cortex_compactor_garbage_collection_duration_seconds", + Help: "Time it took to perform garbage collection iteration.", + }, nil) + m.syncerBlocksMarkedForDeletion = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ + Name: blocksMarkedForDeletionName, + Help: blocksMarkedForDeletionHelp, + }, append(commonLabels, ReasonLabelName)) + + m.compactions = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ + Name: "cortex_compactor_group_compactions_total", + Help: "Total number of group compaction attempts that resulted in a new block.", + }, compactionLabels) + m.compactionPlanned = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ + Name: "cortex_compact_group_compaction_planned_total", + Help: "Total number of compaction planned.", + }, compactionLabels) + m.compactionRunsStarted = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ + Name: "cortex_compactor_group_compaction_runs_started_total", + Help: "Total number of group compaction attempts.", + }, compactionLabels) + m.compactionRunsCompleted = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ + Name: "cortex_compactor_group_compaction_runs_completed_total", + Help: "Total number of group completed compaction runs. This also includes compactor group runs that resulted with no compaction.", + }, compactionLabels) + m.compactionFailures = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ + Name: "cortex_compactor_group_compactions_failures_total", + Help: "Total number of failed group compactions.", + }, compactionLabels) + m.verticalCompactions = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ + Name: "cortex_compactor_group_vertical_compactions_total", + Help: "Total number of group compaction attempts that resulted in a new block based on overlapping blocks.", + }, compactionLabels) + m.partitionCount = promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{ + Name: "cortex_compact_group_partition_count", + Help: "Number of partitions.", + }, compactionLabels) + m.compactionsNotPlanned = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ + Name: "cortex_compact_group_compactions_not_planned", + Help: "Total number of group compaction not planned due to non-critical error (ie. group is currently visited by other compactor).", + }, compactionLabels) + m.compactionDuration = promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{ + Name: "cortex_compact_group_compaction_duration_seconds", + Help: "Duration of completed compactions in seconds", + }, compactionLabels) + m.partitionGroupDuration = promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{ + Name: "cortex_compact_partition_group_duration_seconds", + Help: "Duration of sharding partition grouper in seconds", + }, commonLabels) + m.blockGroupDuration = promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{ + Name: "cortex_compact_block_group_duration_seconds", + Help: "Duration of sharding grouper in seconds", + }, commonLabels) + m.compactionRetryErrors = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ + Name: "cortex_compactor_compaction_retry_error_total", + Help: "Total number of retry errors from compactions.", + }, CommonLabels) + m.compactionHaltErrors = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ + Name: "cortex_compactor_compaction_halt_error_total", + Help: "Total number of halt errors from compactions.", + }, CommonLabels) + m.remainingPlannedCompactions = promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{ + Name: "cortex_compactor_remaining_planned_compactions", + Help: "Total number of plans that remain to be compacted. Only available with shuffle-sharding strategy", + }, CommonLabels) + + return &m +} + +func (m *compactorMetrics) getBaseFetcherMetrics(userID string) *block.BaseFetcherMetrics { + var baseFetcherMetrics block.BaseFetcherMetrics + baseFetcherMetrics.Syncs = m.baseFetcherSyncs.WithLabelValues() + return &baseFetcherMetrics +} + +func (m *compactorMetrics) getMetaFetcherMetrics(userID string) *block.FetcherMetrics { + var fetcherMetrics block.FetcherMetrics + fetcherMetrics.Syncs = m.metaFetcherSyncs.WithLabelValues() + fetcherMetrics.SyncFailures = m.metaFetcherSyncFailures.WithLabelValues() + fetcherMetrics.SyncDuration = m.metaFetcherSyncDuration.WithLabelValues() + fetcherMetrics.Synced = m.metaFetcherSynced + fetcherMetrics.Modified = m.metaFetcherModified + return &fetcherMetrics +} + +func (m *compactorMetrics) getSyncerMetrics(userID string) *compact.SyncerMetrics { + var syncerMetrics compact.SyncerMetrics + labelValues := m.getCommonLabelValues(userID) + syncerMetrics.GarbageCollectedBlocks = m.syncerGarbageCollectedBlocks.WithLabelValues() + syncerMetrics.GarbageCollections = m.syncerGarbageCollections.WithLabelValues() + syncerMetrics.GarbageCollectionFailures = m.syncerGarbageCollectionFailures.WithLabelValues() + syncerMetrics.GarbageCollectionDuration = m.syncerGarbageCollectionDuration.WithLabelValues() + syncerMetrics.BlocksMarkedForDeletion = m.syncerBlocksMarkedForDeletion.WithLabelValues(append(labelValues, "compaction")...) + return &syncerMetrics +} + +func (m *compactorMetrics) getCommonLabelValues(userID string) []string { + var labelValues []string + if len(m.commonLabels) > 0 { + labelValues = append(labelValues, userID) + } + return labelValues +} + +func (m *compactorMetrics) initMetricWithCompactionLabelValues(labelValue ...string) { + if len(m.compactionLabels) != len(CommonLabels)+len(CompactionLabels) { + return + } + + m.compactions.WithLabelValues(labelValue...) + m.compactionPlanned.WithLabelValues(labelValue...) + m.compactionRunsStarted.WithLabelValues(labelValue...) + m.compactionRunsCompleted.WithLabelValues(labelValue...) + m.compactionFailures.WithLabelValues(labelValue...) + m.verticalCompactions.WithLabelValues(labelValue...) + m.partitionCount.WithLabelValues(labelValue...) + m.compactionsNotPlanned.WithLabelValues(labelValue...) + m.compactionDuration.WithLabelValues(labelValue...) +} + +func (m *compactorMetrics) deleteMetricsForDeletedTenant(userID string) { + m.syncerBlocksMarkedForDeletion.DeleteLabelValues(userID) + m.compactions.DeleteLabelValues(userID) + m.compactionPlanned.DeleteLabelValues(userID) + m.compactionRunsStarted.DeleteLabelValues(userID) + m.compactionRunsCompleted.DeleteLabelValues(userID) + m.compactionFailures.DeleteLabelValues(userID) + m.verticalCompactions.DeleteLabelValues(userID) + m.partitionCount.DeleteLabelValues(userID) + m.compactionsNotPlanned.DeleteLabelValues(userID) + m.compactionDuration.DeleteLabelValues(userID) +} diff --git a/pkg/compactor/compactor_paritioning_test.go b/pkg/compactor/compactor_paritioning_test.go new file mode 100644 index 0000000000..250c793f84 --- /dev/null +++ b/pkg/compactor/compactor_paritioning_test.go @@ -0,0 +1,1781 @@ +package compactor + +import ( + "context" + "crypto/rand" + "encoding/json" + "flag" + "fmt" + "io" + "os" + "path" + "strings" + "testing" + "time" + + "github.com/go-kit/log" + "github.com/oklog/ulid" + "github.com/pkg/errors" + "github.com/prometheus/client_golang/prometheus" + prom_testutil "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/prometheus/prometheus/tsdb" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" + "github.com/thanos-io/objstore" + "github.com/thanos-io/thanos/pkg/block" + "github.com/thanos-io/thanos/pkg/block/metadata" + "github.com/thanos-io/thanos/pkg/compact" + "gopkg.in/yaml.v2" + + "github.com/cortexproject/cortex/pkg/ring" + "github.com/cortexproject/cortex/pkg/ring/kv/consul" + "github.com/cortexproject/cortex/pkg/storage/bucket" + cortex_tsdb "github.com/cortexproject/cortex/pkg/storage/tsdb" + "github.com/cortexproject/cortex/pkg/storage/tsdb/bucketindex" + cortex_storage_testutil "github.com/cortexproject/cortex/pkg/storage/tsdb/testutil" + "github.com/cortexproject/cortex/pkg/util" + "github.com/cortexproject/cortex/pkg/util/concurrency" + "github.com/cortexproject/cortex/pkg/util/flagext" + "github.com/cortexproject/cortex/pkg/util/services" + cortex_testutil "github.com/cortexproject/cortex/pkg/util/test" + "github.com/cortexproject/cortex/pkg/util/validation" +) + +// TODO: test CompactionMode config here +func TestPartitionConfig_ShouldSupportYamlConfig(t *testing.T) { + yamlCfg := ` +block_ranges: [2h, 48h] +consistency_delay: 1h +block_sync_concurrency: 123 +data_dir: /tmp +compaction_interval: 15m +compaction_retries: 123 +` + + cfg := Config{} + flagext.DefaultValues(&cfg) + assert.NoError(t, yaml.Unmarshal([]byte(yamlCfg), &cfg)) + assert.Equal(t, cortex_tsdb.DurationList{2 * time.Hour, 48 * time.Hour}, cfg.BlockRanges) + assert.Equal(t, time.Hour, cfg.ConsistencyDelay) + assert.Equal(t, 123, cfg.BlockSyncConcurrency) + assert.Equal(t, "/tmp", cfg.DataDir) + assert.Equal(t, 15*time.Minute, cfg.CompactionInterval) + assert.Equal(t, 123, cfg.CompactionRetries) +} + +// TODO: test CompactionMode config here +func TestPartitionConfig_ShouldSupportCliFlags(t *testing.T) { + fs := flag.NewFlagSet("", flag.PanicOnError) + cfg := Config{} + cfg.RegisterFlags(fs) + require.NoError(t, fs.Parse([]string{ + "-compactor.block-ranges=2h,48h", + "-compactor.consistency-delay=1h", + "-compactor.block-sync-concurrency=123", + "-compactor.data-dir=/tmp", + "-compactor.compaction-interval=15m", + "-compactor.compaction-retries=123", + })) + + assert.Equal(t, cortex_tsdb.DurationList{2 * time.Hour, 48 * time.Hour}, cfg.BlockRanges) + assert.Equal(t, time.Hour, cfg.ConsistencyDelay) + assert.Equal(t, 123, cfg.BlockSyncConcurrency) + assert.Equal(t, "/tmp", cfg.DataDir) + assert.Equal(t, 15*time.Minute, cfg.CompactionInterval) + assert.Equal(t, 123, cfg.CompactionRetries) +} + +// TODO: test CompactionMode config here +func TestPartitionConfig_Validate(t *testing.T) { + tests := map[string]struct { + setup func(cfg *Config) + initLimits func(*validation.Limits) + expected string + }{ + "should pass with the default config": { + setup: func(cfg *Config) {}, + initLimits: func(_ *validation.Limits) {}, + expected: "", + }, + "should pass with only 1 block range period": { + setup: func(cfg *Config) { + cfg.BlockRanges = cortex_tsdb.DurationList{time.Hour} + }, + initLimits: func(_ *validation.Limits) {}, + expected: "", + }, + "should fail with non divisible block range periods": { + setup: func(cfg *Config) { + cfg.BlockRanges = cortex_tsdb.DurationList{2 * time.Hour, 12 * time.Hour, 24 * time.Hour, 30 * time.Hour} + }, + + initLimits: func(_ *validation.Limits) {}, + expected: errors.Errorf(errInvalidBlockRanges, 30*time.Hour, 24*time.Hour).Error(), + }, + "should fail with duration values of zero": { + setup: func(cfg *Config) { + cfg.BlockRanges = cortex_tsdb.DurationList{2 * time.Hour, 0, 24 * time.Hour, 30 * time.Hour} + }, + initLimits: func(_ *validation.Limits) {}, + expected: errors.Errorf("compactor block range period cannot be zero").Error(), + }, + "should pass with valid shuffle sharding config": { + setup: func(cfg *Config) { + cfg.ShardingStrategy = util.ShardingStrategyShuffle + cfg.ShardingEnabled = true + }, + initLimits: func(limits *validation.Limits) { + limits.CompactorTenantShardSize = 1 + }, + expected: "", + }, + "should fail with bad compactor tenant shard size": { + setup: func(cfg *Config) { + cfg.ShardingStrategy = util.ShardingStrategyShuffle + cfg.ShardingEnabled = true + }, + initLimits: func(_ *validation.Limits) {}, + expected: errInvalidTenantShardSize.Error(), + }, + } + + for testName, testData := range tests { + t.Run(testName, func(t *testing.T) { + cfg := &Config{} + limits := validation.Limits{} + flagext.DefaultValues(cfg, &limits) + testData.setup(cfg) + testData.initLimits(&limits) + + if actualErr := cfg.Validate(limits); testData.expected != "" { + assert.EqualError(t, actualErr, testData.expected) + } else { + assert.NoError(t, actualErr) + } + }) + } +} + +func TestPartitionCompactor_SkipCompactionWhenCmkError(t *testing.T) { + t.Parallel() + userID := "user-1" + + ss := bucketindex.Status{Status: bucketindex.CustomerManagedKeyError, Version: bucketindex.SyncStatusFileVersion} + content, err := json.Marshal(ss) + require.NoError(t, err) + + // No user blocks stored in the bucket. + bucketClient := &bucket.ClientMock{} + bucketClient.MockIter("", []string{userID}, nil) + bucketClient.MockIter("__markers__", []string{}, nil) + bucketClient.MockIter(userID+"/", []string{}, nil) + bucketClient.MockIter(userID+"/markers/", nil, nil) + bucketClient.MockGet(userID+"/markers/cleaner-visit-marker.json", "", nil) + bucketClient.MockUpload(userID+"/markers/cleaner-visit-marker.json", nil) + bucketClient.MockDelete(userID+"/markers/cleaner-visit-marker.json", nil) + bucketClient.MockGet(userID+"/bucket-index-sync-status.json", string(content), nil) + bucketClient.MockGet(userID+"/bucket-index.json.gz", "", nil) + bucketClient.MockUpload(userID+"/bucket-index-sync-status.json", nil) + bucketClient.MockUpload(userID+"/bucket-index.json.gz", nil) + bucketClient.MockExists(cortex_tsdb.GetGlobalDeletionMarkPath(userID), false, nil) + bucketClient.MockExists(cortex_tsdb.GetLocalDeletionMarkPath(userID), false, nil) + bucketClient.MockIter(userID+"/"+PartitionedGroupDirectory, nil, nil) + + cfg := prepareConfigForPartitioning() + c, _, _, logs, _ := prepareForPartitioning(t, cfg, bucketClient, nil, nil) + require.NoError(t, services.StartAndAwaitRunning(context.Background(), c)) + + // Wait until a run has completed. + cortex_testutil.Poll(t, 20*time.Second, 1.0, func() interface{} { + return prom_testutil.ToFloat64(c.CompactionRunsCompleted) + }) + + require.NoError(t, services.StopAndAwaitTerminated(context.Background(), c)) + assert.Contains(t, strings.Split(strings.TrimSpace(logs.String()), "\n"), `level=info component=compactor msg="skipping compactUser due CustomerManagedKeyError" user=user-1`) +} + +func TestPartitionCompactor_ShouldDoNothingOnNoUserBlocks(t *testing.T) { + t.Parallel() + + // No user blocks stored in the bucket. + bucketClient := &bucket.ClientMock{} + bucketClient.MockIter("", []string{}, nil) + bucketClient.MockIter("__markers__", []string{}, nil) + cfg := prepareConfigForPartitioning() + c, _, _, logs, registry := prepareForPartitioning(t, cfg, bucketClient, nil, nil) + require.NoError(t, services.StartAndAwaitRunning(context.Background(), c)) + + // Wait until a run has completed. + cortex_testutil.Poll(t, 20*time.Second, 1.0, func() interface{} { + return prom_testutil.ToFloat64(c.CompactionRunsCompleted) + }) + + require.NoError(t, services.StopAndAwaitTerminated(context.Background(), c)) + + assert.Equal(t, prom_testutil.ToFloat64(c.CompactionRunInterval), cfg.CompactionInterval.Seconds()) + + assert.ElementsMatch(t, []string{ + `level=info component=compactor msg="compactor started"`, + `level=info component=compactor msg="discovering users from bucket"`, + `level=info component=compactor msg="discovered users from bucket" users=0`, + }, removeIgnoredLogs(strings.Split(strings.TrimSpace(logs.String()), "\n"))) + + assert.NoError(t, prom_testutil.GatherAndCompare(registry, strings.NewReader(` + # TYPE cortex_compactor_runs_started_total counter + # HELP cortex_compactor_runs_started_total Total number of compaction runs started. + cortex_compactor_runs_started_total 1 + + # TYPE cortex_compactor_runs_completed_total counter + # HELP cortex_compactor_runs_completed_total Total number of compaction runs successfully completed. + cortex_compactor_runs_completed_total 1 + + # TYPE cortex_compactor_runs_failed_total counter + # HELP cortex_compactor_runs_failed_total Total number of compaction runs failed. + cortex_compactor_runs_failed_total 0 + + # TYPE cortex_compactor_block_cleanup_failures_total counter + # HELP cortex_compactor_block_cleanup_failures_total Total number of blocks failed to be deleted. + cortex_compactor_block_cleanup_failures_total 0 + + # HELP cortex_compactor_blocks_cleaned_total Total number of blocks deleted. + # TYPE cortex_compactor_blocks_cleaned_total counter + cortex_compactor_blocks_cleaned_total 0 + + # HELP cortex_compactor_blocks_marked_for_no_compaction_total Total number of blocks marked for no compact during a compaction run. + # TYPE cortex_compactor_blocks_marked_for_no_compaction_total counter + cortex_compactor_blocks_marked_for_no_compaction_total 0 + + # TYPE cortex_compactor_block_cleanup_started_total counter + # HELP cortex_compactor_block_cleanup_started_total Total number of blocks cleanup runs started. + cortex_compactor_block_cleanup_started_total{tenant_status="active"} 1 + cortex_compactor_block_cleanup_started_total{tenant_status="deleted"} 1 + + # TYPE cortex_compactor_block_cleanup_completed_total counter + # HELP cortex_compactor_block_cleanup_completed_total Total number of blocks cleanup runs successfully completed. + cortex_compactor_block_cleanup_completed_total{tenant_status="active"} 1 + cortex_compactor_block_cleanup_completed_total{tenant_status="deleted"} 1 + `), + "cortex_compactor_runs_started_total", + "cortex_compactor_runs_completed_total", + "cortex_compactor_runs_failed_total", + "cortex_compactor_garbage_collected_blocks_total", + "cortex_compactor_block_cleanup_failures_total", + "cortex_compactor_blocks_cleaned_total", + "cortex_compactor_blocks_marked_for_deletion_total", + "cortex_compactor_blocks_marked_for_no_compaction_total", + "cortex_compactor_block_cleanup_started_total", + "cortex_compactor_block_cleanup_completed_total", + "cortex_compactor_block_cleanup_failed_total", + )) +} + +func TestPartitionCompactor_ShouldRetryCompactionOnFailureWhileDiscoveringUsersFromBucket(t *testing.T) { + t.Parallel() + + // Fail to iterate over the bucket while discovering users. + bucketClient := &bucket.ClientMock{} + bucketClient.MockIter("__markers__", nil, errors.New("failed to iterate the bucket")) + bucketClient.MockIter("", nil, errors.New("failed to iterate the bucket")) + + c, _, _, logs, registry := prepareForPartitioning(t, prepareConfigForPartitioning(), bucketClient, nil, nil) + require.NoError(t, services.StartAndAwaitRunning(context.Background(), c)) + + // Wait until all retry attempts have completed. + cortex_testutil.Poll(t, 20*time.Second, 1.0, func() interface{} { + return prom_testutil.ToFloat64(c.CompactionRunsFailed) + }) + + require.NoError(t, services.StopAndAwaitTerminated(context.Background(), c)) + + // Ensure the bucket iteration has been retried the configured number of times. + bucketClient.AssertNumberOfCalls(t, "Iter", 1+3) + + assert.ElementsMatch(t, []string{ + `level=error component=cleaner msg="failed to scan users on startup" err="failed to discover users from bucket: failed to iterate the bucket"`, + `level=info component=compactor msg="compactor started"`, + `level=info component=compactor msg="discovering users from bucket"`, + `level=error component=compactor msg="failed to discover users from bucket" err="failed to iterate the bucket"`, + }, removeIgnoredLogs(strings.Split(strings.TrimSpace(logs.String()), "\n"))) + + assert.NoError(t, prom_testutil.GatherAndCompare(registry, strings.NewReader(` + # TYPE cortex_compactor_runs_started_total counter + # HELP cortex_compactor_runs_started_total Total number of compaction runs started. + cortex_compactor_runs_started_total 1 + + # TYPE cortex_compactor_runs_completed_total counter + # HELP cortex_compactor_runs_completed_total Total number of compaction runs successfully completed. + cortex_compactor_runs_completed_total 0 + + # TYPE cortex_compactor_runs_failed_total counter + # HELP cortex_compactor_runs_failed_total Total number of compaction runs failed. + cortex_compactor_runs_failed_total 1 + + # TYPE cortex_compactor_block_cleanup_failures_total counter + # HELP cortex_compactor_block_cleanup_failures_total Total number of blocks failed to be deleted. + cortex_compactor_block_cleanup_failures_total 0 + + # TYPE cortex_compactor_block_cleanup_failed_total counter + # HELP cortex_compactor_block_cleanup_failed_total Total number of blocks cleanup runs failed. + cortex_compactor_block_cleanup_failed_total{tenant_status="active"} 1 + cortex_compactor_block_cleanup_failed_total{tenant_status="deleted"} 1 + + # HELP cortex_compactor_blocks_cleaned_total Total number of blocks deleted. + # TYPE cortex_compactor_blocks_cleaned_total counter + cortex_compactor_blocks_cleaned_total 0 + + # HELP cortex_compactor_blocks_marked_for_no_compaction_total Total number of blocks marked for no compact during a compaction run. + # TYPE cortex_compactor_blocks_marked_for_no_compaction_total counter + cortex_compactor_blocks_marked_for_no_compaction_total 0 + `), + "cortex_compactor_runs_started_total", + "cortex_compactor_runs_completed_total", + "cortex_compactor_runs_failed_total", + "cortex_compactor_garbage_collected_blocks_total", + "cortex_compactor_block_cleanup_failures_total", + "cortex_compactor_blocks_cleaned_total", + "cortex_compactor_blocks_marked_for_deletion_total", + "cortex_compactor_blocks_marked_for_no_compaction_total", + "cortex_compactor_block_cleanup_started_total", + "cortex_compactor_block_cleanup_completed_total", + "cortex_compactor_block_cleanup_failed_total", + )) +} + +func TestPartitionCompactor_ShouldIncrementCompactionErrorIfFailedToCompactASingleTenant(t *testing.T) { + t.Parallel() + + userID := "test-user" + partitionedGroupID := getPartitionedGroupID(userID) + bucketClient := &bucket.ClientMock{} + bucketClient.MockIter("", []string{userID}, nil) + bucketClient.MockIter("__markers__", []string{}, nil) + bucketClient.MockIter(userID+"/", []string{userID + "/01DTVP434PA9VFXSW2JKB3392D/meta.json", userID + "/01FN6CDF3PNEWWRY5MPGJPE3EX/meta.json"}, nil) + bucketClient.MockIter(userID+"/markers/", nil, nil) + bucketClient.MockGet(userID+"/markers/cleaner-visit-marker.json", "", nil) + bucketClient.MockUpload(userID+"/markers/cleaner-visit-marker.json", nil) + bucketClient.MockDelete(userID+"/markers/cleaner-visit-marker.json", nil) + bucketClient.MockExists(cortex_tsdb.GetGlobalDeletionMarkPath(userID), false, nil) + bucketClient.MockExists(cortex_tsdb.GetLocalDeletionMarkPath(userID), false, nil) + bucketClient.MockGet(userID+"/01DTVP434PA9VFXSW2JKB3392D/meta.json", mockBlockMetaJSON("01DTVP434PA9VFXSW2JKB3392D"), nil) + bucketClient.MockGet(userID+"/01DTVP434PA9VFXSW2JKB3392D/no-compact-mark.json", "", nil) + bucketClient.MockGet(userID+"/01DTVP434PA9VFXSW2JKB3392D/deletion-mark.json", "", nil) + bucketClient.MockGet(userID+"/01DTVP434PA9VFXSW2JKB3392D/partition-0-visit-mark.json", "", nil) + bucketClient.MockUpload(userID+"/01DTVP434PA9VFXSW2JKB3392D/partition-0-visit-mark.json", nil) + bucketClient.MockGet(userID+"/bucket-index-sync-status.json", "", nil) + bucketClient.MockGet(userID+"/01FN6CDF3PNEWWRY5MPGJPE3EX/meta.json", mockBlockMetaJSON("01FN6CDF3PNEWWRY5MPGJPE3EX"), nil) + bucketClient.MockGet(userID+"/01FN6CDF3PNEWWRY5MPGJPE3EX/no-compact-mark.json", "", nil) + bucketClient.MockGet(userID+"/01FN6CDF3PNEWWRY5MPGJPE3EX/deletion-mark.json", "", nil) + bucketClient.MockGet(userID+"/01FN6CDF3PNEWWRY5MPGJPE3EX/partition-0-visit-mark.json", "", nil) + bucketClient.MockUpload(userID+"/01FN6CDF3PNEWWRY5MPGJPE3EX/partition-0-visit-mark.json", nil) + bucketClient.MockGet(userID+"/bucket-index.json.gz", "", nil) + bucketClient.MockUpload(userID+"/bucket-index.json.gz", nil) + bucketClient.MockUpload(userID+"/bucket-index-sync-status.json", nil) + bucketClient.MockGet(userID+"/partitioned-groups/"+partitionedGroupID+".json", "", nil) + bucketClient.MockUpload(userID+"/partitioned-groups/"+partitionedGroupID+".json", nil) + bucketClient.MockIter(userID+"/"+PartitionedGroupDirectory, nil, nil) + + c, _, tsdbPlannerMock, _, registry := prepareForPartitioning(t, prepareConfigForPartitioning(), bucketClient, nil, nil) + tsdbPlannerMock.On("Plan", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return([]*metadata.Meta{}, errors.New("Failed to plan")) + require.NoError(t, services.StartAndAwaitRunning(context.Background(), c)) + + // Wait until all retry attempts have completed. + cortex_testutil.Poll(t, 20*time.Second, 1.0, func() interface{} { + return prom_testutil.ToFloat64(c.CompactionRunsFailed) + }) + + require.NoError(t, services.StopAndAwaitTerminated(context.Background(), c)) + + assert.NoError(t, prom_testutil.GatherAndCompare(registry, strings.NewReader(` + # TYPE cortex_compactor_runs_started_total counter + # HELP cortex_compactor_runs_started_total Total number of compaction runs started. + cortex_compactor_runs_started_total 1 + + # TYPE cortex_compactor_runs_completed_total counter + # HELP cortex_compactor_runs_completed_total Total number of compaction runs successfully completed. + cortex_compactor_runs_completed_total 0 + + # TYPE cortex_compactor_runs_failed_total counter + # HELP cortex_compactor_runs_failed_total Total number of compaction runs failed. + cortex_compactor_runs_failed_total 1 + `), + "cortex_compactor_runs_started_total", + "cortex_compactor_runs_completed_total", + "cortex_compactor_runs_failed_total", + )) +} + +func TestPartitionCompactor_ShouldCompactAndRemoveUserFolder(t *testing.T) { + partitionedGroupID1 := getPartitionedGroupID("user-1") + bucketClient := &bucket.ClientMock{} + bucketClient.MockIter("", []string{"user-1"}, nil) + bucketClient.MockIter("__markers__", []string{}, nil) + bucketClient.MockExists(cortex_tsdb.GetGlobalDeletionMarkPath("user-1"), false, nil) + bucketClient.MockExists(cortex_tsdb.GetLocalDeletionMarkPath("user-1"), false, nil) + bucketClient.MockIter("user-1/", []string{"user-1/01DTVP434PA9VFXSW2JKB3392D/meta.json", "user-1/01FN6CDF3PNEWWRY5MPGJPE3EX/meta.json"}, nil) + bucketClient.MockIter("user-1/markers/", nil, nil) + bucketClient.MockGet("user-1/markers/cleaner-visit-marker.json", "", nil) + bucketClient.MockUpload("user-1/markers/cleaner-visit-marker.json", nil) + bucketClient.MockDelete("user-1/markers/cleaner-visit-marker.json", nil) + bucketClient.MockGet("user-1/01DTVP434PA9VFXSW2JKB3392D/meta.json", mockBlockMetaJSON("01DTVP434PA9VFXSW2JKB3392D"), nil) + bucketClient.MockGet("user-1/01DTVP434PA9VFXSW2JKB3392D/deletion-mark.json", "", nil) + bucketClient.MockGet("user-1/01DTVP434PA9VFXSW2JKB3392D/no-compact-mark.json", "", nil) + bucketClient.MockGet("user-1/01DTVP434PA9VFXSW2JKB3392D/partition-0-visit-mark.json", "", nil) + bucketClient.MockGet("user-1/bucket-index-sync-status.json", "", nil) + bucketClient.MockGet("user-1/01FN6CDF3PNEWWRY5MPGJPE3EX/meta.json", mockBlockMetaJSON("01FN6CDF3PNEWWRY5MPGJPE3EX"), nil) + bucketClient.MockGet("user-1/01FN6CDF3PNEWWRY5MPGJPE3EX/deletion-mark.json", "", nil) + bucketClient.MockGet("user-1/01FN6CDF3PNEWWRY5MPGJPE3EX/no-compact-mark.json", "", nil) + bucketClient.MockGet("user-1/01FN6CDF3PNEWWRY5MPGJPE3EX/partition-0-visit-mark.json", "", nil) + bucketClient.MockGet("user-1/bucket-index.json.gz", "", nil) + bucketClient.MockGet("user-1/bucket-index-sync-status.json", "", nil) + bucketClient.MockIter("user-1/markers/", nil, nil) + bucketClient.MockUpload("user-1/bucket-index.json.gz", nil) + bucketClient.MockUpload("user-1/bucket-index-sync-status.json", nil) + bucketClient.MockGet("user-1/partitioned-groups/"+partitionedGroupID1+".json", "", nil) + bucketClient.MockUpload("user-1/partitioned-groups/"+partitionedGroupID1+".json", nil) + bucketClient.MockIter("user-1/"+PartitionedGroupDirectory, nil, nil) + + c, _, tsdbPlanner, _, _ := prepareForPartitioning(t, prepareConfigForPartitioning(), bucketClient, nil, nil) + + // Make sure the user folder is created and is being used + // This will be called during compaction + tsdbPlanner.On("Plan", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Run(func(args mock.Arguments) { + _, err := os.Stat(c.compactDirForUser("user-1")) + require.NoError(t, err) + }).Return([]*metadata.Meta{}, nil) + + require.NoError(t, services.StartAndAwaitRunning(context.Background(), c)) + + // Wait until a run has completed. + cortex_testutil.Poll(t, 20*time.Second, 1.0, func() interface{} { + return prom_testutil.ToFloat64(c.CompactionRunsCompleted) + }) + + _, err := os.Stat(c.compactDirForUser("user-1")) + require.True(t, os.IsNotExist(err)) +} + +func TestPartitionCompactor_ShouldIterateOverUsersAndRunCompaction(t *testing.T) { + t.Parallel() + + partitionedGroupID1 := getPartitionedGroupID("user-1") + partitionedGroupID2 := getPartitionedGroupID("user-2") + + // Mock the bucket to contain two users, each one with one block. + bucketClient := &bucket.ClientMock{} + bucketClient.MockIter("", []string{"user-1", "user-2"}, nil) + bucketClient.MockIter("__markers__", []string{}, nil) + bucketClient.MockExists(cortex_tsdb.GetGlobalDeletionMarkPath("user-1"), false, nil) + bucketClient.MockExists(cortex_tsdb.GetLocalDeletionMarkPath("user-1"), false, nil) + bucketClient.MockExists(cortex_tsdb.GetGlobalDeletionMarkPath("user-2"), false, nil) + bucketClient.MockExists(cortex_tsdb.GetLocalDeletionMarkPath("user-2"), false, nil) + bucketClient.MockIter("user-1/", []string{"user-1/01DTVP434PA9VFXSW2JKB3392D/meta.json", "user-1/01FN6CDF3PNEWWRY5MPGJPE3EX/meta.json"}, nil) + bucketClient.MockIter("user-2/", []string{"user-2/01DTW0ZCPDDNV4BV83Q2SV4QAZ/meta.json", "user-2/01FN3V83ABR9992RF8WRJZ76ZQ/meta.json"}, nil) + bucketClient.MockIter("user-1/markers/", nil, nil) + bucketClient.MockGet("user-1/markers/cleaner-visit-marker.json", "", nil) + bucketClient.MockUpload("user-1/markers/cleaner-visit-marker.json", nil) + bucketClient.MockDelete("user-1/markers/cleaner-visit-marker.json", nil) + bucketClient.MockIter("user-2/markers/", nil, nil) + bucketClient.MockGet("user-2/markers/cleaner-visit-marker.json", "", nil) + bucketClient.MockUpload("user-2/markers/cleaner-visit-marker.json", nil) + bucketClient.MockDelete("user-2/markers/cleaner-visit-marker.json", nil) + bucketClient.MockGet("user-1/01DTVP434PA9VFXSW2JKB3392D/meta.json", mockBlockMetaJSON("01DTVP434PA9VFXSW2JKB3392D"), nil) + bucketClient.MockGet("user-1/01DTVP434PA9VFXSW2JKB3392D/deletion-mark.json", "", nil) + bucketClient.MockGet("user-1/01DTVP434PA9VFXSW2JKB3392D/no-compact-mark.json", "", nil) + bucketClient.MockGet("user-1/01DTVP434PA9VFXSW2JKB3392D/partition-0-visit-mark.json", "", nil) + bucketClient.MockGet("user-1/bucket-index-sync-status.json", "", nil) + bucketClient.MockGet("user-1/01FN6CDF3PNEWWRY5MPGJPE3EX/meta.json", mockBlockMetaJSON("01FN6CDF3PNEWWRY5MPGJPE3EX"), nil) + bucketClient.MockGet("user-1/01FN6CDF3PNEWWRY5MPGJPE3EX/deletion-mark.json", "", nil) + bucketClient.MockGet("user-1/01FN6CDF3PNEWWRY5MPGJPE3EX/no-compact-mark.json", "", nil) + bucketClient.MockGet("user-1/01FN6CDF3PNEWWRY5MPGJPE3EX/partition-0-visit-mark.json", "", nil) + bucketClient.MockGet("user-2/01DTW0ZCPDDNV4BV83Q2SV4QAZ/meta.json", mockBlockMetaJSON("01DTW0ZCPDDNV4BV83Q2SV4QAZ"), nil) + bucketClient.MockGet("user-2/01DTW0ZCPDDNV4BV83Q2SV4QAZ/deletion-mark.json", "", nil) + bucketClient.MockGet("user-2/01DTW0ZCPDDNV4BV83Q2SV4QAZ/no-compact-mark.json", "", nil) + bucketClient.MockGet("user-2/01DTW0ZCPDDNV4BV83Q2SV4QAZ/partition-0-visit-mark.json", "", nil) + bucketClient.MockGet("user-2/01FN3V83ABR9992RF8WRJZ76ZQ/meta.json", mockBlockMetaJSON("01FN3V83ABR9992RF8WRJZ76ZQ"), nil) + bucketClient.MockGet("user-2/01FN3V83ABR9992RF8WRJZ76ZQ/deletion-mark.json", "", nil) + bucketClient.MockGet("user-2/01FN3V83ABR9992RF8WRJZ76ZQ/no-compact-mark.json", "", nil) + bucketClient.MockGet("user-2/01FN3V83ABR9992RF8WRJZ76ZQ/partition-0-visit-mark.json", "", nil) + bucketClient.MockGet("user-1/bucket-index.json.gz", "", nil) + bucketClient.MockGet("user-2/bucket-index.json.gz", "", nil) + bucketClient.MockGet("user-1/bucket-index-sync-status.json", "", nil) + bucketClient.MockGet("user-2/bucket-index-sync-status.json", "", nil) + bucketClient.MockIter("user-1/markers/", nil, nil) + bucketClient.MockIter("user-2/markers/", nil, nil) + bucketClient.MockUpload("user-1/bucket-index.json.gz", nil) + bucketClient.MockUpload("user-2/bucket-index.json.gz", nil) + bucketClient.MockUpload("user-1/bucket-index-sync-status.json", nil) + bucketClient.MockUpload("user-2/bucket-index-sync-status.json", nil) + bucketClient.MockGet("user-1/partitioned-groups/"+partitionedGroupID1+".json", "", nil) + bucketClient.MockUpload("user-1/partitioned-groups/"+partitionedGroupID1+".json", nil) + bucketClient.MockGet("user-2/partitioned-groups/"+partitionedGroupID2+".json", "", nil) + bucketClient.MockUpload("user-2/partitioned-groups/"+partitionedGroupID2+".json", nil) + bucketClient.MockIter("user-1/"+PartitionedGroupDirectory, nil, nil) + bucketClient.MockIter("user-2/"+PartitionedGroupDirectory, nil, nil) + + c, _, tsdbPlanner, logs, registry := prepareForPartitioning(t, prepareConfigForPartitioning(), bucketClient, nil, nil) + + // Mock the planner as if there's no compaction to do, + // in order to simplify tests (all in all, we just want to + // test our logic and not TSDB compactor which we expect to + // be already tested). + tsdbPlanner.On("Plan", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return([]*metadata.Meta{}, nil) + + require.NoError(t, services.StartAndAwaitRunning(context.Background(), c)) + + // Wait until a run has completed. + cortex_testutil.Poll(t, 20*time.Second, 1.0, func() interface{} { + return prom_testutil.ToFloat64(c.CompactionRunsCompleted) + }) + + require.NoError(t, services.StopAndAwaitTerminated(context.Background(), c)) + + // Ensure a plan has been executed for the blocks of each user. + tsdbPlanner.AssertNumberOfCalls(t, "Plan", 2) + + assert.Len(t, tsdbPlanner.getNoCompactBlocks(), 0) + + assert.ElementsMatch(t, []string{ + `level=info component=compactor msg="compactor started"`, + `level=info component=compactor msg="discovering users from bucket"`, + `level=info component=compactor msg="discovered users from bucket" users=2`, + `level=info component=compactor msg="starting compaction of user blocks" user=user-2`, + `level=info component=compactor org_id=user-2 msg="start sync of metas"`, + `level=info component=compactor org_id=user-2 msg="start of GC"`, + `level=info component=compactor org_id=user-2 msg="start of compactions"`, + `level=info component=compactor org_id=user-2 msg="compaction iterations done"`, + `level=info component=compactor msg="successfully compacted user blocks" user=user-2`, + `level=info component=compactor msg="starting compaction of user blocks" user=user-1`, + `level=info component=compactor org_id=user-1 msg="start sync of metas"`, + `level=info component=compactor org_id=user-1 msg="start of GC"`, + `level=info component=compactor org_id=user-1 msg="start of compactions"`, + `level=info component=compactor org_id=user-1 msg="compaction iterations done"`, + `level=info component=compactor msg="successfully compacted user blocks" user=user-1`, + }, removeIgnoredLogs(strings.Split(strings.TrimSpace(logs.String()), "\n"))) + + // Instead of testing for shipper metrics, we only check our metrics here. + // Real shipper metrics are too variable to embed into a test. + testedMetrics := []string{ + "cortex_compactor_runs_started_total", "cortex_compactor_runs_completed_total", "cortex_compactor_runs_failed_total", + "cortex_compactor_blocks_cleaned_total", "cortex_compactor_block_cleanup_failures_total", "cortex_compactor_blocks_marked_for_deletion_total", + "cortex_compactor_block_cleanup_started_total", "cortex_compactor_block_cleanup_completed_total", "cortex_compactor_block_cleanup_failed_total", + "cortex_compactor_blocks_marked_for_no_compaction_total", + } + assert.NoError(t, prom_testutil.GatherAndCompare(registry, strings.NewReader(` + # TYPE cortex_compactor_runs_started_total counter + # HELP cortex_compactor_runs_started_total Total number of compaction runs started. + cortex_compactor_runs_started_total 1 + + # TYPE cortex_compactor_runs_completed_total counter + # HELP cortex_compactor_runs_completed_total Total number of compaction runs successfully completed. + cortex_compactor_runs_completed_total 1 + + # TYPE cortex_compactor_runs_failed_total counter + # HELP cortex_compactor_runs_failed_total Total number of compaction runs failed. + cortex_compactor_runs_failed_total 0 + + # TYPE cortex_compactor_block_cleanup_failures_total counter + # HELP cortex_compactor_block_cleanup_failures_total Total number of blocks failed to be deleted. + cortex_compactor_block_cleanup_failures_total 0 + + # HELP cortex_compactor_blocks_cleaned_total Total number of blocks deleted. + # TYPE cortex_compactor_blocks_cleaned_total counter + cortex_compactor_blocks_cleaned_total 0 + + # HELP cortex_compactor_blocks_marked_for_deletion_total Total number of blocks marked for deletion in compactor. + # TYPE cortex_compactor_blocks_marked_for_deletion_total counter + cortex_compactor_blocks_marked_for_deletion_total{reason="compaction",user="user-1"} 0 + cortex_compactor_blocks_marked_for_deletion_total{reason="compaction",user="user-2"} 0 + cortex_compactor_blocks_marked_for_deletion_total{reason="retention",user="user-1"} 0 + cortex_compactor_blocks_marked_for_deletion_total{reason="retention",user="user-2"} 0 + + # TYPE cortex_compactor_block_cleanup_started_total counter + # HELP cortex_compactor_block_cleanup_started_total Total number of blocks cleanup runs started. + cortex_compactor_block_cleanup_started_total{tenant_status="active"} 1 + cortex_compactor_block_cleanup_started_total{tenant_status="deleted"} 1 + + # TYPE cortex_compactor_block_cleanup_completed_total counter + # HELP cortex_compactor_block_cleanup_completed_total Total number of blocks cleanup runs successfully completed. + cortex_compactor_block_cleanup_completed_total{tenant_status="active"} 1 + cortex_compactor_block_cleanup_completed_total{tenant_status="deleted"} 1 + + # HELP cortex_compactor_blocks_marked_for_no_compaction_total Total number of blocks marked for no compact during a compaction run. + # TYPE cortex_compactor_blocks_marked_for_no_compaction_total counter + cortex_compactor_blocks_marked_for_no_compaction_total 0 + `), testedMetrics...)) +} + +func TestPartitionCompactor_ShouldNotCompactBlocksMarkedForDeletion(t *testing.T) { + t.Parallel() + + cfg := prepareConfigForPartitioning() + cfg.DeletionDelay = 10 * time.Minute // Delete block after 10 minutes + + // Mock the bucket to contain two users, each one with one block. + bucketClient := &bucket.ClientMock{} + bucketClient.MockIter("", []string{"user-1"}, nil) + bucketClient.MockIter("__markers__", []string{}, nil) + bucketClient.MockIter("user-1/", []string{"user-1/01DTVP434PA9VFXSW2JKB3392D", "user-1/01DTW0ZCPDDNV4BV83Q2SV4QAZ"}, nil) + bucketClient.MockExists(cortex_tsdb.GetGlobalDeletionMarkPath("user-1"), false, nil) + bucketClient.MockExists(cortex_tsdb.GetLocalDeletionMarkPath("user-1"), false, nil) + + // Block that has just been marked for deletion. It will not be deleted just yet, and it also will not be compacted. + bucketClient.MockGet("user-1/01DTVP434PA9VFXSW2JKB3392D/meta.json", mockBlockMetaJSON("01DTVP434PA9VFXSW2JKB3392D"), nil) + bucketClient.MockGet("user-1/01DTVP434PA9VFXSW2JKB3392D/deletion-mark.json", mockDeletionMarkJSON("01DTVP434PA9VFXSW2JKB3392D", time.Now()), nil) + bucketClient.MockGet("user-1/markers/01DTVP434PA9VFXSW2JKB3392D-deletion-mark.json", mockDeletionMarkJSON("01DTVP434PA9VFXSW2JKB3392D", time.Now()), nil) + bucketClient.MockGet("user-1/bucket-index-sync-status.json", "", nil) + + // This block will be deleted by cleaner. + bucketClient.MockGet("user-1/01DTW0ZCPDDNV4BV83Q2SV4QAZ/meta.json", mockBlockMetaJSON("01DTW0ZCPDDNV4BV83Q2SV4QAZ"), nil) + bucketClient.MockGet("user-1/01DTW0ZCPDDNV4BV83Q2SV4QAZ/deletion-mark.json", mockDeletionMarkJSON("01DTW0ZCPDDNV4BV83Q2SV4QAZ", time.Now().Add(-cfg.DeletionDelay)), nil) + bucketClient.MockGet("user-1/markers/01DTW0ZCPDDNV4BV83Q2SV4QAZ-deletion-mark.json", mockDeletionMarkJSON("01DTW0ZCPDDNV4BV83Q2SV4QAZ", time.Now().Add(-cfg.DeletionDelay)), nil) + + bucketClient.MockIter("user-1/01DTW0ZCPDDNV4BV83Q2SV4QAZ", []string{ + "user-1/01DTW0ZCPDDNV4BV83Q2SV4QAZ/meta.json", + "user-1/01DTW0ZCPDDNV4BV83Q2SV4QAZ/deletion-mark.json", + }, nil) + + bucketClient.MockIter("user-1/markers/", []string{ + "user-1/markers/01DTVP434PA9VFXSW2JKB3392D-deletion-mark.json", + "user-1/markers/01DTW0ZCPDDNV4BV83Q2SV4QAZ-deletion-mark.json", + }, nil) + + bucketClient.MockGet("user-1/markers/cleaner-visit-marker.json", "", nil) + bucketClient.MockUpload("user-1/markers/cleaner-visit-marker.json", nil) + bucketClient.MockDelete("user-1/markers/cleaner-visit-marker.json", nil) + + bucketClient.MockDelete("user-1/01DTW0ZCPDDNV4BV83Q2SV4QAZ/meta.json", nil) + bucketClient.MockDelete("user-1/01DTW0ZCPDDNV4BV83Q2SV4QAZ/deletion-mark.json", nil) + bucketClient.MockDelete("user-1/markers/01DTW0ZCPDDNV4BV83Q2SV4QAZ-deletion-mark.json", nil) + bucketClient.MockDelete("user-1/01DTW0ZCPDDNV4BV83Q2SV4QAZ", nil) + bucketClient.MockGet("user-1/bucket-index.json.gz", "", nil) + bucketClient.MockGet("user-1/bucket-index-sync-status.json", "", nil) + bucketClient.MockUpload("user-1/bucket-index.json.gz", nil) + bucketClient.MockUpload("user-1/bucket-index-sync-status.json", nil) + bucketClient.MockIter("user-1/"+PartitionedGroupDirectory, nil, nil) + + c, _, tsdbPlanner, logs, registry := prepareForPartitioning(t, cfg, bucketClient, nil, nil) + + require.NoError(t, services.StartAndAwaitRunning(context.Background(), c)) + + // Wait until a run has completed. + cortex_testutil.Poll(t, 20*time.Second, 1.0, func() interface{} { + return prom_testutil.ToFloat64(c.CompactionRunsCompleted) + }) + + require.NoError(t, services.StopAndAwaitTerminated(context.Background(), c)) + + // Since both blocks are marked for deletion, none of them are going to be compacted. + tsdbPlanner.AssertNumberOfCalls(t, "Plan", 0) + + assert.ElementsMatch(t, []string{ + `level=info component=compactor msg="compactor started"`, + `level=info component=compactor msg="discovering users from bucket"`, + `level=info component=compactor msg="discovered users from bucket" users=1`, + `level=info component=compactor msg="starting compaction of user blocks" user=user-1`, + `level=info component=compactor org_id=user-1 msg="start sync of metas"`, + `level=info component=compactor org_id=user-1 msg="start of GC"`, + `level=info component=compactor org_id=user-1 msg="start of compactions"`, + `level=info component=compactor org_id=user-1 msg="compaction iterations done"`, + `level=info component=compactor msg="successfully compacted user blocks" user=user-1`, + }, removeIgnoredLogs(strings.Split(strings.TrimSpace(logs.String()), "\n"))) + + // Instead of testing for shipper metrics, we only check our metrics here. + // Real shipper metrics are too variable to embed into a test. + testedMetrics := []string{ + "cortex_compactor_runs_started_total", "cortex_compactor_runs_completed_total", "cortex_compactor_runs_failed_total", + "cortex_compactor_blocks_cleaned_total", "cortex_compactor_block_cleanup_failures_total", "cortex_compactor_blocks_marked_for_deletion_total", + "cortex_compactor_block_cleanup_started_total", "cortex_compactor_block_cleanup_completed_total", "cortex_compactor_block_cleanup_failed_total", + "cortex_compactor_blocks_marked_for_no_compaction_total", + } + assert.NoError(t, prom_testutil.GatherAndCompare(registry, strings.NewReader(` + # TYPE cortex_compactor_runs_started_total counter + # HELP cortex_compactor_runs_started_total Total number of compaction runs started. + cortex_compactor_runs_started_total 1 + + # TYPE cortex_compactor_runs_completed_total counter + # HELP cortex_compactor_runs_completed_total Total number of compaction runs successfully completed. + cortex_compactor_runs_completed_total 1 + + # TYPE cortex_compactor_runs_failed_total counter + # HELP cortex_compactor_runs_failed_total Total number of compaction runs failed. + cortex_compactor_runs_failed_total 0 + + # TYPE cortex_compactor_block_cleanup_failures_total counter + # HELP cortex_compactor_block_cleanup_failures_total Total number of blocks failed to be deleted. + cortex_compactor_block_cleanup_failures_total 0 + + # HELP cortex_compactor_blocks_cleaned_total Total number of blocks deleted. + # TYPE cortex_compactor_blocks_cleaned_total counter + cortex_compactor_blocks_cleaned_total 1 + + # HELP cortex_compactor_blocks_marked_for_deletion_total Total number of blocks marked for deletion in compactor. + # TYPE cortex_compactor_blocks_marked_for_deletion_total counter + cortex_compactor_blocks_marked_for_deletion_total{reason="compaction",user="user-1"} 0 + cortex_compactor_blocks_marked_for_deletion_total{reason="retention",user="user-1"} 0 + + # TYPE cortex_compactor_block_cleanup_started_total counter + # HELP cortex_compactor_block_cleanup_started_total Total number of blocks cleanup runs started. + cortex_compactor_block_cleanup_started_total{tenant_status="active"} 1 + cortex_compactor_block_cleanup_started_total{tenant_status="deleted"} 1 + + # TYPE cortex_compactor_block_cleanup_completed_total counter + # HELP cortex_compactor_block_cleanup_completed_total Total number of blocks cleanup runs successfully completed. + cortex_compactor_block_cleanup_completed_total{tenant_status="active"} 1 + cortex_compactor_block_cleanup_completed_total{tenant_status="deleted"} 1 + + # HELP cortex_compactor_blocks_marked_for_no_compaction_total Total number of blocks marked for no compact during a compaction run. + # TYPE cortex_compactor_blocks_marked_for_no_compaction_total counter + cortex_compactor_blocks_marked_for_no_compaction_total 0 + `), testedMetrics...)) +} + +func TestPartitionCompactor_ShouldNotCompactBlocksMarkedForSkipCompact(t *testing.T) { + t.Parallel() + + partitionedGroupID1 := getPartitionedGroupID("user-1") + partitionedGroupID2 := getPartitionedGroupID("user-2") + // Mock the bucket to contain two users, each one with one block. + bucketClient := &bucket.ClientMock{} + bucketClient.MockIter("", []string{"user-1", "user-2"}, nil) + bucketClient.MockIter("__markers__", []string{}, nil) + bucketClient.MockExists(cortex_tsdb.GetGlobalDeletionMarkPath("user-1"), false, nil) + bucketClient.MockExists(cortex_tsdb.GetLocalDeletionMarkPath("user-1"), false, nil) + bucketClient.MockExists(cortex_tsdb.GetGlobalDeletionMarkPath("user-2"), false, nil) + bucketClient.MockExists(cortex_tsdb.GetLocalDeletionMarkPath("user-2"), false, nil) + bucketClient.MockIter("user-1/", []string{"user-1/01DTVP434PA9VFXSW2JKB3392D/meta.json", "user-1/01FN6CDF3PNEWWRY5MPGJPE3EX/meta.json"}, nil) + bucketClient.MockIter("user-2/", []string{"user-2/01DTW0ZCPDDNV4BV83Q2SV4QAZ/meta.json", "user-2/01FN3V83ABR9992RF8WRJZ76ZQ/meta.json"}, nil) + bucketClient.MockIter("user-1/markers/", nil, nil) + bucketClient.MockGet("user-1/markers/cleaner-visit-marker.json", "", nil) + bucketClient.MockUpload("user-1/markers/cleaner-visit-marker.json", nil) + bucketClient.MockDelete("user-1/markers/cleaner-visit-marker.json", nil) + bucketClient.MockIter("user-2/markers/", nil, nil) + bucketClient.MockGet("user-2/markers/cleaner-visit-marker.json", "", nil) + bucketClient.MockUpload("user-2/markers/cleaner-visit-marker.json", nil) + bucketClient.MockDelete("user-2/markers/cleaner-visit-marker.json", nil) + bucketClient.MockGet("user-1/01DTVP434PA9VFXSW2JKB3392D/meta.json", mockBlockMetaJSON("01DTVP434PA9VFXSW2JKB3392D"), nil) + bucketClient.MockGet("user-1/01DTVP434PA9VFXSW2JKB3392D/deletion-mark.json", "", nil) + bucketClient.MockGet("user-1/01DTVP434PA9VFXSW2JKB3392D/no-compact-mark.json", mockNoCompactBlockJSON("01DTVP434PA9VFXSW2JKB3392D"), nil) + bucketClient.MockGet("user-1/01DTVP434PA9VFXSW2JKB3392D/partition-0-visit-mark.json", "", nil) + bucketClient.MockUpload("user-1/01DTVP434PA9VFXSW2JKB3392D/partition-0-visit-mark.json", nil) + bucketClient.MockGet("user-1/bucket-index-sync-status.json", "", nil) + bucketClient.MockGet("user-1/01FN6CDF3PNEWWRY5MPGJPE3EX/meta.json", mockBlockMetaJSON("01FN6CDF3PNEWWRY5MPGJPE3EX"), nil) + bucketClient.MockGet("user-1/01FN6CDF3PNEWWRY5MPGJPE3EX/deletion-mark.json", "", nil) + bucketClient.MockGet("user-1/01FN6CDF3PNEWWRY5MPGJPE3EX/no-compact-mark.json", mockNoCompactBlockJSON("01FN6CDF3PNEWWRY5MPGJPE3EX"), nil) + bucketClient.MockGet("user-1/01FN6CDF3PNEWWRY5MPGJPE3EX/partition-0-visit-mark.json", "", nil) + bucketClient.MockUpload("user-1/01FN6CDF3PNEWWRY5MPGJPE3EX/partition-0-visit-mark.json", nil) + + bucketClient.MockGet("user-2/01DTW0ZCPDDNV4BV83Q2SV4QAZ/meta.json", mockBlockMetaJSON("01DTW0ZCPDDNV4BV83Q2SV4QAZ"), nil) + bucketClient.MockGet("user-2/01DTW0ZCPDDNV4BV83Q2SV4QAZ/deletion-mark.json", "", nil) + bucketClient.MockGet("user-2/01DTW0ZCPDDNV4BV83Q2SV4QAZ/no-compact-mark.json", "", nil) + bucketClient.MockGet("user-2/01DTW0ZCPDDNV4BV83Q2SV4QAZ/partition-0-visit-mark.json", "", nil) + bucketClient.MockUpload("user-2/01DTW0ZCPDDNV4BV83Q2SV4QAZ/partition-0-visit-mark.json", nil) + bucketClient.MockGet("user-2/01FN3V83ABR9992RF8WRJZ76ZQ/meta.json", mockBlockMetaJSON("01FN3V83ABR9992RF8WRJZ76ZQ"), nil) + bucketClient.MockGet("user-2/01FN3V83ABR9992RF8WRJZ76ZQ/deletion-mark.json", "", nil) + bucketClient.MockGet("user-2/01FN3V83ABR9992RF8WRJZ76ZQ/no-compact-mark.json", "", nil) + bucketClient.MockGet("user-2/01FN3V83ABR9992RF8WRJZ76ZQ/partition-0-visit-mark.json", "", nil) + bucketClient.MockUpload("user-2/01FN3V83ABR9992RF8WRJZ76ZQ/partition-0-visit-mark.json", nil) + + bucketClient.MockGet("user-1/bucket-index.json.gz", "", nil) + bucketClient.MockGet("user-2/bucket-index.json.gz", "", nil) + bucketClient.MockGet("user-1/bucket-index-sync-status.json", "", nil) + bucketClient.MockGet("user-2/bucket-index-sync-status.json", "", nil) + bucketClient.MockIter("user-1/markers/", nil, nil) + bucketClient.MockIter("user-2/markers/", nil, nil) + bucketClient.MockUpload("user-1/bucket-index.json.gz", nil) + bucketClient.MockUpload("user-2/bucket-index.json.gz", nil) + bucketClient.MockUpload("user-1/bucket-index-sync-status.json", nil) + bucketClient.MockUpload("user-2/bucket-index-sync-status.json", nil) + bucketClient.MockGet("user-1/partitioned-groups/"+partitionedGroupID1+".json", "", nil) + bucketClient.MockUpload("user-1/partitioned-groups/"+partitionedGroupID1+".json", nil) + bucketClient.MockGet("user-2/partitioned-groups/"+partitionedGroupID2+".json", "", nil) + bucketClient.MockUpload("user-2/partitioned-groups/"+partitionedGroupID2+".json", nil) + bucketClient.MockIter("user-1/"+PartitionedGroupDirectory, nil, nil) + bucketClient.MockIter("user-2/"+PartitionedGroupDirectory, nil, nil) + + c, _, tsdbPlanner, _, registry := prepareForPartitioning(t, prepareConfigForPartitioning(), bucketClient, nil, nil) + + tsdbPlanner.On("Plan", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return([]*metadata.Meta{}, nil) + + require.NoError(t, services.StartAndAwaitRunning(context.Background(), c)) + + cortex_testutil.Poll(t, 20*time.Second, 1.0, func() interface{} { + return prom_testutil.ToFloat64(c.CompactionRunsCompleted) + }) + + require.NoError(t, services.StopAndAwaitTerminated(context.Background(), c)) + + // Planner still called for user with all blocks makred for skip compaction. + tsdbPlanner.AssertNumberOfCalls(t, "Plan", 2) + + assert.ElementsMatch(t, []string{"01DTVP434PA9VFXSW2JKB3392D", "01FN6CDF3PNEWWRY5MPGJPE3EX"}, tsdbPlanner.getNoCompactBlocks()) + + testedMetrics := []string{"cortex_compactor_blocks_marked_for_no_compaction_total"} + + assert.NoError(t, prom_testutil.GatherAndCompare(registry, strings.NewReader(` + # HELP cortex_compactor_blocks_marked_for_no_compaction_total Total number of blocks marked for no compact during a compaction run. + # TYPE cortex_compactor_blocks_marked_for_no_compaction_total counter + cortex_compactor_blocks_marked_for_no_compaction_total 0 + `), testedMetrics...)) +} + +func TestPartitionCompactor_ShouldNotCompactBlocksForUsersMarkedForDeletion(t *testing.T) { + t.Parallel() + + cfg := prepareConfigForPartitioning() + cfg.DeletionDelay = 10 * time.Minute // Delete block after 10 minutes + cfg.TenantCleanupDelay = 10 * time.Minute // To make sure it's not 0. + + partitionedGroupID1 := getPartitionedGroupID("user-1") + // Mock the bucket to contain two users, each one with one block. + bucketClient := &bucket.ClientMock{} + bucketClient.MockIter("", []string{"user-1"}, nil) + bucketClient.MockIter("__markers__", []string{"__markers__/user-1/"}, nil) + bucketClient.MockIter("user-1/", []string{"user-1/01DTVP434PA9VFXSW2JKB3392D"}, nil) + bucketClient.MockGet(cortex_tsdb.GetGlobalDeletionMarkPath("user-1"), `{"deletion_time": 1}`, nil) + bucketClient.MockUpload(cortex_tsdb.GetGlobalDeletionMarkPath("user-1"), nil) + + bucketClient.MockGet("user-1/markers/cleaner-visit-marker.json", "", nil) + bucketClient.MockUpload("user-1/markers/cleaner-visit-marker.json", nil) + bucketClient.MockDelete("user-1/markers/cleaner-visit-marker.json", nil) + + bucketClient.MockIter("user-1/01DTVP434PA9VFXSW2JKB3392D", []string{"user-1/01DTVP434PA9VFXSW2JKB3392D/meta.json", "user-1/01DTVP434PA9VFXSW2JKB3392D/index"}, nil) + bucketClient.MockGet("user-1/01DTVP434PA9VFXSW2JKB3392D/meta.json", mockBlockMetaJSON("01DTVP434PA9VFXSW2JKB3392D"), nil) + bucketClient.MockGet("user-1/01DTVP434PA9VFXSW2JKB3392D/index", "some index content", nil) + bucketClient.MockGet("user-1/bucket-index-sync-status.json", "", nil) + bucketClient.MockGet("user-1/01DTVP434PA9VFXSW2JKB3392D/partition-0-visit-mark.json", "", nil) + bucketClient.MockUpload("user-1/01DTVP434PA9VFXSW2JKB3392D/partition-0-visit-mark.json", nil) + bucketClient.MockExists("user-1/01DTVP434PA9VFXSW2JKB3392D/deletion-mark.json", false, nil) + + bucketClient.MockDelete("user-1/01DTVP434PA9VFXSW2JKB3392D/meta.json", nil) + bucketClient.MockDelete("user-1/01DTVP434PA9VFXSW2JKB3392D/index", nil) + bucketClient.MockDelete("user-1/bucket-index.json.gz", nil) + bucketClient.MockDelete("user-1/bucket-index-sync-status.json", nil) + bucketClient.MockGet("user-1/partitioned-groups/"+partitionedGroupID1+".json", "", nil) + bucketClient.MockUpload("user-1/partitioned-groups/"+partitionedGroupID1+".json", nil) + bucketClient.MockIter("user-1/"+PartitionedGroupDirectory, nil, nil) + + c, _, tsdbPlanner, logs, registry := prepareForPartitioning(t, cfg, bucketClient, nil, nil) + + // Mock the planner as if there's no compaction to do, + // in order to simplify tests (all in all, we just want to + // test our logic and not TSDB compactor which we expect to + // be already tested). + tsdbPlanner.On("Plan", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return([]*metadata.Meta{}, nil) + + require.NoError(t, services.StartAndAwaitRunning(context.Background(), c)) + + // Wait until a run has completed. + cortex_testutil.Poll(t, 20*time.Second, 1.0, func() interface{} { + return prom_testutil.ToFloat64(c.CompactionRunsCompleted) + }) + + require.NoError(t, services.StopAndAwaitTerminated(context.Background(), c)) + + // No user is compacted, single user we have is marked for deletion. + tsdbPlanner.AssertNumberOfCalls(t, "Plan", 0) + + assert.ElementsMatch(t, []string{ + `level=info component=compactor msg="compactor started"`, + `level=info component=compactor msg="discovering users from bucket"`, + `level=info component=compactor msg="discovered users from bucket" users=1`, + `level=debug component=compactor msg="skipping user because it is marked for deletion" user=user-1`, + }, removeIgnoredLogs(strings.Split(strings.TrimSpace(logs.String()), "\n"))) + + // Instead of testing for shipper metrics, we only check our metrics here. + // Real shipper metrics are too variable to embed into a test. + testedMetrics := []string{ + "cortex_compactor_runs_started_total", "cortex_compactor_runs_completed_total", "cortex_compactor_runs_failed_total", + "cortex_compactor_blocks_cleaned_total", "cortex_compactor_block_cleanup_failures_total", "cortex_compactor_blocks_marked_for_deletion_total", + "cortex_compactor_block_cleanup_started_total", "cortex_compactor_block_cleanup_completed_total", "cortex_compactor_block_cleanup_failed_total", + "cortex_bucket_blocks_count", "cortex_bucket_blocks_marked_for_deletion_count", "cortex_bucket_index_last_successful_update_timestamp_seconds", + "cortex_compactor_blocks_marked_for_no_compaction_total", + } + assert.NoError(t, prom_testutil.GatherAndCompare(registry, strings.NewReader(` + # TYPE cortex_compactor_runs_started_total counter + # HELP cortex_compactor_runs_started_total Total number of compaction runs started. + cortex_compactor_runs_started_total 1 + + # TYPE cortex_compactor_runs_completed_total counter + # HELP cortex_compactor_runs_completed_total Total number of compaction runs successfully completed. + cortex_compactor_runs_completed_total 1 + + # TYPE cortex_compactor_runs_failed_total counter + # HELP cortex_compactor_runs_failed_total Total number of compaction runs failed. + cortex_compactor_runs_failed_total 0 + + # TYPE cortex_compactor_block_cleanup_failures_total counter + # HELP cortex_compactor_block_cleanup_failures_total Total number of blocks failed to be deleted. + cortex_compactor_block_cleanup_failures_total 0 + + # HELP cortex_compactor_blocks_cleaned_total Total number of blocks deleted. + # TYPE cortex_compactor_blocks_cleaned_total counter + cortex_compactor_blocks_cleaned_total 1 + + # TYPE cortex_compactor_block_cleanup_started_total counter + # HELP cortex_compactor_block_cleanup_started_total Total number of blocks cleanup runs started. + cortex_compactor_block_cleanup_started_total{tenant_status="active"} 1 + cortex_compactor_block_cleanup_started_total{tenant_status="deleted"} 1 + + # TYPE cortex_compactor_block_cleanup_completed_total counter + # HELP cortex_compactor_block_cleanup_completed_total Total number of blocks cleanup runs successfully completed. + cortex_compactor_block_cleanup_completed_total{tenant_status="active"} 1 + cortex_compactor_block_cleanup_completed_total{tenant_status="deleted"} 1 + + # HELP cortex_compactor_blocks_marked_for_no_compaction_total Total number of blocks marked for no compact during a compaction run. + # TYPE cortex_compactor_blocks_marked_for_no_compaction_total counter + cortex_compactor_blocks_marked_for_no_compaction_total 0 + `), testedMetrics...)) +} + +func TestPartitionCompactor_ShouldSkipOutOrOrderBlocks(t *testing.T) { + bucketClient, tmpDir := cortex_storage_testutil.PrepareFilesystemBucket(t) + bucketClient = bucketindex.BucketWithGlobalMarkers(bucketClient) + + b1 := createTSDBBlock(t, bucketClient, "user-1", 10, 20, map[string]string{"__name__": "Teste"}) + b2 := createTSDBBlock(t, bucketClient, "user-1", 20, 30, map[string]string{"__name__": "Teste"}) + + // Read bad index file. + indexFile, err := os.Open("testdata/out_of_order_chunks/index") + require.NoError(t, err) + indexFileStat, err := indexFile.Stat() + require.NoError(t, err) + + dir := path.Join(tmpDir, "user-1", b1.String()) + outputFile, err := os.OpenFile(path.Join(dir, "index"), os.O_RDWR|os.O_TRUNC, 0755) + require.NoError(t, err) + + n, err := io.Copy(outputFile, indexFile) + require.NoError(t, err) + require.Equal(t, indexFileStat.Size(), n) + + cfg := prepareConfigForPartitioning() + cfg.SkipBlocksWithOutOfOrderChunksEnabled = true + c, tsdbCompac, tsdbPlanner, _, registry := prepareForPartitioning(t, cfg, bucketClient, nil, nil) + + tsdbCompac.On("CompactWithBlockPopulator", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(b1, nil) + + tsdbPlanner.On("Plan", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return([]*metadata.Meta{ + { + BlockMeta: tsdb.BlockMeta{ + ULID: b1, + MinTime: 10, + MaxTime: 20, + }, + }, + { + BlockMeta: tsdb.BlockMeta{ + ULID: b2, + MinTime: 20, + MaxTime: 30, + }, + }, + }, nil) + + require.NoError(t, services.StartAndAwaitRunning(context.Background(), c)) + + defer services.StopAndAwaitTerminated(context.Background(), c) //nolint:errcheck + + // Wait until a run has completed. + cortex_testutil.Poll(t, 20*time.Second, true, func() interface{} { + if _, err := os.Stat(path.Join(dir, "no-compact-mark.json")); err == nil { + return true + } + return false + }) + + assert.NoError(t, prom_testutil.GatherAndCompare(registry, strings.NewReader(` + # HELP cortex_compactor_blocks_marked_for_no_compaction_total Total number of blocks marked for no compact during a compaction run. + # TYPE cortex_compactor_blocks_marked_for_no_compaction_total counter + cortex_compactor_blocks_marked_for_no_compaction_total 1 + `), "cortex_compactor_blocks_marked_for_no_compaction_total")) +} + +func TestPartitionCompactor_ShouldCompactAllUsersOnShardingEnabledButOnlyOneInstanceRunning(t *testing.T) { + t.Parallel() + + partitionedGroupID1 := getPartitionedGroupID("user-1") + partitionedGroupID2 := getPartitionedGroupID("user-2") + // Mock the bucket to contain two users, each one with one block. + bucketClient := &bucket.ClientMock{} + bucketClient.MockIter("", []string{"user-1", "user-2"}, nil) + bucketClient.MockIter("__markers__", []string{}, nil) + bucketClient.MockExists(cortex_tsdb.GetGlobalDeletionMarkPath("user-1"), false, nil) + bucketClient.MockExists(cortex_tsdb.GetLocalDeletionMarkPath("user-1"), false, nil) + bucketClient.MockExists(cortex_tsdb.GetGlobalDeletionMarkPath("user-2"), false, nil) + bucketClient.MockExists(cortex_tsdb.GetLocalDeletionMarkPath("user-2"), false, nil) + bucketClient.MockIter("user-1/", []string{"user-1/01DTVP434PA9VFXSW2JKB3392D/meta.json", "user-1/01FN6CDF3PNEWWRY5MPGJPE3EX/meta.json"}, nil) + bucketClient.MockIter("user-2/", []string{"user-2/01DTW0ZCPDDNV4BV83Q2SV4QAZ/meta.json", "user-2/01FN3V83ABR9992RF8WRJZ76ZQ/meta.json"}, nil) + bucketClient.MockIter("user-1/markers/", nil, nil) + bucketClient.MockGet("user-1/markers/cleaner-visit-marker.json", "", nil) + bucketClient.MockUpload("user-1/markers/cleaner-visit-marker.json", nil) + bucketClient.MockDelete("user-1/markers/cleaner-visit-marker.json", nil) + bucketClient.MockIter("user-2/markers/", nil, nil) + bucketClient.MockGet("user-2/markers/cleaner-visit-marker.json", "", nil) + bucketClient.MockUpload("user-2/markers/cleaner-visit-marker.json", nil) + bucketClient.MockDelete("user-2/markers/cleaner-visit-marker.json", nil) + bucketClient.MockGet("user-1/01DTVP434PA9VFXSW2JKB3392D/meta.json", mockBlockMetaJSON("01DTVP434PA9VFXSW2JKB3392D"), nil) + bucketClient.MockGet("user-1/01DTVP434PA9VFXSW2JKB3392D/deletion-mark.json", "", nil) + bucketClient.MockGet("user-1/01DTVP434PA9VFXSW2JKB3392D/no-compact-mark.json", "", nil) + bucketClient.MockGet("user-1/01DTVP434PA9VFXSW2JKB3392D/partition-0-visit-mark.json", "", nil) + bucketClient.MockUpload("user-1/01DTVP434PA9VFXSW2JKB3392D/partition-0-visit-mark.json", nil) + bucketClient.MockGet("user-1/bucket-index-sync-status.json", "", nil) + bucketClient.MockGet("user-1/01FN6CDF3PNEWWRY5MPGJPE3EX/meta.json", mockBlockMetaJSON("01FN6CDF3PNEWWRY5MPGJPE3EX"), nil) + bucketClient.MockGet("user-1/01FN6CDF3PNEWWRY5MPGJPE3EX/deletion-mark.json", "", nil) + bucketClient.MockGet("user-1/01FN6CDF3PNEWWRY5MPGJPE3EX/no-compact-mark.json", "", nil) + bucketClient.MockGet("user-1/01FN6CDF3PNEWWRY5MPGJPE3EX/partition-0-visit-mark.json", "", nil) + bucketClient.MockUpload("user-1/01FN6CDF3PNEWWRY5MPGJPE3EX/partition-0-visit-mark.json", nil) + bucketClient.MockGet("user-2/01DTW0ZCPDDNV4BV83Q2SV4QAZ/meta.json", mockBlockMetaJSON("01DTW0ZCPDDNV4BV83Q2SV4QAZ"), nil) + bucketClient.MockGet("user-2/01DTW0ZCPDDNV4BV83Q2SV4QAZ/deletion-mark.json", "", nil) + bucketClient.MockGet("user-2/01DTW0ZCPDDNV4BV83Q2SV4QAZ/no-compact-mark.json", "", nil) + bucketClient.MockGet("user-2/01DTW0ZCPDDNV4BV83Q2SV4QAZ/partition-0-visit-mark.json", "", nil) + bucketClient.MockUpload("user-2/01DTW0ZCPDDNV4BV83Q2SV4QAZ/partition-0-visit-mark.json", nil) + bucketClient.MockGet("user-2/01FN3V83ABR9992RF8WRJZ76ZQ/meta.json", mockBlockMetaJSON("01FN3V83ABR9992RF8WRJZ76ZQ"), nil) + bucketClient.MockGet("user-2/01FN3V83ABR9992RF8WRJZ76ZQ/deletion-mark.json", "", nil) + bucketClient.MockGet("user-2/01FN3V83ABR9992RF8WRJZ76ZQ/no-compact-mark.json", "", nil) + bucketClient.MockGet("user-2/01FN3V83ABR9992RF8WRJZ76ZQ/partition-0-visit-mark.json", "", nil) + bucketClient.MockUpload("user-2/01FN3V83ABR9992RF8WRJZ76ZQ/partition-0-visit-mark.json", nil) + bucketClient.MockGet("user-1/bucket-index.json.gz", "", nil) + bucketClient.MockGet("user-2/bucket-index.json.gz", "", nil) + bucketClient.MockGet("user-1/bucket-index-sync-status.json", "", nil) + bucketClient.MockGet("user-2/bucket-index-sync-status.json", "", nil) + bucketClient.MockUpload("user-1/bucket-index.json.gz", nil) + bucketClient.MockUpload("user-2/bucket-index.json.gz", nil) + bucketClient.MockUpload("user-1/bucket-index-sync-status.json", nil) + bucketClient.MockUpload("user-2/bucket-index-sync-status.json", nil) + bucketClient.MockGet("user-1/partitioned-groups/"+partitionedGroupID1+".json", "", nil) + bucketClient.MockUpload("user-1/partitioned-groups/"+partitionedGroupID1+".json", nil) + bucketClient.MockGet("user-2/partitioned-groups/"+partitionedGroupID2+".json", "", nil) + bucketClient.MockUpload("user-2/partitioned-groups/"+partitionedGroupID2+".json", nil) + bucketClient.MockIter("user-1/"+PartitionedGroupDirectory, nil, nil) + bucketClient.MockIter("user-2/"+PartitionedGroupDirectory, nil, nil) + + ringStore, closer := consul.NewInMemoryClient(ring.GetCodec(), log.NewNopLogger(), nil) + t.Cleanup(func() { assert.NoError(t, closer.Close()) }) + + cfg := prepareConfigForPartitioning() + cfg.ShardingEnabled = true + cfg.ShardingRing.InstanceID = "compactor-1" + cfg.ShardingRing.InstanceAddr = "1.2.3.4" + cfg.ShardingRing.KVStore.Mock = ringStore + + c, _, tsdbPlanner, logs, _ := prepareForPartitioning(t, cfg, bucketClient, nil, nil) + + // Mock the planner as if there's no compaction to do, + // in order to simplify tests (all in all, we just want to + // test our logic and not TSDB compactor which we expect to + // be already tested). + tsdbPlanner.On("Plan", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return([]*metadata.Meta{}, nil) + + require.NoError(t, services.StartAndAwaitRunning(context.Background(), c)) + + // Wait until a run has completed. + cortex_testutil.Poll(t, 20*time.Second, 1.0, func() interface{} { + return prom_testutil.ToFloat64(c.CompactionRunsCompleted) + }) + + require.NoError(t, services.StopAndAwaitTerminated(context.Background(), c)) + + // Ensure a plan has been executed for the blocks of each user. + tsdbPlanner.AssertNumberOfCalls(t, "Plan", 2) + + assert.ElementsMatch(t, []string{ + `level=info component=compactor msg="waiting until compactor is ACTIVE in the ring"`, + `level=info component=compactor msg="compactor is ACTIVE in the ring"`, + `level=info component=compactor msg="compactor started"`, + `level=info component=compactor msg="discovering users from bucket"`, + `level=info component=compactor msg="discovered users from bucket" users=2`, + `level=info component=compactor msg="starting compaction of user blocks" user=user-1`, + `level=info component=compactor org_id=user-1 msg="start sync of metas"`, + `level=info component=compactor org_id=user-1 msg="start of GC"`, + `level=info component=compactor org_id=user-1 msg="start of compactions"`, + `level=info component=compactor org_id=user-1 msg="compaction iterations done"`, + `level=info component=compactor msg="successfully compacted user blocks" user=user-1`, + `level=info component=compactor msg="starting compaction of user blocks" user=user-2`, + `level=info component=compactor org_id=user-2 msg="start sync of metas"`, + `level=info component=compactor org_id=user-2 msg="start of GC"`, + `level=info component=compactor org_id=user-2 msg="start of compactions"`, + `level=info component=compactor org_id=user-2 msg="compaction iterations done"`, + `level=info component=compactor msg="successfully compacted user blocks" user=user-2`, + }, removeIgnoredLogs(strings.Split(strings.TrimSpace(logs.String()), "\n"))) +} + +func TestPartitionCompactor_ShouldCompactOnlyUsersOwnedByTheInstanceOnShardingEnabledAndMultipleInstancesRunning(t *testing.T) { + + numUsers := 100 + + // Setup user IDs + userIDs := make([]string, 0, numUsers) + for i := 1; i <= numUsers; i++ { + userIDs = append(userIDs, fmt.Sprintf("user-%d", i)) + } + + // Mock the bucket to contain all users, each one with one block. + bucketClient := &bucket.ClientMock{} + bucketClient.MockIter("", userIDs, nil) + bucketClient.MockIter("__markers__", []string{}, nil) + for _, userID := range userIDs { + partitionedGroupID := getPartitionedGroupID(userID) + bucketClient.MockIter(userID+"/", []string{userID + "/01DTVP434PA9VFXSW2JKB3392D"}, nil) + bucketClient.MockIter(userID+"/markers/", nil, nil) + bucketClient.MockGet(userID+"/markers/cleaner-visit-marker.json", "", nil) + bucketClient.MockUpload(userID+"/markers/cleaner-visit-marker.json", nil) + bucketClient.MockDelete(userID+"/markers/cleaner-visit-marker.json", nil) + bucketClient.MockExists(cortex_tsdb.GetGlobalDeletionMarkPath(userID), false, nil) + bucketClient.MockExists(cortex_tsdb.GetLocalDeletionMarkPath(userID), false, nil) + bucketClient.MockGet(userID+"/01DTVP434PA9VFXSW2JKB3392D/meta.json", mockBlockMetaJSON("01DTVP434PA9VFXSW2JKB3392D"), nil) + bucketClient.MockGet(userID+"/01DTVP434PA9VFXSW2JKB3392D/deletion-mark.json", "", nil) + bucketClient.MockGet(userID+"/01DTVP434PA9VFXSW2JKB3392D/no-compact-mark.json", "", nil) + bucketClient.MockGet(userID+"/01DTVP434PA9VFXSW2JKB3392D/partition-0-visit-mark.json", "", nil) + bucketClient.MockGet(userID+"/bucket-index-sync-status.json", "", nil) + bucketClient.MockGet(userID+"/bucket-index.json.gz", "", nil) + bucketClient.MockUpload(userID+"/bucket-index.json.gz", nil) + bucketClient.MockUpload(userID+"/bucket-index-sync-status.json", nil) + bucketClient.MockGet(userID+"/partitioned-groups/"+partitionedGroupID+".json", "", nil) + bucketClient.MockUpload(userID+"/partitioned-groups/"+partitionedGroupID+".json", nil) + bucketClient.MockIter(userID+"/"+PartitionedGroupDirectory, nil, nil) + } + + // Create a shared KV Store + kvstore, closer := consul.NewInMemoryClient(ring.GetCodec(), log.NewNopLogger(), nil) + t.Cleanup(func() { assert.NoError(t, closer.Close()) }) + + // Create two compactors + var compactors []*Compactor + var logs []*concurrency.SyncBuffer + + for i := 1; i <= 2; i++ { + cfg := prepareConfigForPartitioning() + cfg.ShardingEnabled = true + cfg.ShardingRing.InstanceID = fmt.Sprintf("compactor-%d", i) + cfg.ShardingRing.InstanceAddr = fmt.Sprintf("127.0.0.%d", i) + cfg.ShardingRing.WaitStabilityMinDuration = 3 * time.Second + cfg.ShardingRing.WaitStabilityMaxDuration = 10 * time.Second + cfg.ShardingRing.KVStore.Mock = kvstore + + c, _, tsdbPlanner, l, _ := prepareForPartitioning(t, cfg, bucketClient, nil, nil) + defer services.StopAndAwaitTerminated(context.Background(), c) //nolint:errcheck + + compactors = append(compactors, c) + logs = append(logs, l) + + // Mock the planner as if there's no compaction to do, + // in order to simplify tests (all in all, we just want to + // test our logic and not TSDB compactor which we expect to + // be already tested). + tsdbPlanner.On("Plan", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return([]*metadata.Meta{}, nil) + } + + // Start all compactors + for _, c := range compactors { + require.NoError(t, services.StartAndAwaitRunning(context.Background(), c)) + } + + // Wait until a run has been completed on each compactor + for _, c := range compactors { + cortex_testutil.Poll(t, 120*time.Second, true, func() interface{} { + return prom_testutil.ToFloat64(c.CompactionRunsCompleted) > 1 + }) + } + + // Ensure that each user has been compacted by the correct instance + for _, userID := range userIDs { + _, l, err := findCompactorByUserID(compactors, logs, userID) + require.NoError(t, err) + assert.Contains(t, l.String(), fmt.Sprintf(`level=info component=compactor msg="successfully compacted user blocks" user=%s`, userID)) + } +} + +func TestPartitionCompactor_ShouldCompactOnlyShardsOwnedByTheInstanceOnShardingEnabledWithShuffleShardingAndMultipleInstancesRunning(t *testing.T) { + t.Parallel() + + numUsers := 3 + + // Setup user IDs + userIDs := make([]string, 0, numUsers) + for i := 1; i <= numUsers; i++ { + userIDs = append(userIDs, fmt.Sprintf("user-%d", i)) + } + + startTime := int64(1574776800000) + // Define blocks mapping block IDs to start and end times + blocks := map[string]map[string]int64{ + "01DTVP434PA9VFXSW2JKB3392D": { + "startTime": startTime, + "endTime": startTime + time.Hour.Milliseconds()*2, + }, + "01DTVP434PA9VFXSW2JKB3392E": { + "startTime": startTime, + "endTime": startTime + time.Hour.Milliseconds()*2, + }, + "01DTVP434PA9VFXSW2JKB3392F": { + "startTime": startTime + time.Hour.Milliseconds()*2, + "endTime": startTime + time.Hour.Milliseconds()*4, + }, + "01DTVP434PA9VFXSW2JKB3392G": { + "startTime": startTime + time.Hour.Milliseconds()*2, + "endTime": startTime + time.Hour.Milliseconds()*4, + }, + // Add another new block as the final block so that the previous groups will be planned for compaction + "01DTVP434PA9VFXSW2JKB3392H": { + "startTime": startTime + time.Hour.Milliseconds()*4, + "endTime": startTime + time.Hour.Milliseconds()*6, + }, + } + + // Mock the bucket to contain all users, each one with five blocks, 2 sets of overlapping blocks and 1 separate block. + bucketClient := &bucket.ClientMock{} + bucketClient.MockIter("", userIDs, nil) + bucketClient.MockIter("__markers__", []string{}, nil) + + // Keys with a value greater than 1 will be groups that should be compacted + groupHashes := make(map[uint32]int) + for _, userID := range userIDs { + blockFiles := []string{} + + for blockID, blockTimes := range blocks { + groupHash := HashGroup(userID, blockTimes["startTime"], blockTimes["endTime"]) + partitionVisitMarker := PartitionVisitMarker{ + CompactorID: "test-compactor", + VisitTime: time.Now().Unix(), + PartitionedGroupID: groupHash, + PartitionID: 0, + Status: Pending, + Version: PartitionVisitMarkerVersion1, + } + visitMarkerFileContent, _ := json.Marshal(partitionVisitMarker) + bucketClient.MockGet(userID+"/bucket-index-sync-status.json", "", nil) + bucketClient.MockGet(userID+"/"+blockID+"/meta.json", mockBlockMetaJSONWithTime(blockID, userID, blockTimes["startTime"], blockTimes["endTime"]), nil) + bucketClient.MockGet(userID+"/"+blockID+"/deletion-mark.json", "", nil) + bucketClient.MockGet(userID+"/"+blockID+"/no-compact-mark.json", "", nil) + bucketClient.MockGet(userID+"/"+blockID+"/partition-0-visit-mark.json", "", nil) + bucketClient.MockGet(userID+"/partitioned-groups/visit-marks/"+fmt.Sprint(groupHash)+"/partition-0-visit-mark.json", string(visitMarkerFileContent), nil) + bucketClient.MockGetRequireUpload(userID+"/partitioned-groups/visit-marks/"+fmt.Sprint(groupHash)+"/partition-0-visit-mark.json", string(visitMarkerFileContent), nil) + bucketClient.MockUpload(userID+"/partitioned-groups/visit-marks/"+fmt.Sprint(groupHash)+"/partition-0-visit-mark.json", nil) + // Iter with recursive so expected to get objects rather than directories. + blockFiles = append(blockFiles, path.Join(userID, blockID, block.MetaFilename)) + + // Get all of the unique group hashes so that they can be used to ensure all groups were compacted + groupHashes[groupHash]++ + bucketClient.MockGet(userID+"/partitioned-groups/"+fmt.Sprint(groupHash)+".json", "", nil) + bucketClient.MockUpload(userID+"/partitioned-groups/"+fmt.Sprint(groupHash)+".json", nil) + } + + bucketClient.MockIter(userID+"/", blockFiles, nil) + bucketClient.MockIter(userID+"/markers/", nil, nil) + bucketClient.MockGet(userID+"/markers/cleaner-visit-marker.json", "", nil) + bucketClient.MockUpload(userID+"/markers/cleaner-visit-marker.json", nil) + bucketClient.MockDelete(userID+"/markers/cleaner-visit-marker.json", nil) + bucketClient.MockExists(cortex_tsdb.GetGlobalDeletionMarkPath(userID), false, nil) + bucketClient.MockExists(cortex_tsdb.GetLocalDeletionMarkPath(userID), false, nil) + bucketClient.MockGet(userID+"/bucket-index.json.gz", "", nil) + bucketClient.MockUpload(userID+"/bucket-index.json.gz", nil) + bucketClient.MockUpload(userID+"/bucket-index-sync-status.json", nil) + bucketClient.MockIter(userID+"/"+PartitionedGroupDirectory, nil, nil) + } + + // Create a shared KV Store + kvstore, closer := consul.NewInMemoryClient(ring.GetCodec(), log.NewNopLogger(), nil) + t.Cleanup(func() { assert.NoError(t, closer.Close()) }) + + // Create four compactors + var compactors []*Compactor + var logs []*concurrency.SyncBuffer + + for i := 1; i <= 4; i++ { + cfg := prepareConfigForPartitioning() + cfg.ShardingEnabled = true + cfg.CompactionInterval = 15 * time.Second + cfg.ShardingStrategy = util.ShardingStrategyShuffle + cfg.ShardingRing.InstanceID = fmt.Sprintf("compactor-%d", i) + cfg.ShardingRing.InstanceAddr = fmt.Sprintf("127.0.0.%d", i) + cfg.ShardingRing.WaitStabilityMinDuration = 3 * time.Second + cfg.ShardingRing.WaitStabilityMaxDuration = 10 * time.Second + cfg.ShardingRing.KVStore.Mock = kvstore + + limits := &validation.Limits{} + flagext.DefaultValues(limits) + limits.CompactorTenantShardSize = 3 + + c, _, tsdbPlanner, l, _ := prepareForPartitioning(t, cfg, bucketClient, limits, nil) + defer services.StopAndAwaitTerminated(context.Background(), c) //nolint:errcheck + + compactors = append(compactors, c) + logs = append(logs, l) + + // Mock the planner as if there's no compaction to do, + // in order to simplify tests (all in all, we just want to + // test our logic and not TSDB compactor which we expect to + // be already tested). + tsdbPlanner.On("Plan", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return([]*metadata.Meta{}, nil) + } + + // Start all compactors + for _, c := range compactors { + require.NoError(t, c.StartAsync(context.Background())) + } + // Wait for all the compactors to get into the Running state without errors. + // Cannot use StartAndAwaitRunning as this would cause the compactions to start before + // all the compactors are initialized + for _, c := range compactors { + require.NoError(t, c.AwaitRunning(context.Background())) + } + + // Wait until a run has been completed on each compactor + for _, c := range compactors { + cortex_testutil.Poll(t, 60*time.Second, 1.0, func() interface{} { + return prom_testutil.ToFloat64(c.CompactionRunsCompleted) + }) + } + + // Ensure that each group was only compacted by exactly one compactor + for groupHash, blockCount := range groupHashes { + + l, found, err := checkLogsForPartitionCompaction(compactors, logs, groupHash) + require.NoError(t, err) + + // If the blockCount < 2 then the group shouldn't have been compacted, therefore not found in the logs + if blockCount < 2 { + assert.False(t, found) + } else { + assert.True(t, found) + assert.Contains(t, l.String(), fmt.Sprintf(`group_hash=%d msg="found compactable group for user"`, groupHash)) + } + } +} + +// checkLogsForPartitionCompaction checks the logs to see if a compaction has happened on the groupHash, +// if there has been a compaction it will return the logs of the compactor that handled the group +// and will return true. Otherwise this function will return a nil value for the logs and false +// as the group was not compacted +func checkLogsForPartitionCompaction(compactors []*Compactor, logs []*concurrency.SyncBuffer, groupHash uint32) (*concurrency.SyncBuffer, bool, error) { + var log *concurrency.SyncBuffer + + for _, l := range logs { + owned := strings.Contains(l.String(), fmt.Sprintf(`group_hash=%d msg="found compactable group for user"`, groupHash)) + if owned { + log = l + } + } + + // Return false if we've not been able to find it + if log == nil { + return nil, false, nil + } + + return log, true, nil +} + +func prepareConfigForPartitioning() Config { + compactorCfg := prepareConfig() + + compactorCfg.CompactionMode = util.CompactionModePartitioning + + return compactorCfg +} + +func prepareForPartitioning(t *testing.T, compactorCfg Config, bucketClient objstore.InstrumentedBucket, limits *validation.Limits, tsdbGrouper *tsdbGrouperMock) (*Compactor, *tsdbCompactorMock, *tsdbPlannerMock, *concurrency.SyncBuffer, prometheus.Gatherer) { + storageCfg := cortex_tsdb.BlocksStorageConfig{} + flagext.DefaultValues(&storageCfg) + storageCfg.BucketStore.BlockDiscoveryStrategy = string(cortex_tsdb.RecursiveDiscovery) + + // Create a temporary directory for compactor data. + compactorCfg.DataDir = t.TempDir() + + tsdbCompactor := &tsdbCompactorMock{} + tsdbPlanner := &tsdbPlannerMock{ + noCompactMarkFilters: []*compact.GatherNoCompactionMarkFilter{}, + } + logs := &concurrency.SyncBuffer{} + logger := log.NewLogfmtLogger(logs) + registry := prometheus.NewRegistry() + + if limits == nil { + limits = &validation.Limits{} + flagext.DefaultValues(limits) + } + + overrides, err := validation.NewOverrides(*limits, nil) + require.NoError(t, err) + + bucketClientFactory := func(ctx context.Context) (objstore.InstrumentedBucket, error) { + return bucketClient, nil + } + + blocksCompactorFactory := func(ctx context.Context, cfg Config, logger log.Logger, reg prometheus.Registerer) (compact.Compactor, PlannerFactory, error) { + return tsdbCompactor, + func(ctx context.Context, bkt objstore.InstrumentedBucket, _ log.Logger, _ Config, noCompactMarkFilter *compact.GatherNoCompactionMarkFilter, ringLifecycle *ring.Lifecycler, _ string, _ prometheus.Counter, _ prometheus.Counter, _ prometheus.Counter, _ *compactorMetrics) compact.Planner { + tsdbPlanner.noCompactMarkFilters = append(tsdbPlanner.noCompactMarkFilters, noCompactMarkFilter) + return tsdbPlanner + }, + nil + } + + var blocksGrouperFactory BlocksGrouperFactory + if tsdbGrouper != nil { + blocksGrouperFactory = func(_ context.Context, _ Config, _ objstore.InstrumentedBucket, _ log.Logger, _ prometheus.Counter, _ prometheus.Counter, _ prometheus.Counter, _ prometheus.Counter, _ prometheus.Counter, _ *compact.SyncerMetrics, _ *compactorMetrics, _ *ring.Ring, _ *ring.Lifecycler, _ Limits, _ string, _ *compact.GatherNoCompactionMarkFilter) compact.Grouper { + return tsdbGrouper + } + } else { + if compactorCfg.ShardingStrategy == util.ShardingStrategyShuffle { + blocksGrouperFactory = ShuffleShardingGrouperFactory + } else { + blocksGrouperFactory = DefaultBlocksGrouperFactory + } + } + + var blockDeletableCheckerFactory BlockDeletableCheckerFactory + if compactorCfg.ShardingStrategy == util.ShardingStrategyShuffle { + blockDeletableCheckerFactory = PartitionCompactionBlockDeletableCheckerFactory + } else { + blockDeletableCheckerFactory = DefaultBlockDeletableCheckerFactory + } + + var compactionLifecycleCallbackFactory CompactionLifecycleCallbackFactory + if compactorCfg.ShardingStrategy == util.ShardingStrategyShuffle { + compactionLifecycleCallbackFactory = ShardedCompactionLifecycleCallbackFactory + } else { + compactionLifecycleCallbackFactory = DefaultCompactionLifecycleCallbackFactory + } + + c, err := newCompactor(compactorCfg, storageCfg, logger, registry, bucketClientFactory, blocksGrouperFactory, blocksCompactorFactory, blockDeletableCheckerFactory, compactionLifecycleCallbackFactory, overrides) + require.NoError(t, err) + + return c, tsdbCompactor, tsdbPlanner, logs, registry +} + +type tsdbGrouperMock struct { + mock.Mock +} + +func (m *tsdbGrouperMock) Groups(blocks map[ulid.ULID]*metadata.Meta) (res []*compact.Group, err error) { + args := m.Called(blocks) + return args.Get(0).([]*compact.Group), args.Error(1) +} + +var ( + BlockMinTime = int64(1574776800000) + BlockMaxTime = int64(1574784000000) +) + +func getPartitionedGroupID(userID string) string { + return fmt.Sprint(HashGroup(userID, BlockMinTime, BlockMaxTime)) +} + +func mockBlockGroup(userID string, ids []string, bkt *bucket.ClientMock) *compact.Group { + dummyCounter := prometheus.NewCounter(prometheus.CounterOpts{}) + group, _ := compact.NewGroup( + log.NewNopLogger(), + bkt, + getPartitionedGroupID(userID), + nil, + 0, + true, + true, + dummyCounter, + dummyCounter, + dummyCounter, + dummyCounter, + dummyCounter, + dummyCounter, + dummyCounter, + dummyCounter, + metadata.NoneFunc, + 1, + 1, + ) + for _, id := range ids { + meta := mockBlockMeta(id) + err := group.AppendMeta(&metadata.Meta{ + BlockMeta: meta, + }) + if err != nil { + continue + } + } + return group +} + +func TestPartitionCompactor_DeleteLocalSyncFiles(t *testing.T) { + numUsers := 10 + + // Setup user IDs + userIDs := make([]string, 0, numUsers) + for i := 1; i <= numUsers; i++ { + userIDs = append(userIDs, fmt.Sprintf("user-%d", i)) + } + + inmem := objstore.WithNoopInstr(objstore.NewInMemBucket()) + for _, userID := range userIDs { + id, err := ulid.New(ulid.Now(), rand.Reader) + require.NoError(t, err) + require.NoError(t, inmem.Upload(context.Background(), userID+"/"+id.String()+"/meta.json", strings.NewReader(mockBlockMetaJSON(id.String())))) + } + + // Create a shared KV Store + kvstore, closer := consul.NewInMemoryClient(ring.GetCodec(), log.NewNopLogger(), nil) + t.Cleanup(func() { assert.NoError(t, closer.Close()) }) + + // Create two compactors + var compactors []*Compactor + + for i := 1; i <= 2; i++ { + cfg := prepareConfigForPartitioning() + + cfg.ShardingEnabled = true + cfg.ShardingRing.InstanceID = fmt.Sprintf("compactor-%d", i) + cfg.ShardingRing.InstanceAddr = fmt.Sprintf("127.0.0.%d", i) + cfg.ShardingRing.WaitStabilityMinDuration = 3 * time.Second + cfg.ShardingRing.WaitStabilityMaxDuration = 10 * time.Second + cfg.ShardingRing.KVStore.Mock = kvstore + + // Each compactor will get its own temp dir for storing local files. + c, _, tsdbPlanner, _, _ := prepareForPartitioning(t, cfg, inmem, nil, nil) + t.Cleanup(func() { + require.NoError(t, services.StopAndAwaitTerminated(context.Background(), c)) + }) + + compactors = append(compactors, c) + + // Mock the planner as if there's no compaction to do, + // in order to simplify tests (all in all, we just want to + // test our logic and not TSDB compactor which we expect to + // be already tested). + tsdbPlanner.On("Plan", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return([]*metadata.Meta{}, nil) + } + + require.Equal(t, 2, len(compactors)) + c1 := compactors[0] + c2 := compactors[1] + + // Start first compactor + require.NoError(t, services.StartAndAwaitRunning(context.Background(), c1)) + + // Wait until a run has been completed on first compactor. This happens as soon as compactor starts. + cortex_testutil.Poll(t, 20*time.Second, true, func() interface{} { + return prom_testutil.ToFloat64(c1.CompactionRunsCompleted) > 1 + }) + + require.NoError(t, os.Mkdir(c1.metaSyncDirForUser("new-user"), 0600)) + + // Verify that first compactor has synced all the users, plus there is one extra we have just created. + require.Equal(t, numUsers+1, len(c1.listTenantsWithMetaSyncDirectories())) + + // Now start second compactor, and wait until it runs compaction. + require.NoError(t, services.StartAndAwaitRunning(context.Background(), c2)) + cortex_testutil.Poll(t, 20*time.Second, 1.0, func() interface{} { + return prom_testutil.ToFloat64(c2.CompactionRunsCompleted) + }) + + // Let's check how many users second compactor has. + c2Users := len(c2.listTenantsWithMetaSyncDirectories()) + require.NotZero(t, c2Users) + + // Force new compaction cycle on first compactor. It will run the cleanup of un-owned users at the end of compaction cycle. + c1.compactUsers(context.Background()) + c1Users := len(c1.listTenantsWithMetaSyncDirectories()) + + // Now compactor 1 should have cleaned old sync files. + require.NotEqual(t, numUsers, c1Users) + require.Equal(t, numUsers, c1Users+c2Users) +} + +func TestPartitionCompactor_ShouldFailCompactionOnTimeout(t *testing.T) { + t.Parallel() + + // Mock the bucket + bucketClient := &bucket.ClientMock{} + bucketClient.MockIter("", []string{}, nil) + bucketClient.MockIter("__markers__", []string{}, nil) + + ringStore, closer := consul.NewInMemoryClient(ring.GetCodec(), log.NewNopLogger(), nil) + t.Cleanup(func() { assert.NoError(t, closer.Close()) }) + + cfg := prepareConfigForPartitioning() + cfg.ShardingEnabled = true + cfg.ShardingRing.InstanceID = "compactor-1" + cfg.ShardingRing.InstanceAddr = "1.2.3.4" + cfg.ShardingRing.KVStore.Mock = ringStore + + // Set ObservePeriod to longer than the timeout period to mock a timeout while waiting on ring to become ACTIVE + cfg.ShardingRing.ObservePeriod = time.Second * 10 + + c, _, _, logs, _ := prepareForPartitioning(t, cfg, bucketClient, nil, nil) + + // Try to start the compactor with a bad consul kv-store. The + err := services.StartAndAwaitRunning(context.Background(), c) + + // Assert that the compactor timed out + assert.Equal(t, context.DeadlineExceeded, err) + + assert.ElementsMatch(t, []string{ + `level=info component=compactor msg="compactor started"`, + `level=info component=compactor msg="waiting until compactor is ACTIVE in the ring"`, + `level=error component=compactor msg="compactor failed to become ACTIVE in the ring" err="context deadline exceeded"`, + }, removeIgnoredLogs(strings.Split(strings.TrimSpace(logs.String()), "\n"))) +} + +func TestPartitionCompactor_ShouldNotHangIfPlannerReturnNothing(t *testing.T) { + t.Parallel() + + ss := bucketindex.Status{Status: bucketindex.CustomerManagedKeyError, Version: bucketindex.SyncStatusFileVersion} + content, err := json.Marshal(ss) + require.NoError(t, err) + + partitionedGroupID := getPartitionedGroupID("user-1") + bucketClient := &bucket.ClientMock{} + bucketClient.MockIter("__markers__", []string{}, nil) + bucketClient.MockIter("", []string{"user-1"}, nil) + bucketClient.MockIter("user-1/", []string{"user-1/01DTVP434PA9VFXSW2JKB3392D", "user-1/01DTW0ZCPDDNV4BV83Q2SV4QAZ"}, nil) + bucketClient.MockIter("user-1/markers/", nil, nil) + bucketClient.MockGet("user-1/markers/cleaner-visit-marker.json", "", nil) + bucketClient.MockUpload("user-1/markers/cleaner-visit-marker.json", nil) + bucketClient.MockDelete("user-1/markers/cleaner-visit-marker.json", nil) + bucketClient.MockExists(cortex_tsdb.GetGlobalDeletionMarkPath("user-1"), false, nil) + bucketClient.MockExists(cortex_tsdb.GetLocalDeletionMarkPath("user-1"), false, nil) + bucketClient.MockGet("user-1/01DTVP434PA9VFXSW2JKB3392D/meta.json", mockBlockMetaJSON("01DTVP434PA9VFXSW2JKB3392D"), nil) + bucketClient.MockGet("user-1/01DTVP434PA9VFXSW2JKB3392D/deletion-mark.json", "", nil) + bucketClient.MockGet("user-1/01DTVP434PA9VFXSW2JKB3392D/no-compact-mark.json", "", nil) + bucketClient.MockGet("user-1/01DTVP434PA9VFXSW2JKB3392D/partition-0-visit-mark.json", "", nil) + bucketClient.MockGet("user-1/01DTW0ZCPDDNV4BV83Q2SV4QAZ/meta.json", mockBlockMetaJSON("01DTW0ZCPDDNV4BV83Q2SV4QAZ"), nil) + bucketClient.MockGet("user-1/01DTW0ZCPDDNV4BV83Q2SV4QAZ/deletion-mark.json", "", nil) + bucketClient.MockGet("user-1/01DTW0ZCPDDNV4BV83Q2SV4QAZ/no-compact-mark.json", "", nil) + bucketClient.MockGet("user-1/01DTW0ZCPDDNV4BV83Q2SV4QAZ/partition-0-visit-mark.json", "", nil) + bucketClient.MockGet("user-1/bucket-index.json.gz", "", nil) + bucketClient.MockGet("user-1/bucket-index-sync-status.json", string(content), nil) + bucketClient.MockUpload("user-1/bucket-index.json.gz", nil) + bucketClient.MockUpload("user-1/bucket-index-sync-status.json", nil) + bucketClient.MockIter("user-1/"+PartitionedGroupDirectory, nil, nil) + bucketClient.MockGet("user-1/partitioned-groups/visit-marks/"+string(partitionedGroupID)+"/partition-0-visit-mark.json", "", nil) + + ringStore, closer := consul.NewInMemoryClient(ring.GetCodec(), log.NewNopLogger(), nil) + t.Cleanup(func() { assert.NoError(t, closer.Close()) }) + + cfg := prepareConfigForPartitioning() + cfg.ShardingEnabled = true + cfg.ShardingRing.InstanceID = "compactor-1" + cfg.ShardingRing.InstanceAddr = "1.2.3.4" + cfg.ShardingRing.KVStore.Mock = ringStore + + tsdbGrouper := tsdbGrouperMock{} + mockGroups := []*compact.Group{mockBlockGroup("user-1", []string{"01DTVP434PA9VFXSW2JKB3392D", "01DTW0ZCPDDNV4BV83Q2SV4QAZ"}, bucketClient)} + tsdbGrouper.On("Groups", mock.Anything).Return(mockGroups, nil) + + c, _, tsdbPlanner, _, _ := prepareForPartitioning(t, cfg, bucketClient, nil, &tsdbGrouper) + tsdbPlanner.On("Plan", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return([]*metadata.Meta{}, nil) + + require.NoError(t, services.StartAndAwaitRunning(context.Background(), c)) + + // Wait until a run has completed. + cortex_testutil.Poll(t, 20*time.Second, 1.0, func() interface{} { + return prom_testutil.ToFloat64(c.CompactionRunsCompleted) + }) + + require.NoError(t, services.StopAndAwaitTerminated(context.Background(), c)) +} + +func TestPartitionCompactor_ShouldNotFailCompactionIfAccessDeniedErrDuringMetaSync(t *testing.T) { + t.Parallel() + + ss := bucketindex.Status{Status: bucketindex.Ok, Version: bucketindex.SyncStatusFileVersion} + content, err := json.Marshal(ss) + require.NoError(t, err) + + partitionedGroupID := getPartitionedGroupID("user-1") + bucketClient := &bucket.ClientMock{} + bucketClient.MockIter("__markers__", []string{}, nil) + bucketClient.MockIter("", []string{"user-1"}, nil) + bucketClient.MockIter("user-1/", []string{"user-1/01DTVP434PA9VFXSW2JKB3392D", "user-1/01DTW0ZCPDDNV4BV83Q2SV4QAZ", "user-1/01DTVP434PA9VFXSW2JKB3392D/meta.json", "user-1/01DTW0ZCPDDNV4BV83Q2SV4QAZ/meta.json"}, nil) + bucketClient.MockIter("user-1/markers/", nil, nil) + bucketClient.MockGet("user-1/markers/cleaner-visit-marker.json", "", nil) + bucketClient.MockUpload("user-1/markers/cleaner-visit-marker.json", nil) + bucketClient.MockDelete("user-1/markers/cleaner-visit-marker.json", nil) + bucketClient.MockExists(cortex_tsdb.GetGlobalDeletionMarkPath("user-1"), false, nil) + bucketClient.MockExists(cortex_tsdb.GetLocalDeletionMarkPath("user-1"), false, nil) + bucketClient.MockGet("user-1/01DTVP434PA9VFXSW2JKB3392D/meta.json", mockBlockMetaJSON("01DTVP434PA9VFXSW2JKB3392D"), bucket.ErrKeyPermissionDenied) + bucketClient.MockGet("user-1/01DTVP434PA9VFXSW2JKB3392D/deletion-mark.json", "", bucket.ErrKeyPermissionDenied) + bucketClient.MockGet("user-1/01DTVP434PA9VFXSW2JKB3392D/no-compact-mark.json", "", bucket.ErrKeyPermissionDenied) + bucketClient.MockGet("user-1/01DTW0ZCPDDNV4BV83Q2SV4QAZ/meta.json", mockBlockMetaJSON("01DTW0ZCPDDNV4BV83Q2SV4QAZ"), bucket.ErrKeyPermissionDenied) + bucketClient.MockGet("user-1/01DTW0ZCPDDNV4BV83Q2SV4QAZ/deletion-mark.json", "", bucket.ErrKeyPermissionDenied) + bucketClient.MockGet("user-1/01DTW0ZCPDDNV4BV83Q2SV4QAZ/no-compact-mark.json", "", bucket.ErrKeyPermissionDenied) + bucketClient.MockGet("user-1/bucket-index.json.gz", "", nil) + bucketClient.MockGet("user-1/bucket-index-sync-status.json", string(content), nil) + bucketClient.MockUpload("user-1/bucket-index.json.gz", nil) + bucketClient.MockUpload("user-1/bucket-index-sync-status.json", nil) + bucketClient.MockIter("user-1/"+PartitionedGroupDirectory, nil, nil) + bucketClient.MockGet("user-1/partitioned-groups/visit-marks/"+string(partitionedGroupID)+"/partition-0-visit-mark.json", "", nil) + + ringStore, closer := consul.NewInMemoryClient(ring.GetCodec(), log.NewNopLogger(), nil) + t.Cleanup(func() { assert.NoError(t, closer.Close()) }) + + cfg := prepareConfigForPartitioning() + cfg.ShardingEnabled = true + cfg.ShardingRing.InstanceID = "compactor-1" + cfg.ShardingRing.InstanceAddr = "1.2.3.4" + cfg.ShardingRing.KVStore.Mock = ringStore + + c, _, tsdbPlanner, _, _ := prepareForPartitioning(t, cfg, bucketClient, nil, nil) + tsdbPlanner.On("Plan", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return([]*metadata.Meta{}, nil) + + require.NoError(t, services.StartAndAwaitRunning(context.Background(), c)) + + // Wait until a run has completed. + cortex_testutil.Poll(t, 20*time.Second, 1.0, func() interface{} { + return prom_testutil.ToFloat64(c.CompactionRunsCompleted) + }) + + require.NoError(t, services.StopAndAwaitTerminated(context.Background(), c)) +} + +func TestPartitionCompactor_ShouldNotFailCompactionIfAccessDeniedErrReturnedFromBucket(t *testing.T) { + t.Parallel() + + ss := bucketindex.Status{Status: bucketindex.Ok, Version: bucketindex.SyncStatusFileVersion} + content, err := json.Marshal(ss) + require.NoError(t, err) + + partitionedGroupID := getPartitionedGroupID("user-1") + bucketClient := &bucket.ClientMock{} + bucketClient.MockIter("__markers__", []string{}, nil) + bucketClient.MockIter("", []string{"user-1"}, nil) + bucketClient.MockIter("user-1/", []string{"user-1/01DTVP434PA9VFXSW2JKB3392D", "user-1/01DTW0ZCPDDNV4BV83Q2SV4QAZ", "user-1/01DTVP434PA9VFXSW2JKB3392D/meta.json", "user-1/01DTW0ZCPDDNV4BV83Q2SV4QAZ/meta.json"}, nil) + bucketClient.MockIter("user-1/markers/", nil, nil) + bucketClient.MockGet("user-1/markers/cleaner-visit-marker.json", "", nil) + bucketClient.MockUpload("user-1/markers/cleaner-visit-marker.json", nil) + bucketClient.MockDelete("user-1/markers/cleaner-visit-marker.json", nil) + bucketClient.MockExists(cortex_tsdb.GetGlobalDeletionMarkPath("user-1"), false, nil) + bucketClient.MockExists(cortex_tsdb.GetLocalDeletionMarkPath("user-1"), false, nil) + bucketClient.MockGet("user-1/01DTVP434PA9VFXSW2JKB3392D/meta.json", mockBlockMetaJSON("01DTVP434PA9VFXSW2JKB3392D"), nil) + bucketClient.MockGet("user-1/01DTVP434PA9VFXSW2JKB3392D/deletion-mark.json", "", nil) + bucketClient.MockGet("user-1/01DTVP434PA9VFXSW2JKB3392D/no-compact-mark.json", "", nil) + bucketClient.MockGet("user-1/01DTW0ZCPDDNV4BV83Q2SV4QAZ/meta.json", mockBlockMetaJSON("01DTW0ZCPDDNV4BV83Q2SV4QAZ"), nil) + bucketClient.MockGet("user-1/01DTW0ZCPDDNV4BV83Q2SV4QAZ/deletion-mark.json", "", nil) + bucketClient.MockGet("user-1/01DTW0ZCPDDNV4BV83Q2SV4QAZ/no-compact-mark.json", "", nil) + bucketClient.MockGet("user-1/bucket-index.json.gz", "", nil) + bucketClient.MockGet("user-1/bucket-index-sync-status.json", string(content), nil) + bucketClient.MockUpload("user-1/bucket-index.json.gz", nil) + bucketClient.MockUpload("user-1/bucket-index-sync-status.json", nil) + bucketClient.MockIter("user-1/"+PartitionedGroupDirectory, nil, nil) + bucketClient.MockGet("user-1/partitioned-groups/visit-marks/"+string(partitionedGroupID)+"/partition-0-visit-mark.json", "", nil) + + ringStore, closer := consul.NewInMemoryClient(ring.GetCodec(), log.NewNopLogger(), nil) + t.Cleanup(func() { assert.NoError(t, closer.Close()) }) + + cfg := prepareConfigForPartitioning() + cfg.ShardingEnabled = true + cfg.ShardingRing.InstanceID = "compactor-1" + cfg.ShardingRing.InstanceAddr = "1.2.3.4" + cfg.ShardingRing.KVStore.Mock = ringStore + + c, _, tsdbPlanner, _, _ := prepareForPartitioning(t, cfg, bucketClient, nil, nil) + tsdbPlanner.On("Plan", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return([]*metadata.Meta{}, bucket.ErrKeyPermissionDenied) + + require.NoError(t, services.StartAndAwaitRunning(context.Background(), c)) + + // Wait until a run has completed. + cortex_testutil.Poll(t, 20*time.Second, 1.0, func() interface{} { + return prom_testutil.ToFloat64(c.CompactionRunsCompleted) + }) + + require.NoError(t, services.StopAndAwaitTerminated(context.Background(), c)) +} diff --git a/pkg/compactor/compactor_test.go b/pkg/compactor/compactor_test.go index a886a86249..fbbd344245 100644 --- a/pkg/compactor/compactor_test.go +++ b/pkg/compactor/compactor_test.go @@ -173,6 +173,9 @@ func TestCompactor_SkipCompactionWhenCmkError(t *testing.T) { bucketClient.MockIter("__markers__", []string{}, nil) bucketClient.MockIter(userID+"/", []string{}, nil) bucketClient.MockIter(userID+"/markers/", nil, nil) + bucketClient.MockGet(userID+"/markers/cleaner-visit-marker.json", "", nil) + bucketClient.MockUpload(userID+"/markers/cleaner-visit-marker.json", nil) + bucketClient.MockDelete(userID+"/markers/cleaner-visit-marker.json", nil) bucketClient.MockGet(userID+"/bucket-index-sync-status.json", string(content), nil) bucketClient.MockGet(userID+"/bucket-index.json.gz", "", nil) bucketClient.MockUpload(userID+"/bucket-index-sync-status.json", nil) @@ -185,8 +188,8 @@ func TestCompactor_SkipCompactionWhenCmkError(t *testing.T) { require.NoError(t, services.StartAndAwaitRunning(context.Background(), c)) // Wait until a run has completed. - cortex_testutil.Poll(t, time.Second, 1.0, func() interface{} { - return prom_testutil.ToFloat64(c.compactionRunsCompleted) + cortex_testutil.Poll(t, 10*time.Second, 1.0, func() interface{} { + return prom_testutil.ToFloat64(c.CompactionRunsCompleted) }) require.NoError(t, services.StopAndAwaitTerminated(context.Background(), c)) @@ -205,17 +208,15 @@ func TestCompactor_ShouldDoNothingOnNoUserBlocks(t *testing.T) { require.NoError(t, services.StartAndAwaitRunning(context.Background(), c)) // Wait until a run has completed. - cortex_testutil.Poll(t, time.Second, 1.0, func() interface{} { - return prom_testutil.ToFloat64(c.compactionRunsCompleted) + cortex_testutil.Poll(t, 10*time.Second, 1.0, func() interface{} { + return prom_testutil.ToFloat64(c.CompactionRunsCompleted) }) require.NoError(t, services.StopAndAwaitTerminated(context.Background(), c)) - assert.Equal(t, prom_testutil.ToFloat64(c.compactionRunInterval), cfg.CompactionInterval.Seconds()) + assert.Equal(t, prom_testutil.ToFloat64(c.CompactionRunInterval), cfg.CompactionInterval.Seconds()) assert.ElementsMatch(t, []string{ - `level=info component=cleaner msg="started blocks cleanup and maintenance"`, - `level=info component=cleaner msg="successfully completed blocks cleanup and maintenance"`, `level=info component=compactor msg="compactor started"`, `level=info component=compactor msg="discovering users from bucket"`, `level=info component=compactor msg="discovered users from bucket" users=0`, @@ -234,62 +235,14 @@ func TestCompactor_ShouldDoNothingOnNoUserBlocks(t *testing.T) { # HELP cortex_compactor_runs_failed_total Total number of compaction runs failed. cortex_compactor_runs_failed_total 0 - # HELP cortex_compactor_garbage_collected_blocks_total Total number of blocks marked for deletion by compactor. - # TYPE cortex_compactor_garbage_collected_blocks_total counter - cortex_compactor_garbage_collected_blocks_total 0 - - # HELP cortex_compactor_garbage_collection_duration_seconds Time it took to perform garbage collection iteration. - # TYPE cortex_compactor_garbage_collection_duration_seconds histogram - cortex_compactor_garbage_collection_duration_seconds_bucket{le="+Inf"} 0 - cortex_compactor_garbage_collection_duration_seconds_sum 0 - cortex_compactor_garbage_collection_duration_seconds_count 0 - - # HELP cortex_compactor_garbage_collection_failures_total Total number of failed garbage collection operations. - # TYPE cortex_compactor_garbage_collection_failures_total counter - cortex_compactor_garbage_collection_failures_total 0 - - # HELP cortex_compactor_garbage_collection_total Total number of garbage collection operations. - # TYPE cortex_compactor_garbage_collection_total counter - cortex_compactor_garbage_collection_total 0 + # HELP cortex_compactor_blocks_marked_for_no_compaction_total Total number of blocks marked for no compact during a compaction run. + # TYPE cortex_compactor_blocks_marked_for_no_compaction_total counter + cortex_compactor_blocks_marked_for_no_compaction_total 0 # HELP cortex_compactor_meta_sync_consistency_delay_seconds Configured consistency delay in seconds. # TYPE cortex_compactor_meta_sync_consistency_delay_seconds gauge cortex_compactor_meta_sync_consistency_delay_seconds 0 - # HELP cortex_compactor_meta_sync_duration_seconds Duration of the blocks metadata synchronization in seconds. - # TYPE cortex_compactor_meta_sync_duration_seconds histogram - cortex_compactor_meta_sync_duration_seconds_bucket{le="+Inf"} 0 - cortex_compactor_meta_sync_duration_seconds_sum 0 - cortex_compactor_meta_sync_duration_seconds_count 0 - - # HELP cortex_compactor_meta_sync_failures_total Total blocks metadata synchronization failures. - # TYPE cortex_compactor_meta_sync_failures_total counter - cortex_compactor_meta_sync_failures_total 0 - - # HELP cortex_compactor_meta_syncs_total Total blocks metadata synchronization attempts. - # TYPE cortex_compactor_meta_syncs_total counter - cortex_compactor_meta_syncs_total 0 - - # HELP cortex_compactor_group_compaction_runs_completed_total Total number of group completed compaction runs. This also includes compactor group runs that resulted with no compaction. - # TYPE cortex_compactor_group_compaction_runs_completed_total counter - cortex_compactor_group_compaction_runs_completed_total 0 - - # HELP cortex_compactor_group_compaction_runs_started_total Total number of group compaction attempts. - # TYPE cortex_compactor_group_compaction_runs_started_total counter - cortex_compactor_group_compaction_runs_started_total 0 - - # HELP cortex_compactor_group_compactions_failures_total Total number of failed group compactions. - # TYPE cortex_compactor_group_compactions_failures_total counter - cortex_compactor_group_compactions_failures_total 0 - - # HELP cortex_compactor_group_compactions_total Total number of group compaction attempts that resulted in a new block. - # TYPE cortex_compactor_group_compactions_total counter - cortex_compactor_group_compactions_total 0 - - # HELP cortex_compactor_group_vertical_compactions_total Total number of group compaction attempts that resulted in a new block based on overlapping blocks. - # TYPE cortex_compactor_group_vertical_compactions_total counter - cortex_compactor_group_vertical_compactions_total 0 - # TYPE cortex_compactor_block_cleanup_failures_total counter # HELP cortex_compactor_block_cleanup_failures_total Total number of blocks failed to be deleted. cortex_compactor_block_cleanup_failures_total 0 @@ -298,26 +251,15 @@ func TestCompactor_ShouldDoNothingOnNoUserBlocks(t *testing.T) { # TYPE cortex_compactor_blocks_cleaned_total counter cortex_compactor_blocks_cleaned_total 0 - # HELP cortex_compactor_blocks_marked_for_deletion_total Total number of blocks marked for deletion in compactor. - # TYPE cortex_compactor_blocks_marked_for_deletion_total counter - cortex_compactor_blocks_marked_for_deletion_total{reason="compaction"} 0 - cortex_compactor_blocks_marked_for_deletion_total{reason="retention"} 0 - - # HELP cortex_compactor_blocks_marked_for_no_compaction_total Total number of blocks marked for no compact during a compaction run. - # TYPE cortex_compactor_blocks_marked_for_no_compaction_total counter - cortex_compactor_blocks_marked_for_no_compaction_total 0 - # TYPE cortex_compactor_block_cleanup_started_total counter # HELP cortex_compactor_block_cleanup_started_total Total number of blocks cleanup runs started. - cortex_compactor_block_cleanup_started_total 1 + cortex_compactor_block_cleanup_started_total{tenant_status="active"} 1 + cortex_compactor_block_cleanup_started_total{tenant_status="deleted"} 1 # TYPE cortex_compactor_block_cleanup_completed_total counter # HELP cortex_compactor_block_cleanup_completed_total Total number of blocks cleanup runs successfully completed. - cortex_compactor_block_cleanup_completed_total 1 - - # TYPE cortex_compactor_block_cleanup_failed_total counter - # HELP cortex_compactor_block_cleanup_failed_total Total number of blocks cleanup runs failed. - cortex_compactor_block_cleanup_failed_total 0 + cortex_compactor_block_cleanup_completed_total{tenant_status="active"} 1 + cortex_compactor_block_cleanup_completed_total{tenant_status="deleted"} 1 `), "cortex_compactor_runs_started_total", "cortex_compactor_runs_completed_total", @@ -357,8 +299,8 @@ func TestCompactor_ShouldRetryCompactionOnFailureWhileDiscoveringUsersFromBucket require.NoError(t, services.StartAndAwaitRunning(context.Background(), c)) // Wait until all retry attempts have completed. - cortex_testutil.Poll(t, time.Second, 1.0, func() interface{} { - return prom_testutil.ToFloat64(c.compactionRunsFailed) + cortex_testutil.Poll(t, 10*time.Second, 1.0, func() interface{} { + return prom_testutil.ToFloat64(c.CompactionRunsFailed) }) require.NoError(t, services.StopAndAwaitTerminated(context.Background(), c)) @@ -367,9 +309,8 @@ func TestCompactor_ShouldRetryCompactionOnFailureWhileDiscoveringUsersFromBucket bucketClient.AssertNumberOfCalls(t, "Iter", 1+3) assert.ElementsMatch(t, []string{ - `level=info component=cleaner msg="started blocks cleanup and maintenance"`, - `level=error component=cleaner msg="failed to run blocks cleanup and maintenance" err="failed to discover users from bucket: failed to iterate the bucket"`, `level=info component=compactor msg="compactor started"`, + `level=error component=cleaner msg="failed to scan users on startup" err="failed to discover users from bucket: failed to iterate the bucket"`, `level=info component=compactor msg="discovering users from bucket"`, `level=error component=compactor msg="failed to discover users from bucket" err="failed to iterate the bucket"`, }, removeIgnoredLogs(strings.Split(strings.TrimSpace(logs.String()), "\n"))) @@ -387,62 +328,10 @@ func TestCompactor_ShouldRetryCompactionOnFailureWhileDiscoveringUsersFromBucket # HELP cortex_compactor_runs_failed_total Total number of compaction runs failed. cortex_compactor_runs_failed_total 1 - # HELP cortex_compactor_garbage_collected_blocks_total Total number of blocks marked for deletion by compactor. - # TYPE cortex_compactor_garbage_collected_blocks_total counter - cortex_compactor_garbage_collected_blocks_total 0 - - # HELP cortex_compactor_garbage_collection_duration_seconds Time it took to perform garbage collection iteration. - # TYPE cortex_compactor_garbage_collection_duration_seconds histogram - cortex_compactor_garbage_collection_duration_seconds_bucket{le="+Inf"} 0 - cortex_compactor_garbage_collection_duration_seconds_sum 0 - cortex_compactor_garbage_collection_duration_seconds_count 0 - - # HELP cortex_compactor_garbage_collection_failures_total Total number of failed garbage collection operations. - # TYPE cortex_compactor_garbage_collection_failures_total counter - cortex_compactor_garbage_collection_failures_total 0 - - # HELP cortex_compactor_garbage_collection_total Total number of garbage collection operations. - # TYPE cortex_compactor_garbage_collection_total counter - cortex_compactor_garbage_collection_total 0 - # HELP cortex_compactor_meta_sync_consistency_delay_seconds Configured consistency delay in seconds. # TYPE cortex_compactor_meta_sync_consistency_delay_seconds gauge cortex_compactor_meta_sync_consistency_delay_seconds 0 - # HELP cortex_compactor_meta_sync_duration_seconds Duration of the blocks metadata synchronization in seconds. - # TYPE cortex_compactor_meta_sync_duration_seconds histogram - cortex_compactor_meta_sync_duration_seconds_bucket{le="+Inf"} 0 - cortex_compactor_meta_sync_duration_seconds_sum 0 - cortex_compactor_meta_sync_duration_seconds_count 0 - - # HELP cortex_compactor_meta_sync_failures_total Total blocks metadata synchronization failures. - # TYPE cortex_compactor_meta_sync_failures_total counter - cortex_compactor_meta_sync_failures_total 0 - - # HELP cortex_compactor_meta_syncs_total Total blocks metadata synchronization attempts. - # TYPE cortex_compactor_meta_syncs_total counter - cortex_compactor_meta_syncs_total 0 - - # HELP cortex_compactor_group_compaction_runs_completed_total Total number of group completed compaction runs. This also includes compactor group runs that resulted with no compaction. - # TYPE cortex_compactor_group_compaction_runs_completed_total counter - cortex_compactor_group_compaction_runs_completed_total 0 - - # HELP cortex_compactor_group_compaction_runs_started_total Total number of group compaction attempts. - # TYPE cortex_compactor_group_compaction_runs_started_total counter - cortex_compactor_group_compaction_runs_started_total 0 - - # HELP cortex_compactor_group_compactions_failures_total Total number of failed group compactions. - # TYPE cortex_compactor_group_compactions_failures_total counter - cortex_compactor_group_compactions_failures_total 0 - - # HELP cortex_compactor_group_compactions_total Total number of group compaction attempts that resulted in a new block. - # TYPE cortex_compactor_group_compactions_total counter - cortex_compactor_group_compactions_total 0 - - # HELP cortex_compactor_group_vertical_compactions_total Total number of group compaction attempts that resulted in a new block based on overlapping blocks. - # TYPE cortex_compactor_group_vertical_compactions_total counter - cortex_compactor_group_vertical_compactions_total 0 - # TYPE cortex_compactor_block_cleanup_failures_total counter # HELP cortex_compactor_block_cleanup_failures_total Total number of blocks failed to be deleted. cortex_compactor_block_cleanup_failures_total 0 @@ -451,26 +340,14 @@ func TestCompactor_ShouldRetryCompactionOnFailureWhileDiscoveringUsersFromBucket # TYPE cortex_compactor_blocks_cleaned_total counter cortex_compactor_blocks_cleaned_total 0 - # HELP cortex_compactor_blocks_marked_for_deletion_total Total number of blocks marked for deletion in compactor. - # TYPE cortex_compactor_blocks_marked_for_deletion_total counter - cortex_compactor_blocks_marked_for_deletion_total{reason="compaction"} 0 - cortex_compactor_blocks_marked_for_deletion_total{reason="retention"} 0 - - # TYPE cortex_compactor_block_cleanup_started_total counter - # HELP cortex_compactor_block_cleanup_started_total Total number of blocks cleanup runs started. - cortex_compactor_block_cleanup_started_total 1 - # HELP cortex_compactor_blocks_marked_for_no_compaction_total Total number of blocks marked for no compact during a compaction run. # TYPE cortex_compactor_blocks_marked_for_no_compaction_total counter cortex_compactor_blocks_marked_for_no_compaction_total 0 - # TYPE cortex_compactor_block_cleanup_completed_total counter - # HELP cortex_compactor_block_cleanup_completed_total Total number of blocks cleanup runs successfully completed. - cortex_compactor_block_cleanup_completed_total 0 - - # TYPE cortex_compactor_block_cleanup_failed_total counter # HELP cortex_compactor_block_cleanup_failed_total Total number of blocks cleanup runs failed. - cortex_compactor_block_cleanup_failed_total 1 + # TYPE cortex_compactor_block_cleanup_failed_total counter + cortex_compactor_block_cleanup_failed_total{tenant_status="active"} 1 + cortex_compactor_block_cleanup_failed_total{tenant_status="deleted"} 1 `), "cortex_compactor_runs_started_total", "cortex_compactor_runs_completed_total", @@ -507,6 +384,9 @@ func TestCompactor_ShouldIncrementCompactionErrorIfFailedToCompactASingleTenant( bucketClient.MockIter("__markers__", []string{}, nil) bucketClient.MockIter(userID+"/", []string{userID + "/01DTVP434PA9VFXSW2JKB3392D/meta.json", userID + "/01FN6CDF3PNEWWRY5MPGJPE3EX/meta.json"}, nil) bucketClient.MockIter(userID+"/markers/", nil, nil) + bucketClient.MockGet(userID+"/markers/cleaner-visit-marker.json", "", nil) + bucketClient.MockUpload(userID+"/markers/cleaner-visit-marker.json", nil) + bucketClient.MockDelete(userID+"/markers/cleaner-visit-marker.json", nil) bucketClient.MockExists(cortex_tsdb.GetGlobalDeletionMarkPath(userID), false, nil) bucketClient.MockExists(cortex_tsdb.GetLocalDeletionMarkPath(userID), false, nil) bucketClient.MockGet(userID+"/01DTVP434PA9VFXSW2JKB3392D/meta.json", mockBlockMetaJSON("01DTVP434PA9VFXSW2JKB3392D"), nil) @@ -529,8 +409,8 @@ func TestCompactor_ShouldIncrementCompactionErrorIfFailedToCompactASingleTenant( require.NoError(t, services.StartAndAwaitRunning(context.Background(), c)) // Wait until all retry attempts have completed. - cortex_testutil.Poll(t, time.Second, 1.0, func() interface{} { - return prom_testutil.ToFloat64(c.compactionRunsFailed) + cortex_testutil.Poll(t, 10*time.Second, 1.0, func() interface{} { + return prom_testutil.ToFloat64(c.CompactionRunsFailed) }) require.NoError(t, services.StopAndAwaitTerminated(context.Background(), c)) @@ -562,6 +442,9 @@ func TestCompactor_ShouldCompactAndRemoveUserFolder(t *testing.T) { bucketClient.MockExists(cortex_tsdb.GetLocalDeletionMarkPath("user-1"), false, nil) bucketClient.MockIter("user-1/", []string{"user-1/01DTVP434PA9VFXSW2JKB3392D/meta.json", "user-1/01FN6CDF3PNEWWRY5MPGJPE3EX/meta.json"}, nil) bucketClient.MockIter("user-1/markers/", nil, nil) + bucketClient.MockGet("user-1/markers/cleaner-visit-marker.json", "", nil) + bucketClient.MockUpload("user-1/markers/cleaner-visit-marker.json", nil) + bucketClient.MockDelete("user-1/markers/cleaner-visit-marker.json", nil) bucketClient.MockGet("user-1/01DTVP434PA9VFXSW2JKB3392D/meta.json", mockBlockMetaJSON("01DTVP434PA9VFXSW2JKB3392D"), nil) bucketClient.MockGet("user-1/01DTVP434PA9VFXSW2JKB3392D/deletion-mark.json", "", nil) bucketClient.MockGet("user-1/01DTVP434PA9VFXSW2JKB3392D/no-compact-mark.json", "", nil) @@ -573,7 +456,6 @@ func TestCompactor_ShouldCompactAndRemoveUserFolder(t *testing.T) { bucketClient.MockGet("user-1/01FN6CDF3PNEWWRY5MPGJPE3EX/visit-mark.json", "", nil) bucketClient.MockGet("user-1/bucket-index.json.gz", "", nil) bucketClient.MockGet("user-1/bucket-index-sync-status.json", "", nil) - bucketClient.MockIter("user-1/markers/", nil, nil) bucketClient.MockUpload("user-1/bucket-index.json.gz", nil) bucketClient.MockUpload("user-1/bucket-index-sync-status.json", nil) @@ -589,8 +471,8 @@ func TestCompactor_ShouldCompactAndRemoveUserFolder(t *testing.T) { require.NoError(t, services.StartAndAwaitRunning(context.Background(), c)) // Wait until a run has completed. - cortex_testutil.Poll(t, time.Second, 1.0, func() interface{} { - return prom_testutil.ToFloat64(c.compactionRunsCompleted) + cortex_testutil.Poll(t, 20*time.Second, 1.0, func() interface{} { + return prom_testutil.ToFloat64(c.CompactionRunsCompleted) }) _, err := os.Stat(c.compactDirForUser("user-1")) @@ -611,7 +493,13 @@ func TestCompactor_ShouldIterateOverUsersAndRunCompaction(t *testing.T) { bucketClient.MockIter("user-1/", []string{"user-1/01DTVP434PA9VFXSW2JKB3392D/meta.json", "user-1/01FN6CDF3PNEWWRY5MPGJPE3EX/meta.json"}, nil) bucketClient.MockIter("user-2/", []string{"user-2/01DTW0ZCPDDNV4BV83Q2SV4QAZ/meta.json", "user-2/01FN3V83ABR9992RF8WRJZ76ZQ/meta.json"}, nil) bucketClient.MockIter("user-1/markers/", nil, nil) + bucketClient.MockGet("user-1/markers/cleaner-visit-marker.json", "", nil) + bucketClient.MockUpload("user-1/markers/cleaner-visit-marker.json", nil) + bucketClient.MockDelete("user-1/markers/cleaner-visit-marker.json", nil) bucketClient.MockIter("user-2/markers/", nil, nil) + bucketClient.MockGet("user-2/markers/cleaner-visit-marker.json", "", nil) + bucketClient.MockUpload("user-2/markers/cleaner-visit-marker.json", nil) + bucketClient.MockDelete("user-2/markers/cleaner-visit-marker.json", nil) bucketClient.MockGet("user-1/01DTVP434PA9VFXSW2JKB3392D/meta.json", mockBlockMetaJSON("01DTVP434PA9VFXSW2JKB3392D"), nil) bucketClient.MockGet("user-1/01DTVP434PA9VFXSW2JKB3392D/deletion-mark.json", "", nil) bucketClient.MockGet("user-1/01DTVP434PA9VFXSW2JKB3392D/no-compact-mark.json", "", nil) @@ -633,8 +521,6 @@ func TestCompactor_ShouldIterateOverUsersAndRunCompaction(t *testing.T) { bucketClient.MockGet("user-2/bucket-index.json.gz", "", nil) bucketClient.MockGet("user-1/bucket-index-sync-status.json", "", nil) bucketClient.MockGet("user-2/bucket-index-sync-status.json", "", nil) - bucketClient.MockIter("user-1/markers/", nil, nil) - bucketClient.MockIter("user-2/markers/", nil, nil) bucketClient.MockUpload("user-1/bucket-index.json.gz", nil) bucketClient.MockUpload("user-2/bucket-index.json.gz", nil) bucketClient.MockUpload("user-1/bucket-index-sync-status.json", nil) @@ -651,8 +537,8 @@ func TestCompactor_ShouldIterateOverUsersAndRunCompaction(t *testing.T) { require.NoError(t, services.StartAndAwaitRunning(context.Background(), c)) // Wait until a run has completed. - cortex_testutil.Poll(t, time.Second, 1.0, func() interface{} { - return prom_testutil.ToFloat64(c.compactionRunsCompleted) + cortex_testutil.Poll(t, 10*time.Second, 1.0, func() interface{} { + return prom_testutil.ToFloat64(c.CompactionRunsCompleted) }) require.NoError(t, services.StopAndAwaitTerminated(context.Background(), c)) @@ -663,12 +549,6 @@ func TestCompactor_ShouldIterateOverUsersAndRunCompaction(t *testing.T) { assert.Len(t, tsdbPlanner.getNoCompactBlocks(), 0) assert.ElementsMatch(t, []string{ - `level=info component=cleaner msg="started blocks cleanup and maintenance"`, - `level=info component=cleaner org_id=user-1 msg="started blocks cleanup and maintenance"`, - `level=info component=cleaner org_id=user-1 msg="completed blocks cleanup and maintenance"`, - `level=info component=cleaner org_id=user-2 msg="started blocks cleanup and maintenance"`, - `level=info component=cleaner org_id=user-2 msg="completed blocks cleanup and maintenance"`, - `level=info component=cleaner msg="successfully completed blocks cleanup and maintenance"`, `level=info component=compactor msg="compactor started"`, `level=info component=compactor msg="discovering users from bucket"`, `level=info component=compactor msg="discovered users from bucket" users=2`, @@ -717,20 +597,20 @@ func TestCompactor_ShouldIterateOverUsersAndRunCompaction(t *testing.T) { # HELP cortex_compactor_blocks_marked_for_deletion_total Total number of blocks marked for deletion in compactor. # TYPE cortex_compactor_blocks_marked_for_deletion_total counter - cortex_compactor_blocks_marked_for_deletion_total{reason="compaction"} 0 - cortex_compactor_blocks_marked_for_deletion_total{reason="retention"} 0 + cortex_compactor_blocks_marked_for_deletion_total{reason="compaction",user="user-1"} 0 + cortex_compactor_blocks_marked_for_deletion_total{reason="compaction",user="user-2"} 0 + cortex_compactor_blocks_marked_for_deletion_total{reason="retention",user="user-1"} 0 + cortex_compactor_blocks_marked_for_deletion_total{reason="retention",user="user-2"} 0 # TYPE cortex_compactor_block_cleanup_started_total counter # HELP cortex_compactor_block_cleanup_started_total Total number of blocks cleanup runs started. - cortex_compactor_block_cleanup_started_total 1 + cortex_compactor_block_cleanup_started_total{tenant_status="active"} 1 + cortex_compactor_block_cleanup_started_total{tenant_status="deleted"} 1 # TYPE cortex_compactor_block_cleanup_completed_total counter # HELP cortex_compactor_block_cleanup_completed_total Total number of blocks cleanup runs successfully completed. - cortex_compactor_block_cleanup_completed_total 1 - - # TYPE cortex_compactor_block_cleanup_failed_total counter - # HELP cortex_compactor_block_cleanup_failed_total Total number of blocks cleanup runs failed. - cortex_compactor_block_cleanup_failed_total 0 + cortex_compactor_block_cleanup_completed_total{tenant_status="active"} 1 + cortex_compactor_block_cleanup_completed_total{tenant_status="deleted"} 1 # HELP cortex_compactor_blocks_marked_for_no_compaction_total Total number of blocks marked for no compact during a compaction run. # TYPE cortex_compactor_blocks_marked_for_no_compaction_total counter @@ -772,6 +652,9 @@ func TestCompactor_ShouldNotCompactBlocksMarkedForDeletion(t *testing.T) { "user-1/markers/01DTVP434PA9VFXSW2JKB3392D-deletion-mark.json", "user-1/markers/01DTW0ZCPDDNV4BV83Q2SV4QAZ-deletion-mark.json", }, nil) + bucketClient.MockGet("user-1/markers/cleaner-visit-marker.json", "", nil) + bucketClient.MockUpload("user-1/markers/cleaner-visit-marker.json", nil) + bucketClient.MockDelete("user-1/markers/cleaner-visit-marker.json", nil) bucketClient.MockDelete("user-1/01DTW0ZCPDDNV4BV83Q2SV4QAZ/meta.json", nil) bucketClient.MockDelete("user-1/01DTW0ZCPDDNV4BV83Q2SV4QAZ/deletion-mark.json", nil) @@ -787,8 +670,8 @@ func TestCompactor_ShouldNotCompactBlocksMarkedForDeletion(t *testing.T) { require.NoError(t, services.StartAndAwaitRunning(context.Background(), c)) // Wait until a run has completed. - cortex_testutil.Poll(t, time.Second, 1.0, func() interface{} { - return prom_testutil.ToFloat64(c.compactionRunsCompleted) + cortex_testutil.Poll(t, 10*time.Second, 1.0, func() interface{} { + return prom_testutil.ToFloat64(c.CompactionRunsCompleted) }) require.NoError(t, services.StopAndAwaitTerminated(context.Background(), c)) @@ -797,13 +680,6 @@ func TestCompactor_ShouldNotCompactBlocksMarkedForDeletion(t *testing.T) { tsdbPlanner.AssertNumberOfCalls(t, "Plan", 0) assert.ElementsMatch(t, []string{ - `level=info component=cleaner msg="started blocks cleanup and maintenance"`, - `level=info component=cleaner org_id=user-1 msg="started blocks cleanup and maintenance"`, - `level=debug component=cleaner org_id=user-1 msg="deleted file" file=01DTW0ZCPDDNV4BV83Q2SV4QAZ/meta.json bucket=mock`, - `level=debug component=cleaner org_id=user-1 msg="deleted file" file=01DTW0ZCPDDNV4BV83Q2SV4QAZ/deletion-mark.json bucket=mock`, - `level=info component=cleaner org_id=user-1 msg="deleted block marked for deletion" block=01DTW0ZCPDDNV4BV83Q2SV4QAZ`, - `level=info component=cleaner org_id=user-1 msg="completed blocks cleanup and maintenance"`, - `level=info component=cleaner msg="successfully completed blocks cleanup and maintenance"`, `level=info component=compactor msg="compactor started"`, `level=info component=compactor msg="discovering users from bucket"`, `level=info component=compactor msg="discovered users from bucket" users=1`, @@ -846,20 +722,18 @@ func TestCompactor_ShouldNotCompactBlocksMarkedForDeletion(t *testing.T) { # HELP cortex_compactor_blocks_marked_for_deletion_total Total number of blocks marked for deletion in compactor. # TYPE cortex_compactor_blocks_marked_for_deletion_total counter - cortex_compactor_blocks_marked_for_deletion_total{reason="compaction"} 0 - cortex_compactor_blocks_marked_for_deletion_total{reason="retention"} 0 + cortex_compactor_blocks_marked_for_deletion_total{reason="compaction",user="user-1"} 0 + cortex_compactor_blocks_marked_for_deletion_total{reason="retention",user="user-1"} 0 # TYPE cortex_compactor_block_cleanup_started_total counter # HELP cortex_compactor_block_cleanup_started_total Total number of blocks cleanup runs started. - cortex_compactor_block_cleanup_started_total 1 + cortex_compactor_block_cleanup_started_total{tenant_status="active"} 1 + cortex_compactor_block_cleanup_started_total{tenant_status="deleted"} 1 # TYPE cortex_compactor_block_cleanup_completed_total counter # HELP cortex_compactor_block_cleanup_completed_total Total number of blocks cleanup runs successfully completed. - cortex_compactor_block_cleanup_completed_total 1 - - # TYPE cortex_compactor_block_cleanup_failed_total counter - # HELP cortex_compactor_block_cleanup_failed_total Total number of blocks cleanup runs failed. - cortex_compactor_block_cleanup_failed_total 0 + cortex_compactor_block_cleanup_completed_total{tenant_status="active"} 1 + cortex_compactor_block_cleanup_completed_total{tenant_status="deleted"} 1 # HELP cortex_compactor_blocks_marked_for_no_compaction_total Total number of blocks marked for no compact during a compaction run. # TYPE cortex_compactor_blocks_marked_for_no_compaction_total counter @@ -881,7 +755,13 @@ func TestCompactor_ShouldNotCompactBlocksMarkedForSkipCompact(t *testing.T) { bucketClient.MockIter("user-1/", []string{"user-1/01DTVP434PA9VFXSW2JKB3392D/meta.json", "user-1/01FN6CDF3PNEWWRY5MPGJPE3EX/meta.json"}, nil) bucketClient.MockIter("user-2/", []string{"user-2/01DTW0ZCPDDNV4BV83Q2SV4QAZ/meta.json", "user-2/01FN3V83ABR9992RF8WRJZ76ZQ/meta.json"}, nil) bucketClient.MockIter("user-1/markers/", nil, nil) + bucketClient.MockGet("user-1/markers/cleaner-visit-marker.json", "", nil) + bucketClient.MockUpload("user-1/markers/cleaner-visit-marker.json", nil) + bucketClient.MockDelete("user-1/markers/cleaner-visit-marker.json", nil) bucketClient.MockIter("user-2/markers/", nil, nil) + bucketClient.MockGet("user-2/markers/cleaner-visit-marker.json", "", nil) + bucketClient.MockUpload("user-2/markers/cleaner-visit-marker.json", nil) + bucketClient.MockDelete("user-2/markers/cleaner-visit-marker.json", nil) bucketClient.MockGet("user-1/01DTVP434PA9VFXSW2JKB3392D/meta.json", mockBlockMetaJSON("01DTVP434PA9VFXSW2JKB3392D"), nil) bucketClient.MockGet("user-1/01DTVP434PA9VFXSW2JKB3392D/deletion-mark.json", "", nil) bucketClient.MockGet("user-1/01DTVP434PA9VFXSW2JKB3392D/no-compact-mark.json", mockNoCompactBlockJSON("01DTVP434PA9VFXSW2JKB3392D"), nil) @@ -909,8 +789,6 @@ func TestCompactor_ShouldNotCompactBlocksMarkedForSkipCompact(t *testing.T) { bucketClient.MockGet("user-2/bucket-index.json.gz", "", nil) bucketClient.MockGet("user-1/bucket-index-sync-status.json", "", nil) bucketClient.MockGet("user-2/bucket-index-sync-status.json", "", nil) - bucketClient.MockIter("user-1/markers/", nil, nil) - bucketClient.MockIter("user-2/markers/", nil, nil) bucketClient.MockUpload("user-1/bucket-index.json.gz", nil) bucketClient.MockUpload("user-2/bucket-index.json.gz", nil) bucketClient.MockUpload("user-1/bucket-index-sync-status.json", nil) @@ -922,8 +800,8 @@ func TestCompactor_ShouldNotCompactBlocksMarkedForSkipCompact(t *testing.T) { require.NoError(t, services.StartAndAwaitRunning(context.Background(), c)) - cortex_testutil.Poll(t, time.Second, 1.0, func() interface{} { - return prom_testutil.ToFloat64(c.compactionRunsCompleted) + cortex_testutil.Poll(t, 10*time.Second, 1.0, func() interface{} { + return prom_testutil.ToFloat64(c.CompactionRunsCompleted) }) require.NoError(t, services.StopAndAwaitTerminated(context.Background(), c)) @@ -969,6 +847,9 @@ func TestCompactor_ShouldNotCompactBlocksForUsersMarkedForDeletion(t *testing.T) bucketClient.MockDelete("user-1/01DTVP434PA9VFXSW2JKB3392D/index", nil) bucketClient.MockDelete("user-1/bucket-index.json.gz", nil) bucketClient.MockDelete("user-1/bucket-index-sync-status.json", nil) + bucketClient.MockGet("user-1/markers/cleaner-visit-marker.json", "", nil) + bucketClient.MockUpload("user-1/markers/cleaner-visit-marker.json", nil) + bucketClient.MockDelete("user-1/markers/cleaner-visit-marker.json", nil) c, _, tsdbPlanner, logs, registry := prepare(t, cfg, bucketClient, nil) @@ -981,8 +862,8 @@ func TestCompactor_ShouldNotCompactBlocksForUsersMarkedForDeletion(t *testing.T) require.NoError(t, services.StartAndAwaitRunning(context.Background(), c)) // Wait until a run has completed. - cortex_testutil.Poll(t, time.Second, 1.0, func() interface{} { - return prom_testutil.ToFloat64(c.compactionRunsCompleted) + cortex_testutil.Poll(t, 10*time.Second, 1.0, func() interface{} { + return prom_testutil.ToFloat64(c.CompactionRunsCompleted) }) require.NoError(t, services.StopAndAwaitTerminated(context.Background(), c)) @@ -991,14 +872,6 @@ func TestCompactor_ShouldNotCompactBlocksForUsersMarkedForDeletion(t *testing.T) tsdbPlanner.AssertNumberOfCalls(t, "Plan", 0) assert.ElementsMatch(t, []string{ - `level=info component=cleaner msg="started blocks cleanup and maintenance"`, - `level=info component=cleaner org_id=user-1 msg="deleting blocks for tenant marked for deletion"`, - `level=debug component=cleaner org_id=user-1 msg="deleted file" file=01DTVP434PA9VFXSW2JKB3392D/meta.json bucket=mock`, - `level=debug component=cleaner org_id=user-1 msg="deleted file" file=01DTVP434PA9VFXSW2JKB3392D/index bucket=mock`, - `level=info component=cleaner org_id=user-1 msg="deleted block" block=01DTVP434PA9VFXSW2JKB3392D`, - `level=info component=cleaner org_id=user-1 msg="deleted blocks for tenant marked for deletion" deletedBlocks=1`, - `level=info component=cleaner org_id=user-1 msg="updating finished time in tenant deletion mark"`, - `level=info component=cleaner msg="successfully completed blocks cleanup and maintenance"`, `level=info component=compactor msg="compactor started"`, `level=info component=compactor msg="discovering users from bucket"`, `level=info component=compactor msg="discovered users from bucket" users=1`, @@ -1035,22 +908,15 @@ func TestCompactor_ShouldNotCompactBlocksForUsersMarkedForDeletion(t *testing.T) # TYPE cortex_compactor_blocks_cleaned_total counter cortex_compactor_blocks_cleaned_total 1 - # HELP cortex_compactor_blocks_marked_for_deletion_total Total number of blocks marked for deletion in compactor. - # TYPE cortex_compactor_blocks_marked_for_deletion_total counter - cortex_compactor_blocks_marked_for_deletion_total{reason="compaction"} 0 - cortex_compactor_blocks_marked_for_deletion_total{reason="retention"} 0 - # TYPE cortex_compactor_block_cleanup_started_total counter # HELP cortex_compactor_block_cleanup_started_total Total number of blocks cleanup runs started. - cortex_compactor_block_cleanup_started_total 1 + cortex_compactor_block_cleanup_started_total{tenant_status="active"} 1 + cortex_compactor_block_cleanup_started_total{tenant_status="deleted"} 1 # TYPE cortex_compactor_block_cleanup_completed_total counter # HELP cortex_compactor_block_cleanup_completed_total Total number of blocks cleanup runs successfully completed. - cortex_compactor_block_cleanup_completed_total 1 - - # TYPE cortex_compactor_block_cleanup_failed_total counter - # HELP cortex_compactor_block_cleanup_failed_total Total number of blocks cleanup runs failed. - cortex_compactor_block_cleanup_failed_total 0 + cortex_compactor_block_cleanup_completed_total{tenant_status="active"} 1 + cortex_compactor_block_cleanup_completed_total{tenant_status="deleted"} 1 # HELP cortex_compactor_blocks_marked_for_no_compaction_total Total number of blocks marked for no compact during a compaction run. # TYPE cortex_compactor_blocks_marked_for_no_compaction_total counter @@ -1083,7 +949,7 @@ func TestCompactor_ShouldSkipOutOrOrderBlocks(t *testing.T) { cfg.SkipBlocksWithOutOfOrderChunksEnabled = true c, tsdbCompac, tsdbPlanner, _, registry := prepare(t, cfg, bucketClient, nil) - tsdbCompac.On("CompactWithBlockPopulator", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(b1, nil) + tsdbCompac.On("CompactWithBlockPopulator", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return([]ulid.ULID{b1}, nil) tsdbPlanner.On("Plan", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return([]*metadata.Meta{ { @@ -1107,7 +973,7 @@ func TestCompactor_ShouldSkipOutOrOrderBlocks(t *testing.T) { defer services.StopAndAwaitTerminated(context.Background(), c) //nolint:errcheck // Wait until a run has completed. - cortex_testutil.Poll(t, 5*time.Second, true, func() interface{} { + cortex_testutil.Poll(t, 10*time.Second, true, func() interface{} { if _, err := os.Stat(path.Join(dir, "no-compact-mark.json")); err == nil { return true } @@ -1135,7 +1001,13 @@ func TestCompactor_ShouldCompactAllUsersOnShardingEnabledButOnlyOneInstanceRunni bucketClient.MockIter("user-1/", []string{"user-1/01DTVP434PA9VFXSW2JKB3392D/meta.json", "user-1/01FN6CDF3PNEWWRY5MPGJPE3EX/meta.json"}, nil) bucketClient.MockIter("user-2/", []string{"user-2/01DTW0ZCPDDNV4BV83Q2SV4QAZ/meta.json", "user-2/01FN3V83ABR9992RF8WRJZ76ZQ/meta.json"}, nil) bucketClient.MockIter("user-1/markers/", nil, nil) + bucketClient.MockGet("user-1/markers/cleaner-visit-marker.json", "", nil) + bucketClient.MockUpload("user-1/markers/cleaner-visit-marker.json", nil) + bucketClient.MockDelete("user-1/markers/cleaner-visit-marker.json", nil) bucketClient.MockIter("user-2/markers/", nil, nil) + bucketClient.MockGet("user-2/markers/cleaner-visit-marker.json", "", nil) + bucketClient.MockUpload("user-2/markers/cleaner-visit-marker.json", nil) + bucketClient.MockDelete("user-2/markers/cleaner-visit-marker.json", nil) bucketClient.MockGet("user-1/01DTVP434PA9VFXSW2JKB3392D/meta.json", mockBlockMetaJSON("01DTVP434PA9VFXSW2JKB3392D"), nil) bucketClient.MockGet("user-1/01DTVP434PA9VFXSW2JKB3392D/deletion-mark.json", "", nil) bucketClient.MockGet("user-1/01DTVP434PA9VFXSW2JKB3392D/no-compact-mark.json", "", nil) @@ -1186,8 +1058,8 @@ func TestCompactor_ShouldCompactAllUsersOnShardingEnabledButOnlyOneInstanceRunni require.NoError(t, services.StartAndAwaitRunning(context.Background(), c)) // Wait until a run has completed. - cortex_testutil.Poll(t, 5*time.Second, 1.0, func() interface{} { - return prom_testutil.ToFloat64(c.compactionRunsCompleted) + cortex_testutil.Poll(t, 10*time.Second, 1.0, func() interface{} { + return prom_testutil.ToFloat64(c.CompactionRunsCompleted) }) require.NoError(t, services.StopAndAwaitTerminated(context.Background(), c)) @@ -1198,12 +1070,6 @@ func TestCompactor_ShouldCompactAllUsersOnShardingEnabledButOnlyOneInstanceRunni assert.ElementsMatch(t, []string{ `level=info component=compactor msg="waiting until compactor is ACTIVE in the ring"`, `level=info component=compactor msg="compactor is ACTIVE in the ring"`, - `level=info component=cleaner msg="started blocks cleanup and maintenance"`, - `level=info component=cleaner org_id=user-1 msg="started blocks cleanup and maintenance"`, - `level=info component=cleaner org_id=user-1 msg="completed blocks cleanup and maintenance"`, - `level=info component=cleaner org_id=user-2 msg="started blocks cleanup and maintenance"`, - `level=info component=cleaner org_id=user-2 msg="completed blocks cleanup and maintenance"`, - `level=info component=cleaner msg="successfully completed blocks cleanup and maintenance"`, `level=info component=compactor msg="compactor started"`, `level=info component=compactor msg="discovering users from bucket"`, `level=info component=compactor msg="discovered users from bucket" users=2`, @@ -1240,6 +1106,9 @@ func TestCompactor_ShouldCompactOnlyUsersOwnedByTheInstanceOnShardingEnabledAndM for _, userID := range userIDs { bucketClient.MockIter(userID+"/", []string{userID + "/01DTVP434PA9VFXSW2JKB3392D"}, nil) bucketClient.MockIter(userID+"/markers/", nil, nil) + bucketClient.MockGet(userID+"/markers/cleaner-visit-marker.json", "", nil) + bucketClient.MockUpload(userID+"/markers/cleaner-visit-marker.json", nil) + bucketClient.MockDelete(userID+"/markers/cleaner-visit-marker.json", nil) bucketClient.MockExists(cortex_tsdb.GetGlobalDeletionMarkPath(userID), false, nil) bucketClient.MockExists(cortex_tsdb.GetLocalDeletionMarkPath(userID), false, nil) bucketClient.MockGet(userID+"/01DTVP434PA9VFXSW2JKB3392D/meta.json", mockBlockMetaJSON("01DTVP434PA9VFXSW2JKB3392D"), nil) @@ -1290,8 +1159,8 @@ func TestCompactor_ShouldCompactOnlyUsersOwnedByTheInstanceOnShardingEnabledAndM // Wait until a run has been completed on each compactor for _, c := range compactors { - cortex_testutil.Poll(t, 10*time.Second, 1.0, func() interface{} { - return prom_testutil.ToFloat64(c.compactionRunsCompleted) + cortex_testutil.Poll(t, 90*time.Second, 1.0, func() interface{} { + return prom_testutil.ToFloat64(c.CompactionRunsCompleted) }) } @@ -1368,12 +1237,15 @@ func TestCompactor_ShouldCompactOnlyShardsOwnedByTheInstanceOnShardingEnabledWit blockFiles = append(blockFiles, path.Join(userID, blockID, block.MetaFilename)) // Get all of the unique group hashes so that they can be used to ensure all groups were compacted - groupHash := hashGroup(userID, blockTimes["startTime"], blockTimes["endTime"]) + groupHash := HashGroup(userID, blockTimes["startTime"], blockTimes["endTime"]) groupHashes[groupHash]++ } bucketClient.MockIter(userID+"/", blockFiles, nil) bucketClient.MockIter(userID+"/markers/", nil, nil) + bucketClient.MockGet(userID+"/markers/cleaner-visit-marker.json", "", nil) + bucketClient.MockUpload(userID+"/markers/cleaner-visit-marker.json", nil) + bucketClient.MockDelete(userID+"/markers/cleaner-visit-marker.json", nil) bucketClient.MockExists(cortex_tsdb.GetGlobalDeletionMarkPath(userID), false, nil) bucketClient.MockExists(cortex_tsdb.GetLocalDeletionMarkPath(userID), false, nil) bucketClient.MockGet(userID+"/bucket-index.json.gz", "", nil) @@ -1391,8 +1263,8 @@ func TestCompactor_ShouldCompactOnlyShardsOwnedByTheInstanceOnShardingEnabledWit for i := 1; i <= 4; i++ { cfg := prepareConfig() + cfg.CompactionInterval = 30 * time.Second cfg.ShardingEnabled = true - cfg.CompactionInterval = 15 * time.Second cfg.ShardingStrategy = util.ShardingStrategyShuffle cfg.ShardingRing.InstanceID = fmt.Sprintf("compactor-%d", i) cfg.ShardingRing.InstanceAddr = fmt.Sprintf("127.0.0.%d", i) @@ -1431,7 +1303,7 @@ func TestCompactor_ShouldCompactOnlyShardsOwnedByTheInstanceOnShardingEnabledWit // Wait until a run has been completed on each compactor for _, c := range compactors { cortex_testutil.Poll(t, 60*time.Second, 2.0, func() interface{} { - return prom_testutil.ToFloat64(c.compactionRunsCompleted) + return prom_testutil.ToFloat64(c.CompactionRunsCompleted) }) } @@ -1572,14 +1444,6 @@ func createNoCompactionMark(t *testing.T, bkt objstore.Bucket, userID string, bl require.NoError(t, bkt.Upload(context.Background(), markPath, strings.NewReader(content))) } -func createBlockVisitMarker(t *testing.T, bkt objstore.Bucket, userID string, blockID ulid.ULID) { - content := mockBlockVisitMarker() - blockPath := path.Join(userID, blockID.String()) - markPath := path.Join(blockPath, BlockVisitMarkerFile) - - require.NoError(t, bkt.Upload(context.Background(), markPath, strings.NewReader(content))) -} - func findCompactorByUserID(compactors []*Compactor, logs []*concurrency.SyncBuffer, userID string) (*Compactor, *concurrency.SyncBuffer, error) { var compactor *Compactor var log *concurrency.SyncBuffer @@ -1627,15 +1491,24 @@ func removeIgnoredLogs(input []string) []string { `level=info component=compactor msg="compactor stopped"`: {}, } + ignoredLogStringsRegexList := []*regexp.Regexp{ + regexp.MustCompile(`^level=(info|debug|warn) component=cleaner .+$`), + } + out := make([]string, 0, len(input)) + executionIDRe := regexp.MustCompile(`\s?execution_id=\S+`) durationRe := regexp.MustCompile(`\s?duration(_ms)?=\S+`) +main: for i := 0; i < len(input); i++ { log := input[i] // Remove any duration from logs. log = durationRe.ReplaceAllString(log, "") + // Remove any execution_id from logs. + log = executionIDRe.ReplaceAllString(log, "") + if strings.Contains(log, "block.MetaFetcher") || strings.Contains(log, "block.BaseFetcher") { continue } @@ -1644,6 +1517,12 @@ func removeIgnoredLogs(input []string) []string { continue } + for _, ignoreRegex := range ignoredLogStringsRegexList { + if ignoreRegex.MatchString(log) { + continue main + } + } + out = append(out, log) } @@ -1657,6 +1536,9 @@ func prepareConfig() Config { compactorCfg.retryMinBackoff = 0 compactorCfg.retryMaxBackoff = 0 + //Avoid jitter in startup + compactorCfg.CompactionInterval = 10 * time.Second + // The migration is tested in a dedicated test. compactorCfg.BlockDeletionMarksMigrationEnabled = false @@ -1700,7 +1582,7 @@ func prepare(t *testing.T, compactorCfg Config, bucketClient objstore.Instrument blocksCompactorFactory := func(ctx context.Context, cfg Config, logger log.Logger, reg prometheus.Registerer) (compact.Compactor, PlannerFactory, error) { return tsdbCompactor, - func(ctx context.Context, bkt objstore.InstrumentedBucket, _ log.Logger, _ Config, noCompactMarkFilter *compact.GatherNoCompactionMarkFilter, ringLifecycle *ring.Lifecycler, _ prometheus.Counter, _ prometheus.Counter) compact.Planner { + func(ctx context.Context, bkt objstore.InstrumentedBucket, _ log.Logger, _ Config, noCompactMarkFilter *compact.GatherNoCompactionMarkFilter, ringLifecycle *ring.Lifecycler, _ string, _ prometheus.Counter, _ prometheus.Counter, _ prometheus.Counter, _ *compactorMetrics) compact.Planner { tsdbPlanner.noCompactMarkFilters = append(tsdbPlanner.noCompactMarkFilters, noCompactMarkFilter) return tsdbPlanner }, @@ -1714,7 +1596,7 @@ func prepare(t *testing.T, compactorCfg Config, bucketClient objstore.Instrument blocksGrouperFactory = DefaultBlocksGrouperFactory } - c, err := newCompactor(compactorCfg, storageCfg, logger, registry, bucketClientFactory, blocksGrouperFactory, blocksCompactorFactory, overrides) + c, err := newCompactor(compactorCfg, storageCfg, logger, registry, bucketClientFactory, blocksGrouperFactory, blocksCompactorFactory, DefaultBlockDeletableCheckerFactory, DefaultCompactionLifecycleCallbackFactory, overrides) require.NoError(t, err) return c, tsdbCompactor, tsdbPlanner, logs, registry @@ -1762,17 +1644,21 @@ func (m *tsdbPlannerMock) getNoCompactBlocks() []string { return result } -func mockBlockMetaJSON(id string) string { - meta := tsdb.BlockMeta{ +func mockBlockMeta(id string) tsdb.BlockMeta { + return tsdb.BlockMeta{ Version: 1, ULID: ulid.MustParse(id), - MinTime: 1574776800000, - MaxTime: 1574784000000, + MinTime: BlockMinTime, + MaxTime: BlockMaxTime, Compaction: tsdb.BlockMetaCompaction{ Level: 1, Sources: []ulid.ULID{ulid.MustParse(id)}, }, } +} + +func mockBlockMetaJSON(id string) string { + meta := mockBlockMeta(id) content, err := json.Marshal(meta) if err != nil { @@ -1880,7 +1766,6 @@ func TestCompactor_DeleteLocalSyncFiles(t *testing.T) { for i := 1; i <= 2; i++ { cfg := prepareConfig() - cfg.CompactionInterval = 10 * time.Minute // We will only call compaction manually. cfg.ShardingEnabled = true cfg.ShardingRing.InstanceID = fmt.Sprintf("compactor-%d", i) @@ -1912,8 +1797,8 @@ func TestCompactor_DeleteLocalSyncFiles(t *testing.T) { require.NoError(t, services.StartAndAwaitRunning(context.Background(), c1)) // Wait until a run has been completed on first compactor. This happens as soon as compactor starts. - cortex_testutil.Poll(t, 10*time.Second, 1.0, func() interface{} { - return prom_testutil.ToFloat64(c1.compactionRunsCompleted) + cortex_testutil.Poll(t, 20*time.Second, 1.0, func() interface{} { + return prom_testutil.ToFloat64(c1.CompactionRunsCompleted) }) require.NoError(t, os.Mkdir(c1.metaSyncDirForUser("new-user"), 0600)) @@ -1923,8 +1808,8 @@ func TestCompactor_DeleteLocalSyncFiles(t *testing.T) { // Now start second compactor, and wait until it runs compaction. require.NoError(t, services.StartAndAwaitRunning(context.Background(), c2)) - cortex_testutil.Poll(t, 10*time.Second, 1.0, func() interface{} { - return prom_testutil.ToFloat64(c2.compactionRunsCompleted) + cortex_testutil.Poll(t, 20*time.Second, 1.0, func() interface{} { + return prom_testutil.ToFloat64(c2.CompactionRunsCompleted) }) // Let's check how many users second compactor has. @@ -1986,7 +1871,7 @@ func TestCompactor_ShouldNotTreatInterruptionsAsErrors(t *testing.T) { c, tsdbCompactor, tsdbPlanner, logs, registry := prepare(t, prepareConfig(), bucketClient, nil) ctx, cancel := context.WithCancel(context.Background()) - tsdbCompactor.On("CompactWithBlockPopulator", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(ulid.ULID{}, context.Canceled).Run(func(args mock.Arguments) { + tsdbCompactor.On("CompactWithBlockPopulator", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return([]ulid.ULID{}, context.Canceled).Run(func(args mock.Arguments) { cancel() }) tsdbPlanner.On("Plan", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return([]*metadata.Meta{ @@ -2007,8 +1892,8 @@ func TestCompactor_ShouldNotTreatInterruptionsAsErrors(t *testing.T) { }, nil) require.NoError(t, services.StartAndAwaitRunning(ctx, c)) - cortex_testutil.Poll(t, 1*time.Second, 1.0, func() interface{} { - return prom_testutil.ToFloat64(c.compactionRunsInterrupted) + cortex_testutil.Poll(t, 10*time.Second, 1.0, func() interface{} { + return prom_testutil.ToFloat64(c.CompactionRunsInterrupted) }) require.NoError(t, services.StopAndAwaitTerminated(context.Background(), c)) @@ -2080,7 +1965,7 @@ func TestCompactor_ShouldNotFailCompactionIfAccessDeniedErrDuringMetaSync(t *tes // Wait until a run has completed. cortex_testutil.Poll(t, 20*time.Second, 1.0, func() interface{} { - return prom_testutil.ToFloat64(c.compactionRunsCompleted) + return prom_testutil.ToFloat64(c.CompactionRunsCompleted) }) require.NoError(t, services.StopAndAwaitTerminated(context.Background(), c)) @@ -2130,7 +2015,7 @@ func TestCompactor_ShouldNotFailCompactionIfAccessDeniedErrReturnedFromBucket(t // Wait until a run has completed. cortex_testutil.Poll(t, 20*time.Second, 1.0, func() interface{} { - return prom_testutil.ToFloat64(c.compactionRunsCompleted) + return prom_testutil.ToFloat64(c.CompactionRunsCompleted) }) require.NoError(t, services.StopAndAwaitTerminated(context.Background(), c)) diff --git a/pkg/compactor/partition_compaction_complete_checker.go b/pkg/compactor/partition_compaction_complete_checker.go new file mode 100644 index 0000000000..3bb6b37b05 --- /dev/null +++ b/pkg/compactor/partition_compaction_complete_checker.go @@ -0,0 +1,16 @@ +package compactor + +import ( + "github.com/oklog/ulid" + "github.com/thanos-io/thanos/pkg/compact" +) + +type PartitionCompactionBlockDeletableChecker struct{} + +func NewPartitionCompactionBlockDeletableChecker() *PartitionCompactionBlockDeletableChecker { + return &PartitionCompactionBlockDeletableChecker{} +} + +func (p *PartitionCompactionBlockDeletableChecker) CanDelete(_ *compact.Group, _ ulid.ULID) bool { + return false +} diff --git a/pkg/compactor/partition_compaction_grouper.go b/pkg/compactor/partition_compaction_grouper.go new file mode 100644 index 0000000000..1230a0a397 --- /dev/null +++ b/pkg/compactor/partition_compaction_grouper.go @@ -0,0 +1,931 @@ +package compactor + +import ( + "context" + "fmt" + "math" + "math/rand" + "sort" + "strings" + "time" + + "github.com/go-kit/log" + "github.com/go-kit/log/level" + "github.com/oklog/ulid" + "github.com/pkg/errors" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/prometheus/model/labels" + "github.com/thanos-io/objstore" + thanosblock "github.com/thanos-io/thanos/pkg/block" + "github.com/thanos-io/thanos/pkg/block/metadata" + "github.com/thanos-io/thanos/pkg/compact" + + "github.com/cortexproject/cortex/pkg/ring" + "github.com/cortexproject/cortex/pkg/storage/tsdb" +) + +var ( + DUMMY_BLOCK_ID = ulid.ULID{} +) + +type PartitionCompactionGrouper struct { + ctx context.Context + logger log.Logger + bkt objstore.InstrumentedBucket + acceptMalformedIndex bool + enableVerticalCompaction bool + blocksMarkedForNoCompact prometheus.Counter + hashFunc metadata.HashFunc + syncerMetrics *compact.SyncerMetrics + compactorMetrics *compactorMetrics + compactorCfg Config + limits Limits + userID string + blockFilesConcurrency int + blocksFetchConcurrency int + compactionConcurrency int + + doRandomPick bool + + ring ring.ReadRing + ringLifecyclerAddr string + ringLifecyclerID string + + noCompBlocksFunc func() map[ulid.ULID]*metadata.NoCompactMark + partitionVisitMarkerTimeout time.Duration + partitionVisitMarkerReadFailed prometheus.Counter + partitionVisitMarkerWriteFailed prometheus.Counter + partitionedGroupInfoReadFailed prometheus.Counter + partitionedGroupInfoWriteFailed prometheus.Counter +} + +func NewPartitionCompactionGrouper( + ctx context.Context, + logger log.Logger, + bkt objstore.InstrumentedBucket, + acceptMalformedIndex bool, + enableVerticalCompaction bool, + blocksMarkedForNoCompact prometheus.Counter, + syncerMetrics *compact.SyncerMetrics, + compactorMetrics *compactorMetrics, + hashFunc metadata.HashFunc, + compactorCfg Config, + ring ring.ReadRing, + ringLifecyclerAddr string, + ringLifecyclerID string, + limits Limits, + userID string, + blockFilesConcurrency int, + blocksFetchConcurrency int, + compactionConcurrency int, + doRandomPick bool, + partitionVisitMarkerTimeout time.Duration, + partitionVisitMarkerReadFailed prometheus.Counter, + partitionVisitMarkerWriteFailed prometheus.Counter, + partitionedGroupInfoReadFailed prometheus.Counter, + partitionedGroupInfoWriteFailed prometheus.Counter, + noCompBlocksFunc func() map[ulid.ULID]*metadata.NoCompactMark, +) *PartitionCompactionGrouper { + if logger == nil { + logger = log.NewNopLogger() + } + + return &PartitionCompactionGrouper{ + ctx: ctx, + logger: logger, + bkt: bkt, + acceptMalformedIndex: acceptMalformedIndex, + enableVerticalCompaction: enableVerticalCompaction, + blocksMarkedForNoCompact: blocksMarkedForNoCompact, + hashFunc: hashFunc, + syncerMetrics: syncerMetrics, + compactorMetrics: compactorMetrics, + compactorCfg: compactorCfg, + ring: ring, + ringLifecyclerAddr: ringLifecyclerAddr, + ringLifecyclerID: ringLifecyclerID, + limits: limits, + userID: userID, + blockFilesConcurrency: blockFilesConcurrency, + blocksFetchConcurrency: blocksFetchConcurrency, + compactionConcurrency: compactionConcurrency, + doRandomPick: doRandomPick, + partitionVisitMarkerTimeout: partitionVisitMarkerTimeout, + partitionVisitMarkerReadFailed: partitionVisitMarkerReadFailed, + partitionVisitMarkerWriteFailed: partitionVisitMarkerWriteFailed, + partitionedGroupInfoReadFailed: partitionedGroupInfoReadFailed, + partitionedGroupInfoWriteFailed: partitionedGroupInfoWriteFailed, + noCompBlocksFunc: noCompBlocksFunc, + } +} + +// Groups function modified from https://github.com/cortexproject/cortex/pull/2616 +func (g *PartitionCompactionGrouper) Groups(blocks map[ulid.ULID]*metadata.Meta) (res []*compact.Group, err error) { + // Check if this compactor is on the subring. + // If the compactor is not on the subring when using the userID as a identifier + // no plans generated below will be owned by the compactor so we can just return an empty array + // as there will be no planned groups + onSubring, err := g.checkSubringForCompactor() + if err != nil { + return nil, errors.Wrap(err, "unable to check sub-ring for compactor ownership") + } + if !onSubring { + level.Debug(g.logger).Log("msg", "compactor is not on the current sub-ring skipping user", "user", g.userID) + return nil, nil + } + + // Filter out no compact blocks + noCompactMarked := g.noCompBlocksFunc() + for id, b := range blocks { + if _, excluded := noCompactMarked[b.ULID]; excluded { + delete(blocks, id) + } + } + + partitionCompactionJobs, err := g.generateCompactionJobs(blocks) + if err != nil { + return nil, errors.Wrap(err, "unable to generate compaction jobs") + } + + pickedPartitionCompactionJobs := g.pickPartitionCompactionJob(partitionCompactionJobs) + + return pickedPartitionCompactionJobs, nil +} + +// Check whether this compactor exists on the subring based on user ID +func (g *PartitionCompactionGrouper) checkSubringForCompactor() (bool, error) { + subRing := g.ring.ShuffleShard(g.userID, g.limits.CompactorTenantShardSize(g.userID)) + + rs, err := subRing.GetAllHealthy(RingOp) + if err != nil { + return false, err + } + + return rs.Includes(g.ringLifecyclerAddr), nil +} + +func (g *PartitionCompactionGrouper) generateCompactionJobs(blocks map[ulid.ULID]*metadata.Meta) ([]*blocksGroupWithPartition, error) { + timeRanges := g.compactorCfg.BlockRanges.ToMilliseconds() + + groups := g.groupBlocks(blocks, timeRanges) + + existingPartitionedGroups, err := g.loadExistingPartitionedGroups() + if err != nil { + return nil, err + } + for _, p := range existingPartitionedGroups { + var blockIDs []string + for _, b := range p.getAllBlocks() { + blockIDs = append(blockIDs, b.String()) + } + level.Info(g.logger).Log("msg", "existing partitioned group", "partitioned_group_id", p.PartitionedGroupID, "partition_count", p.PartitionCount, "rangeStart", p.rangeStartTime().String(), "rangeEnd", p.rangeEndTime().String(), "blocks", strings.Join(blockIDs, ",")) + } + + allPartitionedGroup, err := g.generatePartitionedGroups(blocks, groups, existingPartitionedGroups, timeRanges) + if err != nil { + return nil, err + } + g.sortPartitionedGroups(allPartitionedGroup) + for _, p := range allPartitionedGroup { + var blockIDs []string + for _, b := range p.getAllBlocks() { + blockIDs = append(blockIDs, b.String()) + } + level.Info(g.logger).Log("msg", "partitioned group ready for compaction", "partitioned_group_id", p.PartitionedGroupID, "partition_count", p.PartitionCount, "rangeStart", p.rangeStartTime().String(), "rangeEnd", p.rangeEndTime().String(), "blocks", strings.Join(blockIDs, ",")) + } + + partitionCompactionJobs := g.generatePartitionCompactionJobs(blocks, allPartitionedGroup, g.doRandomPick) + for _, p := range partitionCompactionJobs { + var blockIDs []string + for _, b := range p.blocks { + blockIDs = append(blockIDs, b.ULID.String()) + } + level.Info(g.logger).Log("msg", "partitioned compaction job", "partitioned_group_id", p.partitionedGroupInfo.PartitionedGroupID, "partition_id", p.partition.PartitionID, "partition_count", p.partitionedGroupInfo.PartitionCount, "rangeStart", p.rangeStartTime().String(), "rangeEnd", p.rangeEndTime().String(), "blocks", strings.Join(blockIDs, ",")) + } + return partitionCompactionJobs, nil +} + +func (g *PartitionCompactionGrouper) loadExistingPartitionedGroups() (map[uint32]*PartitionedGroupInfo, error) { + partitionedGroups := make(map[uint32]*PartitionedGroupInfo) + err := g.bkt.Iter(g.ctx, PartitionedGroupDirectory, func(file string) error { + if !strings.Contains(file, PartitionVisitMarkerDirectory) { + partitionedGroup, err := ReadPartitionedGroupInfoFile(g.ctx, g.bkt, g.logger, file, g.partitionedGroupInfoReadFailed) + if err != nil { + return err + } + partitionedGroups[partitionedGroup.PartitionedGroupID] = partitionedGroup + } + return nil + }) + if err != nil { + return nil, errors.Wrap(err, "unable to load existing partitioned groups") + } + return partitionedGroups, nil +} + +func (g *PartitionCompactionGrouper) groupBlocks(blocks map[ulid.ULID]*metadata.Meta, timeRanges []int64) []blocksGroupWithPartition { + // First of all we have to group blocks using the Thanos default + // grouping (based on downsample resolution + external labels). + mainGroups := map[string][]*metadata.Meta{} + for _, b := range blocks { + key := b.Thanos.GroupKey() + mainGroups[key] = append(mainGroups[key], b) + } + + var groups []blocksGroupWithPartition + for _, mainBlocks := range mainGroups { + groups = append(groups, g.groupBlocksByCompactableRanges(mainBlocks, timeRanges)...) + } + + g.sortBlockGroups(groups) + + return groups +} + +func (g *PartitionCompactionGrouper) groupBlocksByCompactableRanges(blocks []*metadata.Meta, timeRanges []int64) []blocksGroupWithPartition { + if len(blocks) == 0 { + return nil + } + + // Sort blocks by min time. + sortMetasByMinTime(blocks) + + var groups []blocksGroupWithPartition + + for _, tr := range timeRanges { + groups = append(groups, g.groupBlocksByRange(blocks, tr)...) + } + + return groups +} + +func (g *PartitionCompactionGrouper) groupBlocksByRange(blocks []*metadata.Meta, tr int64) []blocksGroupWithPartition { + var ret []blocksGroupWithPartition + + for i := 0; i < len(blocks); { + var ( + group blocksGroupWithPartition + m = blocks[i] + ) + + group.rangeStart = getRangeStart(m, tr) + group.rangeEnd = group.rangeStart + tr + + // Skip blocks that don't fall into the range. This can happen via mis-alignment or + // by being the multiple of the intended range. + if m.MaxTime > group.rangeEnd { + i++ + continue + } + + // Add all blocks to the current group that are within [t0, t0+tr]. + for ; i < len(blocks); i++ { + // If the block does not start within this group, then we should break the iteration + // and move it to the next group. + if blocks[i].MinTime >= group.rangeEnd { + break + } + + // If the block doesn't fall into this group, but it started within this group then it + // means it spans across multiple ranges and we should skip it. + if blocks[i].MaxTime > group.rangeEnd { + continue + } + + group.blocks = append(group.blocks, blocks[i]) + } + + if len(group.blocks) > 1 { + ret = append(ret, group) + } + } + + return ret +} + +func (g *PartitionCompactionGrouper) sortBlockGroups(groups []blocksGroupWithPartition) { + // Ensure groups are sorted by smallest range, oldest min time first. The rationale + // is that we wanna favor smaller ranges first (ie. to deduplicate samples sooner + // than later) and older ones are more likely to be "complete" (no missing block still + // to be uploaded). + sort.SliceStable(groups, func(i, j int) bool { + iGroup := groups[i] + jGroup := groups[j] + iRangeStart := iGroup.rangeStart + iRangeEnd := iGroup.rangeEnd + jRangeStart := jGroup.rangeStart + jRangeEnd := jGroup.rangeEnd + iLength := iRangeEnd - iRangeStart + jLength := jRangeEnd - jRangeStart + + if iLength != jLength { + return iLength < jLength + } + if iRangeStart != jRangeStart { + return iRangeStart < jRangeStart + } + + iGroupHash := HashGroup(g.userID, iRangeStart, iRangeEnd) + iGroupKey := createGroupKeyWithPartition(iGroupHash, iGroup) + jGroupHash := HashGroup(g.userID, jRangeStart, jRangeEnd) + jGroupKey := createGroupKeyWithPartition(jGroupHash, jGroup) + // Guarantee stable sort for tests. + return iGroupKey < jGroupKey + }) +} + +func (g *PartitionCompactionGrouper) generatePartitionedGroups(blocks map[ulid.ULID]*metadata.Meta, groups []blocksGroupWithPartition, existingPartitionedGroups map[uint32]*PartitionedGroupInfo, timeRanges []int64) ([]*PartitionedGroupInfo, error) { + var allPartitionedGroup []*PartitionedGroupInfo + for _, partitionedGroup := range existingPartitionedGroups { + status := partitionedGroup.getPartitionedGroupStatus(g.ctx, g.bkt, g.partitionVisitMarkerTimeout, g.logger, g.partitionVisitMarkerReadFailed, g.partitionVisitMarkerWriteFailed) + if !status.IsCompleted { + allPartitionedGroup = append(allPartitionedGroup, partitionedGroup) + } + } + + timeRangeChecker := NewCompletenessChecker(blocks, groups, timeRanges) + for _, startTimeMap := range timeRangeChecker.TimeRangesStatus { + for _, status := range startTimeMap { + if !status.canTakeCompaction { + level.Info(g.logger).Log("msg", "incomplete time range", "rangeStart", status.rangeStartTime().String(), "rangeEnd", status.rangeEndTime().String(), + "timeRange", status.timeRangeDuration().String(), "previousTimeRange", status.previousTimeRangeDuration().String()) + } + } + } + + for _, group := range groups { + groupHash := HashGroup(g.userID, group.rangeStart, group.rangeEnd) + logger := log.With(g.logger, "partitioned_group_id", groupHash, "rangeStart", group.rangeStartTime().String(), "rangeEnd", group.rangeEndTime().String()) + + var blockIDs []string + for _, b := range group.blocks { + blockIDs = append(blockIDs, b.ULID.String()) + } + level.Info(logger).Log("msg", "block group", "blocks", strings.Join(blockIDs, ",")) + + level.Info(logger).Log("msg", "start generating partitioned group") + if g.shouldSkipGroup(logger, group, groupHash, existingPartitionedGroups, timeRangeChecker) { + level.Info(logger).Log("msg", "skip generating partitioned group") + continue + } + partitionedGroup, err := g.generatePartitionBlockGroup(group, groupHash) + if err != nil { + return nil, errors.Wrapf(err, "unable to generate partitioned group: %d", groupHash) + } + level.Info(logger).Log("msg", "generated partitioned group") + allPartitionedGroup = append(allPartitionedGroup, partitionedGroup) + } + return allPartitionedGroup, nil +} + +func (g *PartitionCompactionGrouper) shouldSkipGroup(logger log.Logger, group blocksGroupWithPartition, partitionedGroupID uint32, existingPartitionedGroups map[uint32]*PartitionedGroupInfo, timeRangeChecker TimeRangeChecker) bool { + if _, ok := existingPartitionedGroups[partitionedGroupID]; ok { + level.Info(logger).Log("msg", "skip group", "reason", "partitioned group already exists") + return true + } + tr := group.rangeEnd - group.rangeStart + if status, ok := timeRangeChecker.TimeRangesStatus[tr][group.rangeStart]; !ok { + level.Info(logger).Log("msg", "skip group", "reason", "unable to get time range status") + return true + } else if !status.canTakeCompaction { + level.Info(logger).Log("msg", "skip group", "reason", "time range cannot take compaction job") + return true + } + + // Check if all blocks in group having same partitioned group id as destination partitionedGroupID + for _, b := range group.blocks { + partitionInfo, err := tsdb.GetPartitionInfo(*b) + if err != nil || partitionInfo == nil || partitionInfo.PartitionedGroupID != partitionedGroupID { + return false + } + } + level.Info(logger).Log("msg", "skip group", "reason", "all blocks in the group have partitioned group id equals to new group partitioned_group_id") + return true +} + +func (g *PartitionCompactionGrouper) generatePartitionBlockGroup(group blocksGroupWithPartition, groupHash uint32) (*PartitionedGroupInfo, error) { + partitionedGroupInfo, err := g.partitionBlockGroup(group, groupHash) + if err != nil { + return nil, err + } + updatedPartitionedGroupInfo, err := UpdatePartitionedGroupInfo(g.ctx, g.bkt, g.logger, *partitionedGroupInfo, g.partitionedGroupInfoReadFailed, g.partitionedGroupInfoWriteFailed) + if err != nil { + return nil, err + } + return updatedPartitionedGroupInfo, nil +} + +func (g *PartitionCompactionGrouper) partitionBlockGroup(group blocksGroupWithPartition, groupHash uint32) (*PartitionedGroupInfo, error) { + partitionCount := g.calculatePartitionCount(group, groupHash) + blocksByMinTime := g.groupBlocksByMinTime(group) + partitionedGroups, err := g.partitionBlocksGroup(partitionCount, blocksByMinTime, group.rangeStart, group.rangeEnd) + if err != nil { + return nil, err + } + + var partitions []Partition + for partitionID := 0; partitionID < partitionCount; partitionID++ { + partitionedGroup := partitionedGroups[partitionID] + var blockIDs []ulid.ULID + for _, m := range partitionedGroup.blocks { + blockIDs = append(blockIDs, m.ULID) + } + partitions = append(partitions, Partition{ + PartitionID: partitionID, + Blocks: blockIDs, + }) + } + partitionedGroupInfo := PartitionedGroupInfo{ + PartitionedGroupID: groupHash, + PartitionCount: partitionCount, + Partitions: partitions, + RangeStart: group.rangeStart, + RangeEnd: group.rangeEnd, + Version: PartitionedGroupInfoVersion1, + } + return &partitionedGroupInfo, nil +} + +func (g *PartitionCompactionGrouper) calculatePartitionCount(group blocksGroupWithPartition, groupHash uint32) int { + indexSizeLimit := g.limits.CompactorPartitionIndexSizeLimitInBytes(g.userID) + seriesCountLimit := g.limits.CompactorPartitionSeriesCountLimit(g.userID) + smallestRange := g.compactorCfg.BlockRanges.ToMilliseconds()[0] + groupRange := group.rangeLength() + if smallestRange >= groupRange { + level.Info(g.logger).Log("msg", "use level 1 block limits", "partitioned_group_id", groupHash, "smallestRange", smallestRange, "groupRange", groupRange) + indexSizeLimit = g.limits.CompactorPartitionLevel1IndexSizeLimitInBytes(g.userID) + seriesCountLimit = g.limits.CompactorPartitionLevel1SeriesCountLimit(g.userID) + } + + totalIndexSizeInBytes := int64(0) + totalSeriesCount := int64(0) + for _, block := range group.blocks { + blockFiles := block.Thanos.Files + totalSeriesCount += int64(block.Stats.NumSeries) + var indexFile *metadata.File + for _, file := range blockFiles { + if file.RelPath == thanosblock.IndexFilename { + indexFile = &file + } + } + if indexFile == nil { + level.Debug(g.logger).Log("msg", "unable to find index file in metadata", "block", block.ULID) + break + } + indexSize := indexFile.SizeBytes + totalIndexSizeInBytes += indexSize + } + partitionNumberBasedOnIndex := 1 + if indexSizeLimit > 0 && totalIndexSizeInBytes > indexSizeLimit { + partitionNumberBasedOnIndex = g.findNearestPartitionNumber(float64(totalIndexSizeInBytes), float64(indexSizeLimit)) + } + partitionNumberBasedOnSeries := 1 + if seriesCountLimit > 0 && totalSeriesCount > seriesCountLimit { + partitionNumberBasedOnSeries = g.findNearestPartitionNumber(float64(totalSeriesCount), float64(seriesCountLimit)) + } + partitionNumber := partitionNumberBasedOnIndex + if partitionNumberBasedOnSeries > partitionNumberBasedOnIndex { + partitionNumber = partitionNumberBasedOnSeries + } + level.Info(g.logger).Log("msg", "calculated partition number for group", "partitioned_group_id", groupHash, "partition_number", partitionNumber, "total_index_size", totalIndexSizeInBytes, "index_size_limit", indexSizeLimit, "total_series_count", totalSeriesCount, "series_count_limit", seriesCountLimit, "group", group.String()) + return partitionNumber +} + +func (g *PartitionCompactionGrouper) findNearestPartitionNumber(size float64, limit float64) int { + return int(math.Pow(2, math.Ceil(math.Log2(size/limit)))) +} + +func (g *PartitionCompactionGrouper) groupBlocksByMinTime(group blocksGroupWithPartition) map[int64][]*metadata.Meta { + blocksByMinTime := make(map[int64][]*metadata.Meta) + for _, block := range group.blocks { + blockRange := block.MaxTime - block.MinTime + minTime := block.MinTime + for _, tr := range g.compactorCfg.BlockRanges.ToMilliseconds() { + if blockRange <= tr { + minTime = tr * (block.MinTime / tr) + break + } + } + blocksByMinTime[minTime] = append(blocksByMinTime[minTime], block) + } + return blocksByMinTime +} + +func (g *PartitionCompactionGrouper) partitionBlocksGroup(partitionCount int, blocksByMinTime map[int64][]*metadata.Meta, rangeStart int64, rangeEnd int64) (map[int]blocksGroupWithPartition, error) { + partitionedGroups := make(map[int]blocksGroupWithPartition) + addToPartitionedGroups := func(blocks []*metadata.Meta, partitionID int) { + if _, ok := partitionedGroups[partitionID]; !ok { + partitionedGroups[partitionID] = blocksGroupWithPartition{ + rangeStart: rangeStart, + rangeEnd: rangeEnd, + blocks: []*metadata.Meta{}, + } + } + partitionedGroup := partitionedGroups[partitionID] + partitionedGroup.blocks = append(partitionedGroup.blocks, blocks...) + partitionedGroups[partitionID] = partitionedGroup + } + + for _, blocksInSameTimeInterval := range blocksByMinTime { + for _, block := range blocksInSameTimeInterval { + partitionInfo, err := tsdb.GetPartitionInfo(*block) + if err != nil { + return nil, err + } + if partitionInfo == nil || partitionInfo.PartitionCount < 1 { + // For legacy blocks with level > 1, treat PartitionID is always 0. + // So it can be included in every partition. + defaultPartitionInfo := tsdb.DefaultPartitionInfo + partitionInfo = &defaultPartitionInfo + } + if partitionInfo.PartitionCount < partitionCount { + for partitionID := partitionInfo.PartitionID; partitionID < partitionCount; partitionID += partitionInfo.PartitionCount { + addToPartitionedGroups([]*metadata.Meta{block}, partitionID) + } + } else if partitionInfo.PartitionCount == partitionCount { + addToPartitionedGroups([]*metadata.Meta{block}, partitionInfo.PartitionID) + } else { + addToPartitionedGroups([]*metadata.Meta{block}, partitionInfo.PartitionID%partitionCount) + } + } + } + return partitionedGroups, nil +} + +func (g *PartitionCompactionGrouper) sortPartitionedGroups(partitionedGroups []*PartitionedGroupInfo) { + // Ensure groups are sorted by smallest range, oldest min time first. The rationale + // is that we wanna favor smaller ranges first (ie. to deduplicate samples sooner + // than later) and older ones are more likely to be "complete" (no missing block still + // to be uploaded). + sort.SliceStable(partitionedGroups, func(i, j int) bool { + iGroup := partitionedGroups[i] + jGroup := partitionedGroups[j] + iRangeStart := iGroup.RangeStart + iRangeEnd := iGroup.RangeEnd + jRangeStart := jGroup.RangeStart + jRangeEnd := jGroup.RangeEnd + iLength := iRangeEnd - iRangeStart + jLength := jRangeEnd - jRangeStart + + if iLength != jLength { + return iLength < jLength + } + if iRangeStart != jRangeStart { + return iRangeStart < jRangeStart + } + // Guarantee stable sort for tests. + return iGroup.PartitionedGroupID < jGroup.PartitionedGroupID + }) +} + +func (g *PartitionCompactionGrouper) generatePartitionCompactionJobs(blocks map[ulid.ULID]*metadata.Meta, partitionedGroups []*PartitionedGroupInfo, doRandomPick bool) []*blocksGroupWithPartition { + var partitionedBlockGroups []*blocksGroupWithPartition + for _, partitionedGroupInfo := range partitionedGroups { + partitionedGroupID := partitionedGroupInfo.PartitionedGroupID + partitionAdded := 0 + var partitionIDs []int + if doRandomPick { + // Randomly pick partitions from partitioned group to avoid all compactors + // trying to get same partition at same time. + r := rand.New(rand.NewSource(time.Now().UnixMicro() + int64(hashString(g.ringLifecyclerID)))) + partitionIDs = r.Perm(len(partitionedGroupInfo.Partitions)) + } else { + for i := 0; i < partitionedGroupInfo.PartitionCount; i++ { + partitionIDs = append(partitionIDs, i) + } + } + for _, i := range partitionIDs { + partition := partitionedGroupInfo.Partitions[i] + if len(partition.Blocks) == 1 { + partition.Blocks = append(partition.Blocks, DUMMY_BLOCK_ID) + level.Info(g.logger).Log("msg", "handled single block in partition", "partitioned_group_id", partitionedGroupInfo.PartitionedGroupID, "partition_count", partitionedGroupInfo.PartitionCount, "partition_id", partition.PartitionID) + } else if len(partition.Blocks) < 1 { + if err := g.handleEmptyPartition(partitionedGroupInfo, partition); err != nil { + level.Warn(g.logger).Log("msg", "failed to handle empty partition", "partitioned_group_id", partitionedGroupInfo.PartitionedGroupID, "partition_count", partitionedGroupInfo.PartitionCount, "partition_id", partition.PartitionID, "err", err) + } + continue + } + partitionedGroup, err := createBlocksGroup(blocks, partition.Blocks, partitionedGroupInfo.RangeStart, partitionedGroupInfo.RangeEnd) + if err != nil { + continue + } + partitionedGroup.groupHash = partitionedGroupID + partitionedGroup.partitionedGroupInfo = partitionedGroupInfo + partitionedGroup.partition = partition + partitionedBlockGroups = append(partitionedBlockGroups, partitionedGroup) + partitionAdded++ + } + } + return partitionedBlockGroups +} + +func (g *PartitionCompactionGrouper) handleEmptyPartition(partitionedGroupInfo *PartitionedGroupInfo, partition Partition) error { + if len(partition.Blocks) > 0 { + return nil + } + + level.Info(g.logger).Log("msg", "handling empty block partition", "partitioned_group_id", partitionedGroupInfo.PartitionedGroupID, "partition_count", partitionedGroupInfo.PartitionCount, "partition_id", partition.PartitionID) + partitionVisitMarker := &PartitionVisitMarker{ + PartitionedGroupID: partitionedGroupInfo.PartitionedGroupID, + PartitionID: partition.PartitionID, + Version: PartitionVisitMarkerVersion1, + } + visitMarkerManager := NewVisitMarkerManager(g.bkt, g.logger, g.ringLifecyclerID, partitionVisitMarker, g.partitionVisitMarkerReadFailed, g.partitionVisitMarkerWriteFailed) + visitMarkerManager.MarkCompleted(g.ctx) + + level.Info(g.logger).Log("msg", "handled empty block in partition", "partitioned_group_id", partitionedGroupInfo.PartitionedGroupID, "partition_count", partitionedGroupInfo.PartitionCount, "partition_id", partition.PartitionID) + return nil +} + +func (g *PartitionCompactionGrouper) pickPartitionCompactionJob(partitionCompactionJobs []*blocksGroupWithPartition) []*compact.Group { + var outGroups []*compact.Group + for _, partitionedGroup := range partitionCompactionJobs { + groupHash := partitionedGroup.groupHash + partitionedGroupID := partitionedGroup.partitionedGroupInfo.PartitionedGroupID + partitionCount := partitionedGroup.partitionedGroupInfo.PartitionCount + partitionID := partitionedGroup.partition.PartitionID + partitionedGroupLogger := log.With(g.logger, "rangeStart", partitionedGroup.rangeStartTime().String(), "rangeEnd", partitionedGroup.rangeEndTime().String(), "rangeDuration", partitionedGroup.rangeDuration().String(), "partitioned_group_id", partitionedGroupID, "partition_id", partitionID, "partition_count", partitionCount, "group_hash", groupHash) + partitionVisitMarker := NewPartitionVisitMarker(g.ringLifecyclerID, partitionedGroupID, partitionID) + visitMarkerManager := NewVisitMarkerManager(g.bkt, g.logger, g.ringLifecyclerID, partitionVisitMarker, g.partitionVisitMarkerReadFailed, g.partitionVisitMarkerWriteFailed) + if isVisited, err := g.isGroupVisited(partitionID, visitMarkerManager); err != nil { + level.Warn(partitionedGroupLogger).Log("msg", "unable to check if partition is visited", "err", err, "group", partitionedGroup.String()) + continue + } else if isVisited { + level.Info(partitionedGroupLogger).Log("msg", "skipping group because partition is visited") + continue + } + partitionedGroupKey := createGroupKeyWithPartitionID(groupHash, partitionID, *partitionedGroup) + + level.Info(partitionedGroupLogger).Log("msg", "found compactable group for user", "group", partitionedGroup.String()) + begin := time.Now() + + visitMarkerManager.MarkPending(g.ctx) + level.Info(partitionedGroupLogger).Log("msg", "marked partition visited in group", "duration", time.Since(begin), "duration_ms", time.Since(begin).Milliseconds(), "group", partitionedGroup.String()) + + resolution := partitionedGroup.blocks[0].Thanos.Downsample.Resolution + externalLabels := labels.FromMap(partitionedGroup.blocks[0].Thanos.Labels) + timeRange := partitionedGroup.rangeEnd - partitionedGroup.rangeStart + metricLabelValues := []string{ + g.userID, + fmt.Sprintf("%d", timeRange), + } + g.compactorMetrics.initMetricWithCompactionLabelValues(metricLabelValues...) + g.compactorMetrics.partitionCount.WithLabelValues(metricLabelValues...).Set(float64(partitionCount)) + thanosGroup, err := compact.NewGroup( + log.With(partitionedGroupLogger, "groupKey", partitionedGroupKey, "externalLabels", externalLabels, "downsampleResolution", resolution), + g.bkt, + partitionedGroupKey, + externalLabels, + resolution, + g.acceptMalformedIndex, + true, // Enable vertical compaction. + g.compactorMetrics.compactions.WithLabelValues(metricLabelValues...), + g.compactorMetrics.compactionRunsStarted.WithLabelValues(metricLabelValues...), + g.compactorMetrics.compactionRunsCompleted.WithLabelValues(metricLabelValues...), + g.compactorMetrics.compactionFailures.WithLabelValues(metricLabelValues...), + g.compactorMetrics.verticalCompactions.WithLabelValues(metricLabelValues...), + g.syncerMetrics.GarbageCollectedBlocks, + g.syncerMetrics.BlocksMarkedForDeletion, + g.blocksMarkedForNoCompact, + g.hashFunc, + g.blockFilesConcurrency, + g.blocksFetchConcurrency, + ) + if err != nil { + level.Error(partitionedGroupLogger).Log("msg", "failed to create partitioned group", "blocks", partitionedGroup.partition.Blocks) + } + + for _, m := range partitionedGroup.blocks { + if err := thanosGroup.AppendMeta(m); err != nil { + level.Error(partitionedGroupLogger).Log("msg", "failed to add block to partitioned group", "block", m.ULID, "err", err) + } + } + thanosGroup.SetExtensions(&tsdb.CortexMetaExtensions{ + PartitionInfo: &tsdb.PartitionInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: partitionCount, + PartitionID: partitionID, + PartitionedGroupCreationTime: partitionedGroup.partitionedGroupInfo.CreationTime, + }, + TimeRange: timeRange, + }) + + outGroups = append(outGroups, thanosGroup) + level.Debug(partitionedGroupLogger).Log("msg", "added partition to compaction groups") + if len(outGroups) >= g.compactionConcurrency { + break + } + } + + level.Info(g.logger).Log("msg", fmt.Sprintf("total groups for compaction: %d", len(outGroups))) + + for _, p := range outGroups { + partitionInfo, err := tsdb.ConvertToPartitionInfo(p.Extensions()) + if err == nil && partitionInfo != nil { + level.Info(g.logger).Log("msg", "picked compaction job", "partitioned_group_id", partitionInfo.PartitionedGroupID, "partition_count", partitionInfo.PartitionCount) + } + } + return outGroups +} + +func (g *PartitionCompactionGrouper) isGroupVisited(partitionID int, visitMarkerManager *VisitMarkerManager) (bool, error) { + partitionVisitMarker := &PartitionVisitMarker{} + err := visitMarkerManager.ReadVisitMarker(g.ctx, partitionVisitMarker) + if err != nil { + if errors.Is(err, ErrorVisitMarkerNotFound) { + level.Warn(g.logger).Log("msg", "no visit marker file for partition", "partition_visit_marker_file", visitMarkerManager.visitMarker.GetVisitMarkerFilePath()) + return false, nil + } + level.Error(g.logger).Log("msg", "unable to read partition visit marker file", "partition_visit_marker_file", visitMarkerManager.visitMarker.GetVisitMarkerFilePath(), "err", err) + return true, err + } + if partitionVisitMarker.IsCompleted() { + level.Info(g.logger).Log("msg", "partition visit marker with partition ID is completed", partitionVisitMarker.LogInfo()) + return true, nil + } + if partitionVisitMarker.IsVisited(g.partitionVisitMarkerTimeout, partitionID) { + level.Info(g.logger).Log("msg", "visited partition with partition ID", partitionVisitMarker.LogInfo()) + return true, nil + } + return false, nil +} + +type TimeRangeChecker struct { + // This is a map of timeRange to a map of rangeStart to timeRangeStatus + TimeRangesStatus map[int64]map[int64]*timeRangeStatus +} + +func NewCompletenessChecker(blocks map[ulid.ULID]*metadata.Meta, groups []blocksGroupWithPartition, timeRanges []int64) TimeRangeChecker { + timeRangeToBlockMap := make(map[int64][]*metadata.Meta) + for _, b := range blocks { + timeRange := int64(0) + if b.Compaction.Level > 1 { + ext, err := tsdb.GetCortexMetaExtensionsFromMeta(*b) + if err == nil && ext != nil && ext.TimeRange > 0 { + timeRange = ext.TimeRange + } else { + // fallback logic to guess block time range based + // on MaxTime and MinTime + blockRange := b.MaxTime - b.MinTime + for _, tr := range timeRanges { + rangeStart := getRangeStart(b, tr) + rangeEnd := rangeStart + tr + if tr >= blockRange && rangeEnd >= b.MaxTime { + timeRange = tr + break + } + } + } + } + timeRangeToBlockMap[timeRange] = append(timeRangeToBlockMap[timeRange], b) + } + timeRangesStatus := make(map[int64]map[int64]*timeRangeStatus) + for _, g := range groups { + tr := g.rangeEnd - g.rangeStart + if _, ok := timeRangesStatus[tr]; !ok { + timeRangesStatus[tr] = make(map[int64]*timeRangeStatus) + } + timeRangesStatus[tr][g.rangeStart] = &timeRangeStatus{ + timeRange: tr, + rangeStart: g.rangeStart, + rangeEnd: g.rangeEnd, + numActiveBlocks: 0, + canTakeCompaction: false, + } + } + for tr, blks := range timeRangeToBlockMap { + if _, ok := timeRangesStatus[tr]; !ok { + timeRangesStatus[tr] = make(map[int64]*timeRangeStatus) + } + for _, b := range blks { + actualTr := tr + if tr == 0 { + actualTr = timeRanges[0] + } + rangeStart := getRangeStart(b, actualTr) + if _, ok := timeRangesStatus[tr][rangeStart]; !ok { + timeRangesStatus[tr][rangeStart] = &timeRangeStatus{ + timeRange: tr, + rangeStart: rangeStart, + rangeEnd: rangeStart + actualTr, + numActiveBlocks: 0, + canTakeCompaction: false, + } + } + timeRangesStatus[tr][rangeStart].addBlock(1) + } + } + previousTimeRanges := []int64{0} + for _, tr := range timeRanges { + timeRangeLoop: + for rangeStart, status := range timeRangesStatus[tr] { + previousTrBlocks := 0 + for _, previousTr := range previousTimeRanges { + allPreviousTimeRanges := getAllPreviousTimeRanges(tr, rangeStart, previousTr, timeRanges[0]) + for _, previousRangeStart := range allPreviousTimeRanges { + if previousTrStatus, ok := timeRangesStatus[previousTr][previousRangeStart]; ok { + if previousTrStatus.canTakeCompaction { + status.canTakeCompaction = false + continue timeRangeLoop + } + previousTrBlocks += previousTrStatus.numActiveBlocks + } + } + } + status.canTakeCompaction = !(previousTrBlocks == 0 || (previousTrBlocks == 1 && status.numActiveBlocks == 0)) + } + previousTimeRanges = append(previousTimeRanges, tr) + } + return TimeRangeChecker{TimeRangesStatus: timeRangesStatus} +} + +// getAllPreviousTimeRanges returns a list of rangeStart time for previous time range that +// falls within current time range and start time +func getAllPreviousTimeRanges(currentTr int64, rangeStart int64, previousTr int64, smallestTr int64) []int64 { + var result []int64 + if previousTr == 0 { + previousTr = smallestTr + } + previousRangeStart := rangeStart + for ; previousRangeStart+previousTr <= rangeStart+currentTr; previousRangeStart += previousTr { + result = append(result, previousRangeStart) + } + return result +} + +type timeRangeStatus struct { + timeRange int64 + rangeStart int64 + rangeEnd int64 + numActiveBlocks int + canTakeCompaction bool + previousTimeRange int64 +} + +func (t *timeRangeStatus) addBlock(num int) { + t.numActiveBlocks += num +} + +func (t *timeRangeStatus) rangeStartTime() time.Time { + return time.Unix(0, t.rangeStart*int64(time.Millisecond)).UTC() +} + +func (t *timeRangeStatus) rangeEndTime() time.Time { + return time.Unix(0, t.rangeEnd*int64(time.Millisecond)).UTC() +} + +func (t *timeRangeStatus) timeRangeDuration() time.Duration { + return time.Duration(t.timeRange) * time.Millisecond +} + +func (t *timeRangeStatus) previousTimeRangeDuration() time.Duration { + return time.Duration(t.previousTimeRange) * time.Millisecond +} + +type blocksGroupWithPartition struct { + blocksGroup + rangeStart int64 // Included. + rangeEnd int64 // Excluded. + blocks []*metadata.Meta + groupHash uint32 + partitionedGroupInfo *PartitionedGroupInfo + partition Partition +} + +func (g blocksGroupWithPartition) rangeDuration() time.Duration { + return g.rangeEndTime().Sub(g.rangeStartTime()) +} + +func createGroupKeyWithPartition(groupHash uint32, group blocksGroupWithPartition) string { + return fmt.Sprintf("%v%s", groupHash, group.blocks[0].Thanos.GroupKey()) +} + +func createGroupKeyWithPartitionID(groupHash uint32, partitionID int, group blocksGroupWithPartition) string { + return fmt.Sprintf("%v%d%s", groupHash, partitionID, group.blocks[0].Thanos.GroupKey()) +} + +func createBlocksGroup(blocks map[ulid.ULID]*metadata.Meta, blockIDs []ulid.ULID, rangeStart int64, rangeEnd int64) (*blocksGroupWithPartition, error) { + var group blocksGroupWithPartition + group.rangeStart = rangeStart + group.rangeEnd = rangeEnd + var nonDummyBlock *metadata.Meta + for _, blockID := range blockIDs { + if blockID == DUMMY_BLOCK_ID { + continue + } + m, ok := blocks[blockID] + if !ok { + return nil, fmt.Errorf("block not found: %s", blockID) + } + nonDummyBlock = m + group.blocks = append(group.blocks, m) + } + for _, blockID := range blockIDs { + if blockID == DUMMY_BLOCK_ID { + dummyMeta := *nonDummyBlock + dummyMeta.ULID = DUMMY_BLOCK_ID + group.blocks = append(group.blocks, &dummyMeta) + } + } + return &group, nil +} diff --git a/pkg/compactor/partition_compaction_grouper_test.go b/pkg/compactor/partition_compaction_grouper_test.go new file mode 100644 index 0000000000..2a7554383a --- /dev/null +++ b/pkg/compactor/partition_compaction_grouper_test.go @@ -0,0 +1,2147 @@ +package compactor + +import ( + "context" + "encoding/json" + "fmt" + "path" + "testing" + "time" + + "github.com/oklog/ulid" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/prometheus/tsdb" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" + "github.com/thanos-io/objstore" + thanosblock "github.com/thanos-io/thanos/pkg/block" + "github.com/thanos-io/thanos/pkg/block/metadata" + + "github.com/cortexproject/cortex/pkg/ring" + "github.com/cortexproject/cortex/pkg/storage/bucket" + cortextsdb "github.com/cortexproject/cortex/pkg/storage/tsdb" + "github.com/cortexproject/cortex/pkg/util/validation" +) + +var ( + M = time.Minute.Milliseconds() + H = time.Hour.Milliseconds() +) + +func TestPartitionCompactionGrouper_GenerateCompactionJobs(t *testing.T) { + block1 := ulid.MustNew(1, nil) + block2 := ulid.MustNew(2, nil) + block3 := ulid.MustNew(3, nil) + block4 := ulid.MustNew(4, nil) + block5 := ulid.MustNew(5, nil) + block6 := ulid.MustNew(6, nil) + block7 := ulid.MustNew(7, nil) + + testCompactorID := "test-compactor" + //otherCompactorID := "other-compactor" + + userID := "test-user" + partitionedGroupID_0_2 := HashGroup(userID, 0*H, 2*H) + partitionedGroupID_0_12 := HashGroup(userID, 0*H, 12*H) + partitionedGroupID_0_24 := HashGroup(userID, 0*H, 24*H) + + tests := map[string]generateCompactionJobsTestCase{ + "only level 1 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{}, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 2 * H}, + {blocks: []ulid.ULID{block3, block4}, partitionCount: 1, partitionID: 0, rangeStart: 2 * H, rangeEnd: 4 * H}, + }, + }, + "only level 1 blocks, there is existing partitioned group file": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{ + {rangeStart: 0 * H, rangeEnd: 2 * H, partitionCount: 1, partitions: []Partition{ + {PartitionID: 0, Blocks: []ulid.ULID{block1, block2}}, + }}, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 2 * H}, + {blocks: []ulid.ULID{block3, block4}, partitionCount: 1, partitionID: 0, rangeStart: 2 * H, rangeEnd: 4 * H}, + }, + }, + "only level 1 blocks, there are existing partitioned group files for all blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{ + {rangeStart: 0 * H, rangeEnd: 2 * H, partitionCount: 1, partitions: []Partition{ + {PartitionID: 0, Blocks: []ulid.ULID{block1, block2}}, + }}, + {rangeStart: 2 * H, rangeEnd: 4 * H, partitionCount: 1, partitions: []Partition{ + {PartitionID: 0, Blocks: []ulid.ULID{block3, block4}}, + }}, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 2 * H}, + {blocks: []ulid.ULID{block3, block4}, partitionCount: 1, partitionID: 0, rangeStart: 2 * H, rangeEnd: 4 * H}, + }, + }, + "only level 2 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block5: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block5, MinTime: 12 * H, MaxTime: 14 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{}, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3, block4}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "only level 2 blocks, there is existing partitioned group file": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block5: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block5, MinTime: 12 * H, MaxTime: 14 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{ + {rangeStart: 0 * H, rangeEnd: 12 * H, partitionCount: 1, partitions: []Partition{ + {PartitionID: 0, Blocks: []ulid.ULID{block1, block2, block3, block4}}, + }}, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3, block4}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "only level 2 blocks from same time range": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{}, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "mix level 1 and level 2 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block5: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block5, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{}, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block4, block5}, partitionCount: 1, partitionID: 0, rangeStart: 10 * H, rangeEnd: 12 * H}, + }, + }, + "mix level 1 and level 2 blocks, there is partitioned group file for level 1 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block5: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block5, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{ + {rangeStart: 10 * H, rangeEnd: 12 * H, partitionCount: 1, partitions: []Partition{ + {PartitionID: 0, Blocks: []ulid.ULID{block4, block5}}, + }}, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block4, block5}, partitionCount: 1, partitionID: 0, rangeStart: 10 * H, rangeEnd: 12 * H}, + }, + }, + "mix level 1 and level 2 blocks in different time range": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 12 * H, MaxTime: 14 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block5: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block5, MinTime: 12 * H, MaxTime: 14 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{}, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block4, block5}, partitionCount: 1, partitionID: 0, rangeStart: 12 * H, rangeEnd: 14 * H}, + {blocks: []ulid.ULID{block1, block2, block3}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "mix level 1 and level 2 blocks in different time range, there are partitioned group files for all groups": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 12 * H, MaxTime: 14 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block5: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block5, MinTime: 12 * H, MaxTime: 14 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{ + {rangeStart: 0 * H, rangeEnd: 12 * H, partitionCount: 1, partitions: []Partition{ + {PartitionID: 0, Blocks: []ulid.ULID{block1, block2, block3}}, + }}, + {rangeStart: 12 * H, rangeEnd: 14 * H, partitionCount: 1, partitions: []Partition{ + {PartitionID: 0, Blocks: []ulid.ULID{block4, block5}}, + }}, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block4, block5}, partitionCount: 1, partitionID: 0, rangeStart: 12 * H, rangeEnd: 14 * H}, + {blocks: []ulid.ULID{block1, block2, block3}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "level 2 blocks along with level 3 blocks from some of partitions, level 1 blocks in different time range, there are partitioned group files for all groups": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 4, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block5: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block5, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 4, PartitionID: 2}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block6: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block6, MinTime: 22 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block7: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block7, MinTime: 22 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{ + {rangeStart: 0 * H, rangeEnd: 12 * H, partitionCount: 4, partitions: []Partition{ + {PartitionID: 0, Blocks: []ulid.ULID{block1, block3}}, + {PartitionID: 1, Blocks: []ulid.ULID{block2, block3}}, + {PartitionID: 2, Blocks: []ulid.ULID{block1, block3}}, + {PartitionID: 3, Blocks: []ulid.ULID{block2, block3}}, + }, partitionVisitMarkers: map[int]mockPartitionVisitMarker{ + 0: {partitionID: 0, compactorID: testCompactorID, isExpired: true, status: Completed}, + 2: {partitionID: 2, compactorID: testCompactorID, isExpired: false, status: Completed}, + }}, + {rangeStart: 22 * H, rangeEnd: 24 * H, partitionCount: 1, partitions: []Partition{ + {PartitionID: 0, Blocks: []ulid.ULID{block6, block7}}, + }}, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block6, block7}, partitionCount: 1, partitionID: 0, rangeStart: 22 * H, rangeEnd: 24 * H}, + {blocks: []ulid.ULID{block1, block3}, partitionCount: 4, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + {blocks: []ulid.ULID{block2, block3}, partitionCount: 4, partitionID: 1, rangeStart: 0 * H, rangeEnd: 12 * H}, + {blocks: []ulid.ULID{block1, block3}, partitionCount: 4, partitionID: 2, rangeStart: 0 * H, rangeEnd: 12 * H}, + {blocks: []ulid.ULID{block2, block3}, partitionCount: 4, partitionID: 3, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "level 2 blocks in first 12h are all complete, level 2 blocks in second 12h have not started compaction, there is no partitioned group file": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 4 * H, MaxTime: 6 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 6 * H, MaxTime: 8 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block5: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block5, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block6: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block6, MinTime: 12 * H, MaxTime: 14 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block7: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block7, MinTime: 12 * H, MaxTime: 14 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{}, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block6, block7}, partitionCount: 1, partitionID: 0, rangeStart: 12 * H, rangeEnd: 14 * H}, + {blocks: []ulid.ULID{block1, block2, block3, block4, block5}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "level 2 blocks are all complete, there is no partitioned group file": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 4 * H, MaxTime: 6 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block5: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block5, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block6: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block6, MinTime: 12 * H, MaxTime: 14 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block7: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block7, MinTime: 14 * H, MaxTime: 16 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{}, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3, block4, block5}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + {blocks: []ulid.ULID{block6, block7}, partitionCount: 1, partitionID: 0, rangeStart: 12 * H, rangeEnd: 24 * H}, + }, + }, + "level 2 blocks are complete only in second half of 12h, there is existing partitioned group file": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 4 * H, MaxTime: 6 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block5: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block5, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block6: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block6, MinTime: 12 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block7: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block7, MinTime: 12 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{ + {rangeStart: 0 * H, rangeEnd: 12 * H, partitionCount: 2, partitions: []Partition{ + {PartitionID: 0, Blocks: []ulid.ULID{block1, block2, block3, block4}}, + {PartitionID: 1, Blocks: []ulid.ULID{block1, block2, block3, block4}}, + }, partitionVisitMarkers: map[int]mockPartitionVisitMarker{ + 1: {partitionID: 1, compactorID: testCompactorID, isExpired: true, status: Completed}, + }}, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3, block4}, partitionCount: 2, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + {blocks: []ulid.ULID{block1, block2, block3, block4}, partitionCount: 2, partitionID: 1, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "level 3 blocks are complete, there are some level 2 blocks not deleted, there is existing partitioned group file": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 4 * H, MaxTime: 6 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block5: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block5, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block6: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block6, MinTime: 12 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block7: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block7, MinTime: 12 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{ + {rangeStart: 0 * H, rangeEnd: 12 * H, partitionCount: 1, partitions: []Partition{ + {PartitionID: 0, Blocks: []ulid.ULID{block1, block2, block3, block4}}, + }, partitionVisitMarkers: map[int]mockPartitionVisitMarker{ + 0: {partitionID: 0, compactorID: testCompactorID, isExpired: true, status: Completed}, + }}, + }, + expected: []expectedCompactionJob{ + // nothing should be grouped. cleaner should mark all level 2 blocks for deletion + // and delete partitioned group file since level 2 to level 3 compaction is complete + }, + }, + "recompact one level 1 block with level 2 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 2 * H}, + }, + }, + "recompact one level 1 block with level 2 blocks in same and different time range": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 2 * H}, + }, + }, + "recompact one level 1 block with level 2 blocks all in different time range": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 4 * H, MaxTime: 6 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3, block4}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "recompact one level 1 block with level 2 blocks and level 3 block": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block5: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block5, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 2 * H}, + }, + }, + "recompact two level 1 block with level 2 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3, block4}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 2 * H}, + }, + }, + "recompact one level 1 block with one level 3 block": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "recompact one level 1 block with level 3 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "recompact two level 1 block in same time range with level 3 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 2 * H}, + }, + }, + "recompact two level 1 block in different time range with level 3 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3, block4}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "recompact one level 1 block with one level 4 block": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 22 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 24 * H}, + }, + }, + "recompact one level 1 block with level 4 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 24 * H}, + }, + }, + "recompact two level 1 blocks in different time range with level 4 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 22 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3, block4}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 24 * H}, + }, + }, + "recompact one level 2 block with level 3 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "recompact two level 2 blocks from different time range with level 3 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3, block4}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "recompact two level 2 blocks from same time range with level 3 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3, block4}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "recompact one level 2 block with level 4 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 24 * H}, + }, + }, + "recompact two level 2 blocks from different time range with level 4 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "recompact two level 2 blocks from same time range with level 4 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "recompact one level 3 block with level 4 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 24 * H}, + }, + }, + "recompact two level 3 blocks from different time range with level 4 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 12 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3, block4}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 24 * H}, + }, + }, + "recompact two level 3 blocks from same time range with level 4 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3, block4}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 24 * H}, + }, + }, + "blocks with partition info should be assigned to correct partition": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}, Stats: tsdb.BlockStats{NumSeries: 1}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 4, PartitionID: 0}}, Files: []metadata.File{{RelPath: thanosblock.IndexFilename, SizeBytes: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}, Stats: tsdb.BlockStats{NumSeries: 1}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 4, PartitionID: 1}}, Files: []metadata.File{{RelPath: thanosblock.IndexFilename, SizeBytes: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}, Stats: tsdb.BlockStats{NumSeries: 1}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 4, PartitionID: 2}}, Files: []metadata.File{{RelPath: thanosblock.IndexFilename, SizeBytes: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}, Stats: tsdb.BlockStats{NumSeries: 1}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 4, PartitionID: 3}}, Files: []metadata.File{{RelPath: thanosblock.IndexFilename, SizeBytes: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block5: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block5, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}, Stats: tsdb.BlockStats{NumSeries: 1}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}, Files: []metadata.File{{RelPath: thanosblock.IndexFilename, SizeBytes: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block6: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block6, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}, Stats: tsdb.BlockStats{NumSeries: 1}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}, Files: []metadata.File{{RelPath: thanosblock.IndexFilename, SizeBytes: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block7: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block7, MinTime: 4 * H, MaxTime: 6 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}, Stats: tsdb.BlockStats{NumSeries: 1}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}, Files: []metadata.File{{RelPath: thanosblock.IndexFilename, SizeBytes: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block3, block5, block7}, partitionCount: 2, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + {blocks: []ulid.ULID{block2, block4, block6, block7}, partitionCount: 2, partitionID: 1, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "one of the partitions got only one block": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}, Stats: tsdb.BlockStats{NumSeries: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}, Files: []metadata.File{{RelPath: thanosblock.IndexFilename, SizeBytes: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}, Stats: tsdb.BlockStats{NumSeries: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}, Files: []metadata.File{{RelPath: thanosblock.IndexFilename, SizeBytes: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}, Stats: tsdb.BlockStats{NumSeries: 2}}, + Thanos: metadata.Thanos{Files: []metadata.File{{RelPath: thanosblock.IndexFilename, SizeBytes: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3}, partitionCount: 2, partitionID: 0, rangeStart: 0 * H, rangeEnd: 2 * H}, + {blocks: []ulid.ULID{block3, DUMMY_BLOCK_ID}, partitionCount: 2, partitionID: 1, rangeStart: 0 * H, rangeEnd: 2 * H}, + }, + }, + "not all level 2 blocks are in bucket index": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{ + {rangeStart: 0 * H, rangeEnd: 2 * H, partitionCount: 2, partitions: []Partition{ + {PartitionID: 0, Blocks: []ulid.ULID{ulid.MustNew(99, nil), ulid.MustNew(98, nil)}}, + {PartitionID: 1, Blocks: []ulid.ULID{ulid.MustNew(99, nil), ulid.MustNew(98, nil)}}, + }, partitionVisitMarkers: map[int]mockPartitionVisitMarker{ + 0: {partitionID: 0, compactorID: testCompactorID, isExpired: true, status: Completed}, + 1: {partitionID: 1, compactorID: testCompactorID, isExpired: true, status: Completed}, + }}, + }, + expected: []expectedCompactionJob{}, + }, + "not all level 2 blocks are in bucket index and there are late level 1 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{ + {rangeStart: 0 * H, rangeEnd: 2 * H, partitionCount: 2, partitions: []Partition{ + {PartitionID: 0, Blocks: []ulid.ULID{ulid.MustNew(99, nil), ulid.MustNew(98, nil)}}, + {PartitionID: 1, Blocks: []ulid.ULID{ulid.MustNew(99, nil), ulid.MustNew(98, nil)}}, + }, partitionVisitMarkers: map[int]mockPartitionVisitMarker{ + 0: {partitionID: 0, compactorID: testCompactorID, isExpired: true, status: Completed}, + 1: {partitionID: 1, compactorID: testCompactorID, isExpired: true, status: Completed}, + }}, + }, + expected: []expectedCompactionJob{}, + }, + "level 2 blocks all have same partitioned group id as destination group": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionedGroupID: partitionedGroupID_0_12, PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionedGroupID: partitionedGroupID_0_12, PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{}, + expected: []expectedCompactionJob{}, + }, + "level 3 blocks all have same partitioned group id as destination group": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionedGroupID: partitionedGroupID_0_24, PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionedGroupID: partitionedGroupID_0_24, PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{}, + expected: []expectedCompactionJob{}, + }, + "level 2 blocks not all have same partitioned group id as destination group": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionedGroupID: partitionedGroupID_0_12, PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionedGroupID: partitionedGroupID_0_12, PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionedGroupID: partitionedGroupID_0_2, PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{}, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "recompact one level 1 block with level 4 blocks with data only in part of time range across smaller time range": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 10*H + 49*M, MaxTime: 11*H + 47*M, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 10*H + 49*M, MaxTime: 16 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 24 * H}, + }, + }, + "recompact one level 1 block with level 4 blocks with time range in meta and data only in part of time range in same smaller time range": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 10*H + 49*M, MaxTime: 11*H + 47*M, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 1 * H, MaxTime: 10 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{TimeRange: 24 * H, PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 24 * H}, + }, + }, + "recompact one level 1 block with level 4 blocks with no time range in meta and data only in part of time range in same smaller time range": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 10*H + 49*M, MaxTime: 11*H + 47*M, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 1 * H, MaxTime: 10 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + } + + for testName, testCase := range tests { + t.Run(testName, func(t *testing.T) { + compactorCfg := &Config{ + BlockRanges: testCase.ranges, + } + + limits := &validation.Limits{ + CompactorPartitionSeriesCountLimit: 4, + CompactorPartitionLevel1SeriesCountLimit: 4, + } + overrides, err := validation.NewOverrides(*limits, nil) + require.NoError(t, err) + + // Setup mocking of the ring so that the grouper will own all the shards + rs := ring.ReplicationSet{ + Instances: []ring.InstanceDesc{ + {Addr: "test-addr"}, + }, + } + subring := &RingMock{} + subring.On("GetAllHealthy", mock.Anything).Return(rs, nil) + subring.On("Get", mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(rs, nil) + + ring := &RingMock{} + ring.On("ShuffleShard", mock.Anything, mock.Anything).Return(subring, nil) + + registerer := prometheus.NewPedanticRegistry() + visitMarkerReadFailed := prometheus.NewCounter(prometheus.CounterOpts{}) + visitMarkerWriteFailed := prometheus.NewCounter(prometheus.CounterOpts{}) + partitionedGroupInfoReadFailed := prometheus.NewCounter(prometheus.CounterOpts{}) + partitionedGroupInfoWriteFailed := prometheus.NewCounter(prometheus.CounterOpts{}) + + metrics := newCompactorMetrics(registerer) + + noCompactFilter := testCase.getNoCompactFilter() + + bkt := &bucket.ClientMock{} + visitMarkerTimeout := 5 * time.Minute + testCase.setupBucketStore(t, bkt, userID, visitMarkerTimeout) + bkt.MockUpload(mock.Anything, nil) + bkt.MockGet(mock.Anything, "", nil) + bkt.MockIter(mock.Anything, nil, nil) + + for _, b := range testCase.blocks { + b.fixPartitionInfo(t, userID) + } + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + g := NewPartitionCompactionGrouper( + ctx, + nil, + objstore.WithNoopInstr(bkt), + false, // Do not accept malformed indexes + true, // Enable vertical compaction + nil, + metrics.getSyncerMetrics(userID), + metrics, + metadata.NoneFunc, + *compactorCfg, + ring, + "test-addr", + testCompactorID, + overrides, + userID, + 10, + 3, + 1, + false, + visitMarkerTimeout, + visitMarkerReadFailed, + visitMarkerWriteFailed, + partitionedGroupInfoReadFailed, + partitionedGroupInfoWriteFailed, + noCompactFilter, + ) + actual, err := g.generateCompactionJobs(testCase.getBlocks()) + require.NoError(t, err) + require.Len(t, actual, len(testCase.expected)) + + for idx, expectedGroup := range testCase.expected { + actualGroup := actual[idx] + actualBlocks := actualGroup.blocks + require.Equal(t, expectedGroup.rangeStart, actualGroup.partitionedGroupInfo.RangeStart) + require.Equal(t, expectedGroup.rangeEnd, actualGroup.partitionedGroupInfo.RangeEnd) + require.Equal(t, expectedGroup.partitionCount, actualGroup.partitionedGroupInfo.PartitionCount) + require.Equal(t, expectedGroup.partitionID, actualGroup.partition.PartitionID) + require.Len(t, actualBlocks, len(expectedGroup.blocks)) + for _, b := range actualBlocks { + require.Contains(t, expectedGroup.blocks, b.ULID) + } + } + }) + } +} + +type generateCompactionJobsTestCase struct { + ranges []time.Duration + blocks map[ulid.ULID]mockBlock + existingPartitionedGroups []mockExistingPartitionedGroup + expected []expectedCompactionJob +} + +func (g *generateCompactionJobsTestCase) setupBucketStore(t *testing.T, bkt *bucket.ClientMock, userID string, visitMarkerTimeout time.Duration) { + var existingPartitionedGroupFiles []string + for _, existingPartitionedGroup := range g.existingPartitionedGroups { + partitionedGroupFilePath := existingPartitionedGroup.setupBucketStore(t, bkt, userID, visitMarkerTimeout) + existingPartitionedGroupFiles = append(existingPartitionedGroupFiles, partitionedGroupFilePath) + } + bkt.MockIter(PartitionedGroupDirectory, existingPartitionedGroupFiles, nil) +} + +func (g *generateCompactionJobsTestCase) getNoCompactFilter() func() map[ulid.ULID]*metadata.NoCompactMark { + noCompactBlocks := make(map[ulid.ULID]*metadata.NoCompactMark) + for id, b := range g.blocks { + if b.hasNoCompactMark { + noCompactBlocks[id] = &metadata.NoCompactMark{ + ID: id, + NoCompactTime: time.Now().Add(-1 * time.Hour).Unix(), + } + } + } + return func() map[ulid.ULID]*metadata.NoCompactMark { + return noCompactBlocks + } +} + +func (g *generateCompactionJobsTestCase) getBlocks() map[ulid.ULID]*metadata.Meta { + blocks := make(map[ulid.ULID]*metadata.Meta) + for id, b := range g.blocks { + blocks[id] = b.meta + } + return blocks +} + +type mockExistingPartitionedGroup struct { + partitionedGroupID uint32 + rangeStart int64 + rangeEnd int64 + partitionCount int + partitions []Partition + partitionVisitMarkers map[int]mockPartitionVisitMarker +} + +func (p *mockExistingPartitionedGroup) updatePartitionedGroupID(userID string) { + p.partitionedGroupID = HashGroup(userID, p.rangeStart, p.rangeEnd) +} + +func (p *mockExistingPartitionedGroup) setupBucketStore(t *testing.T, bkt *bucket.ClientMock, userID string, visitMarkerTimeout time.Duration) string { + p.updatePartitionedGroupID(userID) + partitionedGroupFilePath := path.Join(PartitionedGroupDirectory, fmt.Sprintf("%d.json", p.partitionedGroupID)) + for _, partition := range p.partitions { + partitionID := partition.PartitionID + if _, ok := p.partitionVisitMarkers[partitionID]; !ok { + continue + } + visitMarker := p.partitionVisitMarkers[partitionID] + partitionVisitMarkerFilePath := path.Join(PartitionedGroupDirectory, PartitionVisitMarkerDirectory, + fmt.Sprintf("%d/%s%d-%s", p.partitionedGroupID, PartitionVisitMarkerFilePrefix, partitionID, PartitionVisitMarkerFileSuffix)) + visitTime := time.Now() + if visitMarker.isExpired { + visitTime = time.Now().Add(-2 * visitMarkerTimeout) + } + partitionVisitMarker := PartitionVisitMarker{ + CompactorID: visitMarker.compactorID, + Status: visitMarker.status, + PartitionedGroupID: p.partitionedGroupID, + PartitionID: partitionID, + VisitTime: visitTime.UnixMilli(), + Version: PartitionVisitMarkerVersion1, + } + partitionVisitMarkerContent, err := json.Marshal(partitionVisitMarker) + require.NoError(t, err) + bkt.MockGet(partitionVisitMarkerFilePath, string(partitionVisitMarkerContent), nil) + } + partitionedGroup := PartitionedGroupInfo{ + PartitionedGroupID: p.partitionedGroupID, + PartitionCount: p.partitionCount, + Partitions: p.partitions, + RangeStart: p.rangeStart, + RangeEnd: p.rangeEnd, + CreationTime: time.Now().Add(-1 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + } + partitionedGroupContent, err := json.Marshal(partitionedGroup) + require.NoError(t, err) + bkt.MockGet(partitionedGroupFilePath, string(partitionedGroupContent), nil) + return partitionedGroupFilePath +} + +type mockBlock struct { + meta *metadata.Meta + timeRange time.Duration + hasNoCompactMark bool +} + +func (b *mockBlock) fixPartitionInfo(t *testing.T, userID string) { + extensions, err := cortextsdb.GetCortexMetaExtensionsFromMeta(*b.meta) + require.NoError(t, err) + if extensions != nil { + rangeStart := getRangeStart(b.meta, b.timeRange.Milliseconds()) + rangeEnd := rangeStart + b.timeRange.Milliseconds() + if extensions.PartitionInfo.PartitionedGroupID == 0 { + extensions.PartitionInfo.PartitionedGroupID = HashGroup(userID, rangeStart, rangeEnd) + } + b.meta.Thanos.Extensions = extensions + } +} + +type mockPartitionVisitMarker struct { + partitionID int + compactorID string + isExpired bool + status VisitStatus +} + +type expectedCompactionJob struct { + blocks []ulid.ULID + partitionCount int + partitionID int + rangeStart int64 + rangeEnd int64 +} diff --git a/pkg/compactor/partition_compaction_planner.go b/pkg/compactor/partition_compaction_planner.go new file mode 100644 index 0000000000..5e2036ed26 --- /dev/null +++ b/pkg/compactor/partition_compaction_planner.go @@ -0,0 +1,157 @@ +package compactor + +import ( + "context" + "fmt" + "time" + + "github.com/go-kit/log" + "github.com/go-kit/log/level" + "github.com/oklog/ulid" + "github.com/pkg/errors" + "github.com/prometheus/client_golang/prometheus" + "github.com/thanos-io/objstore" + "github.com/thanos-io/thanos/pkg/block/metadata" + + "github.com/cortexproject/cortex/pkg/storage/tsdb" +) + +var ( + PlannerCompletedPartitionError = errors.New("got completed partition") + PlannerVisitedPartitionError = errors.New("got partition visited by other compactor") +) + +type PartitionCompactionPlanner struct { + ctx context.Context + bkt objstore.InstrumentedBucket + logger log.Logger + ranges []int64 + noCompBlocksFunc func() map[ulid.ULID]*metadata.NoCompactMark + ringLifecyclerID string + userID string + plannerDelay time.Duration + partitionVisitMarkerTimeout time.Duration + partitionVisitMarkerFileUpdateInterval time.Duration + partitionVisitMarkerReadFailed prometheus.Counter + partitionVisitMarkerWriteFailed prometheus.Counter + partitionedGroupInfoReadFailed prometheus.Counter + compactorMetrics *compactorMetrics +} + +func NewPartitionCompactionPlanner( + ctx context.Context, + bkt objstore.InstrumentedBucket, + logger log.Logger, + ranges []int64, + noCompBlocksFunc func() map[ulid.ULID]*metadata.NoCompactMark, + ringLifecyclerID string, + userID string, + plannerDelay time.Duration, + partitionVisitMarkerTimeout time.Duration, + partitionVisitMarkerFileUpdateInterval time.Duration, + partitionVisitMarkerReadFailed prometheus.Counter, + partitionVisitMarkerWriteFailed prometheus.Counter, + partitionedGroupInfoReadFailed prometheus.Counter, + compactorMetrics *compactorMetrics, +) *PartitionCompactionPlanner { + return &PartitionCompactionPlanner{ + ctx: ctx, + bkt: bkt, + logger: logger, + ranges: ranges, + noCompBlocksFunc: noCompBlocksFunc, + ringLifecyclerID: ringLifecyclerID, + userID: userID, + plannerDelay: plannerDelay, + partitionVisitMarkerTimeout: partitionVisitMarkerTimeout, + partitionVisitMarkerFileUpdateInterval: partitionVisitMarkerFileUpdateInterval, + partitionVisitMarkerReadFailed: partitionVisitMarkerReadFailed, + partitionVisitMarkerWriteFailed: partitionVisitMarkerWriteFailed, + partitionedGroupInfoReadFailed: partitionedGroupInfoReadFailed, + compactorMetrics: compactorMetrics, + } +} + +func (p *PartitionCompactionPlanner) Plan(ctx context.Context, metasByMinTime []*metadata.Meta, errChan chan error, extensions any) ([]*metadata.Meta, error) { + cortexMetaExtensions, err := tsdb.ConvertToCortexMetaExtensions(extensions) + if err != nil { + return nil, err + } + if cortexMetaExtensions == nil { + return nil, fmt.Errorf("cortexMetaExtensions cannot be nil") + } + return p.PlanWithPartition(ctx, metasByMinTime, cortexMetaExtensions, errChan) +} + +func (p *PartitionCompactionPlanner) PlanWithPartition(_ context.Context, metasByMinTime []*metadata.Meta, cortexMetaExtensions *tsdb.CortexMetaExtensions, errChan chan error) ([]*metadata.Meta, error) { + partitionInfo := cortexMetaExtensions.PartitionInfo + if partitionInfo == nil { + return nil, fmt.Errorf("partitionInfo cannot be nil") + } + partitionID := partitionInfo.PartitionID + partitionedGroupID := partitionInfo.PartitionedGroupID + + // This delay would prevent double compaction when two compactors + // claimed same partition in grouper at same time. + time.Sleep(p.plannerDelay) + + partitionVisitMarker := NewPartitionVisitMarker(p.ringLifecyclerID, partitionedGroupID, partitionID) + visitMarkerManager := NewVisitMarkerManager(p.bkt, p.logger, p.ringLifecyclerID, partitionVisitMarker, p.partitionVisitMarkerReadFailed, p.partitionVisitMarkerWriteFailed) + existingPartitionVisitMarker := &PartitionVisitMarker{} + err := visitMarkerManager.ReadVisitMarker(p.ctx, existingPartitionVisitMarker) + visitMarkerExists := true + if err != nil { + if errors.Is(err, ErrorVisitMarkerNotFound) { + visitMarkerExists = false + } else { + return nil, fmt.Errorf("unable to get visit marker file for partition with partition ID %d, partitioned group ID %d: %s", partitionID, partitionedGroupID, err.Error()) + } + } + if visitMarkerExists { + if existingPartitionVisitMarker.IsCompleted() { + p.compactorMetrics.compactionsNotPlanned.WithLabelValues(p.userID, cortexMetaExtensions.TimeRangeStr()).Inc() + level.Warn(p.logger).Log("msg", "partition is in completed status", "partitioned_group_id", partitionedGroupID, "partition_id", partitionID, "compactor_id", p.ringLifecyclerID, existingPartitionVisitMarker.LogInfo()) + return nil, PlannerCompletedPartitionError + } + if !existingPartitionVisitMarker.IsPendingByCompactor(p.partitionVisitMarkerTimeout, partitionID, p.ringLifecyclerID) { + p.compactorMetrics.compactionsNotPlanned.WithLabelValues(p.userID, cortexMetaExtensions.TimeRangeStr()).Inc() + level.Warn(p.logger).Log("msg", "partition is not visited by current compactor", "partitioned_group_id", partitionedGroupID, "partition_id", partitionID, "compactor_id", p.ringLifecyclerID, existingPartitionVisitMarker.LogInfo()) + return nil, PlannerVisitedPartitionError + } + } + + // Ensure all blocks fits within the largest range. This is a double check + // to ensure there's no bug in the previous blocks grouping, given this Plan() + // is just a pass-through. + // Modified from https://github.com/cortexproject/cortex/pull/2616/files#diff-e3051fc530c48bb276ba958dd8fadc684e546bd7964e6bc75cef9a86ef8df344R28-R63 + largestRange := p.ranges[len(p.ranges)-1] + rangeStart := getRangeStart(metasByMinTime[0], largestRange) + rangeEnd := rangeStart + largestRange + noCompactMarked := p.noCompBlocksFunc() + resultMetas := make([]*metadata.Meta, 0, len(metasByMinTime)) + + for _, b := range metasByMinTime { + if b.ULID == DUMMY_BLOCK_ID { + continue + } + blockID := b.ULID.String() + if _, excluded := noCompactMarked[b.ULID]; excluded { + continue + } + + if b.MinTime < rangeStart || b.MaxTime > rangeEnd { + return nil, fmt.Errorf("block %s with time range %d:%d is outside the largest expected range %d:%d", blockID, b.MinTime, b.MaxTime, rangeStart, rangeEnd) + } + + resultMetas = append(resultMetas, b) + } + + if len(resultMetas) < 1 { + level.Info(p.logger).Log("msg", "result meta size is empty", "partitioned_group_id", partitionedGroupID, "partition_id", partitionID, "size", len(resultMetas)) + return nil, nil + } + + go visitMarkerManager.HeartBeat(p.ctx, errChan, p.partitionVisitMarkerFileUpdateInterval, false) + + return resultMetas, nil +} diff --git a/pkg/compactor/partition_compaction_planner_test.go b/pkg/compactor/partition_compaction_planner_test.go new file mode 100644 index 0000000000..b64b861f0b --- /dev/null +++ b/pkg/compactor/partition_compaction_planner_test.go @@ -0,0 +1,354 @@ +package compactor + +import ( + "context" + "encoding/json" + "fmt" + "testing" + "time" + + "github.com/go-kit/log" + "github.com/oklog/ulid" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" + "github.com/prometheus/prometheus/tsdb" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" + "github.com/thanos-io/objstore" + "github.com/thanos-io/thanos/pkg/block/metadata" + + "github.com/cortexproject/cortex/pkg/storage/bucket" + cortextsdb "github.com/cortexproject/cortex/pkg/storage/tsdb" + "github.com/cortexproject/cortex/pkg/util/concurrency" +) + +func TestPartitionCompactionPlanner_Plan(t *testing.T) { + type VisitedPartition struct { + isExpired bool + compactorID string + } + + currentCompactor := "test-compactor" + otherCompactor := "other-compactor" + + block1ulid := ulid.MustNew(1, nil) + block2ulid := ulid.MustNew(2, nil) + block3ulid := ulid.MustNew(3, nil) + + tests := map[string]struct { + ranges []int64 + noCompactBlocks map[ulid.ULID]*metadata.NoCompactMark + blocks []*metadata.Meta + expected []*metadata.Meta + expectedErr error + visitedPartition VisitedPartition + }{ + "test basic plan": { + ranges: []int64{2 * time.Hour.Milliseconds()}, + blocks: []*metadata.Meta{ + { + BlockMeta: tsdb.BlockMeta{ + ULID: block1ulid, + MinTime: 1 * time.Hour.Milliseconds(), + MaxTime: 2 * time.Hour.Milliseconds(), + }, + }, + { + BlockMeta: tsdb.BlockMeta{ + ULID: block2ulid, + MinTime: 1 * time.Hour.Milliseconds(), + MaxTime: 2 * time.Hour.Milliseconds(), + }, + }, + }, + visitedPartition: VisitedPartition{ + isExpired: false, + compactorID: currentCompactor, + }, + expected: []*metadata.Meta{ + { + BlockMeta: tsdb.BlockMeta{ + ULID: block1ulid, + MinTime: 1 * time.Hour.Milliseconds(), + MaxTime: 2 * time.Hour.Milliseconds(), + }, + }, + { + BlockMeta: tsdb.BlockMeta{ + ULID: block2ulid, + MinTime: 1 * time.Hour.Milliseconds(), + MaxTime: 2 * time.Hour.Milliseconds(), + }, + }, + }, + }, + "test blocks outside largest range smaller min time after": { + ranges: []int64{2 * time.Hour.Milliseconds()}, + blocks: []*metadata.Meta{ + { + BlockMeta: tsdb.BlockMeta{ + ULID: block1ulid, + MinTime: 2 * time.Hour.Milliseconds(), + MaxTime: 4 * time.Hour.Milliseconds(), + }, + }, + { + BlockMeta: tsdb.BlockMeta{ + ULID: block2ulid, + MinTime: 0 * time.Hour.Milliseconds(), + MaxTime: 2 * time.Hour.Milliseconds(), + }, + }, + }, + visitedPartition: VisitedPartition{ + isExpired: false, + compactorID: currentCompactor, + }, + expectedErr: fmt.Errorf("block %s with time range %d:%d is outside the largest expected range %d:%d", block2ulid.String(), 0*time.Hour.Milliseconds(), 2*time.Hour.Milliseconds(), 2*time.Hour.Milliseconds(), 4*time.Hour.Milliseconds()), + }, + "test blocks outside largest range 1": { + ranges: []int64{2 * time.Hour.Milliseconds()}, + blocks: []*metadata.Meta{ + { + BlockMeta: tsdb.BlockMeta{ + ULID: block1ulid, + MinTime: 0 * time.Hour.Milliseconds(), + MaxTime: 4 * time.Hour.Milliseconds(), + }, + }, + { + BlockMeta: tsdb.BlockMeta{ + ULID: block2ulid, + MinTime: 0 * time.Hour.Milliseconds(), + MaxTime: 4 * time.Hour.Milliseconds(), + }, + }, + }, + visitedPartition: VisitedPartition{ + isExpired: false, + compactorID: currentCompactor, + }, + expectedErr: fmt.Errorf("block %s with time range %d:%d is outside the largest expected range %d:%d", block1ulid.String(), 0*time.Hour.Milliseconds(), 4*time.Hour.Milliseconds(), 0*time.Hour.Milliseconds(), 2*time.Hour.Milliseconds()), + }, + "test blocks outside largest range 2": { + ranges: []int64{2 * time.Hour.Milliseconds()}, + blocks: []*metadata.Meta{ + { + BlockMeta: tsdb.BlockMeta{ + ULID: block1ulid, + MinTime: 0 * time.Hour.Milliseconds(), + MaxTime: 2 * time.Hour.Milliseconds(), + }, + }, + { + BlockMeta: tsdb.BlockMeta{ + ULID: block2ulid, + MinTime: 0 * time.Hour.Milliseconds(), + MaxTime: 4 * time.Hour.Milliseconds(), + }, + }, + }, + visitedPartition: VisitedPartition{ + isExpired: false, + compactorID: currentCompactor, + }, + expectedErr: fmt.Errorf("block %s with time range %d:%d is outside the largest expected range %d:%d", block2ulid.String(), 0*time.Hour.Milliseconds(), 4*time.Hour.Milliseconds(), 0*time.Hour.Milliseconds(), 2*time.Hour.Milliseconds()), + }, + "test should skip blocks marked for no compact": { + ranges: []int64{2 * time.Hour.Milliseconds()}, + noCompactBlocks: map[ulid.ULID]*metadata.NoCompactMark{block1ulid: {}}, + blocks: []*metadata.Meta{ + { + BlockMeta: tsdb.BlockMeta{ + ULID: block1ulid, + MinTime: 1 * time.Hour.Milliseconds(), + MaxTime: 2 * time.Hour.Milliseconds(), + }, + }, + { + BlockMeta: tsdb.BlockMeta{ + ULID: block2ulid, + MinTime: 1 * time.Hour.Milliseconds(), + MaxTime: 2 * time.Hour.Milliseconds(), + }, + }, + { + BlockMeta: tsdb.BlockMeta{ + ULID: block3ulid, + MinTime: 1 * time.Hour.Milliseconds(), + MaxTime: 2 * time.Hour.Milliseconds(), + }, + }, + }, + visitedPartition: VisitedPartition{ + isExpired: false, + compactorID: currentCompactor, + }, + expected: []*metadata.Meta{ + { + BlockMeta: tsdb.BlockMeta{ + ULID: block2ulid, + MinTime: 1 * time.Hour.Milliseconds(), + MaxTime: 2 * time.Hour.Milliseconds(), + }, + }, + { + BlockMeta: tsdb.BlockMeta{ + ULID: block3ulid, + MinTime: 1 * time.Hour.Milliseconds(), + MaxTime: 2 * time.Hour.Milliseconds(), + }, + }, + }, + }, + "test should not compact if there is no compactable block": { + ranges: []int64{2 * time.Hour.Milliseconds()}, + noCompactBlocks: map[ulid.ULID]*metadata.NoCompactMark{block1ulid: {}}, + blocks: []*metadata.Meta{ + { + BlockMeta: tsdb.BlockMeta{ + ULID: block1ulid, + MinTime: 1 * time.Hour.Milliseconds(), + MaxTime: 2 * time.Hour.Milliseconds(), + }, + }, + }, + visitedPartition: VisitedPartition{ + isExpired: false, + compactorID: currentCompactor, + }, + expected: []*metadata.Meta{}, + }, + "test should not compact if visit marker file is not expired and visited by other compactor": { + ranges: []int64{2 * time.Hour.Milliseconds()}, + blocks: []*metadata.Meta{ + { + BlockMeta: tsdb.BlockMeta{ + ULID: block1ulid, + MinTime: 1 * time.Hour.Milliseconds(), + MaxTime: 2 * time.Hour.Milliseconds(), + }, + }, + { + BlockMeta: tsdb.BlockMeta{ + ULID: block2ulid, + MinTime: 1 * time.Hour.Milliseconds(), + MaxTime: 2 * time.Hour.Milliseconds(), + }, + }, + }, + visitedPartition: VisitedPartition{ + isExpired: false, + compactorID: otherCompactor, + }, + expectedErr: PlannerVisitedPartitionError, + }, + "test should not compact if visit marker file is expired": { + ranges: []int64{2 * time.Hour.Milliseconds()}, + blocks: []*metadata.Meta{ + { + BlockMeta: tsdb.BlockMeta{ + ULID: block1ulid, + MinTime: 1 * time.Hour.Milliseconds(), + MaxTime: 2 * time.Hour.Milliseconds(), + }, + }, + { + BlockMeta: tsdb.BlockMeta{ + ULID: block2ulid, + MinTime: 1 * time.Hour.Milliseconds(), + MaxTime: 2 * time.Hour.Milliseconds(), + }, + }, + }, + visitedPartition: VisitedPartition{ + isExpired: true, + compactorID: currentCompactor, + }, + expectedErr: PlannerVisitedPartitionError, + }, + } + + visitMarkerTimeout := 5 * time.Minute + partitionedGroupID := uint32(1) + partitionID := 0 + for testName, testData := range tests { + t.Run(testName, func(t *testing.T) { + bkt := &bucket.ClientMock{} + visitMarkerFile := GetPartitionVisitMarkerFilePath(partitionedGroupID, partitionID) + expireTime := time.Now() + if testData.visitedPartition.isExpired { + expireTime = expireTime.Add(-1 * visitMarkerTimeout) + } + visitMarker := PartitionVisitMarker{ + CompactorID: testData.visitedPartition.compactorID, + PartitionedGroupID: partitionedGroupID, + PartitionID: partitionID, + VisitTime: expireTime.Unix(), + Status: Pending, + Version: PartitionVisitMarkerVersion1, + } + visitMarkerFileContent, _ := json.Marshal(visitMarker) + bkt.MockGet(visitMarkerFile, string(visitMarkerFileContent), nil) + bkt.MockUpload(mock.Anything, nil) + bkt.MockGet(mock.Anything, "", nil) + + registerer := prometheus.NewPedanticRegistry() + visitMarkerReadFailed := promauto.With(registerer).NewCounter(prometheus.CounterOpts{ + Name: "cortex_compactor_block_visit_marker_read_failed", + Help: "Number of block visit marker file failed to be read.", + }) + visitMarkerWriteFailed := promauto.With(registerer).NewCounter(prometheus.CounterOpts{ + Name: "cortex_compactor_block_visit_marker_write_failed", + Help: "Number of block visit marker file failed to be written.", + }) + partitionedGroupInfoReadFailed := promauto.With(registerer).NewCounter(prometheus.CounterOpts{ + Name: "cortex_compactor_partitioned_group_info_read_failed", + Help: "Number of partitioned group info file failed to be read.", + }) + + metrics := newCompactorMetrics(registerer) + + logs := &concurrency.SyncBuffer{} + logger := log.NewLogfmtLogger(logs) + p := NewPartitionCompactionPlanner( + context.Background(), + objstore.WithNoopInstr(bkt), + logger, + testData.ranges, + func() map[ulid.ULID]*metadata.NoCompactMark { + return testData.noCompactBlocks + }, + currentCompactor, + "test-user", + 10*time.Millisecond, + visitMarkerTimeout, + time.Minute, + visitMarkerReadFailed, + visitMarkerWriteFailed, + partitionedGroupInfoReadFailed, + metrics, + ) + actual, err := p.Plan(context.Background(), testData.blocks, nil, &cortextsdb.CortexMetaExtensions{ + PartitionInfo: &cortextsdb.PartitionInfo{ + PartitionCount: 1, + PartitionID: partitionID, + PartitionedGroupID: partitionedGroupID, + }, + }) + + if testData.expectedErr != nil { + assert.Equal(t, err, testData.expectedErr) + } else { + require.NoError(t, err) + } + + require.Len(t, actual, len(testData.expected)) + + for idx, expectedMeta := range testData.expected { + assert.Equal(t, expectedMeta.ULID, actual[idx].ULID) + } + }) + } +} diff --git a/pkg/compactor/partition_visit_marker.go b/pkg/compactor/partition_visit_marker.go new file mode 100644 index 0000000000..b3598fd9f4 --- /dev/null +++ b/pkg/compactor/partition_visit_marker.go @@ -0,0 +1,131 @@ +package compactor + +import ( + "fmt" + "path" + "strings" + "time" + + "github.com/pkg/errors" +) + +const ( + // PartitionVisitMarkerDirectory is the name of directory where all visit markers are saved. + PartitionVisitMarkerDirectory = "visit-marks" + // PartitionVisitMarkerFileSuffix is the known suffix of json filename for representing the most recent compactor visit. + PartitionVisitMarkerFileSuffix = "visit-mark.json" + // PartitionVisitMarkerFilePrefix is the known prefix of json filename for representing the most recent compactor visit. + PartitionVisitMarkerFilePrefix = "partition-" + // PartitionVisitMarkerVersion1 is the current supported version of visit-mark file. + PartitionVisitMarkerVersion1 = 1 +) + +var ( + ErrorNotPartitionVisitMarker = errors.New("file is not partition visit marker") +) + +type PartitionVisitMarker struct { + CompactorID string `json:"compactorID"` + Status VisitStatus `json:"status"` + PartitionedGroupID uint32 `json:"partitionedGroupID"` + PartitionID int `json:"partitionID"` + // VisitTime is a unix timestamp of when the partition was visited (mark updated). + VisitTime int64 `json:"visitTime"` + // Version of the file. + Version int `json:"version"` +} + +func NewPartitionVisitMarker(compactorID string, partitionedGroupID uint32, partitionID int) *PartitionVisitMarker { + return &PartitionVisitMarker{ + CompactorID: compactorID, + PartitionedGroupID: partitionedGroupID, + PartitionID: partitionID, + } +} + +func (b *PartitionVisitMarker) IsExpired(partitionVisitMarkerTimeout time.Duration) bool { + return !time.Now().Before(time.Unix(b.VisitTime, 0).Add(partitionVisitMarkerTimeout)) +} + +func (b *PartitionVisitMarker) IsVisited(partitionVisitMarkerTimeout time.Duration, partitionID int) bool { + return b.IsCompleted() || (partitionID == b.PartitionID && !b.IsExpired(partitionVisitMarkerTimeout)) +} + +func (b *PartitionVisitMarker) IsPendingByCompactor(partitionVisitMarkerTimeout time.Duration, partitionID int, compactorID string) bool { + return b.CompactorID == compactorID && partitionID == b.PartitionID && b.IsPending() && !b.IsExpired(partitionVisitMarkerTimeout) +} + +func (b *PartitionVisitMarker) IsCompleted() bool { + return b.Status == Completed +} + +func (b *PartitionVisitMarker) IsFailed() bool { + return b.Status == Failed +} + +func (b *PartitionVisitMarker) IsPending() bool { + return b.Status == Pending +} + +func (b *PartitionVisitMarker) IsInProgress() bool { + return b.Status == InProgress +} + +func (b *PartitionVisitMarker) GetVisitMarkerFilePath() string { + return GetPartitionVisitMarkerFilePath(b.PartitionedGroupID, b.PartitionID) +} + +func (b *PartitionVisitMarker) MarkInProgress(ownerIdentifier string) { + b.CompactorID = ownerIdentifier + b.Status = InProgress + b.VisitTime = time.Now().Unix() +} + +func (b *PartitionVisitMarker) MarkPending(ownerIdentifier string) { + b.CompactorID = ownerIdentifier + b.Status = Pending + b.VisitTime = time.Now().Unix() +} + +func (b *PartitionVisitMarker) MarkCompleted(ownerIdentifier string) { + b.CompactorID = ownerIdentifier + b.Status = Completed + b.VisitTime = time.Now().Unix() +} + +func (b *PartitionVisitMarker) MarkFailed(ownerIdentifier string) { + b.CompactorID = ownerIdentifier + b.Status = Failed + b.VisitTime = time.Now().Unix() +} + +func (b *PartitionVisitMarker) LogInfo() []string { + return []string{ + "visit_marker_partitioned_group_id", + fmt.Sprintf("%d", b.PartitionedGroupID), + "visit_marker_partition_id", + fmt.Sprintf("%d", b.PartitionID), + "visit_marker_compactor_id", + b.CompactorID, + "visit_marker_status", + string(b.Status), + "visit_marker_visit_time", + time.Unix(b.VisitTime, 0).String(), + } +} + +func GetPartitionVisitMarkerFilePath(partitionedGroupID uint32, partitionID int) string { + return path.Join(GetPartitionVisitMarkerDirectoryPath(partitionedGroupID), fmt.Sprintf("%s%d-%s", PartitionVisitMarkerFilePrefix, partitionID, PartitionVisitMarkerFileSuffix)) +} + +func GetPartitionVisitMarkerDirectoryPath(partitionedGroupID uint32) string { + return path.Join(PartitionedGroupDirectory, PartitionVisitMarkerDirectory, fmt.Sprintf("%d", partitionedGroupID)) +} + +func IsPartitionVisitMarker(path string) bool { + return strings.HasSuffix(path, PartitionVisitMarkerFileSuffix) +} + +func IsNotPartitionVisitMarkerError(err error) bool { + return errors.Is(err, ErrorNotPartitionVisitMarker) +} diff --git a/pkg/compactor/partitioned_group_info.go b/pkg/compactor/partitioned_group_info.go new file mode 100644 index 0000000000..269c7c582a --- /dev/null +++ b/pkg/compactor/partitioned_group_info.go @@ -0,0 +1,330 @@ +package compactor + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "path" + "strings" + "time" + + "github.com/go-kit/log" + "github.com/go-kit/log/level" + "github.com/oklog/ulid" + "github.com/pkg/errors" + "github.com/prometheus/client_golang/prometheus" + "github.com/thanos-io/objstore" + "github.com/thanos-io/thanos/pkg/block" + "github.com/thanos-io/thanos/pkg/block/metadata" + + "github.com/cortexproject/cortex/pkg/util/runutil" +) + +const ( + PartitionedGroupDirectory = "partitioned-groups" + PartitionedGroupInfoVersion1 = 1 +) + +var ( + ErrorPartitionedGroupInfoNotFound = errors.New("partitioned group info not found") + ErrorUnmarshalPartitionedGroupInfo = errors.New("unmarshal partitioned group info JSON") +) + +type Partition struct { + PartitionID int `json:"partitionID"` + Blocks []ulid.ULID `json:"blocks"` +} + +type PartitionedGroupStatus struct { + PartitionedGroupID uint32 + CanDelete bool + IsCompleted bool + DeleteVisitMarker bool + PendingPartitions int + InProgressPartitions int + PendingOrFailedPartitions []Partition +} + +func (s PartitionedGroupStatus) String() string { + var partitions []string + for _, p := range s.PendingOrFailedPartitions { + partitions = append(partitions, fmt.Sprintf("%d", p.PartitionID)) + } + return fmt.Sprintf(`{"partitioned_group_id": %d, "can_delete": %t, "is_complete": %t, "delete_visit_marker": %t, "pending_partitions": %d, "in_progress_partitions": %d, "pending_or_failed_partitions": [%s]}`, + s.PartitionedGroupID, s.CanDelete, s.IsCompleted, s.DeleteVisitMarker, s.PendingPartitions, s.InProgressPartitions, strings.Join(partitions, ",")) +} + +type PartitionedGroupInfo struct { + PartitionedGroupID uint32 `json:"partitionedGroupID"` + PartitionCount int `json:"partitionCount"` + Partitions []Partition `json:"partitions"` + RangeStart int64 `json:"rangeStart"` + RangeEnd int64 `json:"rangeEnd"` + CreationTime int64 `json:"creationTime"` + // Version of the file. + Version int `json:"version"` +} + +func (p *PartitionedGroupInfo) rangeStartTime() time.Time { + return time.Unix(0, p.RangeStart*int64(time.Millisecond)).UTC() +} + +func (p *PartitionedGroupInfo) rangeEndTime() time.Time { + return time.Unix(0, p.RangeEnd*int64(time.Millisecond)).UTC() +} + +func (p *PartitionedGroupInfo) getPartitionIDsByBlock(blockID ulid.ULID) []int { + var partitionIDs []int +partitionLoop: + for _, partition := range p.Partitions { + for _, block := range partition.Blocks { + if block == blockID { + partitionIDs = append(partitionIDs, partition.PartitionID) + continue partitionLoop + } + } + } + return partitionIDs +} + +func (p *PartitionedGroupInfo) getAllBlocks() []ulid.ULID { + uniqueBlocks := make(map[ulid.ULID]struct{}) + for _, partition := range p.Partitions { + for _, block := range partition.Blocks { + uniqueBlocks[block] = struct{}{} + } + } + blocks := make([]ulid.ULID, len(uniqueBlocks)) + i := 0 + for block := range uniqueBlocks { + blocks[i] = block + i++ + } + return blocks +} + +func (p *PartitionedGroupInfo) getPartitionedGroupStatus( + ctx context.Context, + userBucket objstore.InstrumentedBucket, + partitionVisitMarkerTimeout time.Duration, + userLogger log.Logger, + partitionVisitMarkerReadFailed prometheus.Counter, + partitionVisitMarkerWriteFailed prometheus.Counter, +) PartitionedGroupStatus { + status := PartitionedGroupStatus{ + PartitionedGroupID: p.PartitionedGroupID, + CanDelete: false, + IsCompleted: false, + DeleteVisitMarker: false, + PendingPartitions: 0, + InProgressPartitions: 0, + PendingOrFailedPartitions: []Partition{}, + } + allPartitionCompleted := true + hasInProgressPartitions := false + for _, partition := range p.Partitions { + partitionVisitMarker := &PartitionVisitMarker{ + PartitionedGroupID: p.PartitionedGroupID, + PartitionID: partition.PartitionID, + } + visitMarkerManager := NewVisitMarkerManager(userBucket, userLogger, "PartitionedGroupInfo.getPartitionedGroupStatus", partitionVisitMarker, partitionVisitMarkerReadFailed, partitionVisitMarkerWriteFailed) + partitionVisitMarkerExists := true + if err := visitMarkerManager.ReadVisitMarker(ctx, partitionVisitMarker); err != nil { + if errors.Is(err, ErrorVisitMarkerNotFound) { + partitionVisitMarkerExists = false + } else { + level.Warn(userLogger).Log("msg", "unable to read partition visit marker", "path", partitionVisitMarker.GetVisitMarkerFilePath(), "err", err) + return status + } + } + + if !partitionVisitMarkerExists { + status.PendingPartitions++ + allPartitionCompleted = false + status.PendingOrFailedPartitions = append(status.PendingOrFailedPartitions, partition) + continue + } + + if partitionVisitMarker.VisitTime < p.CreationTime { + status.DeleteVisitMarker = true + allPartitionCompleted = false + continue + } + + if partitionVisitMarker.IsCompleted() { + continue + } + + if partitionVisitMarkerExists && (partitionVisitMarker.IsPending() || partitionVisitMarker.IsInProgress()) && !partitionVisitMarker.IsExpired(partitionVisitMarkerTimeout) { + status.InProgressPartitions++ + hasInProgressPartitions = true + allPartitionCompleted = false + continue + } + + if partitionVisitMarker.IsFailed() { + status.PendingPartitions++ + } else { + status.PendingPartitions++ + } + allPartitionCompleted = false + status.PendingOrFailedPartitions = append(status.PendingOrFailedPartitions, partition) + } + + if hasInProgressPartitions { + return status + } + + status.IsCompleted = allPartitionCompleted + + if allPartitionCompleted { + status.CanDelete = true + status.DeleteVisitMarker = true + return status + } + + checkedBlocks := make(map[ulid.ULID]struct{}) + for _, partition := range status.PendingOrFailedPartitions { + for _, blockID := range partition.Blocks { + if _, ok := checkedBlocks[blockID]; ok { + continue + } + if !p.doesBlockExist(ctx, userBucket, userLogger, blockID) { + level.Info(userLogger).Log("msg", "delete partitioned group", "reason", "block is physically deleted", "block", blockID) + status.CanDelete = true + status.DeleteVisitMarker = true + return status + } + if p.isBlockDeleted(ctx, userBucket, userLogger, blockID) { + level.Info(userLogger).Log("msg", "delete partitioned group", "reason", "block is marked for deletion", "block", blockID) + status.CanDelete = true + status.DeleteVisitMarker = true + return status + } + if p.isBlockNoCompact(ctx, userBucket, userLogger, blockID) { + level.Info(userLogger).Log("msg", "delete partitioned group", "reason", "block is marked for no compact", "block", blockID) + status.CanDelete = true + status.DeleteVisitMarker = true + return status + } + checkedBlocks[blockID] = struct{}{} + } + } + return status +} + +func (p *PartitionedGroupInfo) doesBlockExist(ctx context.Context, userBucket objstore.InstrumentedBucket, userLogger log.Logger, blockID ulid.ULID) bool { + metaExists, err := userBucket.Exists(ctx, path.Join(blockID.String(), metadata.MetaFilename)) + if err != nil { + level.Warn(userLogger).Log("msg", "unable to get stats of meta.json for block", "partitioned_group_id", p.PartitionedGroupID, "block", blockID.String()) + return true + } + return metaExists +} + +func (p *PartitionedGroupInfo) isBlockDeleted(ctx context.Context, userBucket objstore.InstrumentedBucket, userLogger log.Logger, blockID ulid.ULID) bool { + deletionMarkerExists, err := userBucket.Exists(ctx, path.Join(blockID.String(), metadata.DeletionMarkFilename)) + if err != nil { + level.Warn(userLogger).Log("msg", "unable to get stats of deletion-mark.json for block", "partitioned_group_id", p.PartitionedGroupID, "block", blockID.String()) + return false + } + return deletionMarkerExists +} + +func (p *PartitionedGroupInfo) isBlockNoCompact(ctx context.Context, userBucket objstore.InstrumentedBucket, userLogger log.Logger, blockID ulid.ULID) bool { + noCompactMarkerExists, err := userBucket.Exists(ctx, path.Join(blockID.String(), metadata.NoCompactMarkFilename)) + if err != nil { + level.Warn(userLogger).Log("msg", "unable to get stats of no-compact-mark.json for block", "partitioned_group_id", p.PartitionedGroupID, "block", blockID.String()) + return false + } + return noCompactMarkerExists +} + +func (p *PartitionedGroupInfo) markAllBlocksForDeletion(ctx context.Context, userBucket objstore.InstrumentedBucket, userLogger log.Logger, blocksMarkedForDeletion *prometheus.CounterVec, userID string) error { + blocks := p.getAllBlocks() + for _, blockID := range blocks { + if p.doesBlockExist(ctx, userBucket, userLogger, blockID) && !p.isBlockDeleted(ctx, userBucket, userLogger, blockID) { + if err := block.MarkForDeletion(ctx, userLogger, userBucket, blockID, "delete block during partitioned group completion check", blocksMarkedForDeletion.WithLabelValues(userID, reasonLabelValue)); err != nil { + level.Warn(userLogger).Log("msg", "unable to mark block for deletion", "partitioned_group_id", p.PartitionedGroupID, "block", blockID.String()) + return err + } + level.Info(userLogger).Log("msg", "marked block for deletion during partitioned group info clean up", "partitioned_group_id", p.PartitionedGroupID, "block", blockID.String()) + } + } + return nil +} + +func (p *PartitionedGroupInfo) String() string { + var partitions []string + for _, partition := range p.Partitions { + partitions = append(partitions, fmt.Sprintf("(PartitionID: %d, Blocks: %s)", partition.PartitionID, partition.Blocks)) + } + return fmt.Sprintf("{PartitionedGroupID: %d, PartitionCount: %d, Partitions: %s}", p.PartitionedGroupID, p.PartitionCount, strings.Join(partitions, ", ")) +} + +func GetPartitionedGroupFile(partitionedGroupID uint32) string { + return path.Join(PartitionedGroupDirectory, fmt.Sprintf("%d.json", partitionedGroupID)) +} + +func ReadPartitionedGroupInfo(ctx context.Context, bkt objstore.InstrumentedBucketReader, logger log.Logger, partitionedGroupID uint32, partitionedGroupInfoReadFailed prometheus.Counter) (*PartitionedGroupInfo, error) { + return ReadPartitionedGroupInfoFile(ctx, bkt, logger, GetPartitionedGroupFile(partitionedGroupID), partitionedGroupInfoReadFailed) +} + +func ReadPartitionedGroupInfoFile(ctx context.Context, bkt objstore.InstrumentedBucketReader, logger log.Logger, partitionedGroupFile string, partitionedGroupInfoReadFailed prometheus.Counter) (*PartitionedGroupInfo, error) { + partitionedGroupReader, err := bkt.ReaderWithExpectedErrs(bkt.IsObjNotFoundErr).Get(ctx, partitionedGroupFile) + if err != nil { + if bkt.IsObjNotFoundErr(err) { + return nil, errors.Wrapf(ErrorPartitionedGroupInfoNotFound, "partitioned group file: %s", partitionedGroupReader) + } + partitionedGroupInfoReadFailed.Inc() + return nil, errors.Wrapf(err, "get partitioned group file: %s", partitionedGroupReader) + } + defer runutil.CloseWithLogOnErr(logger, partitionedGroupReader, "close partitioned group reader") + p, err := io.ReadAll(partitionedGroupReader) + if err != nil { + partitionedGroupInfoReadFailed.Inc() + return nil, errors.Wrapf(err, "read partitioned group file: %s", partitionedGroupFile) + } + partitionedGroupInfo := PartitionedGroupInfo{} + if err = json.Unmarshal(p, &partitionedGroupInfo); err != nil { + partitionedGroupInfoReadFailed.Inc() + return nil, errors.Wrapf(ErrorUnmarshalPartitionedGroupInfo, "partitioned group file: %s, error: %v", partitionedGroupFile, err.Error()) + } + if partitionedGroupInfo.Version != VisitMarkerVersion1 { + partitionedGroupInfoReadFailed.Inc() + return nil, errors.Errorf("unexpected partitioned group file version %d, expected %d", partitionedGroupInfo.Version, VisitMarkerVersion1) + } + if partitionedGroupInfo.CreationTime <= 0 { + objAttr, err := bkt.Attributes(ctx, partitionedGroupFile) + if err != nil { + return nil, errors.Errorf("unable to get partitioned group file attributes: %s, error: %v", partitionedGroupFile, err.Error()) + } + partitionedGroupInfo.CreationTime = objAttr.LastModified.Unix() + } + return &partitionedGroupInfo, nil +} + +func UpdatePartitionedGroupInfo(ctx context.Context, bkt objstore.InstrumentedBucket, logger log.Logger, partitionedGroupInfo PartitionedGroupInfo, partitionedGroupInfoReadFailed prometheus.Counter, partitionedGroupInfoWriteFailed prometheus.Counter) (*PartitionedGroupInfo, error) { + existingPartitionedGroup, _ := ReadPartitionedGroupInfo(ctx, bkt, logger, partitionedGroupInfo.PartitionedGroupID, partitionedGroupInfoReadFailed) + if existingPartitionedGroup != nil { + level.Warn(logger).Log("msg", "partitioned group info already exists", "partitioned_group_id", partitionedGroupInfo.PartitionedGroupID) + return existingPartitionedGroup, nil + } + if partitionedGroupInfo.CreationTime <= 0 { + partitionedGroupInfo.CreationTime = time.Now().Unix() + } + partitionedGroupFile := GetPartitionedGroupFile(partitionedGroupInfo.PartitionedGroupID) + partitionedGroupInfoContent, err := json.Marshal(partitionedGroupInfo) + if err != nil { + partitionedGroupInfoWriteFailed.Inc() + return nil, err + } + reader := bytes.NewReader(partitionedGroupInfoContent) + if err := bkt.Upload(ctx, partitionedGroupFile, reader); err != nil { + return nil, err + } + level.Info(logger).Log("msg", "created new partitioned group info", "partitioned_group_id", partitionedGroupInfo.PartitionedGroupID) + return &partitionedGroupInfo, nil +} diff --git a/pkg/compactor/partitioned_group_info_test.go b/pkg/compactor/partitioned_group_info_test.go new file mode 100644 index 0000000000..862ac3e88e --- /dev/null +++ b/pkg/compactor/partitioned_group_info_test.go @@ -0,0 +1,886 @@ +package compactor + +import ( + "context" + "encoding/json" + "path" + "testing" + "time" + + "github.com/go-kit/log" + "github.com/oklog/ulid" + "github.com/prometheus/client_golang/prometheus" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" + "github.com/thanos-io/objstore" + "github.com/thanos-io/thanos/pkg/block/metadata" + + "github.com/cortexproject/cortex/pkg/storage/bucket" + cortex_testutil "github.com/cortexproject/cortex/pkg/storage/tsdb/testutil" +) + +func TestPartitionedGroupInfo(t *testing.T) { + ulid0 := ulid.MustNew(0, nil) + ulid1 := ulid.MustNew(1, nil) + ulid2 := ulid.MustNew(2, nil) + rangeStart := (1 * time.Hour).Milliseconds() + rangeEnd := (2 * time.Hour).Milliseconds() + partitionedGroupID := uint32(12345) + for _, tcase := range []struct { + name string + partitionedGroupInfo PartitionedGroupInfo + }{ + { + name: "write partitioned group info 1", + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 2, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + RangeStart: rangeStart, + RangeEnd: rangeEnd, + Version: PartitionedGroupInfoVersion1, + }, + }, + { + name: "write partitioned group info 2", + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 3, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid1, + }, + }, + { + PartitionID: 2, + Blocks: []ulid.ULID{ + ulid2, + }, + }, + }, + RangeStart: rangeStart, + RangeEnd: rangeEnd, + Version: PartitionedGroupInfoVersion1, + }, + }, + } { + t.Run(tcase.name, func(t *testing.T) { + ctx := context.Background() + dummyReadCounter := prometheus.NewCounter(prometheus.CounterOpts{}) + dummyWriteCounter := prometheus.NewCounter(prometheus.CounterOpts{}) + testBkt, _ := cortex_testutil.PrepareFilesystemBucket(t) + bkt := objstore.WithNoopInstr(testBkt) + logger := log.NewNopLogger() + writeRes, err := UpdatePartitionedGroupInfo(ctx, bkt, logger, tcase.partitionedGroupInfo, dummyReadCounter, dummyWriteCounter) + tcase.partitionedGroupInfo.CreationTime = writeRes.CreationTime + require.NoError(t, err) + require.Equal(t, tcase.partitionedGroupInfo, *writeRes) + readRes, err := ReadPartitionedGroupInfo(ctx, bkt, logger, tcase.partitionedGroupInfo.PartitionedGroupID, dummyReadCounter) + require.NoError(t, err) + require.Equal(t, tcase.partitionedGroupInfo, *readRes) + }) + } +} + +func TestGetPartitionIDsByBlock(t *testing.T) { + ulid0 := ulid.MustNew(0, nil) + ulid1 := ulid.MustNew(1, nil) + ulid2 := ulid.MustNew(2, nil) + ulid3 := ulid.MustNew(3, nil) + partitionedGroupInfo := PartitionedGroupInfo{ + PartitionedGroupID: uint32(12345), + PartitionCount: 3, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + { + PartitionID: 2, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + ulid2, + ulid3, + }, + }, + }, + RangeStart: (1 * time.Hour).Milliseconds(), + RangeEnd: (2 * time.Hour).Milliseconds(), + Version: PartitionedGroupInfoVersion1, + } + + res0 := partitionedGroupInfo.getPartitionIDsByBlock(ulid0) + require.Equal(t, 3, len(res0)) + require.Contains(t, res0, 0) + require.Contains(t, res0, 1) + require.Contains(t, res0, 2) + + res1 := partitionedGroupInfo.getPartitionIDsByBlock(ulid1) + require.Equal(t, 2, len(res1)) + require.Contains(t, res1, 0) + require.Contains(t, res1, 2) + + res2 := partitionedGroupInfo.getPartitionIDsByBlock(ulid2) + require.Equal(t, 2, len(res2)) + require.Contains(t, res2, 1) + require.Contains(t, res2, 2) + + res3 := partitionedGroupInfo.getPartitionIDsByBlock(ulid3) + require.Equal(t, 1, len(res3)) + require.Contains(t, res3, 2) +} + +func TestGetPartitionedGroupStatus(t *testing.T) { + ulid0 := ulid.MustNew(0, nil) + ulid1 := ulid.MustNew(1, nil) + ulid2 := ulid.MustNew(2, nil) + partitionedGroupID := uint32(1234) + for _, tcase := range []struct { + name string + expectedResult PartitionedGroupStatus + partitionedGroupInfo PartitionedGroupInfo + partitionVisitMarkers []PartitionVisitMarker + deletedBlock map[ulid.ULID]bool + noCompactBlock map[ulid.ULID]struct{} + }{ + { + name: "test one partition is not visited and contains block marked for deletion", + expectedResult: PartitionedGroupStatus{ + CanDelete: true, + IsCompleted: false, + DeleteVisitMarker: true, + PendingOrFailedPartitions: []Partition{ + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + }, + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 2, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + RangeStart: (1 * time.Hour).Milliseconds(), + RangeEnd: (2 * time.Hour).Milliseconds(), + CreationTime: time.Now().Add(-10 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + }, + partitionVisitMarkers: []PartitionVisitMarker{ + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 0, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + }, + deletedBlock: map[ulid.ULID]bool{ + ulid0: true, + }, + }, + { + name: "test one partition is pending and contains block marked for deletion", + expectedResult: PartitionedGroupStatus{ + CanDelete: true, + IsCompleted: false, + DeleteVisitMarker: true, + PendingOrFailedPartitions: []Partition{ + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + }, + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 2, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + RangeStart: (1 * time.Hour).Milliseconds(), + RangeEnd: (2 * time.Hour).Milliseconds(), + CreationTime: time.Now().Add(-10 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + }, + partitionVisitMarkers: []PartitionVisitMarker{ + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 0, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 1, + Status: Pending, + VisitTime: time.Now().Add(-5 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + }, + deletedBlock: map[ulid.ULID]bool{ + ulid0: true, + }, + }, + { + name: "test one partition is completed and one partition is under visiting", + expectedResult: PartitionedGroupStatus{ + CanDelete: false, + IsCompleted: false, + DeleteVisitMarker: false, + PendingOrFailedPartitions: []Partition{}, + }, + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 2, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + RangeStart: (1 * time.Hour).Milliseconds(), + RangeEnd: (2 * time.Hour).Milliseconds(), + CreationTime: time.Now().Add(-10 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + }, + partitionVisitMarkers: []PartitionVisitMarker{ + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 0, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 1, + Status: Pending, + VisitTime: time.Now().Add(time.Second).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + }, + deletedBlock: map[ulid.ULID]bool{ + ulid0: false, + }, + }, + { + name: "test one partition is pending expired", + expectedResult: PartitionedGroupStatus{ + CanDelete: false, + IsCompleted: false, + DeleteVisitMarker: false, + PendingOrFailedPartitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + }, + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 2, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + RangeStart: (1 * time.Hour).Milliseconds(), + RangeEnd: (2 * time.Hour).Milliseconds(), + CreationTime: time.Now().Add(-10 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + }, + partitionVisitMarkers: []PartitionVisitMarker{ + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 1, + Status: Pending, + VisitTime: time.Now().Add(-5 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + }, + deletedBlock: map[ulid.ULID]bool{}, + }, + { + name: "test one partition is complete with one block deleted and one partition is not visited with no blocks deleted", + expectedResult: PartitionedGroupStatus{ + CanDelete: false, + IsCompleted: false, + DeleteVisitMarker: false, + PendingOrFailedPartitions: []Partition{ + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + }, + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 2, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + RangeStart: (1 * time.Hour).Milliseconds(), + RangeEnd: (2 * time.Hour).Milliseconds(), + CreationTime: time.Now().Add(-10 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + }, + partitionVisitMarkers: []PartitionVisitMarker{ + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 0, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + }, + deletedBlock: map[ulid.ULID]bool{ + ulid1: true, + }, + }, + { + name: "test one partition is complete and one partition is failed with no blocks deleted", + expectedResult: PartitionedGroupStatus{ + CanDelete: false, + IsCompleted: false, + DeleteVisitMarker: false, + PendingOrFailedPartitions: []Partition{ + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + }, + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 2, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + RangeStart: (1 * time.Hour).Milliseconds(), + RangeEnd: (2 * time.Hour).Milliseconds(), + CreationTime: time.Now().Add(-10 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + }, + partitionVisitMarkers: []PartitionVisitMarker{ + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 0, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 1, + Status: Failed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + }, + deletedBlock: map[ulid.ULID]bool{}, + }, + { + name: "test one partition is complete and one partition is failed one block deleted", + expectedResult: PartitionedGroupStatus{ + CanDelete: true, + IsCompleted: false, + DeleteVisitMarker: true, + PendingOrFailedPartitions: []Partition{ + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + }, + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 2, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + RangeStart: (1 * time.Hour).Milliseconds(), + RangeEnd: (2 * time.Hour).Milliseconds(), + CreationTime: time.Now().Add(-10 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + }, + partitionVisitMarkers: []PartitionVisitMarker{ + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 0, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 1, + Status: Failed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + }, + deletedBlock: map[ulid.ULID]bool{ + ulid2: true, + }, + }, + { + name: "test all partitions are complete", + expectedResult: PartitionedGroupStatus{ + CanDelete: true, + IsCompleted: true, + DeleteVisitMarker: true, + PendingOrFailedPartitions: []Partition{}, + }, + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 2, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + RangeStart: (1 * time.Hour).Milliseconds(), + RangeEnd: (2 * time.Hour).Milliseconds(), + CreationTime: time.Now().Add(-10 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + }, + partitionVisitMarkers: []PartitionVisitMarker{ + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 0, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 1, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + }, + deletedBlock: map[ulid.ULID]bool{ + ulid2: true, + }, + }, + { + name: "test partitioned group created after visit marker", + expectedResult: PartitionedGroupStatus{ + CanDelete: false, + IsCompleted: false, + DeleteVisitMarker: true, + PendingOrFailedPartitions: []Partition{}, + }, + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 2, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + RangeStart: (1 * time.Hour).Milliseconds(), + RangeEnd: (2 * time.Hour).Milliseconds(), + CreationTime: time.Now().Add(1 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + }, + partitionVisitMarkers: []PartitionVisitMarker{ + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 0, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 1, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + }, + deletedBlock: map[ulid.ULID]bool{}, + }, + { + name: "test one partition is in progress not expired and contains block marked for deletion", + expectedResult: PartitionedGroupStatus{ + CanDelete: false, + IsCompleted: false, + DeleteVisitMarker: false, + PendingOrFailedPartitions: []Partition{}, + }, + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 2, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + RangeStart: (1 * time.Hour).Milliseconds(), + RangeEnd: (2 * time.Hour).Milliseconds(), + CreationTime: time.Now().Add(-10 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + }, + partitionVisitMarkers: []PartitionVisitMarker{ + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 0, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 1, + Status: InProgress, + VisitTime: time.Now().Add(time.Second).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + }, + deletedBlock: map[ulid.ULID]bool{ + ulid0: true, + }, + }, + { + name: "test one partition is not visited and contains block with no compact mark", + expectedResult: PartitionedGroupStatus{ + CanDelete: true, + IsCompleted: false, + DeleteVisitMarker: true, + PendingOrFailedPartitions: []Partition{ + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + }, + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 2, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + RangeStart: (1 * time.Hour).Milliseconds(), + RangeEnd: (2 * time.Hour).Milliseconds(), + CreationTime: time.Now().Add(-10 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + }, + partitionVisitMarkers: []PartitionVisitMarker{ + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 0, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + }, + noCompactBlock: map[ulid.ULID]struct{}{ + ulid0: {}, + }, + }, + { + name: "test one partition is expired and contains block with no compact mark", + expectedResult: PartitionedGroupStatus{ + CanDelete: true, + IsCompleted: false, + DeleteVisitMarker: true, + PendingOrFailedPartitions: []Partition{ + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + }, + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 2, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + RangeStart: (1 * time.Hour).Milliseconds(), + RangeEnd: (2 * time.Hour).Milliseconds(), + CreationTime: time.Now().Add(-10 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + }, + partitionVisitMarkers: []PartitionVisitMarker{ + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 0, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 1, + Status: InProgress, + VisitTime: time.Now().Add(-10 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + }, + noCompactBlock: map[ulid.ULID]struct{}{ + ulid0: {}, + }, + }, + } { + t.Run(tcase.name, func(t *testing.T) { + bucketClient := &bucket.ClientMock{} + for _, partitionVisitMarker := range tcase.partitionVisitMarkers { + content, _ := json.Marshal(partitionVisitMarker) + bucketClient.MockGet(partitionVisitMarker.GetVisitMarkerFilePath(), string(content), nil) + } + + for _, partition := range tcase.partitionedGroupInfo.Partitions { + for _, blockID := range partition.Blocks { + metaPath := path.Join(blockID.String(), metadata.MetaFilename) + noCompactPath := path.Join(blockID.String(), metadata.NoCompactMarkFilename) + deletionMarkerPath := path.Join(blockID.String(), metadata.DeletionMarkFilename) + if hasDeletionMarker, ok := tcase.deletedBlock[blockID]; ok { + if hasDeletionMarker { + bucketClient.MockExists(metaPath, true, nil) + bucketClient.MockExists(deletionMarkerPath, true, nil) + } else { + bucketClient.MockExists(metaPath, false, nil) + } + } else { + bucketClient.MockExists(metaPath, true, nil) + bucketClient.MockExists(deletionMarkerPath, false, nil) + } + if _, ok := tcase.noCompactBlock[blockID]; ok { + bucketClient.MockExists(noCompactPath, true, nil) + } else { + bucketClient.MockExists(noCompactPath, false, nil) + } + } + } + bucketClient.MockGet(mock.Anything, "", nil) + + ctx := context.Background() + dummyCounter := prometheus.NewCounter(prometheus.CounterOpts{}) + logger := log.NewNopLogger() + result := tcase.partitionedGroupInfo.getPartitionedGroupStatus(ctx, bucketClient, 60*time.Second, logger, dummyCounter, dummyCounter) + require.Equal(t, tcase.expectedResult.CanDelete, result.CanDelete) + require.Equal(t, tcase.expectedResult.IsCompleted, result.IsCompleted) + require.Equal(t, len(tcase.expectedResult.PendingOrFailedPartitions), len(result.PendingOrFailedPartitions)) + for _, partition := range result.PendingOrFailedPartitions { + require.Contains(t, tcase.expectedResult.PendingOrFailedPartitions, partition) + } + }) + } +} diff --git a/pkg/compactor/sharded_block_populator.go b/pkg/compactor/sharded_block_populator.go new file mode 100644 index 0000000000..721498344b --- /dev/null +++ b/pkg/compactor/sharded_block_populator.go @@ -0,0 +1,217 @@ +package compactor + +import ( + "context" + "io" + "maps" + "sync" + "time" + + "github.com/go-kit/log" + "github.com/go-kit/log/level" + "github.com/pkg/errors" + "golang.org/x/exp/slices" + "golang.org/x/sync/errgroup" + + "github.com/prometheus/prometheus/storage" + "github.com/prometheus/prometheus/tsdb" + "github.com/prometheus/prometheus/tsdb/chunkenc" + "github.com/prometheus/prometheus/tsdb/chunks" + tsdb_errors "github.com/prometheus/prometheus/tsdb/errors" +) + +type ShardedBlockPopulator struct { + partitionCount int + partitionID int + logger log.Logger +} + +// PopulateBlock fills the index and chunk writers with new data gathered as the union +// of the provided blocks. It returns meta information for the new block. +// It expects sorted blocks input by mint. +// The main logic is copied from tsdb.DefaultPopulateBlockFunc +func (c ShardedBlockPopulator) PopulateBlock(ctx context.Context, metrics *tsdb.CompactorMetrics, _ log.Logger, chunkPool chunkenc.Pool, mergeFunc storage.VerticalChunkSeriesMergeFunc, blocks []tsdb.BlockReader, meta *tsdb.BlockMeta, indexw tsdb.IndexWriter, chunkw tsdb.ChunkWriter, postingsFunc tsdb.IndexReaderPostingsFunc) (err error) { + if len(blocks) == 0 { + return errors.New("cannot populate block from no readers") + } + + var ( + sets []storage.ChunkSeriesSet + setsMtx sync.Mutex + symbols map[string]struct{} + closers []io.Closer + overlapping bool + ) + symbols = make(map[string]struct{}) + defer func() { + errs := tsdb_errors.NewMulti(err) + if cerr := tsdb_errors.CloseAll(closers); cerr != nil { + errs.Add(errors.Wrap(cerr, "close")) + } + err = errs.Err() + metrics.PopulatingBlocks.Set(0) + }() + metrics.PopulatingBlocks.Set(1) + + globalMaxt := blocks[0].Meta().MaxTime + g, _ := errgroup.WithContext(ctx) + g.SetLimit(8) + for i, b := range blocks { + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + + if !overlapping { + if i > 0 && b.Meta().MinTime < globalMaxt { + metrics.OverlappingBlocks.Inc() + overlapping = true + level.Info(c.logger).Log("msg", "Found overlapping blocks during compaction", "ulid", meta.ULID) + } + if b.Meta().MaxTime > globalMaxt { + globalMaxt = b.Meta().MaxTime + } + } + + indexr, err := b.Index() + if err != nil { + return errors.Wrapf(err, "open index reader for block %+v", b.Meta()) + } + closers = append(closers, indexr) + + chunkr, err := b.Chunks() + if err != nil { + return errors.Wrapf(err, "open chunk reader for block %+v", b.Meta()) + } + closers = append(closers, chunkr) + + tombsr, err := b.Tombstones() + if err != nil { + return errors.Wrapf(err, "open tombstone reader for block %+v", b.Meta()) + } + closers = append(closers, tombsr) + + all := postingsFunc(ctx, indexr) + g.Go(func() error { + shardStart := time.Now() + shardedPosting, syms, err := NewShardedPosting(all, uint64(c.partitionCount), uint64(c.partitionID), indexr.Series) + if err != nil { + return err + } + level.Debug(c.logger).Log("msg", "finished sharding", "duration", time.Since(shardStart)) + // Blocks meta is half open: [min, max), so subtract 1 to ensure we don't hold samples with exact meta.MaxTime timestamp. + setsMtx.Lock() + sets = append(sets, tsdb.NewBlockChunkSeriesSet(meta.ULID, indexr, chunkr, tombsr, shardedPosting, meta.MinTime, meta.MaxTime-1, false)) + maps.Copy(symbols, syms) + setsMtx.Unlock() + return nil + }) + } + if err := g.Wait(); err != nil { + return err + } + + symbolsList := make([]string, len(symbols)) + symbolIdx := 0 + for symbol := range symbols { + symbolsList[symbolIdx] = symbol + symbolIdx++ + } + slices.Sort(symbolsList) + for _, symbol := range symbolsList { + if err := indexw.AddSymbol(symbol); err != nil { + return errors.Wrap(err, "add symbol") + } + } + + var ( + ref = storage.SeriesRef(0) + ch = make(chan func() error, 1000) + ) + + set := sets[0] + if len(sets) > 1 { + iCtx, cancel := context.WithCancel(ctx) + // Merge series using specified chunk series merger. + // The default one is the compacting series merger. + set = NewBackgroundChunkSeriesSet(iCtx, storage.NewMergeChunkSeriesSet(sets, mergeFunc)) + defer cancel() + } + + go func() { + // Iterate over all sorted chunk series. + for set.Next() { + select { + case <-ctx.Done(): + ch <- func() error { return ctx.Err() } + default: + } + s := set.At() + curChksIter := s.Iterator(nil) + + var chks []chunks.Meta + var wg sync.WaitGroup + r := ref + wg.Add(1) + go func() { + for curChksIter.Next() { + // We are not iterating in streaming way over chunk as + // it's more efficient to do bulk write for index and + // chunk file purposes. + chks = append(chks, curChksIter.At()) + } + wg.Done() + }() + + ch <- func() error { + wg.Wait() + if curChksIter.Err() != nil { + return errors.Wrap(curChksIter.Err(), "chunk iter") + } + + // Skip the series with all deleted chunks. + if len(chks) == 0 { + return nil + } + + if err := chunkw.WriteChunks(chks...); err != nil { + return errors.Wrap(err, "write chunks") + } + if err := indexw.AddSeries(r, s.Labels(), chks...); err != nil { + return errors.Wrap(err, "add series") + } + + meta.Stats.NumChunks += uint64(len(chks)) + meta.Stats.NumSeries++ + for _, chk := range chks { + meta.Stats.NumSamples += uint64(chk.Chunk.NumSamples()) + } + + for _, chk := range chks { + if err := chunkPool.Put(chk.Chunk); err != nil { + return errors.Wrap(err, "put chunk") + } + } + + return nil + } + + ref++ + } + close(ch) + }() + + for callback := range ch { + err := callback() + if err != nil { + return err + } + } + + if set.Err() != nil { + return errors.Wrap(set.Err(), "iterate compaction set") + } + + return nil +} diff --git a/pkg/compactor/sharded_compaction_lifecycle_callback.go b/pkg/compactor/sharded_compaction_lifecycle_callback.go new file mode 100644 index 0000000000..cc509a664c --- /dev/null +++ b/pkg/compactor/sharded_compaction_lifecycle_callback.go @@ -0,0 +1,112 @@ +package compactor + +import ( + "context" + "path/filepath" + "time" + + "github.com/go-kit/log" + "github.com/go-kit/log/level" + "github.com/oklog/ulid" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/prometheus/tsdb" + "github.com/thanos-io/objstore" + "github.com/thanos-io/thanos/pkg/block/metadata" + "github.com/thanos-io/thanos/pkg/compact" + "github.com/thanos-io/thanos/pkg/runutil" + + cortextsdb "github.com/cortexproject/cortex/pkg/storage/tsdb" +) + +type ShardedCompactionLifecycleCallback struct { + ctx context.Context + userBucket objstore.InstrumentedBucket + logger log.Logger + metaSyncConcurrency int + compactDir string + userID string + partitionedGroupInfoReadFailed prometheus.Counter + compactorMetrics *compactorMetrics + + startTime time.Time +} + +func NewShardedCompactionLifecycleCallback( + ctx context.Context, + userBucket objstore.InstrumentedBucket, + logger log.Logger, + metaSyncConcurrency int, + compactDir string, + userID string, + partitionedGroupInfoReadFailed prometheus.Counter, + compactorMetrics *compactorMetrics, +) *ShardedCompactionLifecycleCallback { + return &ShardedCompactionLifecycleCallback{ + ctx: ctx, + userBucket: userBucket, + logger: logger, + metaSyncConcurrency: metaSyncConcurrency, + compactDir: compactDir, + userID: userID, + partitionedGroupInfoReadFailed: partitionedGroupInfoReadFailed, + compactorMetrics: compactorMetrics, + } +} + +func (c *ShardedCompactionLifecycleCallback) PreCompactionCallback(_ context.Context, logger log.Logger, g *compact.Group, meta []*metadata.Meta) error { + c.startTime = time.Now() + + metaExt, err := cortextsdb.ConvertToCortexMetaExtensions(g.Extensions()) + if err != nil { + level.Warn(logger).Log("msg", "unable to get cortex meta extensions", "err", err) + } else if metaExt != nil { + c.compactorMetrics.compactionPlanned.WithLabelValues(c.userID, metaExt.TimeRangeStr()).Inc() + } + + // Delete local files other than current group + var ignoreDirs []string + for _, m := range meta { + ignoreDirs = append(ignoreDirs, filepath.Join(g.Key(), m.ULID.String())) + } + if err := runutil.DeleteAll(c.compactDir, ignoreDirs...); err != nil { + level.Warn(logger).Log("msg", "failed deleting non-current compaction group files, disk space usage might have leaked.", "err", err, "dir", c.compactDir) + } + return nil +} + +func (c *ShardedCompactionLifecycleCallback) PostCompactionCallback(_ context.Context, logger log.Logger, cg *compact.Group, _ ulid.ULID) error { + metaExt, err := cortextsdb.ConvertToCortexMetaExtensions(cg.Extensions()) + if err != nil { + level.Warn(logger).Log("msg", "unable to get cortex meta extensions", "err", err) + } else if metaExt != nil { + c.compactorMetrics.compactionDuration.WithLabelValues(c.userID, metaExt.TimeRangeStr()).Set(time.Since(c.startTime).Seconds()) + } + return nil +} + +func (c *ShardedCompactionLifecycleCallback) GetBlockPopulator(_ context.Context, logger log.Logger, cg *compact.Group) (tsdb.BlockPopulator, error) { + partitionInfo, err := cortextsdb.ConvertToPartitionInfo(cg.Extensions()) + if err != nil { + return nil, err + } + if partitionInfo == nil { + return tsdb.DefaultBlockPopulator{}, nil + } + if partitionInfo.PartitionCount <= 0 { + partitionInfo = &cortextsdb.PartitionInfo{ + PartitionCount: 1, + PartitionID: partitionInfo.PartitionID, + PartitionedGroupID: partitionInfo.PartitionedGroupID, + PartitionedGroupCreationTime: partitionInfo.PartitionedGroupCreationTime, + } + cg.SetExtensions(&cortextsdb.CortexMetaExtensions{ + PartitionInfo: partitionInfo, + }) + } + populateBlockFunc := ShardedBlockPopulator{ + partitionCount: partitionInfo.PartitionCount, + partitionID: partitionInfo.PartitionID, + logger: logger, + } + return populateBlockFunc, nil +} diff --git a/pkg/compactor/sharded_compaction_lifecycle_callback_test.go b/pkg/compactor/sharded_compaction_lifecycle_callback_test.go new file mode 100644 index 0000000000..09157c895b --- /dev/null +++ b/pkg/compactor/sharded_compaction_lifecycle_callback_test.go @@ -0,0 +1,96 @@ +package compactor + +import ( + "context" + "os" + "path/filepath" + "testing" + "time" + + "github.com/go-kit/log" + "github.com/oklog/ulid" + "github.com/prometheus/prometheus/tsdb" + "github.com/stretchr/testify/require" + "github.com/thanos-io/thanos/pkg/block/metadata" + "github.com/thanos-io/thanos/pkg/compact" +) + +func TestPreCompactionCallback(t *testing.T) { + compactDir, err := os.MkdirTemp(os.TempDir(), "compact") + require.NoError(t, err) + + t.Cleanup(func() { + require.NoError(t, os.RemoveAll(compactDir)) + }) + + lifecycleCallback := ShardedCompactionLifecycleCallback{ + compactDir: compactDir, + } + + block1 := ulid.MustNew(1, nil) + block2 := ulid.MustNew(2, nil) + block3 := ulid.MustNew(3, nil) + meta := []*metadata.Meta{ + { + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 1 * time.Hour.Milliseconds(), MaxTime: 2 * time.Hour.Milliseconds()}, + }, + { + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 1 * time.Hour.Milliseconds(), MaxTime: 2 * time.Hour.Milliseconds()}, + }, + { + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 2 * time.Hour.Milliseconds(), MaxTime: 3 * time.Hour.Milliseconds()}, + }, + } + testGroupKey := "test_group_key" + testGroup, _ := compact.NewGroup( + log.NewNopLogger(), + nil, + testGroupKey, + nil, + 0, + true, + true, + nil, + nil, + nil, + nil, + nil, + nil, + nil, + nil, + metadata.NoneFunc, + 1, + 1, + ) + for _, m := range meta { + err := testGroup.AppendMeta(m) + require.NoError(t, err) + } + + dummyGroupID1 := "dummy_dir_1" + dummyGroupID2 := "dummy_dir_2" + err = os.MkdirAll(filepath.Join(compactDir, testGroupKey), 0750) + require.NoError(t, err) + err = os.MkdirAll(filepath.Join(compactDir, testGroupKey, block1.String()), 0750) + require.NoError(t, err) + err = os.MkdirAll(filepath.Join(compactDir, dummyGroupID1), 0750) + require.NoError(t, err) + err = os.MkdirAll(filepath.Join(compactDir, dummyGroupID2), 0750) + require.NoError(t, err) + + err = lifecycleCallback.PreCompactionCallback(context.Background(), log.NewNopLogger(), testGroup, meta) + require.NoError(t, err) + + info, err := os.Stat(filepath.Join(compactDir, testGroupKey)) + require.NoError(t, err) + require.True(t, info.IsDir()) + info, err = os.Stat(filepath.Join(compactDir, testGroupKey, block1.String())) + require.NoError(t, err) + require.True(t, info.IsDir()) + _, err = os.Stat(filepath.Join(compactDir, dummyGroupID1)) + require.Error(t, err) + require.True(t, os.IsNotExist(err)) + _, err = os.Stat(filepath.Join(compactDir, dummyGroupID2)) + require.Error(t, err) + require.True(t, os.IsNotExist(err)) +} diff --git a/pkg/compactor/sharded_posting.go b/pkg/compactor/sharded_posting.go new file mode 100644 index 0000000000..aca18a804d --- /dev/null +++ b/pkg/compactor/sharded_posting.go @@ -0,0 +1,30 @@ +package compactor + +import ( + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/storage" + "github.com/prometheus/prometheus/tsdb/chunks" + "github.com/prometheus/prometheus/tsdb/index" +) + +func NewShardedPosting(postings index.Postings, partitionCount uint64, partitionID uint64, labelsFn func(ref storage.SeriesRef, builder *labels.ScratchBuilder, chks *[]chunks.Meta) error) (index.Postings, map[string]struct{}, error) { + bufChks := make([]chunks.Meta, 0) + series := make([]storage.SeriesRef, 0) + symbols := make(map[string]struct{}) + var builder labels.ScratchBuilder + for postings.Next() { + err := labelsFn(postings.At(), &builder, &bufChks) + if err != nil { + return nil, nil, err + } + if builder.Labels().Hash()%partitionCount == partitionID { + posting := postings.At() + series = append(series, posting) + for _, label := range builder.Labels() { + symbols[label.Name] = struct{}{} + symbols[label.Value] = struct{}{} + } + } + } + return index.NewListPostings(series), symbols, nil +} diff --git a/pkg/compactor/sharded_posting_test.go b/pkg/compactor/sharded_posting_test.go new file mode 100644 index 0000000000..5077d740ed --- /dev/null +++ b/pkg/compactor/sharded_posting_test.go @@ -0,0 +1,109 @@ +package compactor + +import ( + "context" + "io" + "math/rand" + "os" + "path/filepath" + "strconv" + "testing" + "time" + + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/tsdb/chunks" + "github.com/prometheus/prometheus/tsdb/index" + "github.com/stretchr/testify/require" + "github.com/thanos-io/thanos/pkg/block/metadata" + "github.com/thanos-io/thanos/pkg/testutil/e2eutil" +) + +const ( + MetricLabelName = "__name__" + MetricName = "test_metric" + TestLabelName = "test_label" + ConstLabelName = "const_label" + ConstLabelValue = "const_value" +) + +func TestShardPostingAndSymbolBasedOnPartitionID(t *testing.T) { + partitionCount := 8 + + tmpdir, err := os.MkdirTemp("", "sharded_posting_test") + require.NoError(t, err) + t.Cleanup(func() { + require.NoError(t, os.RemoveAll(tmpdir)) + }) + + r := rand.New(rand.NewSource(0)) + var series []labels.Labels + expectedSymbols := make(map[string]bool) + metricName := labels.Label{Name: MetricLabelName, Value: MetricName} + expectedSymbols[MetricLabelName] = false + expectedSymbols[MetricName] = false + expectedSymbols[ConstLabelName] = false + expectedSymbols[ConstLabelValue] = false + expectedSeriesCount := 10 + for i := 0; i < expectedSeriesCount; i++ { + labelValue := strconv.Itoa(r.Int()) + series = append(series, labels.Labels{ + metricName, + {Name: ConstLabelName, Value: ConstLabelValue}, + {Name: TestLabelName, Value: labelValue}, + }) + expectedSymbols[TestLabelName] = false + expectedSymbols[labelValue] = false + } + blockID, err := e2eutil.CreateBlock(context.Background(), tmpdir, series, 10, time.Now().Add(-10*time.Minute).UnixMilli(), time.Now().UnixMilli(), nil, 0, metadata.NoneFunc) + require.NoError(t, err) + + var closers []io.Closer + defer func() { + for _, c := range closers { + c.Close() + } + }() + seriesCount := 0 + for partitionID := 0; partitionID < partitionCount; partitionID++ { + ir, err := index.NewFileReader(filepath.Join(tmpdir, blockID.String(), "index")) + closers = append(closers, ir) + require.NoError(t, err) + k, v := index.AllPostingsKey() + postings, err := ir.Postings(context.Background(), k, v) + require.NoError(t, err) + postings = ir.SortedPostings(postings) + shardedPostings, syms, err := NewShardedPosting(postings, uint64(partitionCount), uint64(partitionID), ir.Series) + require.NoError(t, err) + bufChks := make([]chunks.Meta, 0) + expectedShardedSymbols := make(map[string]struct{}) + for shardedPostings.Next() { + var builder labels.ScratchBuilder + err = ir.Series(shardedPostings.At(), &builder, &bufChks) + require.NoError(t, err) + require.Equal(t, uint64(partitionID), builder.Labels().Hash()%uint64(partitionCount)) + seriesCount++ + for _, label := range builder.Labels() { + expectedShardedSymbols[label.Name] = struct{}{} + expectedShardedSymbols[label.Value] = struct{}{} + } + } + err = ir.Close() + if err == nil { + closers = closers[0 : len(closers)-1] + } + symbolsCount := 0 + for s := range syms { + symbolsCount++ + _, ok := expectedSymbols[s] + require.True(t, ok) + expectedSymbols[s] = true + _, ok = expectedShardedSymbols[s] + require.True(t, ok) + } + require.Equal(t, len(expectedShardedSymbols), symbolsCount) + } + require.Equal(t, expectedSeriesCount, seriesCount) + for _, visited := range expectedSymbols { + require.True(t, visited) + } +} diff --git a/pkg/compactor/shuffle_sharding_grouper.go b/pkg/compactor/shuffle_sharding_grouper.go index 892ab05398..303646e33f 100644 --- a/pkg/compactor/shuffle_sharding_grouper.go +++ b/pkg/compactor/shuffle_sharding_grouper.go @@ -13,7 +13,6 @@ import ( "github.com/oklog/ulid" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" "github.com/prometheus/prometheus/model/labels" "github.com/thanos-io/objstore" "github.com/thanos-io/thanos/pkg/block/metadata" @@ -23,28 +22,21 @@ import ( ) type ShuffleShardingGrouper struct { - ctx context.Context - logger log.Logger - bkt objstore.InstrumentedBucket - acceptMalformedIndex bool - enableVerticalCompaction bool - reg prometheus.Registerer - blocksMarkedForDeletion prometheus.Counter - blocksMarkedForNoCompact prometheus.Counter - garbageCollectedBlocks prometheus.Counter - remainingPlannedCompactions prometheus.Gauge - hashFunc metadata.HashFunc - compactions *prometheus.CounterVec - compactionRunsStarted *prometheus.CounterVec - compactionRunsCompleted *prometheus.CounterVec - compactionFailures *prometheus.CounterVec - verticalCompactions *prometheus.CounterVec - compactorCfg Config - limits Limits - userID string - blockFilesConcurrency int - blocksFetchConcurrency int - compactionConcurrency int + ctx context.Context + logger log.Logger + bkt objstore.InstrumentedBucket + acceptMalformedIndex bool + enableVerticalCompaction bool + blocksMarkedForNoCompact prometheus.Counter + syncerMetrics *compact.SyncerMetrics + compactorMetrics *compactorMetrics + hashFunc metadata.HashFunc + compactorCfg Config + limits Limits + userID string + blockFilesConcurrency int + blocksFetchConcurrency int + compactionConcurrency int ring ring.ReadRing ringLifecyclerAddr string @@ -63,12 +55,10 @@ func NewShuffleShardingGrouper( bkt objstore.InstrumentedBucket, acceptMalformedIndex bool, enableVerticalCompaction bool, - reg prometheus.Registerer, - blocksMarkedForDeletion prometheus.Counter, blocksMarkedForNoCompact prometheus.Counter, - garbageCollectedBlocks prometheus.Counter, - remainingPlannedCompactions prometheus.Gauge, hashFunc metadata.HashFunc, + syncerMetrics *compact.SyncerMetrics, + compactorMetrics *compactorMetrics, compactorCfg Config, ring ring.ReadRing, ringLifecyclerAddr string, @@ -93,33 +83,10 @@ func NewShuffleShardingGrouper( bkt: bkt, acceptMalformedIndex: acceptMalformedIndex, enableVerticalCompaction: enableVerticalCompaction, - reg: reg, - blocksMarkedForDeletion: blocksMarkedForDeletion, blocksMarkedForNoCompact: blocksMarkedForNoCompact, - garbageCollectedBlocks: garbageCollectedBlocks, - remainingPlannedCompactions: remainingPlannedCompactions, hashFunc: hashFunc, - // Metrics are copied from Thanos DefaultGrouper constructor - compactions: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ - Name: "thanos_compact_group_compactions_total", - Help: "Total number of group compaction attempts that resulted in a new block.", - }, []string{"group"}), - compactionRunsStarted: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ - Name: "thanos_compact_group_compaction_runs_started_total", - Help: "Total number of group compaction attempts.", - }, []string{"group"}), - compactionRunsCompleted: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ - Name: "thanos_compact_group_compaction_runs_completed_total", - Help: "Total number of group completed compaction runs. This also includes compactor group runs that resulted with no compaction.", - }, []string{"group"}), - compactionFailures: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ - Name: "thanos_compact_group_compactions_failures_total", - Help: "Total number of failed group compactions.", - }, []string{"group"}), - verticalCompactions: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ - Name: "thanos_compact_group_vertical_compactions_total", - Help: "Total number of group compaction attempts that resulted in a new block based on overlapping blocks.", - }, []string{"group"}), + syncerMetrics: syncerMetrics, + compactorMetrics: compactorMetrics, compactorCfg: compactorCfg, ring: ring, ringLifecyclerAddr: ringLifecyclerAddr, @@ -167,7 +134,9 @@ func (g *ShuffleShardingGrouper) Groups(blocks map[ulid.ULID]*metadata.Meta) (re } // Metrics for the remaining planned compactions var remainingCompactions = 0. - defer func() { g.remainingPlannedCompactions.Set(remainingCompactions) }() + defer func() { + g.compactorMetrics.remainingPlannedCompactions.WithLabelValues(g.userID).Set(remainingCompactions) + }() var groups []blocksGroup for _, mainBlocks := range mainGroups { @@ -195,9 +164,9 @@ func (g *ShuffleShardingGrouper) Groups(blocks map[ulid.ULID]*metadata.Meta) (re return iMinTime < jMinTime } - iGroupHash := hashGroup(g.userID, iGroup.rangeStart, iGroup.rangeEnd) + iGroupHash := HashGroup(g.userID, iGroup.rangeStart, iGroup.rangeEnd) iGroupKey := createGroupKey(iGroupHash, iGroup) - jGroupHash := hashGroup(g.userID, jGroup.rangeStart, jGroup.rangeEnd) + jGroupHash := HashGroup(g.userID, jGroup.rangeStart, jGroup.rangeEnd) jGroupKey := createGroupKey(jGroupHash, jGroup) // Guarantee stable sort for tests. return iGroupKey < jGroupKey @@ -217,7 +186,7 @@ mainLoop: continue } - groupHash := hashGroup(g.userID, group.rangeStart, group.rangeEnd) + groupHash := HashGroup(g.userID, group.rangeStart, group.rangeEnd) if isVisited, err := g.isGroupVisited(group.blocks, g.ringLifecyclerID); err != nil { level.Warn(g.logger).Log("msg", "unable to check if blocks in group are visited", "group hash", groupHash, "err", err, "group", group.String()) @@ -242,7 +211,11 @@ mainLoop: // resolution and external labels. resolution := group.blocks[0].Thanos.Downsample.Resolution externalLabels := labels.FromMap(group.blocks[0].Thanos.Labels) - + timeRange := group.rangeEnd - group.rangeStart + metricLabelValues := []string{ + g.userID, + fmt.Sprintf("%d", timeRange), + } thanosGroup, err := compact.NewGroup( log.With(g.logger, "groupKey", groupKey, "rangeStart", group.rangeStartTime().String(), "rangeEnd", group.rangeEndTime().String(), "externalLabels", externalLabels, "downsampleResolution", resolution), g.bkt, @@ -251,13 +224,13 @@ mainLoop: resolution, g.acceptMalformedIndex, true, // Enable vertical compaction. - g.compactions.WithLabelValues(groupKey), - g.compactionRunsStarted.WithLabelValues(groupKey), - g.compactionRunsCompleted.WithLabelValues(groupKey), - g.compactionFailures.WithLabelValues(groupKey), - g.verticalCompactions.WithLabelValues(groupKey), - g.garbageCollectedBlocks, - g.blocksMarkedForDeletion, + g.compactorMetrics.compactions.WithLabelValues(metricLabelValues...), + g.compactorMetrics.compactionRunsStarted.WithLabelValues(metricLabelValues...), + g.compactorMetrics.compactionRunsCompleted.WithLabelValues(metricLabelValues...), + g.compactorMetrics.compactionFailures.WithLabelValues(metricLabelValues...), + g.compactorMetrics.verticalCompactions.WithLabelValues(metricLabelValues...), + g.syncerMetrics.GarbageCollectedBlocks, + g.syncerMetrics.BlocksMarkedForDeletion, g.blocksMarkedForNoCompact, g.hashFunc, g.blockFilesConcurrency, @@ -316,15 +289,20 @@ func (g *ShuffleShardingGrouper) checkSubringForCompactor() (bool, error) { return rs.Includes(g.ringLifecyclerAddr), nil } -// Get the hash of a group based on the UserID, and the starting and ending time of the group's range. -func hashGroup(userID string, rangeStart int64, rangeEnd int64) uint32 { +// HashGroup Get the hash of a group based on the UserID, and the starting and ending time of the group's range. +func HashGroup(userID string, rangeStart int64, rangeEnd int64) uint32 { groupString := fmt.Sprintf("%v%v%v", userID, rangeStart, rangeEnd) - groupHasher := fnv.New32a() + + return hashString(groupString) +} + +func hashString(s string) uint32 { + hasher := fnv.New32a() // Hasher never returns err. - _, _ = groupHasher.Write([]byte(groupString)) - groupHash := groupHasher.Sum32() + _, _ = hasher.Write([]byte(s)) + result := hasher.Sum32() - return groupHash + return result } func createGroupKey(groupHash uint32, group blocksGroup) string { diff --git a/pkg/compactor/shuffle_sharding_grouper_test.go b/pkg/compactor/shuffle_sharding_grouper_test.go index ab19ece87b..df10d3043f 100644 --- a/pkg/compactor/shuffle_sharding_grouper_test.go +++ b/pkg/compactor/shuffle_sharding_grouper_test.go @@ -140,9 +140,9 @@ func TestShuffleShardingGrouper_Groups(t *testing.T) { {block1hto2hExt1Ulid, block0hto1hExt1Ulid}, {block3hto4hExt1Ulid, block2hto3hExt1Ulid}, }, - metrics: `# HELP cortex_compactor_remaining_planned_compactions Total number of plans that remain to be compacted. + metrics: `# HELP cortex_compactor_remaining_planned_compactions Total number of plans that remain to be compacted. Only available with shuffle-sharding strategy # TYPE cortex_compactor_remaining_planned_compactions gauge - cortex_compactor_remaining_planned_compactions 3 + cortex_compactor_remaining_planned_compactions{user="test-user"} 3 `, }, "test no compaction": { @@ -150,9 +150,9 @@ func TestShuffleShardingGrouper_Groups(t *testing.T) { ranges: []time.Duration{2 * time.Hour, 4 * time.Hour}, blocks: map[ulid.ULID]*metadata.Meta{block0hto1hExt1Ulid: blocks[block0hto1hExt1Ulid], block0hto1hExt2Ulid: blocks[block0hto1hExt2Ulid], block0to1hExt3Ulid: blocks[block0to1hExt3Ulid]}, expected: [][]ulid.ULID{}, - metrics: `# HELP cortex_compactor_remaining_planned_compactions Total number of plans that remain to be compacted. + metrics: `# HELP cortex_compactor_remaining_planned_compactions Total number of plans that remain to be compacted. Only available with shuffle-sharding strategy # TYPE cortex_compactor_remaining_planned_compactions gauge - cortex_compactor_remaining_planned_compactions 0 + cortex_compactor_remaining_planned_compactions{user="test-user"} 0 `, }, "test smallest range first": { @@ -164,9 +164,9 @@ func TestShuffleShardingGrouper_Groups(t *testing.T) { {block3hto4hExt1Ulid, block2hto3hExt1Ulid}, {block4hto6hExt2Ulid, block6hto8hExt2Ulid}, }, - metrics: `# HELP cortex_compactor_remaining_planned_compactions Total number of plans that remain to be compacted. + metrics: `# HELP cortex_compactor_remaining_planned_compactions Total number of plans that remain to be compacted. Only available with shuffle-sharding strategy # TYPE cortex_compactor_remaining_planned_compactions gauge - cortex_compactor_remaining_planned_compactions 3 + cortex_compactor_remaining_planned_compactions{user="test-user"} 3 `, }, "test oldest min time first": { @@ -177,9 +177,9 @@ func TestShuffleShardingGrouper_Groups(t *testing.T) { {block1hto2hExt1Ulid, block0hto1hExt1Ulid, block1hto2hExt1UlidCopy}, {block3hto4hExt1Ulid, block2hto3hExt1Ulid}, }, - metrics: `# HELP cortex_compactor_remaining_planned_compactions Total number of plans that remain to be compacted. + metrics: `# HELP cortex_compactor_remaining_planned_compactions Total number of plans that remain to be compacted. Only available with shuffle-sharding strategy # TYPE cortex_compactor_remaining_planned_compactions gauge - cortex_compactor_remaining_planned_compactions 2 + cortex_compactor_remaining_planned_compactions{user="test-user"} 2 `, }, "test overlapping blocks": { @@ -189,9 +189,9 @@ func TestShuffleShardingGrouper_Groups(t *testing.T) { expected: [][]ulid.ULID{ {block21hto40hExt1Ulid, block21hto40hExt1UlidCopy}, }, - metrics: `# HELP cortex_compactor_remaining_planned_compactions Total number of plans that remain to be compacted. + metrics: `# HELP cortex_compactor_remaining_planned_compactions Total number of plans that remain to be compacted. Only available with shuffle-sharding strategy # TYPE cortex_compactor_remaining_planned_compactions gauge - cortex_compactor_remaining_planned_compactions 1 + cortex_compactor_remaining_planned_compactions{user="test-user"} 1 `, }, "test imperfect maxTime blocks": { @@ -201,9 +201,9 @@ func TestShuffleShardingGrouper_Groups(t *testing.T) { expected: [][]ulid.ULID{ {block0hto45mExt1Ulid, block0hto1h30mExt1Ulid}, }, - metrics: `# HELP cortex_compactor_remaining_planned_compactions Total number of plans that remain to be compacted. + metrics: `# HELP cortex_compactor_remaining_planned_compactions Total number of plans that remain to be compacted. Only available with shuffle-sharding strategy # TYPE cortex_compactor_remaining_planned_compactions gauge - cortex_compactor_remaining_planned_compactions 1 + cortex_compactor_remaining_planned_compactions{user="test-user"} 1 `, }, "test prematurely created blocks": { @@ -211,9 +211,9 @@ func TestShuffleShardingGrouper_Groups(t *testing.T) { ranges: []time.Duration{2 * time.Hour}, blocks: map[ulid.ULID]*metadata.Meta{blocklast1hExt1UlidCopy: blocks[blocklast1hExt1UlidCopy], blocklast1hExt1Ulid: blocks[blocklast1hExt1Ulid]}, expected: [][]ulid.ULID{}, - metrics: `# HELP cortex_compactor_remaining_planned_compactions Total number of plans that remain to be compacted. + metrics: `# HELP cortex_compactor_remaining_planned_compactions Total number of plans that remain to be compacted. Only available with shuffle-sharding strategy # TYPE cortex_compactor_remaining_planned_compactions gauge - cortex_compactor_remaining_planned_compactions 0 + cortex_compactor_remaining_planned_compactions{user="test-user"} 0 `, }, "test group with all blocks visited": { @@ -231,9 +231,9 @@ func TestShuffleShardingGrouper_Groups(t *testing.T) { {id: block1hto2hExt2Ulid, compactorID: otherCompactorID, isExpired: false}, {id: block0hto1hExt2Ulid, compactorID: otherCompactorID, isExpired: false}, }, - metrics: `# HELP cortex_compactor_remaining_planned_compactions Total number of plans that remain to be compacted. + metrics: `# HELP cortex_compactor_remaining_planned_compactions Total number of plans that remain to be compacted. Only available with shuffle-sharding strategy # TYPE cortex_compactor_remaining_planned_compactions gauge - cortex_compactor_remaining_planned_compactions 1 + cortex_compactor_remaining_planned_compactions{user="test-user"} 1 `, }, "test group with one block visited": { @@ -250,9 +250,9 @@ func TestShuffleShardingGrouper_Groups(t *testing.T) { }{ {id: block1hto2hExt2Ulid, compactorID: otherCompactorID, isExpired: false}, }, - metrics: `# HELP cortex_compactor_remaining_planned_compactions Total number of plans that remain to be compacted. + metrics: `# HELP cortex_compactor_remaining_planned_compactions Total number of plans that remain to be compacted. Only available with shuffle-sharding strategy # TYPE cortex_compactor_remaining_planned_compactions gauge - cortex_compactor_remaining_planned_compactions 1 + cortex_compactor_remaining_planned_compactions{user="test-user"} 1 `, }, "test group block visit marker file expired": { @@ -270,9 +270,9 @@ func TestShuffleShardingGrouper_Groups(t *testing.T) { {id: block1hto2hExt2Ulid, compactorID: otherCompactorID, isExpired: true}, {id: block0hto1hExt2Ulid, compactorID: otherCompactorID, isExpired: true}, }, - metrics: `# HELP cortex_compactor_remaining_planned_compactions Total number of plans that remain to be compacted. + metrics: `# HELP cortex_compactor_remaining_planned_compactions Total number of plans that remain to be compacted. Only available with shuffle-sharding strategy # TYPE cortex_compactor_remaining_planned_compactions gauge - cortex_compactor_remaining_planned_compactions 1 + cortex_compactor_remaining_planned_compactions{user="test-user"} 1 `, }, "test group with one block visited by current compactor": { @@ -289,9 +289,9 @@ func TestShuffleShardingGrouper_Groups(t *testing.T) { }{ {id: block1hto2hExt2Ulid, compactorID: testCompactorID, isExpired: false}, }, - metrics: `# HELP cortex_compactor_remaining_planned_compactions Total number of plans that remain to be compacted. + metrics: `# HELP cortex_compactor_remaining_planned_compactions Total number of plans that remain to be compacted. Only available with shuffle-sharding strategy # TYPE cortex_compactor_remaining_planned_compactions gauge - cortex_compactor_remaining_planned_compactions 1 + cortex_compactor_remaining_planned_compactions{user="test-user"} 1 `, }, "test basic grouping with concurrency 2": { @@ -302,9 +302,9 @@ func TestShuffleShardingGrouper_Groups(t *testing.T) { {block1hto2hExt2Ulid, block0hto1hExt2Ulid}, {block1hto2hExt1Ulid, block0hto1hExt1Ulid}, }, - metrics: `# HELP cortex_compactor_remaining_planned_compactions Total number of plans that remain to be compacted. + metrics: `# HELP cortex_compactor_remaining_planned_compactions Total number of plans that remain to be compacted. Only available with shuffle-sharding strategy # TYPE cortex_compactor_remaining_planned_compactions gauge - cortex_compactor_remaining_planned_compactions 2 + cortex_compactor_remaining_planned_compactions{user="test-user"} 2 `, }, "test should skip block with no compact marker": { @@ -315,9 +315,9 @@ func TestShuffleShardingGrouper_Groups(t *testing.T) { {block1hto2hExt2Ulid, block0hto1hExt2Ulid}, {block1hto2hExt1Ulid, block0hto1hExt1Ulid}, }, - metrics: `# HELP cortex_compactor_remaining_planned_compactions Total number of plans that remain to be compacted. + metrics: `# HELP cortex_compactor_remaining_planned_compactions Total number of plans that remain to be compacted. Only available with shuffle-sharding strategy # TYPE cortex_compactor_remaining_planned_compactions gauge - cortex_compactor_remaining_planned_compactions 2 + cortex_compactor_remaining_planned_compactions{user="test-user"} 2 `, noCompactBlocks: map[ulid.ULID]*metadata.NoCompactMark{block2hto3hExt1Ulid: {}}, }, @@ -347,10 +347,6 @@ func TestShuffleShardingGrouper_Groups(t *testing.T) { ring.On("ShuffleShard", mock.Anything, mock.Anything).Return(subring, nil) registerer := prometheus.NewPedanticRegistry() - remainingPlannedCompactions := promauto.With(registerer).NewGauge(prometheus.GaugeOpts{ - Name: "cortex_compactor_remaining_planned_compactions", - Help: "Total number of plans that remain to be compacted.", - }) blockVisitMarkerReadFailed := promauto.With(registerer).NewCounter(prometheus.CounterOpts{ Name: "cortex_compactor_block_visit_marker_read_failed", Help: "Number of block visit marker file failed to be read.", @@ -379,6 +375,8 @@ func TestShuffleShardingGrouper_Groups(t *testing.T) { bkt.MockUpload(mock.Anything, nil) bkt.MockGet(mock.Anything, "", nil) + metrics := newCompactorMetrics(registerer) + noCompactFilter := func() map[ulid.ULID]*metadata.NoCompactMark { return testData.noCompactBlocks } @@ -391,18 +389,16 @@ func TestShuffleShardingGrouper_Groups(t *testing.T) { objstore.WithNoopInstr(bkt), false, // Do not accept malformed indexes true, // Enable vertical compaction - registerer, - nil, - nil, nil, - remainingPlannedCompactions, metadata.NoneFunc, + metrics.getSyncerMetrics("test-user"), + metrics, *compactorCfg, ring, "test-addr", testCompactorID, overrides, - "", + "test-user", 10, 3, testData.concurrency, diff --git a/pkg/compactor/syncer_metrics.go b/pkg/compactor/syncer_metrics.go deleted file mode 100644 index c171779270..0000000000 --- a/pkg/compactor/syncer_metrics.go +++ /dev/null @@ -1,124 +0,0 @@ -package compactor - -import ( - "github.com/go-kit/log/level" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" - - "github.com/cortexproject/cortex/pkg/util" - util_log "github.com/cortexproject/cortex/pkg/util/log" -) - -// Copied from Thanos, pkg/compact/compact.go. -// Here we aggregate metrics from all finished syncers. -type syncerMetrics struct { - metaSync prometheus.Counter - metaSyncFailures prometheus.Counter - metaSyncDuration *util.HistogramDataCollector // was prometheus.Histogram before - metaSyncConsistencyDelay prometheus.Gauge - garbageCollections prometheus.Counter - garbageCollectionFailures prometheus.Counter - garbageCollectionDuration *util.HistogramDataCollector // was prometheus.Histogram before - compactions prometheus.Counter - compactionRunsStarted prometheus.Counter - compactionRunsCompleted prometheus.Counter - compactionFailures prometheus.Counter - verticalCompactions prometheus.Counter -} - -// Copied (and modified with Cortex prefix) from Thanos, pkg/compact/compact.go -// We also ignore "group" label, since we only use a single group. -func newSyncerMetrics(reg prometheus.Registerer) *syncerMetrics { - var m syncerMetrics - - m.metaSync = promauto.With(reg).NewCounter(prometheus.CounterOpts{ - Name: "cortex_compactor_meta_syncs_total", - Help: "Total blocks metadata synchronization attempts.", - }) - m.metaSyncFailures = promauto.With(reg).NewCounter(prometheus.CounterOpts{ - Name: "cortex_compactor_meta_sync_failures_total", - Help: "Total blocks metadata synchronization failures.", - }) - m.metaSyncDuration = util.NewHistogramDataCollector(prometheus.NewDesc( - "cortex_compactor_meta_sync_duration_seconds", - "Duration of the blocks metadata synchronization in seconds.", - nil, nil)) - m.metaSyncConsistencyDelay = promauto.With(reg).NewGauge(prometheus.GaugeOpts{ - Name: "cortex_compactor_meta_sync_consistency_delay_seconds", - Help: "Configured consistency delay in seconds.", - }) - - m.garbageCollections = promauto.With(reg).NewCounter(prometheus.CounterOpts{ - Name: "cortex_compactor_garbage_collection_total", - Help: "Total number of garbage collection operations.", - }) - m.garbageCollectionFailures = promauto.With(reg).NewCounter(prometheus.CounterOpts{ - Name: "cortex_compactor_garbage_collection_failures_total", - Help: "Total number of failed garbage collection operations.", - }) - m.garbageCollectionDuration = util.NewHistogramDataCollector(prometheus.NewDesc( - "cortex_compactor_garbage_collection_duration_seconds", - "Time it took to perform garbage collection iteration.", - nil, nil)) - - m.compactions = promauto.With(reg).NewCounter(prometheus.CounterOpts{ - Name: "cortex_compactor_group_compactions_total", - Help: "Total number of group compaction attempts that resulted in a new block.", - }) - m.compactionRunsStarted = promauto.With(reg).NewCounter(prometheus.CounterOpts{ - Name: "cortex_compactor_group_compaction_runs_started_total", - Help: "Total number of group compaction attempts.", - }) - m.compactionRunsCompleted = promauto.With(reg).NewCounter(prometheus.CounterOpts{ - Name: "cortex_compactor_group_compaction_runs_completed_total", - Help: "Total number of group completed compaction runs. This also includes compactor group runs that resulted with no compaction.", - }) - m.compactionFailures = promauto.With(reg).NewCounter(prometheus.CounterOpts{ - Name: "cortex_compactor_group_compactions_failures_total", - Help: "Total number of failed group compactions.", - }) - m.verticalCompactions = promauto.With(reg).NewCounter(prometheus.CounterOpts{ - Name: "cortex_compactor_group_vertical_compactions_total", - Help: "Total number of group compaction attempts that resulted in a new block based on overlapping blocks.", - }) - - if reg != nil { - reg.MustRegister(m.metaSyncDuration, m.garbageCollectionDuration) - } - - return &m -} - -func (m *syncerMetrics) gatherThanosSyncerMetrics(reg *prometheus.Registry) { - if m == nil { - return - } - - mf, err := reg.Gather() - if err != nil { - level.Warn(util_log.Logger).Log("msg", "failed to gather metrics from syncer registry after compaction", "err", err) - return - } - - mfm, err := util.NewMetricFamilyMap(mf) - if err != nil { - level.Warn(util_log.Logger).Log("msg", "failed to gather metrics from syncer registry after compaction", "err", err) - return - } - - m.metaSync.Add(mfm.SumCounters("blocks_meta_syncs_total")) - m.metaSyncFailures.Add(mfm.SumCounters("blocks_meta_sync_failures_total")) - m.metaSyncDuration.Add(mfm.SumHistograms("blocks_meta_sync_duration_seconds")) - m.metaSyncConsistencyDelay.Set(mfm.MaxGauges("consistency_delay_seconds")) - - m.garbageCollections.Add(mfm.SumCounters("thanos_compact_garbage_collection_total")) - m.garbageCollectionFailures.Add(mfm.SumCounters("thanos_compact_garbage_collection_failures_total")) - m.garbageCollectionDuration.Add(mfm.SumHistograms("thanos_compact_garbage_collection_duration_seconds")) - - // These metrics have "group" label, but we sum them all together. - m.compactions.Add(mfm.SumCounters("thanos_compact_group_compactions_total")) - m.compactionRunsStarted.Add(mfm.SumCounters("thanos_compact_group_compaction_runs_started_total")) - m.compactionRunsCompleted.Add(mfm.SumCounters("thanos_compact_group_compaction_runs_completed_total")) - m.compactionFailures.Add(mfm.SumCounters("thanos_compact_group_compactions_failures_total")) - m.verticalCompactions.Add(mfm.SumCounters("thanos_compact_group_vertical_compactions_total")) -} diff --git a/pkg/compactor/syncer_metrics_test.go b/pkg/compactor/syncer_metrics_test.go deleted file mode 100644 index 7a21955ebf..0000000000 --- a/pkg/compactor/syncer_metrics_test.go +++ /dev/null @@ -1,232 +0,0 @@ -package compactor - -import ( - "bytes" - "testing" - - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/testutil" - "github.com/stretchr/testify/require" -) - -func TestSyncerMetrics(t *testing.T) { - reg := prometheus.NewPedanticRegistry() - - sm := newSyncerMetrics(reg) - sm.gatherThanosSyncerMetrics(generateTestData(12345)) - sm.gatherThanosSyncerMetrics(generateTestData(76543)) - sm.gatherThanosSyncerMetrics(generateTestData(22222)) - // total base = 111110 - - err := testutil.GatherAndCompare(reg, bytes.NewBufferString(` - # HELP cortex_compactor_meta_sync_consistency_delay_seconds Configured consistency delay in seconds. - # TYPE cortex_compactor_meta_sync_consistency_delay_seconds gauge - cortex_compactor_meta_sync_consistency_delay_seconds 300 - - # HELP cortex_compactor_meta_syncs_total Total blocks metadata synchronization attempts. - # TYPE cortex_compactor_meta_syncs_total counter - cortex_compactor_meta_syncs_total 111110 - - # HELP cortex_compactor_meta_sync_failures_total Total blocks metadata synchronization failures. - # TYPE cortex_compactor_meta_sync_failures_total counter - cortex_compactor_meta_sync_failures_total 222220 - - # HELP cortex_compactor_meta_sync_duration_seconds Duration of the blocks metadata synchronization in seconds. - # TYPE cortex_compactor_meta_sync_duration_seconds histogram - # Observed values: 3.7035, 22.9629, 6.6666 (seconds) - cortex_compactor_meta_sync_duration_seconds_bucket{le="0.01"} 0 - cortex_compactor_meta_sync_duration_seconds_bucket{le="0.1"} 0 - cortex_compactor_meta_sync_duration_seconds_bucket{le="0.3"} 0 - cortex_compactor_meta_sync_duration_seconds_bucket{le="0.6"} 0 - cortex_compactor_meta_sync_duration_seconds_bucket{le="1"} 0 - cortex_compactor_meta_sync_duration_seconds_bucket{le="3"} 0 - cortex_compactor_meta_sync_duration_seconds_bucket{le="6"} 1 - cortex_compactor_meta_sync_duration_seconds_bucket{le="9"} 2 - cortex_compactor_meta_sync_duration_seconds_bucket{le="20"} 2 - cortex_compactor_meta_sync_duration_seconds_bucket{le="30"} 3 - cortex_compactor_meta_sync_duration_seconds_bucket{le="60"} 3 - cortex_compactor_meta_sync_duration_seconds_bucket{le="90"} 3 - cortex_compactor_meta_sync_duration_seconds_bucket{le="120"} 3 - cortex_compactor_meta_sync_duration_seconds_bucket{le="240"} 3 - cortex_compactor_meta_sync_duration_seconds_bucket{le="360"} 3 - cortex_compactor_meta_sync_duration_seconds_bucket{le="720"} 3 - cortex_compactor_meta_sync_duration_seconds_bucket{le="+Inf"} 3 - # rounding error - cortex_compactor_meta_sync_duration_seconds_sum 33.333000000000006 - cortex_compactor_meta_sync_duration_seconds_count 3 - - # HELP cortex_compactor_garbage_collection_total Total number of garbage collection operations. - # TYPE cortex_compactor_garbage_collection_total counter - cortex_compactor_garbage_collection_total 555550 - - # HELP cortex_compactor_garbage_collection_failures_total Total number of failed garbage collection operations. - # TYPE cortex_compactor_garbage_collection_failures_total counter - cortex_compactor_garbage_collection_failures_total 666660 - - # HELP cortex_compactor_garbage_collection_duration_seconds Time it took to perform garbage collection iteration. - # TYPE cortex_compactor_garbage_collection_duration_seconds histogram - # Observed values: 8.6415, 53.5801, 15.5554 - cortex_compactor_garbage_collection_duration_seconds_bucket{le="0.01"} 0 - cortex_compactor_garbage_collection_duration_seconds_bucket{le="0.1"} 0 - cortex_compactor_garbage_collection_duration_seconds_bucket{le="0.3"} 0 - cortex_compactor_garbage_collection_duration_seconds_bucket{le="0.6"} 0 - cortex_compactor_garbage_collection_duration_seconds_bucket{le="1"} 0 - cortex_compactor_garbage_collection_duration_seconds_bucket{le="3"} 0 - cortex_compactor_garbage_collection_duration_seconds_bucket{le="6"} 0 - cortex_compactor_garbage_collection_duration_seconds_bucket{le="9"} 1 - cortex_compactor_garbage_collection_duration_seconds_bucket{le="20"} 2 - cortex_compactor_garbage_collection_duration_seconds_bucket{le="30"} 2 - cortex_compactor_garbage_collection_duration_seconds_bucket{le="60"} 3 - cortex_compactor_garbage_collection_duration_seconds_bucket{le="90"} 3 - cortex_compactor_garbage_collection_duration_seconds_bucket{le="120"} 3 - cortex_compactor_garbage_collection_duration_seconds_bucket{le="240"} 3 - cortex_compactor_garbage_collection_duration_seconds_bucket{le="360"} 3 - cortex_compactor_garbage_collection_duration_seconds_bucket{le="720"} 3 - cortex_compactor_garbage_collection_duration_seconds_bucket{le="+Inf"} 3 - cortex_compactor_garbage_collection_duration_seconds_sum 77.777 - cortex_compactor_garbage_collection_duration_seconds_count 3 - - # HELP cortex_compactor_group_compactions_total Total number of group compaction attempts that resulted in a new block. - # TYPE cortex_compactor_group_compactions_total counter - # Sum across all groups - cortex_compactor_group_compactions_total 2999970 - - # HELP cortex_compactor_group_compaction_runs_started_total Total number of group compaction attempts. - # TYPE cortex_compactor_group_compaction_runs_started_total counter - # Sum across all groups - cortex_compactor_group_compaction_runs_started_total 3999960 - - # HELP cortex_compactor_group_compaction_runs_completed_total Total number of group completed compaction runs. This also includes compactor group runs that resulted with no compaction. - # TYPE cortex_compactor_group_compaction_runs_completed_total counter - # Sum across all groups - cortex_compactor_group_compaction_runs_completed_total 4999950 - - # HELP cortex_compactor_group_compactions_failures_total Total number of failed group compactions. - # TYPE cortex_compactor_group_compactions_failures_total counter - cortex_compactor_group_compactions_failures_total 5999940 - - # HELP cortex_compactor_group_vertical_compactions_total Total number of group compaction attempts that resulted in a new block based on overlapping blocks. - # TYPE cortex_compactor_group_vertical_compactions_total counter - cortex_compactor_group_vertical_compactions_total 6999930 - `)) - require.NoError(t, err) -} - -func generateTestData(base float64) *prometheus.Registry { - r := prometheus.NewRegistry() - m := newTestSyncerMetrics(r) - m.metaSync.Add(1 * base) - m.metaSyncFailures.Add(2 * base) - m.metaSyncDuration.Observe(3 * base / 10000) - m.metaSyncConsistencyDelay.Set(300) - m.garbageCollections.Add(5 * base) - m.garbageCollectionFailures.Add(6 * base) - m.garbageCollectionDuration.Observe(7 * base / 10000) - m.compactions.WithLabelValues("aaa").Add(8 * base) - m.compactions.WithLabelValues("bbb").Add(9 * base) - m.compactions.WithLabelValues("ccc").Add(10 * base) - m.compactionRunsStarted.WithLabelValues("aaa").Add(11 * base) - m.compactionRunsStarted.WithLabelValues("bbb").Add(12 * base) - m.compactionRunsStarted.WithLabelValues("ccc").Add(13 * base) - m.compactionRunsCompleted.WithLabelValues("aaa").Add(14 * base) - m.compactionRunsCompleted.WithLabelValues("bbb").Add(15 * base) - m.compactionRunsCompleted.WithLabelValues("ccc").Add(16 * base) - m.compactionFailures.WithLabelValues("aaa").Add(17 * base) - m.compactionFailures.WithLabelValues("bbb").Add(18 * base) - m.compactionFailures.WithLabelValues("ccc").Add(19 * base) - m.verticalCompactions.WithLabelValues("aaa").Add(20 * base) - m.verticalCompactions.WithLabelValues("bbb").Add(21 * base) - m.verticalCompactions.WithLabelValues("ccc").Add(22 * base) - return r -} - -// directly copied from Thanos (and renamed syncerMetrics to testSyncerMetrics to avoid conflict) -type testSyncerMetrics struct { - metaSync prometheus.Counter - metaSyncFailures prometheus.Counter - metaSyncDuration prometheus.Histogram - metaSyncConsistencyDelay prometheus.Gauge - garbageCollections prometheus.Counter - garbageCollectionFailures prometheus.Counter - garbageCollectionDuration prometheus.Histogram - compactions *prometheus.CounterVec - compactionRunsStarted *prometheus.CounterVec - compactionRunsCompleted *prometheus.CounterVec - compactionFailures *prometheus.CounterVec - verticalCompactions *prometheus.CounterVec -} - -func newTestSyncerMetrics(reg prometheus.Registerer) *testSyncerMetrics { - var m testSyncerMetrics - - m.metaSync = prometheus.NewCounter(prometheus.CounterOpts{ - Name: "blocks_meta_syncs_total", - Help: "Total blocks metadata synchronization attempts.", - }) - m.metaSyncFailures = prometheus.NewCounter(prometheus.CounterOpts{ - Name: "blocks_meta_sync_failures_total", - Help: "Total blocks metadata synchronization failures.", - }) - m.metaSyncDuration = prometheus.NewHistogram(prometheus.HistogramOpts{ - Name: "blocks_meta_sync_duration_seconds", - Help: "Duration of the blocks metadata synchronization in seconds.", - Buckets: []float64{0.01, 0.1, 0.3, 0.6, 1, 3, 6, 9, 20, 30, 60, 90, 120, 240, 360, 720}, - }) - m.metaSyncConsistencyDelay = prometheus.NewGauge(prometheus.GaugeOpts{ - Name: "consistency_delay_seconds", - Help: "Configured consistency delay in seconds.", - }) - - m.garbageCollections = prometheus.NewCounter(prometheus.CounterOpts{ - Name: "thanos_compact_garbage_collection_total", - Help: "Total number of garbage collection operations.", - }) - m.garbageCollectionFailures = prometheus.NewCounter(prometheus.CounterOpts{ - Name: "thanos_compact_garbage_collection_failures_total", - Help: "Total number of failed garbage collection operations.", - }) - m.garbageCollectionDuration = prometheus.NewHistogram(prometheus.HistogramOpts{ - Name: "thanos_compact_garbage_collection_duration_seconds", - Help: "Time it took to perform garbage collection iteration.", - Buckets: []float64{0.01, 0.1, 0.3, 0.6, 1, 3, 6, 9, 20, 30, 60, 90, 120, 240, 360, 720}, - }) - - m.compactions = prometheus.NewCounterVec(prometheus.CounterOpts{ - Name: "thanos_compact_group_compactions_total", - Help: "Total number of group compaction attempts that resulted in a new block.", - }, []string{"group"}) - m.compactionRunsStarted = prometheus.NewCounterVec(prometheus.CounterOpts{ - Name: "thanos_compact_group_compaction_runs_started_total", - Help: "Total number of group compaction attempts.", - }, []string{"group"}) - m.compactionRunsCompleted = prometheus.NewCounterVec(prometheus.CounterOpts{ - Name: "thanos_compact_group_compaction_runs_completed_total", - Help: "Total number of group completed compaction runs. This also includes compactor group runs that resulted with no compaction.", - }, []string{"group"}) - m.compactionFailures = prometheus.NewCounterVec(prometheus.CounterOpts{ - Name: "thanos_compact_group_compactions_failures_total", - Help: "Total number of failed group compactions.", - }, []string{"group"}) - m.verticalCompactions = prometheus.NewCounterVec(prometheus.CounterOpts{ - Name: "thanos_compact_group_vertical_compactions_total", - Help: "Total number of group compaction attempts that resulted in a new block based on overlapping blocks.", - }, []string{"group"}) - - if reg != nil { - reg.MustRegister( - m.metaSync, - m.metaSyncFailures, - m.metaSyncDuration, - m.metaSyncConsistencyDelay, - m.garbageCollections, - m.garbageCollectionFailures, - m.garbageCollectionDuration, - m.compactions, - m.compactionRunsStarted, - m.compactionRunsCompleted, - m.compactionFailures, - m.verticalCompactions, - ) - } - return &m -} diff --git a/pkg/compactor/visit_marker.go b/pkg/compactor/visit_marker.go new file mode 100644 index 0000000000..8b363998e8 --- /dev/null +++ b/pkg/compactor/visit_marker.go @@ -0,0 +1,220 @@ +package compactor + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "sync" + "time" + + "github.com/go-kit/log" + "github.com/go-kit/log/level" + "github.com/pkg/errors" + "github.com/prometheus/client_golang/prometheus" + "github.com/thanos-io/objstore" + "github.com/thanos-io/thanos/pkg/compact" + + "github.com/cortexproject/cortex/pkg/util/runutil" +) + +var ( + ErrorVisitMarkerNotFound = errors.New("visit marker not found") + ErrorUnmarshalVisitMarker = errors.New("unmarshal visit marker JSON") +) + +type VisitStatus string + +const ( + Pending VisitStatus = "pending" + InProgress VisitStatus = "inProgress" + Completed VisitStatus = "completed" + Failed VisitStatus = "failed" +) + +type VisitMarker interface { + GetVisitMarkerFilePath() string + MarkInProgress(ownerIdentifier string) + MarkPending(ownerIdentifier string) + MarkCompleted(ownerIdentifier string) + MarkFailed(ownerIdentifier string) + LogInfo() []string + IsExpired(visitMarkerTimeout time.Duration) bool + IsCompleted() bool + IsFailed() bool + IsInProgress() bool + IsPending() bool +} + +type VisitMarkerManager struct { + bkt objstore.InstrumentedBucket + logger log.Logger + ownerIdentifier string + visitMarker VisitMarker + visitMarkerReadFailed prometheus.Counter + visitMarkerWriteFailed prometheus.Counter + + mutex sync.Mutex +} + +func NewVisitMarkerManager( + bkt objstore.InstrumentedBucket, + logger log.Logger, + ownerIdentifier string, + visitMarker VisitMarker, + visitMarkerReadFailed prometheus.Counter, + visitMarkerWriteFailed prometheus.Counter, +) *VisitMarkerManager { + return &VisitMarkerManager{ + bkt: bkt, + logger: log.With(logger, "type", fmt.Sprintf("%T", visitMarker), visitMarker.LogInfo()), + ownerIdentifier: ownerIdentifier, + visitMarker: visitMarker, + visitMarkerReadFailed: visitMarkerReadFailed, + visitMarkerWriteFailed: visitMarkerWriteFailed, + } +} + +func (v *VisitMarkerManager) HeartBeat(ctx context.Context, errChan <-chan error, visitMarkerFileUpdateInterval time.Duration, deleteOnExit bool) { + level.Info(v.getLogger()).Log("msg", "start visit marker heart beat") + ticker := time.NewTicker(visitMarkerFileUpdateInterval) + defer ticker.Stop() +heartBeat: + for { + v.MarkInProgress(ctx) + + select { + case <-ctx.Done(): + level.Warn(v.getLogger()).Log("msg", "visit marker heart beat got cancelled") + v.MarkPending(context.Background()) + break heartBeat + case <-ticker.C: + continue + case err := <-errChan: + if err == nil { + level.Info(v.getLogger()).Log("msg", "update visit marker to completed status") + v.MarkCompleted(ctx) + } else { + level.Warn(v.getLogger()).Log("msg", "stop visit marker heart beat due to error", "err", err) + if compact.IsHaltError(err) { + level.Info(v.getLogger()).Log("msg", "update visit marker to failed status", "err", err) + v.MarkFailed(ctx) + } else { + level.Info(v.getLogger()).Log("msg", "update visit marker to pending status", "err", err) + v.MarkPending(ctx) + } + } + break heartBeat + } + } + level.Info(v.getLogger()).Log("msg", "stop visit marker heart beat") + if deleteOnExit { + level.Info(v.getLogger()).Log("msg", "delete visit marker when exiting heart beat") + v.DeleteVisitMarker(context.Background()) + } +} + +func (v *VisitMarkerManager) MarkInProgress(ctx context.Context) { + v.visitMarker.MarkInProgress(v.ownerIdentifier) + if err := v.updateVisitMarker(ctx); err != nil { + level.Error(v.getLogger()).Log("msg", "unable to upsert visit marker file content", "err", err) + return + } + level.Debug(v.getLogger()).Log("msg", "marked in progress") +} + +func (v *VisitMarkerManager) MarkPending(ctx context.Context) { + v.visitMarker.MarkPending(v.ownerIdentifier) + if err := v.updateVisitMarker(ctx); err != nil { + level.Error(v.getLogger()).Log("msg", "unable to upsert visit marker file content", "err", err) + return + } + level.Debug(v.getLogger()).Log("msg", "marked pending") +} + +func (v *VisitMarkerManager) MarkCompleted(ctx context.Context) { + v.visitMarker.MarkCompleted(v.ownerIdentifier) + if err := v.updateVisitMarker(ctx); err != nil { + level.Error(v.getLogger()).Log("msg", "unable to upsert visit marker file content", "err", err) + return + } + level.Debug(v.getLogger()).Log("msg", "marked completed") +} + +func (v *VisitMarkerManager) MarkFailed(ctx context.Context) { + v.visitMarker.MarkFailed(v.ownerIdentifier) + if err := v.updateVisitMarker(ctx); err != nil { + level.Error(v.getLogger()).Log("msg", "unable to upsert visit marker file content", "err", err) + return + } + level.Debug(v.getLogger()).Log("msg", "marked failed") +} + +func (v *VisitMarkerManager) DeleteVisitMarker(ctx context.Context) { + if err := v.bkt.Delete(ctx, v.visitMarker.GetVisitMarkerFilePath()); err != nil { + level.Error(v.getLogger()).Log("msg", "failed to delete visit marker", "err", err) + return + } + level.Debug(v.getLogger()).Log("msg", "visit marker deleted") +} + +func (v *VisitMarkerManager) ReloadVisitMarker(ctx context.Context) error { + if err := v.ReadVisitMarker(ctx, v.visitMarker); err != nil { + return err + } + v.setLogger(log.With(v.getLogger(), v.visitMarker.LogInfo())) + level.Debug(v.getLogger()).Log("msg", "visit marker reloaded") + return nil +} + +func (v *VisitMarkerManager) ReadVisitMarker(ctx context.Context, visitMarker any) error { + visitMarkerFile := v.visitMarker.GetVisitMarkerFilePath() + visitMarkerFileReader, err := v.bkt.ReaderWithExpectedErrs(v.bkt.IsObjNotFoundErr).Get(ctx, visitMarkerFile) + if err != nil { + if v.bkt.IsObjNotFoundErr(err) { + return errors.Wrapf(ErrorVisitMarkerNotFound, "visit marker file: %s", visitMarkerFile) + } + v.visitMarkerReadFailed.Inc() + return errors.Wrapf(err, "get visit marker file: %s", visitMarkerFile) + } + defer runutil.CloseWithLogOnErr(v.getLogger(), visitMarkerFileReader, "close visit marker reader") + b, err := io.ReadAll(visitMarkerFileReader) + if err != nil { + v.visitMarkerReadFailed.Inc() + return errors.Wrapf(err, "read visit marker file: %s", visitMarkerFile) + } + if err = json.Unmarshal(b, visitMarker); err != nil { + v.visitMarkerReadFailed.Inc() + return errors.Wrapf(ErrorUnmarshalVisitMarker, "visit marker file: %s, content: %s, error: %v", visitMarkerFile, string(b), err.Error()) + } + level.Debug(v.getLogger()).Log("msg", "visit marker read from file", "visit_marker_file", visitMarkerFile) + return nil +} + +func (v *VisitMarkerManager) updateVisitMarker(ctx context.Context) error { + visitMarkerFileContent, err := json.Marshal(v.visitMarker) + if err != nil { + v.visitMarkerWriteFailed.Inc() + return err + } + + reader := bytes.NewReader(visitMarkerFileContent) + if err := v.bkt.Upload(ctx, v.visitMarker.GetVisitMarkerFilePath(), reader); err != nil { + v.visitMarkerWriteFailed.Inc() + return err + } + return nil +} + +func (v *VisitMarkerManager) getLogger() log.Logger { + v.mutex.Lock() + defer v.mutex.Unlock() + return v.logger +} + +func (v *VisitMarkerManager) setLogger(logger log.Logger) { + v.mutex.Lock() + defer v.mutex.Unlock() + v.logger = logger +} diff --git a/pkg/compactor/visit_marker_test.go b/pkg/compactor/visit_marker_test.go new file mode 100644 index 0000000000..184ce81163 --- /dev/null +++ b/pkg/compactor/visit_marker_test.go @@ -0,0 +1,294 @@ +package compactor + +import ( + "bytes" + "context" + "crypto/rand" + "encoding/json" + "fmt" + "testing" + "time" + + "github.com/go-kit/log" + "github.com/oklog/ulid" + "github.com/prometheus/client_golang/prometheus" + "github.com/stretchr/testify/require" + "github.com/thanos-io/objstore" + "github.com/thanos-io/thanos/pkg/compact" + + cortex_testutil "github.com/cortexproject/cortex/pkg/storage/tsdb/testutil" +) + +func TestMarkPending(t *testing.T) { + ctx := context.Background() + dummyCounter := prometheus.NewCounter(prometheus.CounterOpts{}) + bkt, _ := cortex_testutil.PrepareFilesystemBucket(t) + logger := log.NewNopLogger() + + ownerIdentifier := "test-owner" + testVisitMarker := NewTestVisitMarker(ownerIdentifier) + + visitMarkerManager := NewVisitMarkerManager(objstore.WithNoopInstr(bkt), logger, ownerIdentifier, testVisitMarker, dummyCounter, dummyCounter) + visitMarkerManager.MarkPending(ctx) + + require.Equal(t, Pending, testVisitMarker.Status) + + visitMarkerFromFile := &TestVisitMarker{} + err := visitMarkerManager.ReadVisitMarker(ctx, visitMarkerFromFile) + require.NoError(t, err) + require.Equal(t, Pending, visitMarkerFromFile.Status) +} + +func TestMarkInProgress(t *testing.T) { + ctx := context.Background() + dummyCounter := prometheus.NewCounter(prometheus.CounterOpts{}) + bkt, _ := cortex_testutil.PrepareFilesystemBucket(t) + logger := log.NewNopLogger() + + ownerIdentifier := "test-owner" + testVisitMarker := NewTestVisitMarker(ownerIdentifier) + + visitMarkerManager := NewVisitMarkerManager(objstore.WithNoopInstr(bkt), logger, ownerIdentifier, testVisitMarker, dummyCounter, dummyCounter) + visitMarkerManager.MarkInProgress(ctx) + + require.Equal(t, InProgress, testVisitMarker.Status) + + visitMarkerFromFile := &TestVisitMarker{} + err := visitMarkerManager.ReadVisitMarker(ctx, visitMarkerFromFile) + require.NoError(t, err) + require.Equal(t, InProgress, visitMarkerFromFile.Status) +} + +func TestMarkCompleted(t *testing.T) { + ctx := context.Background() + dummyCounter := prometheus.NewCounter(prometheus.CounterOpts{}) + bkt, _ := cortex_testutil.PrepareFilesystemBucket(t) + logger := log.NewNopLogger() + + ownerIdentifier := "test-owner" + testVisitMarker := NewTestVisitMarker(ownerIdentifier) + + visitMarkerManager := NewVisitMarkerManager(objstore.WithNoopInstr(bkt), logger, ownerIdentifier, testVisitMarker, dummyCounter, dummyCounter) + visitMarkerManager.MarkCompleted(ctx) + + require.Equal(t, Completed, testVisitMarker.Status) + + visitMarkerFromFile := &TestVisitMarker{} + err := visitMarkerManager.ReadVisitMarker(ctx, visitMarkerFromFile) + require.NoError(t, err) + require.Equal(t, Completed, visitMarkerFromFile.Status) +} + +func TestReloadVisitMarker(t *testing.T) { + ctx := context.Background() + dummyCounter := prometheus.NewCounter(prometheus.CounterOpts{}) + bkt, _ := cortex_testutil.PrepareFilesystemBucket(t) + logger := log.NewNopLogger() + + ownerIdentifier := "test-owner" + testVisitMarker := NewTestVisitMarker(ownerIdentifier) + + visitMarkerManager := NewVisitMarkerManager(objstore.WithNoopInstr(bkt), logger, ownerIdentifier, testVisitMarker, dummyCounter, dummyCounter) + + newValue := "updated stored value" + updatedVisitMarker := TestVisitMarker{ + OwnerIdentifier: ownerIdentifier, + Status: Completed, + StoredValue: newValue, + } + visitMarkerFileContent, err := json.Marshal(updatedVisitMarker) + require.NoError(t, err) + + reader := bytes.NewReader(visitMarkerFileContent) + err = bkt.Upload(ctx, testVisitMarker.GetVisitMarkerFilePath(), reader) + require.NoError(t, err) + + err = visitMarkerManager.ReloadVisitMarker(ctx) + require.NoError(t, err) + require.Equal(t, ownerIdentifier, testVisitMarker.OwnerIdentifier) + require.Equal(t, Completed, testVisitMarker.Status) + require.Equal(t, newValue, testVisitMarker.StoredValue) +} + +func TestUpdateExistingVisitMarker(t *testing.T) { + ctx := context.Background() + dummyCounter := prometheus.NewCounter(prometheus.CounterOpts{}) + bkt, _ := cortex_testutil.PrepareFilesystemBucket(t) + logger := log.NewNopLogger() + + ownerIdentifier1 := "test-owner-1" + testVisitMarker1 := NewTestVisitMarker(ownerIdentifier1) + visitMarkerManager1 := NewVisitMarkerManager(objstore.WithNoopInstr(bkt), logger, ownerIdentifier1, testVisitMarker1, dummyCounter, dummyCounter) + visitMarkerManager1.MarkInProgress(ctx) + + ownerIdentifier2 := "test-owner-2" + testVisitMarker2 := &TestVisitMarker{ + OwnerIdentifier: ownerIdentifier2, + markerID: testVisitMarker1.markerID, + StoredValue: testVisitMarker1.StoredValue, + } + visitMarkerManager2 := NewVisitMarkerManager(objstore.WithNoopInstr(bkt), logger, ownerIdentifier2, testVisitMarker2, dummyCounter, dummyCounter) + visitMarkerManager2.MarkCompleted(ctx) + + visitMarkerFromFile := &TestVisitMarker{} + err := visitMarkerManager2.ReadVisitMarker(ctx, visitMarkerFromFile) + require.NoError(t, err) + require.Equal(t, ownerIdentifier2, visitMarkerFromFile.OwnerIdentifier) + require.Equal(t, Completed, visitMarkerFromFile.Status) +} + +func TestHeartBeat(t *testing.T) { + for _, tcase := range []struct { + name string + isCancelled bool + callerErr error + expectedStatus VisitStatus + deleteOnExit bool + }{ + { + name: "heart beat got cancelled", + isCancelled: true, + callerErr: nil, + expectedStatus: Pending, + deleteOnExit: false, + }, + { + name: "heart beat complete without error", + isCancelled: false, + callerErr: nil, + expectedStatus: Completed, + deleteOnExit: false, + }, + { + name: "heart beat stopped due to halt error", + isCancelled: false, + callerErr: compact.HaltError{}, + expectedStatus: Failed, + deleteOnExit: false, + }, + { + name: "heart beat stopped due to non halt error", + isCancelled: false, + callerErr: fmt.Errorf("some error"), + expectedStatus: Pending, + deleteOnExit: false, + }, + { + name: "heart beat got cancelled and delete visit marker on exit", + isCancelled: true, + callerErr: nil, + expectedStatus: Pending, + deleteOnExit: true, + }, + { + name: "heart beat complete without error and delete visit marker on exit", + isCancelled: false, + callerErr: nil, + expectedStatus: Completed, + deleteOnExit: true, + }, + { + name: "heart beat stopped due to caller error and delete visit marker on exit", + isCancelled: false, + callerErr: fmt.Errorf("some error"), + expectedStatus: Failed, + deleteOnExit: true, + }, + } { + t.Run(tcase.name, func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + dummyCounter := prometheus.NewCounter(prometheus.CounterOpts{}) + bkt, _ := cortex_testutil.PrepareFilesystemBucket(t) + logger := log.NewNopLogger() + errChan := make(chan error, 1) + + ownerIdentifier := "test-owner" + testVisitMarker := NewTestVisitMarker(ownerIdentifier) + visitMarkerManager := NewVisitMarkerManager(objstore.WithNoopInstr(bkt), logger, ownerIdentifier, testVisitMarker, dummyCounter, dummyCounter) + go visitMarkerManager.HeartBeat(ctx, errChan, time.Second, tcase.deleteOnExit) + + time.Sleep(2 * time.Second) + if tcase.isCancelled { + cancel() + } else { + errChan <- tcase.callerErr + defer cancel() + } + time.Sleep(2 * time.Second) + + if tcase.deleteOnExit { + exists, err := bkt.Exists(context.Background(), testVisitMarker.GetVisitMarkerFilePath()) + require.NoError(t, err) + require.False(t, exists) + } else { + err := visitMarkerManager.ReloadVisitMarker(context.Background()) + require.NoError(t, err) + require.Equal(t, tcase.expectedStatus, testVisitMarker.Status) + } + }) + } +} + +type TestVisitMarker struct { + OwnerIdentifier string `json:"ownerIdentifier"` + Status VisitStatus `json:"status"` + StoredValue string `json:"storedValue"` + + markerID ulid.ULID +} + +func (t *TestVisitMarker) IsExpired(visitMarkerTimeout time.Duration) bool { + return true +} + +func (t *TestVisitMarker) IsCompleted() bool { + return t.Status == Completed +} + +func (t *TestVisitMarker) IsFailed() bool { + return t.Status == Failed +} + +func (t *TestVisitMarker) IsPending() bool { + return t.Status == Pending +} + +func (t *TestVisitMarker) IsInProgress() bool { + return t.Status == InProgress +} + +func NewTestVisitMarker(ownerIdentifier string) *TestVisitMarker { + return &TestVisitMarker{ + OwnerIdentifier: ownerIdentifier, + markerID: ulid.MustNew(uint64(time.Now().UnixMilli()), rand.Reader), + StoredValue: "initial value", + } +} + +func (t *TestVisitMarker) GetVisitMarkerFilePath() string { + return fmt.Sprintf("test-visit-marker-%s.json", t.markerID.String()) +} + +func (t *TestVisitMarker) MarkInProgress(ownerIdentifier string) { + t.OwnerIdentifier = ownerIdentifier + t.Status = InProgress +} + +func (t *TestVisitMarker) MarkPending(ownerIdentifier string) { + t.OwnerIdentifier = ownerIdentifier + t.Status = Pending +} + +func (t *TestVisitMarker) MarkCompleted(ownerIdentifier string) { + t.OwnerIdentifier = ownerIdentifier + t.Status = Completed +} + +func (t *TestVisitMarker) MarkFailed(ownerIdentifier string) { + t.OwnerIdentifier = ownerIdentifier + t.Status = Failed +} + +func (t *TestVisitMarker) LogInfo() []string { + return []string{"id", t.markerID.String(), "ownerIdentifier", t.OwnerIdentifier, "status", string(t.Status), "storedValue", t.StoredValue} +} diff --git a/pkg/storage/tsdb/bucketindex/index.go b/pkg/storage/tsdb/bucketindex/index.go index 8ef1501550..bd593cd377 100644 --- a/pkg/storage/tsdb/bucketindex/index.go +++ b/pkg/storage/tsdb/bucketindex/index.go @@ -64,6 +64,20 @@ func (idx *Index) RemoveBlock(id ulid.ULID) { } } +func (idx *Index) FindActiveBlocksByTimeRange(rangeStart int64, rangeEnd int64) Blocks { + var result []*Block + deletedBlocks := idx.BlockDeletionMarks.GetULIDSet() + for _, b := range idx.Blocks { + if _, ok := deletedBlocks[b.ID]; ok { + continue + } + if b.MinTime >= rangeStart && b.MaxTime <= rangeEnd { + result = append(result, b) + } + } + return result +} + // Block holds the information about a block in the index. type Block struct { // Block ID. @@ -236,6 +250,14 @@ func (s BlockDeletionMarks) GetULIDs() []ulid.ULID { return ids } +func (s BlockDeletionMarks) GetULIDSet() map[ulid.ULID]struct{} { + res := make(map[ulid.ULID]struct{}) + for _, m := range s { + res[m.ID] = struct{}{} + } + return res +} + func (s BlockDeletionMarks) Clone() BlockDeletionMarks { clone := make(BlockDeletionMarks, len(s)) for i, m := range s { diff --git a/pkg/storage/tsdb/bucketindex/updater.go b/pkg/storage/tsdb/bucketindex/updater.go index cee3e6e3bf..da00519fe0 100644 --- a/pkg/storage/tsdb/bucketindex/updater.go +++ b/pkg/storage/tsdb/bucketindex/updater.go @@ -79,17 +79,22 @@ func (w *Updater) updateBlocks(ctx context.Context, old []*Block, deletedBlocks partials = map[ulid.ULID]error{} // Find all blocks in the storage. + begin := time.Now() + count := 0 err := w.bkt.Iter(ctx, "", func(name string) error { if id, ok := block.IsBlockDir(name); ok { discovered[id] = struct{}{} } + count++ return nil }) if err != nil { return nil, nil, errors.Wrap(err, "list blocks") } + level.Info(w.logger).Log("msg", "finish iterating blocks", "iteration_count", count, "duration", time.Since(begin), "duration_ms", time.Since(begin).Milliseconds()) // Since blocks are immutable, all blocks already existing in the index can just be copied. + begin = time.Now() for _, b := range old { if _, ok := discovered[b.ID]; ok { delete(discovered, b.ID) @@ -101,10 +106,12 @@ func (w *Updater) updateBlocks(ctx context.Context, old []*Block, deletedBlocks blocks = append(blocks, b) } } + level.Info(w.logger).Log("msg", "finish adding blocks", "old_blocks_count", len(old), "new_blocks_count", len(blocks), "duration", time.Since(begin), "duration_ms", time.Since(begin).Milliseconds()) // Remaining blocks are new ones and we have to fetch the meta.json for each of them, in order // to find out if their upload has been completed (meta.json is uploaded last) and get the block // information to store in the bucket index. + begin = time.Now() for id := range discovered { b, err := w.updateBlockIndexEntry(ctx, id) if err == nil { @@ -129,6 +136,7 @@ func (w *Updater) updateBlocks(ctx context.Context, old []*Block, deletedBlocks } return nil, nil, err } + level.Info(w.logger).Log("msg", "finish updating block entries", "discovered_blocks_count", len(discovered), "new_blocks_count", len(blocks), "duration", time.Since(begin), "duration_ms", time.Since(begin).Milliseconds()) return blocks, partials, nil } @@ -187,6 +195,8 @@ func (w *Updater) updateBlockMarks(ctx context.Context, old []*BlockDeletionMark totalBlocksBlocksMarkedForNoCompaction := int64(0) // Find all markers in the storage. + begin := time.Now() + count := 0 err := w.bkt.Iter(ctx, MarkersPathname+"/", func(name string) error { if blockID, ok := IsBlockDeletionMarkFilename(path.Base(name)); ok { discovered[blockID] = struct{}{} @@ -196,13 +206,16 @@ func (w *Updater) updateBlockMarks(ctx context.Context, old []*BlockDeletionMark totalBlocksBlocksMarkedForNoCompaction++ } + count++ return nil }) if err != nil { return nil, nil, totalBlocksBlocksMarkedForNoCompaction, errors.Wrap(err, "list block deletion marks") } + level.Info(w.logger).Log("msg", "finish iterating markers", "iteration_count", count, "discovered_blocks_count", len(discovered), "duration", time.Since(begin), "duration_ms", time.Since(begin).Milliseconds()) // Since deletion marks are immutable, all markers already existing in the index can just be copied. + begin = time.Now() for _, m := range old { if _, ok := discovered[m.ID]; ok { out = append(out, m) @@ -211,8 +224,10 @@ func (w *Updater) updateBlockMarks(ctx context.Context, old []*BlockDeletionMark deletedBlocks[m.ID] = struct{}{} } } + level.Info(w.logger).Log("msg", "finish getting deleted blocks", "old_blocks_count", len(old), "discovered_blocks_count", len(discovered), "deletion_markers_count", len(out), "duration", time.Since(begin), "duration_ms", time.Since(begin).Milliseconds()) // Remaining markers are new ones and we have to fetch them. + begin = time.Now() for id := range discovered { m, err := w.updateBlockDeletionMarkIndexEntry(ctx, id) if errors.Is(err, ErrBlockDeletionMarkNotFound) { @@ -230,6 +245,7 @@ func (w *Updater) updateBlockMarks(ctx context.Context, old []*BlockDeletionMark out = append(out, m) } + level.Info(w.logger).Log("msg", "finish getting new deletion markers", "discovered_blocks_count", len(discovered), "deletion_markers_count", len(out), "duration", time.Since(begin), "duration_ms", time.Since(begin).Milliseconds()) return out, deletedBlocks, totalBlocksBlocksMarkedForNoCompaction, nil } diff --git a/pkg/storage/tsdb/meta_extensions.go b/pkg/storage/tsdb/meta_extensions.go new file mode 100644 index 0000000000..b6b8a7acf0 --- /dev/null +++ b/pkg/storage/tsdb/meta_extensions.go @@ -0,0 +1,71 @@ +package tsdb + +import ( + "fmt" + "strconv" + + "github.com/thanos-io/thanos/pkg/block/metadata" +) + +type CortexMetaExtensions struct { + PartitionInfo *PartitionInfo `json:"partition_info,omitempty"` + TimeRange int64 `json:"time_range,omitempty"` +} + +type PartitionInfo struct { + PartitionedGroupID uint32 `json:"partitioned_group_id"` + PartitionCount int `json:"partition_count"` + PartitionID int `json:"partition_id"` + PartitionedGroupCreationTime int64 `json:"partitioned_group_creation_time"` +} + +var ( + DefaultPartitionInfo = PartitionInfo{ + PartitionedGroupID: 0, + PartitionID: 0, + PartitionCount: 1, + PartitionedGroupCreationTime: 0, + } +) + +func (c *CortexMetaExtensions) TimeRangeStr() string { + return strconv.FormatInt(c.TimeRange, 10) +} + +func ConvertToCortexMetaExtensions(extensions any) (*CortexMetaExtensions, error) { + defaultPartitionInfo := DefaultPartitionInfo + cortexExtensions, err := metadata.ConvertExtensions(extensions, &CortexMetaExtensions{ + PartitionInfo: &defaultPartitionInfo, + }) + + if err != nil { + return nil, err + } + if cortexExtensions == nil { + return nil, nil + } + converted, ok := cortexExtensions.(*CortexMetaExtensions) + if !ok { + return nil, fmt.Errorf("unable to convert extensions to CortexMetaExtensions") + } + return converted, nil +} + +func ConvertToPartitionInfo(extensions any) (*PartitionInfo, error) { + cortexExtensions, err := ConvertToCortexMetaExtensions(extensions) + if err != nil { + return nil, err + } + if cortexExtensions == nil { + return nil, nil + } + return cortexExtensions.PartitionInfo, nil +} + +func GetCortexMetaExtensionsFromMeta(meta metadata.Meta) (*CortexMetaExtensions, error) { + return ConvertToCortexMetaExtensions(meta.Thanos.Extensions) +} + +func GetPartitionInfo(meta metadata.Meta) (*PartitionInfo, error) { + return ConvertToPartitionInfo(meta.Thanos.Extensions) +} diff --git a/pkg/storage/tsdb/meta_extensions_test.go b/pkg/storage/tsdb/meta_extensions_test.go new file mode 100644 index 0000000000..6f296eb461 --- /dev/null +++ b/pkg/storage/tsdb/meta_extensions_test.go @@ -0,0 +1,182 @@ +package tsdb + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/thanos-io/thanos/pkg/block/metadata" +) + +func TestGetPartitionedInfo(t *testing.T) { + for _, tcase := range []struct { + name string + meta metadata.Meta + expected *PartitionInfo + }{ + { + name: "partition info with all information provided", + meta: metadata.Meta{ + Thanos: metadata.Thanos{ + Extensions: &CortexMetaExtensions{ + PartitionInfo: &PartitionInfo{ + PartitionedGroupID: 123, + PartitionID: 8, + PartitionCount: 32, + }, + }, + }, + }, + expected: &PartitionInfo{ + PartitionedGroupID: 123, + PartitionID: 8, + PartitionCount: 32, + }, + }, + { + name: "partition info with only PartitionedGroupID provided", + meta: metadata.Meta{ + Thanos: metadata.Thanos{ + Extensions: &CortexMetaExtensions{ + PartitionInfo: &PartitionInfo{ + PartitionedGroupID: 123, + }, + }, + }, + }, + expected: &PartitionInfo{ + PartitionedGroupID: 123, + PartitionID: 0, + PartitionCount: 0, + }, + }, + { + name: "partition info with only PartitionID provided", + meta: metadata.Meta{ + Thanos: metadata.Thanos{ + Extensions: &CortexMetaExtensions{ + PartitionInfo: &PartitionInfo{ + PartitionID: 5, + }, + }, + }, + }, + expected: &PartitionInfo{ + PartitionedGroupID: 0, + PartitionID: 5, + PartitionCount: 0, + }, + }, + { + name: "partition info with only PartitionCount provided", + meta: metadata.Meta{ + Thanos: metadata.Thanos{ + Extensions: &CortexMetaExtensions{ + PartitionInfo: &PartitionInfo{ + PartitionCount: 4, + }, + }, + }, + }, + expected: &PartitionInfo{ + PartitionedGroupID: 0, + PartitionID: 0, + PartitionCount: 4, + }, + }, + { + name: "meta with empty partition info provided", + meta: metadata.Meta{ + Thanos: metadata.Thanos{ + Extensions: &CortexMetaExtensions{ + PartitionInfo: &PartitionInfo{}, + }, + }, + }, + expected: &PartitionInfo{ + PartitionedGroupID: 0, + PartitionID: 0, + PartitionCount: 0, + }, + }, + { + name: "meta with nil partition info provided", + meta: metadata.Meta{ + Thanos: metadata.Thanos{ + Extensions: &CortexMetaExtensions{ + PartitionInfo: nil, + }, + }, + }, + expected: &PartitionInfo{ + PartitionedGroupID: 0, + PartitionID: 0, + PartitionCount: 1, + }, + }, + { + name: "meta with non CortexMetaExtensions provided", + meta: metadata.Meta{ + Thanos: metadata.Thanos{ + Extensions: struct { + dummy string + }{ + dummy: "test_dummy", + }, + }, + }, + expected: &PartitionInfo{ + PartitionedGroupID: 0, + PartitionID: 0, + PartitionCount: 1, + }, + }, + { + name: "meta with invalid CortexMetaExtensions provided", + meta: metadata.Meta{ + Thanos: metadata.Thanos{ + Extensions: struct { + PartitionInfo struct { + PartitionedGroupID uint32 `json:"partitionedGroupId"` + PartitionCount int `json:"partitionCount"` + PartitionID int `json:"partitionId"` + } `json:"partition_info,omitempty"` + }{ + PartitionInfo: struct { + PartitionedGroupID uint32 `json:"partitionedGroupId"` + PartitionCount int `json:"partitionCount"` + PartitionID int `json:"partitionId"` + }{ + PartitionedGroupID: 123, + PartitionID: 8, + PartitionCount: 32, + }, + }, + }, + }, + expected: &PartitionInfo{ + PartitionedGroupID: 0, + PartitionID: 0, + PartitionCount: 1, + }, + }, + { + name: "meta does not have any extensions", + meta: metadata.Meta{ + Thanos: metadata.Thanos{ + Extensions: nil, + }, + }, + expected: nil, + }, + } { + t.Run(tcase.name, func(t *testing.T) { + result, err := GetPartitionInfo(tcase.meta) + assert.NoError(t, err) + if tcase.expected == nil { + assert.Nil(t, result) + } else { + assert.Equal(t, *tcase.expected, *result) + } + }) + } +} diff --git a/pkg/util/log/wrappers.go b/pkg/util/log/wrappers.go index 24266bdedf..2341048e74 100644 --- a/pkg/util/log/wrappers.go +++ b/pkg/util/log/wrappers.go @@ -18,6 +18,12 @@ func WithUserID(userID string, l log.Logger) log.Logger { return log.With(l, "org_id", userID) } +// WithExecutionID returns a Logger that has information about the execution id in +// its details. +func WithExecutionID(executionID string, l log.Logger) log.Logger { + return log.With(l, "execution_id", executionID) +} + // WithTraceID returns a Logger that has information about the traceID in // its details. func WithTraceID(traceID string, l log.Logger) log.Logger { diff --git a/pkg/util/shard.go b/pkg/util/shard.go index 82392b3a1a..1ffe234ee9 100644 --- a/pkg/util/shard.go +++ b/pkg/util/shard.go @@ -10,6 +10,10 @@ const ( // Sharding strategies. ShardingStrategyDefault = "default" ShardingStrategyShuffle = "shuffle-sharding" + + // Compaction mode + CompactionModeDefault = "default" + CompactionModePartitioning = "partitioning" ) var ( diff --git a/pkg/util/validation/limits.go b/pkg/util/validation/limits.go index 4586a24622..24006e9d23 100644 --- a/pkg/util/validation/limits.go +++ b/pkg/util/validation/limits.go @@ -182,8 +182,12 @@ type Limits struct { MaxDownloadedBytesPerRequest int `yaml:"max_downloaded_bytes_per_request" json:"max_downloaded_bytes_per_request"` // Compactor. - CompactorBlocksRetentionPeriod model.Duration `yaml:"compactor_blocks_retention_period" json:"compactor_blocks_retention_period"` - CompactorTenantShardSize int `yaml:"compactor_tenant_shard_size" json:"compactor_tenant_shard_size"` + CompactorBlocksRetentionPeriod model.Duration `yaml:"compactor_blocks_retention_period" json:"compactor_blocks_retention_period"` + CompactorTenantShardSize int `yaml:"compactor_tenant_shard_size" json:"compactor_tenant_shard_size"` + CompactorPartitionIndexSizeLimitInBytes int64 `yaml:"compactor_partition_index_size_limit_in_bytes" json:"compactor_partition_index_size_limit_in_bytes"` + CompactorPartitionSeriesCountLimit int64 `yaml:"compactor_partition_series_count_limit" json:"compactor_partition_series_count_limit"` + CompactorPartitionLevel1IndexSizeLimitInBytes int64 `yaml:"compactor_partition_level1_index_size_limit_in_bytes" json:"compactor_partition_level1_index_size_limit_in_bytes"` + CompactorPartitionLevel1SeriesCountLimit int64 `yaml:"compactor_partition_level1_series_count_limit" json:"compactor_partition_level1_series_count_limit"` // This config doesn't have a CLI flag registered here because they're registered in // their own original config struct. @@ -268,6 +272,10 @@ func (l *Limits) RegisterFlags(f *flag.FlagSet) { f.Var(&l.CompactorBlocksRetentionPeriod, "compactor.blocks-retention-period", "Delete blocks containing samples older than the specified retention period. 0 to disable.") f.IntVar(&l.CompactorTenantShardSize, "compactor.tenant-shard-size", 0, "The default tenant's shard size when the shuffle-sharding strategy is used by the compactor. When this setting is specified in the per-tenant overrides, a value of 0 disables shuffle sharding for the tenant.") + f.Int64Var(&l.CompactorPartitionIndexSizeLimitInBytes, "compactor.partition-index-size-limit-in-bytes", 0, "Index size limit in bytes for each compaction partition. 0 means no limit") + f.Int64Var(&l.CompactorPartitionSeriesCountLimit, "compactor.partition-series-count-limit", 0, "Time series count limit for each compaction partition. 0 means no limit") + f.Int64Var(&l.CompactorPartitionLevel1IndexSizeLimitInBytes, "compactor.partition-level1-index-size-limit-in-bytes", 0, "Index size limit in bytes for each level 1 compaction partition. 0 means no limit") + f.Int64Var(&l.CompactorPartitionLevel1SeriesCountLimit, "compactor.partition-level1-series-count-limit", 0, "Time series count limit for each level 1 compaction partition. 0 means no limit") // Store-gateway. f.Float64Var(&l.StoreGatewayTenantShardSize, "store-gateway.tenant-shard-size", 0, "The default tenant's shard size when the shuffle-sharding strategy is used. Must be set when the store-gateway sharding is enabled with the shuffle-sharding strategy. When this setting is specified in the per-tenant overrides, a value of 0 disables shuffle sharding for the tenant. If the value is < 1 the shard size will be a percentage of the total store-gateways.") @@ -762,6 +770,26 @@ func (o *Overrides) CompactorTenantShardSize(userID string) int { return o.GetOverridesForUser(userID).CompactorTenantShardSize } +// CompactorPartitionIndexSizeLimitInBytes returns shard size (number of rulers) used by this tenant when using shuffle-sharding strategy. +func (o *Overrides) CompactorPartitionIndexSizeLimitInBytes(userID string) int64 { + return o.GetOverridesForUser(userID).CompactorPartitionIndexSizeLimitInBytes +} + +// CompactorPartitionSeriesCountLimit returns shard size (number of rulers) used by this tenant when using shuffle-sharding strategy. +func (o *Overrides) CompactorPartitionSeriesCountLimit(userID string) int64 { + return o.GetOverridesForUser(userID).CompactorPartitionSeriesCountLimit +} + +// CompactorPartitionLevel1IndexSizeLimitInBytes returns shard size (number of rulers) used by this tenant when using shuffle-sharding strategy. +func (o *Overrides) CompactorPartitionLevel1IndexSizeLimitInBytes(userID string) int64 { + return o.GetOverridesForUser(userID).CompactorPartitionLevel1IndexSizeLimitInBytes +} + +// CompactorPartitionLevel1SeriesCountLimit returns shard size (number of rulers) used by this tenant when using shuffle-sharding strategy. +func (o *Overrides) CompactorPartitionLevel1SeriesCountLimit(userID string) int64 { + return o.GetOverridesForUser(userID).CompactorPartitionLevel1SeriesCountLimit +} + // MetricRelabelConfigs returns the metric relabel configs for a given user. func (o *Overrides) MetricRelabelConfigs(userID string) []*relabel.Config { return o.GetOverridesForUser(userID).MetricRelabelConfigs diff --git a/vendor/github.com/thanos-io/thanos/pkg/testutil/e2eutil/copy.go b/vendor/github.com/thanos-io/thanos/pkg/testutil/e2eutil/copy.go new file mode 100644 index 0000000000..6464cd02ba --- /dev/null +++ b/vendor/github.com/thanos-io/thanos/pkg/testutil/e2eutil/copy.go @@ -0,0 +1,55 @@ +// Copyright (c) The Thanos Authors. +// Licensed under the Apache License 2.0. + +package e2eutil + +import ( + "io" + "os" + "path/filepath" + "testing" + + "github.com/efficientgo/core/testutil" + "github.com/pkg/errors" + "github.com/thanos-io/thanos/pkg/runutil" +) + +func Copy(t testing.TB, src, dst string) { + testutil.Ok(t, copyRecursive(src, dst)) +} + +func copyRecursive(src, dst string) error { + return filepath.Walk(src, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + relPath, err := filepath.Rel(src, path) + if err != nil { + return err + } + + if info.IsDir() { + return os.MkdirAll(filepath.Join(dst, relPath), os.ModePerm) + } + + if !info.Mode().IsRegular() { + return errors.Errorf("%s is not a regular file", path) + } + + source, err := os.Open(filepath.Clean(path)) + if err != nil { + return err + } + defer runutil.CloseWithErrCapture(&err, source, "close file") + + destination, err := os.Create(filepath.Join(dst, relPath)) + if err != nil { + return err + } + defer runutil.CloseWithErrCapture(&err, destination, "close file") + + _, err = io.Copy(destination, source) + return err + }) +} diff --git a/vendor/github.com/thanos-io/thanos/pkg/testutil/e2eutil/port.go b/vendor/github.com/thanos-io/thanos/pkg/testutil/e2eutil/port.go new file mode 100644 index 0000000000..986f1c7d7f --- /dev/null +++ b/vendor/github.com/thanos-io/thanos/pkg/testutil/e2eutil/port.go @@ -0,0 +1,20 @@ +// Copyright (c) The Thanos Authors. +// Licensed under the Apache License 2.0. + +package e2eutil + +import "net" + +// FreePort returns port that is free now. +func FreePort() (int, error) { + addr, err := net.ResolveTCPAddr("tcp", ":0") + if err != nil { + return 0, err + } + + l, err := net.ListenTCP("tcp", addr) + if err != nil { + return 0, err + } + return l.Addr().(*net.TCPAddr).Port, l.Close() +} diff --git a/vendor/github.com/thanos-io/thanos/pkg/testutil/e2eutil/prometheus.go b/vendor/github.com/thanos-io/thanos/pkg/testutil/e2eutil/prometheus.go new file mode 100644 index 0000000000..39aab94eab --- /dev/null +++ b/vendor/github.com/thanos-io/thanos/pkg/testutil/e2eutil/prometheus.go @@ -0,0 +1,818 @@ +// Copyright (c) The Thanos Authors. +// Licensed under the Apache License 2.0. + +package e2eutil + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "math" + "math/rand" + "net/http" + "os" + "os/exec" + "path" + "path/filepath" + "runtime" + "strings" + "sync" + "syscall" + "testing" + "time" + + "github.com/efficientgo/core/testutil" + "github.com/go-kit/log" + "github.com/oklog/ulid" + "github.com/pkg/errors" + "github.com/prometheus/prometheus/model/histogram" + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/model/timestamp" + "github.com/prometheus/prometheus/storage" + "github.com/prometheus/prometheus/tsdb" + "github.com/prometheus/prometheus/tsdb/chunkenc" + "github.com/prometheus/prometheus/tsdb/index" + "go.uber.org/atomic" + "golang.org/x/sync/errgroup" + + "github.com/thanos-io/thanos/pkg/block/metadata" + "github.com/thanos-io/thanos/pkg/runutil" +) + +const ( + defaultPrometheusVersion = "v0.37.0" + defaultAlertmanagerVersion = "v0.20.0" + defaultMinioVersion = "RELEASE.2022-07-30T05-21-40Z" + + // Space delimited list of versions. + promPathsEnvVar = "THANOS_TEST_PROMETHEUS_PATHS" + alertmanagerBinEnvVar = "THANOS_TEST_ALERTMANAGER_PATH" + minioBinEnvVar = "THANOS_TEST_MINIO_PATH" + + // A placeholder for actual Prometheus instance address in the scrape config. + PromAddrPlaceHolder = "PROMETHEUS_ADDRESS" +) + +var ( + histogramSample = histogram.Histogram{ + Schema: 0, + Count: 20, + Sum: -3.1415, + ZeroCount: 12, + ZeroThreshold: 0.001, + NegativeSpans: []histogram.Span{ + {Offset: 0, Length: 4}, + {Offset: 1, Length: 1}, + }, + NegativeBuckets: []int64{1, 2, -2, 1, -1}, + } + + floatHistogramSample = histogram.FloatHistogram{ + ZeroThreshold: 0.01, + ZeroCount: 5.5, + Count: 15, + Sum: 11.5, + PositiveSpans: []histogram.Span{ + {Offset: -2, Length: 2}, + {Offset: 1, Length: 3}, + }, + PositiveBuckets: []float64{0.5, 0, 1.5, 2, 3.5}, + NegativeSpans: []histogram.Span{ + {Offset: 3, Length: 2}, + {Offset: 3, Length: 2}, + }, + NegativeBuckets: []float64{1.5, 0.5, 2.5, 3}, + } +) + +func PrometheusBinary() string { + return "prometheus-" + defaultPrometheusVersion +} + +func AlertmanagerBinary() string { + b := os.Getenv(alertmanagerBinEnvVar) + if b == "" { + return fmt.Sprintf("alertmanager-%s", defaultAlertmanagerVersion) + } + return b +} + +func MinioBinary() string { + b := os.Getenv(minioBinEnvVar) + if b == "" { + return fmt.Sprintf("minio-%s", defaultMinioVersion) + } + return b +} + +// Prometheus represents a test instance for integration testing. +// It can be populated with data before being started. +type Prometheus struct { + dir string + db *tsdb.DB + prefix string + binPath string + + running bool + cmd *exec.Cmd + disabledCompaction bool + addr string + + config string + + stdout, stderr bytes.Buffer +} + +func NewTSDB() (*tsdb.DB, error) { + dir, err := os.MkdirTemp("", "prometheus-test") + if err != nil { + return nil, err + } + opts := tsdb.DefaultOptions() + opts.RetentionDuration = math.MaxInt64 + return tsdb.Open(dir, nil, nil, opts, nil) +} + +func ForeachPrometheus(t *testing.T, testFn func(t testing.TB, p *Prometheus)) { + paths := os.Getenv(promPathsEnvVar) + if paths == "" { + paths = PrometheusBinary() + } + + for _, path := range strings.Split(paths, " ") { + if ok := t.Run(path, func(t *testing.T) { + p, err := newPrometheus(path, "") + testutil.Ok(t, err) + + testFn(t, p) + testutil.Ok(t, p.Stop()) + }); !ok { + return + } + } +} + +// NewPrometheus creates a new test Prometheus instance that will listen on local address. +// Use ForeachPrometheus if you want to test against set of Prometheus versions. +// TODO(bwplotka): Improve it with https://github.com/thanos-io/thanos/issues/758. +func NewPrometheus() (*Prometheus, error) { + return newPrometheus("", "") +} + +// NewPrometheusOnPath creates a new test Prometheus instance that will listen on local address and given prefix path. +func NewPrometheusOnPath(prefix string) (*Prometheus, error) { + return newPrometheus("", prefix) +} + +func newPrometheus(binPath, prefix string) (*Prometheus, error) { + if binPath == "" { + binPath = PrometheusBinary() + } + + db, err := NewTSDB() + if err != nil { + return nil, err + } + + f, err := os.Create(filepath.Join(db.Dir(), "prometheus.yml")) + if err != nil { + return nil, err + } + defer f.Close() + + // Some well-known external labels so that we can test label resorting + if _, err = io.WriteString(f, "global:\n external_labels:\n region: eu-west"); err != nil { + return nil, err + } + + return &Prometheus{ + dir: db.Dir(), + db: db, + prefix: prefix, + binPath: binPath, + addr: "<prometheus-not-started>", + }, nil +} + +// Start running the Prometheus instance and return. +func (p *Prometheus) Start(ctx context.Context, l log.Logger) error { + if p.running { + return errors.New("Already started") + } + + if err := p.db.Close(); err != nil { + return err + } + if err := p.start(); err != nil { + return err + } + if err := p.waitPrometheusUp(ctx, l, p.prefix); err != nil { + return err + } + return nil +} + +func (p *Prometheus) start() error { + port, err := FreePort() + if err != nil { + return err + } + + var extra []string + if p.disabledCompaction { + extra = append(extra, + "--storage.tsdb.min-block-duration=2h", + "--storage.tsdb.max-block-duration=2h", + ) + } + p.addr = fmt.Sprintf("localhost:%d", port) + // Write the final config to the config file. + // The address placeholder will be replaced with the actual address. + if err := p.writeConfig(strings.ReplaceAll(p.config, PromAddrPlaceHolder, p.addr)); err != nil { + return err + } + args := append([]string{ + "--storage.tsdb.retention=2d", // Pass retention cause prometheus since 2.8.0 don't show default value for that flags in web/api: https://github.com/prometheus/prometheus/pull/5433. + "--storage.tsdb.path=" + p.db.Dir(), + "--web.listen-address=" + p.addr, + "--web.route-prefix=" + p.prefix, + "--web.enable-admin-api", + "--config.file=" + filepath.Join(p.db.Dir(), "prometheus.yml"), + }, extra...) + + p.cmd = exec.Command(p.binPath, args...) + p.cmd.SysProcAttr = SysProcAttr() + + p.stderr.Reset() + p.stdout.Reset() + + p.cmd.Stdout = &p.stdout + p.cmd.Stderr = &p.stderr + + if err := p.cmd.Start(); err != nil { + return fmt.Errorf("starting Prometheus failed: %w", err) + } + + p.running = true + return nil +} + +func (p *Prometheus) waitPrometheusUp(ctx context.Context, logger log.Logger, prefix string) error { + if !p.running { + return errors.New("method Start was not invoked.") + } + return runutil.RetryWithLog(logger, time.Second, ctx.Done(), func() error { + r, err := http.Get(fmt.Sprintf("http://%s%s/-/ready", p.addr, prefix)) + if err != nil { + return err + } + defer runutil.ExhaustCloseWithLogOnErr(logger, r.Body, "failed to exhaust and close body") + + if r.StatusCode != 200 { + return errors.Errorf("Got non 200 response: %v", r.StatusCode) + } + return nil + }) +} + +func (p *Prometheus) Restart(ctx context.Context, l log.Logger) error { + if err := p.cmd.Process.Signal(syscall.SIGTERM); err != nil { + return errors.Wrap(err, "failed to kill Prometheus. Kill it manually") + } + _ = p.cmd.Wait() + if err := p.start(); err != nil { + return err + } + return p.waitPrometheusUp(ctx, l, p.prefix) +} + +// Dir returns TSDB dir. +func (p *Prometheus) Dir() string { + return p.dir +} + +// Addr returns correct address after Start method. +func (p *Prometheus) Addr() string { + return p.addr + p.prefix +} + +func (p *Prometheus) DisableCompaction() { + p.disabledCompaction = true +} + +// SetConfig updates the contents of the config. +func (p *Prometheus) SetConfig(s string) { + p.config = s +} + +// writeConfig writes the Prometheus config to the config file. +func (p *Prometheus) writeConfig(config string) (err error) { + f, err := os.Create(filepath.Join(p.dir, "prometheus.yml")) + if err != nil { + return err + } + defer runutil.CloseWithErrCapture(&err, f, "prometheus config") + _, err = f.Write([]byte(config)) + return err +} + +// Stop terminates Prometheus and clean up its data directory. +func (p *Prometheus) Stop() (rerr error) { + if !p.running { + return nil + } + + if p.cmd.Process != nil { + if err := p.cmd.Process.Signal(syscall.SIGTERM); err != nil { + return errors.Wrapf(err, "failed to Prometheus. Kill it manually and clean %s dir", p.db.Dir()) + } + + err := p.cmd.Wait() + if err != nil { + var exitErr *exec.ExitError + if errors.As(err, &exitErr) { + if exitErr.ExitCode() != -1 { + fmt.Fprintln(os.Stderr, "Prometheus exited with", exitErr.ExitCode()) + fmt.Fprintln(os.Stderr, "stdout:\n", p.stdout.String(), "\nstderr:\n", p.stderr.String()) + } else { + err = nil + } + } + } + + if err != nil { + return fmt.Errorf("waiting for Prometheus to exit: %w", err) + } + } + + return p.cleanup() +} + +func (p *Prometheus) cleanup() error { + p.running = false + return os.RemoveAll(p.db.Dir()) +} + +// Appender returns a new appender to populate the Prometheus instance with data. +// All appenders must be closed before Start is called and no new ones must be opened +// afterwards. +func (p *Prometheus) Appender() storage.Appender { + if p.running { + panic("Appender must not be called after start") + } + return p.db.Appender(context.Background()) +} + +// CreateEmptyBlock produces empty block like it was the case before fix: https://github.com/prometheus/tsdb/pull/374. +// (Prometheus pre v2.7.0). +func CreateEmptyBlock(dir string, mint, maxt int64, extLset labels.Labels, resolution int64) (ulid.ULID, error) { + entropy := rand.New(rand.NewSource(time.Now().UnixNano())) + uid := ulid.MustNew(ulid.Now(), entropy) + + if err := os.Mkdir(path.Join(dir, uid.String()), os.ModePerm); err != nil { + return ulid.ULID{}, errors.Wrap(err, "close index") + } + + if err := os.Mkdir(path.Join(dir, uid.String(), "chunks"), os.ModePerm); err != nil { + return ulid.ULID{}, errors.Wrap(err, "close index") + } + + w, err := index.NewWriter(context.Background(), path.Join(dir, uid.String(), "index")) + if err != nil { + return ulid.ULID{}, errors.Wrap(err, "new index") + } + + if err := w.Close(); err != nil { + return ulid.ULID{}, errors.Wrap(err, "close index") + } + + m := tsdb.BlockMeta{ + Version: 1, + ULID: uid, + MinTime: mint, + MaxTime: maxt, + Compaction: tsdb.BlockMetaCompaction{ + Level: 1, + Sources: []ulid.ULID{uid}, + }, + } + b, err := json.Marshal(&m) + if err != nil { + return ulid.ULID{}, err + } + + if err := os.WriteFile(path.Join(dir, uid.String(), "meta.json"), b, os.ModePerm); err != nil { + return ulid.ULID{}, errors.Wrap(err, "saving meta.json") + } + + if _, err = metadata.InjectThanos(log.NewNopLogger(), filepath.Join(dir, uid.String()), metadata.Thanos{ + Labels: extLset.Map(), + Downsample: metadata.ThanosDownsample{Resolution: resolution}, + Source: metadata.TestSource, + }, nil); err != nil { + return ulid.ULID{}, errors.Wrap(err, "finalize block") + } + + return uid, nil +} + +// CreateBlock writes a block with the given series and numSamples samples each. +// Samples will be in the time range [mint, maxt). +func CreateBlock( + ctx context.Context, + dir string, + series []labels.Labels, + numSamples int, + mint, maxt int64, + extLset labels.Labels, + resolution int64, + hashFunc metadata.HashFunc, +) (id ulid.ULID, err error) { + return createBlock(ctx, dir, series, numSamples, mint, maxt, extLset, resolution, false, hashFunc, chunkenc.ValFloat) +} + +// CreateBlockWithTombstone is same as CreateBlock but leaves tombstones which mimics the Prometheus local block. +func CreateBlockWithTombstone( + ctx context.Context, + dir string, + series []labels.Labels, + numSamples int, + mint, maxt int64, + extLset labels.Labels, + resolution int64, + hashFunc metadata.HashFunc, +) (id ulid.ULID, err error) { + return createBlock(ctx, dir, series, numSamples, mint, maxt, extLset, resolution, true, hashFunc, chunkenc.ValFloat) +} + +// CreateBlockWithBlockDelay writes a block with the given series and numSamples samples each. +// Samples will be in the time range [mint, maxt) +// Block ID will be created with a delay of time duration blockDelay. +func CreateBlockWithBlockDelay( + ctx context.Context, + dir string, + series []labels.Labels, + numSamples int, + mint, maxt int64, + blockDelay time.Duration, + extLset labels.Labels, + resolution int64, + hashFunc metadata.HashFunc, +) (ulid.ULID, error) { + return createBlockWithDelay(ctx, dir, series, numSamples, mint, maxt, blockDelay, extLset, resolution, hashFunc, chunkenc.ValFloat) +} + +// CreateHistogramBlockWithDelay writes a block with the given native histogram series and numSamples samples each. +// Samples will be in the time range [mint, maxt). +func CreateHistogramBlockWithDelay( + ctx context.Context, + dir string, + series []labels.Labels, + numSamples int, + mint, maxt int64, + blockDelay time.Duration, + extLset labels.Labels, + resolution int64, + hashFunc metadata.HashFunc, +) (id ulid.ULID, err error) { + return createBlockWithDelay(ctx, dir, series, numSamples, mint, maxt, blockDelay, extLset, resolution, hashFunc, chunkenc.ValHistogram) +} + +// CreateFloatHistogramBlockWithDelay writes a block with the given float native histogram series and numSamples samples each. +// Samples will be in the time range [mint, maxt). +func CreateFloatHistogramBlockWithDelay( + ctx context.Context, + dir string, + series []labels.Labels, + numSamples int, + mint, maxt int64, + blockDelay time.Duration, + extLset labels.Labels, + resolution int64, + hashFunc metadata.HashFunc, +) (id ulid.ULID, err error) { + return createBlockWithDelay(ctx, dir, series, numSamples, mint, maxt, blockDelay, extLset, resolution, hashFunc, chunkenc.ValFloatHistogram) +} + +func createBlockWithDelay(ctx context.Context, dir string, series []labels.Labels, numSamples int, mint int64, maxt int64, blockDelay time.Duration, extLset labels.Labels, resolution int64, hashFunc metadata.HashFunc, samplesType chunkenc.ValueType) (ulid.ULID, error) { + blockID, err := createBlock(ctx, dir, series, numSamples, mint, maxt, extLset, resolution, false, hashFunc, samplesType) + if err != nil { + return ulid.ULID{}, errors.Wrap(err, "block creation") + } + + id, err := ulid.New(uint64(timestamp.FromTime(timestamp.Time(int64(blockID.Time())).Add(-blockDelay))), bytes.NewReader(blockID.Entropy())) + if err != nil { + return ulid.ULID{}, errors.Wrap(err, "create block id") + } + + bdir := path.Join(dir, blockID.String()) + m, err := metadata.ReadFromDir(bdir) + if err != nil { + return ulid.ULID{}, errors.Wrap(err, "open meta file") + } + + logger := log.NewNopLogger() + m.ULID = id + m.Compaction.Sources = []ulid.ULID{id} + if err := m.WriteToDir(logger, path.Join(dir, blockID.String())); err != nil { + return ulid.ULID{}, errors.Wrap(err, "write meta.json file") + } + + return id, os.Rename(path.Join(dir, blockID.String()), path.Join(dir, id.String())) +} + +func createBlock( + ctx context.Context, + dir string, + series []labels.Labels, + numSamples int, + mint, maxt int64, + extLset labels.Labels, + resolution int64, + tombstones bool, + hashFunc metadata.HashFunc, + sampleType chunkenc.ValueType, +) (id ulid.ULID, err error) { + headOpts := tsdb.DefaultHeadOptions() + headOpts.ChunkDirRoot = filepath.Join(dir, "chunks") + headOpts.ChunkRange = 10000000000 + headOpts.EnableNativeHistograms = *atomic.NewBool(true) + h, err := tsdb.NewHead(nil, nil, nil, nil, headOpts, nil) + if err != nil { + return id, errors.Wrap(err, "create head block") + } + defer func() { + runutil.CloseWithErrCapture(&err, h, "TSDB Head") + if e := os.RemoveAll(headOpts.ChunkDirRoot); e != nil { + err = errors.Wrap(e, "delete chunks dir") + } + }() + + var g errgroup.Group + var timeStepSize = (maxt - mint) / int64(numSamples+1) + var batchSize = len(series) / runtime.GOMAXPROCS(0) + r := rand.New(rand.NewSource(int64(numSamples))) + var randMutex sync.Mutex + + for len(series) > 0 { + l := batchSize + if len(series) < 1000 { + l = len(series) + } + batch := series[:l] + series = series[l:] + + g.Go(func() error { + t := mint + + for i := 0; i < numSamples; i++ { + app := h.Appender(ctx) + + for _, lset := range batch { + var err error + if sampleType == chunkenc.ValFloat { + randMutex.Lock() + _, err = app.Append(0, lset, t, r.Float64()) + randMutex.Unlock() + } else if sampleType == chunkenc.ValHistogram { + _, err = app.AppendHistogram(0, lset, t, &histogramSample, nil) + } else if sampleType == chunkenc.ValFloatHistogram { + _, err = app.AppendHistogram(0, lset, t, nil, &floatHistogramSample) + } + if err != nil { + if rerr := app.Rollback(); rerr != nil { + err = errors.Wrapf(err, "rollback failed: %v", rerr) + } + + return errors.Wrap(err, "add sample") + } + } + if err := app.Commit(); err != nil { + return errors.Wrap(err, "commit") + } + t += timeStepSize + } + return nil + }) + } + if err := g.Wait(); err != nil { + return id, err + } + c, err := tsdb.NewLeveledCompactor(ctx, nil, log.NewNopLogger(), []int64{maxt - mint}, nil, nil) + if err != nil { + return id, errors.Wrap(err, "create compactor") + } + + ids, err := c.Write(dir, h, mint, maxt, nil) + if err != nil { + return id, errors.Wrap(err, "write block") + } + if len(ids) == 0 { + return id, errors.Errorf("nothing to write, asked for %d samples", numSamples) + } + id = ids[0] + + blockDir := filepath.Join(dir, id.String()) + logger := log.NewNopLogger() + seriesSize, err := gatherMaxSeriesSize(ctx, filepath.Join(blockDir, "index")) + if err != nil { + return id, errors.Wrap(err, "gather max series size") + } + + files := []metadata.File{} + if hashFunc != metadata.NoneFunc { + paths := []string{} + if err := filepath.Walk(blockDir, func(path string, info os.FileInfo, err error) error { + if info.IsDir() { + return nil + } + paths = append(paths, path) + return nil + }); err != nil { + return id, errors.Wrapf(err, "walking %s", dir) + } + + for _, p := range paths { + pHash, err := metadata.CalculateHash(p, metadata.SHA256Func, log.NewNopLogger()) + if err != nil { + return id, errors.Wrapf(err, "calculating hash of %s", blockDir+p) + } + files = append(files, metadata.File{ + RelPath: strings.TrimPrefix(p, blockDir+"/"), + Hash: &pHash, + }) + } + } + + if _, err = metadata.InjectThanos(logger, blockDir, metadata.Thanos{ + Labels: extLset.Map(), + Downsample: metadata.ThanosDownsample{Resolution: resolution}, + Source: metadata.TestSource, + Files: files, + IndexStats: metadata.IndexStats{SeriesMaxSize: seriesSize}, + }, nil); err != nil { + return id, errors.Wrap(err, "finalize block") + } + + if !tombstones { + if err = os.Remove(filepath.Join(dir, id.String(), "tombstones")); err != nil { + return id, errors.Wrap(err, "remove tombstones") + } + } + + return id, nil +} + +func gatherMaxSeriesSize(ctx context.Context, fn string) (int64, error) { + r, err := index.NewFileReader(fn) + if err != nil { + return 0, errors.Wrap(err, "open index file") + } + defer runutil.CloseWithErrCapture(&err, r, "gather index issue file reader") + + key, value := index.AllPostingsKey() + p, err := r.Postings(ctx, key, value) + if err != nil { + return 0, errors.Wrap(err, "get all postings") + } + + // As of version two all series entries are 16 byte padded. All references + // we get have to account for that to get the correct offset. + offsetMultiplier := 1 + version := r.Version() + if version >= 2 { + offsetMultiplier = 16 + } + + // Per series. + var ( + prevId storage.SeriesRef + maxSeriesSize int64 + ) + for p.Next() { + id := p.At() + if prevId != 0 { + // Approximate size. + seriesSize := int64(id-prevId) * int64(offsetMultiplier) + if seriesSize > maxSeriesSize { + maxSeriesSize = seriesSize + } + } + prevId = id + } + if p.Err() != nil { + return 0, errors.Wrap(err, "walk postings") + } + + return maxSeriesSize, nil +} + +// CreateBlockWithChurn writes a block with the given series. Start time of each series +// will be randomized in the given time window to create churn. Only float chunk is supported right now. +func CreateBlockWithChurn( + ctx context.Context, + rnd *rand.Rand, + dir string, + series []labels.Labels, + numSamples int, + mint, maxt int64, + extLset labels.Labels, + resolution int64, + scrapeInterval int64, + seriesSize int64, +) (id ulid.ULID, err error) { + headOpts := tsdb.DefaultHeadOptions() + headOpts.ChunkDirRoot = filepath.Join(dir, "chunks") + headOpts.ChunkRange = 10000000000 + h, err := tsdb.NewHead(nil, nil, nil, nil, headOpts, nil) + if err != nil { + return id, errors.Wrap(err, "create head block") + } + defer func() { + runutil.CloseWithErrCapture(&err, h, "TSDB Head") + if e := os.RemoveAll(headOpts.ChunkDirRoot); e != nil { + err = errors.Wrap(e, "delete chunks dir") + } + }() + + app := h.Appender(ctx) + for i := 0; i < len(series); i++ { + + var ref storage.SeriesRef + start := RandRange(rnd, mint, maxt) + for j := 0; j < numSamples; j++ { + if ref == 0 { + ref, err = app.Append(0, series[i], start, float64(i+j)) + } else { + ref, err = app.Append(ref, series[i], start, float64(i+j)) + } + if err != nil { + if rerr := app.Rollback(); rerr != nil { + err = errors.Wrapf(err, "rollback failed: %v", rerr) + } + return id, errors.Wrap(err, "add sample") + } + start += scrapeInterval + if start > maxt { + break + } + } + } + if err := app.Commit(); err != nil { + return id, errors.Wrap(err, "commit") + } + + c, err := tsdb.NewLeveledCompactor(ctx, nil, log.NewNopLogger(), []int64{maxt - mint}, nil, nil) + if err != nil { + return id, errors.Wrap(err, "create compactor") + } + + ids, err := c.Write(dir, h, mint, maxt, nil) + if err != nil { + return id, errors.Wrap(err, "write block") + } + + if len(ids) == 0 { + return id, errors.Errorf("nothing to write, asked for %d samples", numSamples) + } + id = ids[0] + + blockDir := filepath.Join(dir, id.String()) + logger := log.NewNopLogger() + + if _, err = metadata.InjectThanos(logger, blockDir, metadata.Thanos{ + Labels: extLset.Map(), + Downsample: metadata.ThanosDownsample{Resolution: resolution}, + Source: metadata.TestSource, + IndexStats: metadata.IndexStats{SeriesMaxSize: seriesSize}, + }, nil); err != nil { + return id, errors.Wrap(err, "finalize block") + } + + return id, nil +} + +// AddDelay rewrites a given block with delay. +func AddDelay(blockID ulid.ULID, dir string, blockDelay time.Duration) (ulid.ULID, error) { + id, err := ulid.New(uint64(timestamp.FromTime(timestamp.Time(int64(blockID.Time())).Add(-blockDelay))), bytes.NewReader(blockID.Entropy())) + if err != nil { + return ulid.ULID{}, errors.Wrap(err, "create block id") + } + + bdir := path.Join(dir, blockID.String()) + m, err := metadata.ReadFromDir(bdir) + if err != nil { + return ulid.ULID{}, errors.Wrap(err, "open meta file") + } + + logger := log.NewNopLogger() + m.ULID = id + m.Compaction.Sources = []ulid.ULID{id} + if err := m.WriteToDir(logger, path.Join(dir, blockID.String())); err != nil { + return ulid.ULID{}, errors.Wrap(err, "write meta.json file") + } + + return id, os.Rename(path.Join(dir, blockID.String()), path.Join(dir, id.String())) +} diff --git a/vendor/github.com/thanos-io/thanos/pkg/testutil/e2eutil/rand.go b/vendor/github.com/thanos-io/thanos/pkg/testutil/e2eutil/rand.go new file mode 100644 index 0000000000..5cac2d6f07 --- /dev/null +++ b/vendor/github.com/thanos-io/thanos/pkg/testutil/e2eutil/rand.go @@ -0,0 +1,11 @@ +// Copyright (c) The Thanos Authors. +// Licensed under the Apache License 2.0. + +package e2eutil + +import "math/rand" + +// RandRange returns a random int64 from [min, max]. +func RandRange(rnd *rand.Rand, min, max int64) int64 { + return rnd.Int63n(max-min) + min +} diff --git a/vendor/github.com/thanos-io/thanos/pkg/testutil/e2eutil/sysprocattr.go b/vendor/github.com/thanos-io/thanos/pkg/testutil/e2eutil/sysprocattr.go new file mode 100644 index 0000000000..53aaa7039f --- /dev/null +++ b/vendor/github.com/thanos-io/thanos/pkg/testutil/e2eutil/sysprocattr.go @@ -0,0 +1,13 @@ +// Copyright (c) The Thanos Authors. +// Licensed under the Apache License 2.0. + +//go:build !linux +// +build !linux + +package e2eutil + +import "syscall" + +func SysProcAttr() *syscall.SysProcAttr { + return &syscall.SysProcAttr{} +} diff --git a/vendor/github.com/thanos-io/thanos/pkg/testutil/e2eutil/sysprocattr_linux.go b/vendor/github.com/thanos-io/thanos/pkg/testutil/e2eutil/sysprocattr_linux.go new file mode 100644 index 0000000000..dd77ed32a1 --- /dev/null +++ b/vendor/github.com/thanos-io/thanos/pkg/testutil/e2eutil/sysprocattr_linux.go @@ -0,0 +1,13 @@ +// Copyright (c) The Thanos Authors. +// Licensed under the Apache License 2.0. + +package e2eutil + +import "syscall" + +func SysProcAttr() *syscall.SysProcAttr { + return &syscall.SysProcAttr{ + // For linux only, kill this if the go test process dies before the cleanup. + Pdeathsig: syscall.SIGKILL, + } +} diff --git a/vendor/modules.txt b/vendor/modules.txt index 912998c41c..584f605696 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -1009,6 +1009,7 @@ github.com/thanos-io/thanos/pkg/store/storepb/prompb github.com/thanos-io/thanos/pkg/strutil github.com/thanos-io/thanos/pkg/targets/targetspb github.com/thanos-io/thanos/pkg/tenancy +github.com/thanos-io/thanos/pkg/testutil/e2eutil github.com/thanos-io/thanos/pkg/tls github.com/thanos-io/thanos/pkg/tracing github.com/thanos-io/thanos/pkg/tracing/interceptors