Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ClusterManagerTaskThrottler Improvements #15508

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Optimise snapshot deletion to speed up snapshot deletion and creation ([#15568](https://github.com/opensearch-project/OpenSearch/pull/15568))
- [Remote Publication] Added checksum validation for cluster state behind a cluster setting ([#15218](https://github.com/opensearch-project/OpenSearch/pull/15218))
- Add canRemain method to TargetPoolAllocationDecider to move shards from local to remote pool for hot to warm tiering ([#15010](https://github.com/opensearch-project/OpenSearch/pull/15010))
- ClusterManagerTaskThrottler Improvements ([#15508](https://github.com/opensearch-project/OpenSearch/pull/15508))

### Dependencies
- Bump `netty` from 4.1.111.Final to 4.1.112.Final ([#15081](https://github.com/opensearch-project/OpenSearch/pull/15081))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
* <p>
* Set specific setting to for setting the threshold of throttling of particular task type.
* e.g : Set "cluster_manager.throttling.thresholds.put_mapping" to set throttling limit of "put mapping" tasks,
* Set it to default value(-1) to disable the throttling for this task type.
* Set it to default value(-1) to disable the throttling for this task type.
*/
public class ClusterManagerTaskThrottler implements TaskBatcherListener {
private static final Logger logger = LogManager.getLogger(ClusterManagerTaskThrottler.class);
Expand Down Expand Up @@ -69,7 +69,7 @@ public class ClusterManagerTaskThrottler implements TaskBatcherListener {
private final int MIN_THRESHOLD_VALUE = -1; // Disabled throttling
private final ClusterManagerTaskThrottlerListener clusterManagerTaskThrottlerListener;

private final ConcurrentMap<String, Long> tasksCount;
final ConcurrentMap<String, Long> tasksCount;
private final ConcurrentMap<String, Long> tasksThreshold;
private final Supplier<Version> minNodeVersionSupplier;

Expand Down Expand Up @@ -209,30 +209,59 @@ Long getThrottlingLimit(final String taskKey) {
return tasksThreshold.get(taskKey);
}

private void failFastWhenThrottlingThresholdsAreAlreadyBreached(
final boolean throttlingEnabledWithThreshold,
final Long threshold,
final long existingTaskCount,
final int incomingTaskCount,
final String taskThrottlingKey
) {
if (throttlingEnabledWithThreshold && shouldThrottle(threshold, existingTaskCount, incomingTaskCount)) {
throw new ClusterManagerThrottlingException("Throttling Exception : Limit exceeded for " + taskThrottlingKey);
}
}

@Override
public void onBeginSubmit(List<? extends TaskBatcher.BatchedTask> tasks) {
ThrottlingKey clusterManagerThrottlingKey = ((ClusterStateTaskExecutor<Object>) tasks.get(0).batchingKey)
final ThrottlingKey clusterManagerThrottlingKey = ((ClusterStateTaskExecutor<Object>) tasks.get(0).batchingKey)
.getClusterManagerThrottlingKey();
tasksCount.putIfAbsent(clusterManagerThrottlingKey.getTaskThrottlingKey(), 0L);
tasksCount.computeIfPresent(clusterManagerThrottlingKey.getTaskThrottlingKey(), (key, count) -> {
int size = tasks.size();
if (clusterManagerThrottlingKey.isThrottlingEnabled()) {
Long threshold = tasksThreshold.get(clusterManagerThrottlingKey.getTaskThrottlingKey());
if (threshold != null && shouldThrottle(threshold, count, size)) {
clusterManagerTaskThrottlerListener.onThrottle(clusterManagerThrottlingKey.getTaskThrottlingKey(), size);
logger.warn(
"Throwing Throttling Exception for [{}]. Trying to add [{}] tasks to queue, limit is set to [{}]",
clusterManagerThrottlingKey.getTaskThrottlingKey(),
tasks.size(),
threshold
);
throw new ClusterManagerThrottlingException(
"Throttling Exception : Limit exceeded for " + clusterManagerThrottlingKey.getTaskThrottlingKey()
);
}
}
return count + size;
});
final String taskThrottlingKey = clusterManagerThrottlingKey.getTaskThrottlingKey();
final Long threshold = getThrottlingLimit(taskThrottlingKey);
final boolean isThrottlingEnabledWithThreshold = clusterManagerThrottlingKey.isThrottlingEnabled() && threshold != null;
int incomingTaskCount = tasks.size();

sumitasr marked this conversation as resolved.
Show resolved Hide resolved
try {
tasksCount.putIfAbsent(taskThrottlingKey, 0L);
// Perform shallow check before acquiring lock to avoid blocking of network threads
// if throttling is ongoing for a specific task
failFastWhenThrottlingThresholdsAreAlreadyBreached(
isThrottlingEnabledWithThreshold,
threshold,
tasksCount.get(taskThrottlingKey),
incomingTaskCount,
taskThrottlingKey
);

tasksCount.computeIfPresent(taskThrottlingKey, (key, existingTaskCount) -> {
failFastWhenThrottlingThresholdsAreAlreadyBreached(
isThrottlingEnabledWithThreshold,
threshold,
existingTaskCount,
incomingTaskCount,
taskThrottlingKey
);
return existingTaskCount + incomingTaskCount;
});
} catch (final ClusterManagerThrottlingException e) {
clusterManagerTaskThrottlerListener.onThrottle(taskThrottlingKey, incomingTaskCount);
logger.trace(
"Throwing Throttling Exception for [{}]. Trying to add [{}] tasks to queue, limit is set to [{}]",
taskThrottlingKey,
incomingTaskCount,
threshold
);
throw e;
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,10 @@ public ClusterManagerThrottlingException(String msg, Object... args) {
public ClusterManagerThrottlingException(StreamInput in) throws IOException {
super(in);
}

@Override
public Throwable fillInStackTrace() {
// This is on the hot path; stack traces are expensive to compute and not very useful for this exception, so don't fill it.
return this;
}
}
Loading
Loading