Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add internal "deadline remaining" client side metric #2341 #2370

Merged
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
7fe9933
Rebase on HEAD
djyau Oct 16, 2024
bd36596
Add override to CompositeTracer
djyau Oct 4, 2024
965e4bd
Remove blank lines
djyau Oct 4, 2024
2068604
Fix missing imports
djyau Oct 4, 2024
affb852
Only record remaining deadline if it is not null
djyau Oct 7, 2024
67fe252
Add remaining deadline to list of metrics in BigtableCloudMonitoringE…
djyau Oct 7, 2024
64908fb
Export remainingDeadline histogram
djyau Oct 7, 2024
804d223
Fix unit test
djyau Oct 8, 2024
585c64c
Remove print statement
djyau Oct 8, 2024
ff7c311
Fix merge conflict
djyau Oct 16, 2024
a336fa4
Use deadline from callContext, not callOptions
djyau Oct 16, 2024
3f3d273
Fixing merge conflicts
djyau Oct 16, 2024
afb4d50
Fix more merge conflicts
djyau Oct 16, 2024
bf789ab
Fix merge conflicts...
djyau Oct 16, 2024
501e029
Fixing more merge conflicts
djyau Oct 16, 2024
42930a8
Fix more merge conflicts
djyau Oct 16, 2024
0191ab0
Use operation timer
djyau Oct 17, 2024
ec13bd8
Run auto formatter
djyau Oct 21, 2024
1322f44
Address PR comment
djyau Oct 21, 2024
2235f20
Remove print statement, fix fn coment
djyau Oct 21, 2024
b8046a6
Trying to plumb operation timeout thru
djyau Oct 25, 2024
68d2216
Set remaining deadline in tracer callables
djyau Oct 25, 2024
235e73a
Set remaining deadline in tracer unary callable as well
djyau Oct 25, 2024
b878e0b
Remove unused lines
djyau Oct 25, 2024
e9b4b12
Remove unused code
djyau Oct 25, 2024
74b9ae5
Set the operation timeout on the call context for each callable
djyau Oct 25, 2024
7e78857
Remove unnecessary long conversion, format code
djyau Oct 25, 2024
1a66d9e
Make the deadline key a public member of Bigtable Tracer
djyau Oct 25, 2024
439e081
Fix settings
djyau Oct 25, 2024
18ffd59
Missed one setting
djyau Oct 25, 2024
b04b8ca
Address PR comments
djyau Oct 28, 2024
d63d640
Rename variable to make consistent
djyau Oct 28, 2024
f6b6c70
Add nonzero check for remaining deadline
djyau Oct 28, 2024
0fef2b8
Set remaining operation timeout in attemptStart
djyau Oct 30, 2024
4a9ef83
Add clarifying comment
djyau Oct 30, 2024
f409ea7
Format code
djyau Oct 30, 2024
83f0df1
Use only java.time.Duration in BuiltinMetricsTracerTest
djyau Nov 4, 2024
cc88293
Loosen test assertions
djyau Nov 4, 2024
2c54095
Merge branch 'main' into feature-deadline-remaining-metric
mutianf Nov 6, 2024
549fb9a
Use callSettings in UnaryCallable
djyau Nov 6, 2024
4e66bc2
Format code
djyau Nov 6, 2024
bc1dde2
Address PR comments
djyau Nov 6, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import static com.google.cloud.bigtable.data.v2.stub.metrics.BuiltinMetricsConstants.METER_NAME;
import static com.google.cloud.bigtable.data.v2.stub.metrics.BuiltinMetricsConstants.OPERATION_LATENCIES_NAME;
import static com.google.cloud.bigtable.data.v2.stub.metrics.BuiltinMetricsConstants.PER_CONNECTION_ERROR_COUNT_NAME;
import static com.google.cloud.bigtable.data.v2.stub.metrics.BuiltinMetricsConstants.REMAINING_DEADLINE_NAME;
import static com.google.cloud.bigtable.data.v2.stub.metrics.BuiltinMetricsConstants.RETRY_COUNT_NAME;
import static com.google.cloud.bigtable.data.v2.stub.metrics.BuiltinMetricsConstants.SERVER_LATENCIES_NAME;

Expand Down Expand Up @@ -115,7 +116,8 @@ public final class BigtableCloudMonitoringExporter implements MetricExporter {
CLIENT_BLOCKING_LATENCIES_NAME,
APPLICATION_BLOCKING_LATENCIES_NAME,
RETRY_COUNT_NAME,
CONNECTIVITY_ERROR_COUNT_NAME)
CONNECTIVITY_ERROR_COUNT_NAME,
REMAINING_DEADLINE_NAME)
.stream()
.map(m -> METER_NAME + m)
.collect(ImmutableList.toImmutableList());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

import io.grpc.ClientStreamTracer;
import io.grpc.Metadata;
import org.threeten.bp.Duration;

/**
* Records the time a request is enqueued in a grpc channel queue. This a bridge between gRPC stream
Expand All @@ -26,28 +27,33 @@
class BigtableGrpcStreamTracer extends ClientStreamTracer {

private final BigtableTracer tracer;
private final Duration deadline;

public BigtableGrpcStreamTracer(BigtableTracer tracer) {
public BigtableGrpcStreamTracer(BigtableTracer tracer, Duration deadline) {
this.tracer = tracer;
this.deadline = deadline;
}

@Override
public void outboundMessageSent(int seqNo, long optionalWireSize, long optionalUncompressedSize) {
tracer.grpcMessageSent();
tracer.setRemainingDeadline(deadline.toMillis());
}

static class Factory extends ClientStreamTracer.Factory {

private final BigtableTracer tracer;
private final Duration deadline;

Factory(BigtableTracer tracer) {
Factory(BigtableTracer tracer, Duration deadline) {
this.tracer = tracer;
this.deadline = deadline;
}

@Override
public ClientStreamTracer newClientStreamTracer(
ClientStreamTracer.StreamInfo info, Metadata headers) {
return new BigtableGrpcStreamTracer(tracer);
return new BigtableGrpcStreamTracer(tracer, deadline);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -93,4 +93,12 @@ public void grpcChannelQueuedLatencies(long queuedTimeMs) {
public void grpcMessageSent() {
// noop
}

/**
* Set the remaining customer specified deadline so it can be exported in a metric. This will be
* called in BuiltinMetricsTracer.
*/
public void setRemainingDeadline(long deadline) {
// noop
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ public class BuiltinMetricsConstants {
static final String SERVER_LATENCIES_NAME = "server_latencies";
static final String FIRST_RESPONSE_LATENCIES_NAME = "first_response_latencies";
static final String APPLICATION_BLOCKING_LATENCIES_NAME = "application_latencies";
static final String REMAINING_DEADLINE_NAME = "remaining_deadline";
static final String CLIENT_BLOCKING_LATENCIES_NAME = "throttling_latencies";
static final String PER_CONNECTION_ERROR_COUNT_NAME = "per_connection_error_count";

Expand Down Expand Up @@ -214,6 +215,16 @@ public static Map<InstrumentSelector, View> getAllViews() {
ImmutableSet.<AttributeKey>builder()
.add(BIGTABLE_PROJECT_ID_KEY, INSTANCE_ID_KEY, APP_PROFILE_KEY, CLIENT_NAME_KEY)
.build());
defineView(
views,
REMAINING_DEADLINE_NAME,
AGGREGATION_WITH_MILLIS_HISTOGRAM,
InstrumentType.HISTOGRAM,
"ms",
ImmutableSet.<AttributeKey>builder()
.addAll(COMMON_ATTRIBUTES)
.add(STREAMING_KEY, STATUS_KEY)
.build());

return views.build();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ class BuiltinMetricsTracer extends BigtableTracer {
private Long serverLatencies = null;
private final AtomicLong grpcMessageSentDelay = new AtomicLong(0);

private long deadlineRemaining = 0;

// OpenCensus (and server) histogram buckets use [start, end), however OpenTelemetry uses (start,
// end]. To work around this, we measure all the latencies in nanoseconds and convert them
// to milliseconds and use DoubleHistogram. This should minimize the chance of a data
Expand All @@ -96,6 +98,7 @@ class BuiltinMetricsTracer extends BigtableTracer {
private final DoubleHistogram firstResponseLatenciesHistogram;
private final DoubleHistogram clientBlockingLatenciesHistogram;
private final DoubleHistogram applicationBlockingLatenciesHistogram;
private final DoubleHistogram remainingDeadlineHistogram;
private final LongCounter connectivityErrorCounter;
private final LongCounter retryCounter;

Expand All @@ -109,6 +112,7 @@ class BuiltinMetricsTracer extends BigtableTracer {
DoubleHistogram firstResponseLatenciesHistogram,
DoubleHistogram clientBlockingLatenciesHistogram,
DoubleHistogram applicationBlockingLatenciesHistogram,
DoubleHistogram remainingDeadlineHistogram,
LongCounter connectivityErrorCounter,
LongCounter retryCounter) {
this.operationType = operationType;
Expand All @@ -121,6 +125,7 @@ class BuiltinMetricsTracer extends BigtableTracer {
this.firstResponseLatenciesHistogram = firstResponseLatenciesHistogram;
this.clientBlockingLatenciesHistogram = clientBlockingLatenciesHistogram;
this.applicationBlockingLatenciesHistogram = applicationBlockingLatenciesHistogram;
this.remainingDeadlineHistogram = remainingDeadlineHistogram;
this.connectivityErrorCounter = connectivityErrorCounter;
this.retryCounter = retryCounter;
}
Expand Down Expand Up @@ -268,6 +273,12 @@ public void grpcMessageSent() {
grpcMessageSentDelay.set(attemptTimer.elapsed(TimeUnit.NANOSECONDS));
}

@Override
public void setRemainingDeadline(long deadline) {
djyau marked this conversation as resolved.
Show resolved Hide resolved
long timeElapsed = operationTimer.elapsed(TimeUnit.MILLISECONDS);
deadlineRemaining = deadline - timeElapsed;
}

@Override
public void disableFlowControl() {
flowControlIsDisabled = true;
Expand Down Expand Up @@ -306,6 +317,8 @@ private void recordOperationCompletion(@Nullable Throwable status) {

operationLatenciesHistogram.record(convertToMs(operationLatencyNano), attributes);

remainingDeadlineHistogram.record(deadlineRemaining, attributes);

// serverLatencyTimer should already be stopped in recordAttemptCompletion
long applicationLatencyNano = operationLatencyNano - totalServerLatencyNano.get();
applicationBlockingLatenciesHistogram.record(convertToMs(applicationLatencyNano), attributes);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import static com.google.cloud.bigtable.data.v2.stub.metrics.BuiltinMetricsConstants.FIRST_RESPONSE_LATENCIES_NAME;
import static com.google.cloud.bigtable.data.v2.stub.metrics.BuiltinMetricsConstants.METER_NAME;
import static com.google.cloud.bigtable.data.v2.stub.metrics.BuiltinMetricsConstants.OPERATION_LATENCIES_NAME;
import static com.google.cloud.bigtable.data.v2.stub.metrics.BuiltinMetricsConstants.REMAINING_DEADLINE_NAME;
import static com.google.cloud.bigtable.data.v2.stub.metrics.BuiltinMetricsConstants.RETRY_COUNT_NAME;
import static com.google.cloud.bigtable.data.v2.stub.metrics.BuiltinMetricsConstants.SERVER_LATENCIES_NAME;

Expand Down Expand Up @@ -55,6 +56,7 @@ public class BuiltinMetricsTracerFactory extends BaseApiTracerFactory {
private final DoubleHistogram firstResponseLatenciesHistogram;
private final DoubleHistogram clientBlockingLatenciesHistogram;
private final DoubleHistogram applicationBlockingLatenciesHistogram;
private final DoubleHistogram remainingDeadlineHistogram;
private final LongCounter connectivityErrorCounter;
private final LongCounter retryCounter;

Expand Down Expand Up @@ -108,6 +110,12 @@ public static BuiltinMetricsTracerFactory create(
"The latency of the client application consuming available response data.")
.setUnit(MILLISECOND)
.build();
remainingDeadlineHistogram =
meter
.histogramBuilder(REMAINING_DEADLINE_NAME)
.setDescription("The remaining customer specified deadline at the end of the request.")
.setUnit(MILLISECOND)
.build();
connectivityErrorCounter =
meter
.counterBuilder(CONNECTIVITY_ERROR_COUNT_NAME)
Expand Down Expand Up @@ -135,6 +143,7 @@ public ApiTracer newTracer(ApiTracer parent, SpanName spanName, OperationType op
firstResponseLatenciesHistogram,
clientBlockingLatenciesHistogram,
applicationBlockingLatenciesHistogram,
remainingDeadlineHistogram,
connectivityErrorCounter,
retryCounter);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -232,4 +232,11 @@ public void grpcMessageSent() {
tracer.grpcMessageSent();
}
}

@Override
public void setRemainingDeadline(long deadline) {
for (BigtableTracer tracer : bigtableTracers) {
tracer.setRemainingDeadline(deadline);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.annotation.Nullable;
import org.threeten.bp.Duration;

/** Utilities to help integrating with OpenCensus. */
@InternalApi("For internal use only")
Expand Down Expand Up @@ -222,9 +223,11 @@ static GrpcCallContext injectBigtableStreamTracer(
if (context instanceof GrpcCallContext) {
GrpcCallContext callContext = (GrpcCallContext) context;
CallOptions callOptions = callContext.getCallOptions();
Duration deadline = callContext.getTimeout();
return responseMetadata.addHandlers(
callContext.withCallOptions(
callOptions.withStreamTracerFactory(new BigtableGrpcStreamTracer.Factory(tracer))));
callOptions.withStreamTracerFactory(
new BigtableGrpcStreamTracer.Factory(tracer, deadline))));
} else {
// context should always be an instance of GrpcCallContext. If not throw an exception
// so we can see what class context is.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import static com.google.cloud.bigtable.data.v2.stub.metrics.BuiltinMetricsConstants.CONNECTIVITY_ERROR_COUNT_NAME;
import static com.google.cloud.bigtable.data.v2.stub.metrics.BuiltinMetricsConstants.METHOD_KEY;
import static com.google.cloud.bigtable.data.v2.stub.metrics.BuiltinMetricsConstants.OPERATION_LATENCIES_NAME;
import static com.google.cloud.bigtable.data.v2.stub.metrics.BuiltinMetricsConstants.REMAINING_DEADLINE_NAME;
import static com.google.cloud.bigtable.data.v2.stub.metrics.BuiltinMetricsConstants.RETRY_COUNT_NAME;
import static com.google.cloud.bigtable.data.v2.stub.metrics.BuiltinMetricsConstants.SERVER_LATENCIES_NAME;
import static com.google.cloud.bigtable.data.v2.stub.metrics.BuiltinMetricsConstants.STATUS_KEY;
Expand Down Expand Up @@ -94,6 +95,7 @@
import java.io.IOException;
import java.net.SocketAddress;
import java.nio.charset.Charset;
import java.time.Duration;
djyau marked this conversation as resolved.
Show resolved Hide resolved
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
Expand Down Expand Up @@ -211,6 +213,11 @@ public void sendHeaders(Metadata headers) {
.retrySettings()
.setInitialRetryDelayDuration(java.time.Duration.ofMillis(200));

stubSettingsBuilder
.readRowsSettings()
.retrySettings()
.setTotalTimeoutDuration(Duration.ofMillis(3000));

stubSettingsBuilder
.bulkMutateRowsSettings()
.setBatchingSettings(
Expand Down Expand Up @@ -702,6 +709,27 @@ public void testPermanentFailure() {
verifyAttributes(opLatency, expected);
}

@Test
public void testRemainingDeadline() {
mutianf marked this conversation as resolved.
Show resolved Hide resolved
stub.readRowsCallable().all().call(Query.create(TABLE));
MetricData remainingDeadlineMetric = getMetricData(metricReader, REMAINING_DEADLINE_NAME);

Attributes attributes =
baseAttributes
.toBuilder()
.put(STATUS_KEY, "OK")
.put(TABLE_ID_KEY, TABLE)
.put(ZONE_ID_KEY, ZONE)
.put(CLUSTER_ID_KEY, CLUSTER)
.put(METHOD_KEY, "Bigtable.ReadRows")
.put(STREAMING_KEY, true)
.put(CLIENT_NAME_KEY, CLIENT_NAME)
.build();

long remainingDeadline = getAggregatedValue(remainingDeadlineMetric, attributes);
assertThat(remainingDeadline).isIn(Range.closed((long) 2000, (long) 2500));
}

private static class FakeService extends BigtableGrpc.BigtableImplBase {

static List<ReadRowsResponse> createFakeResponse() {
Expand Down
Loading