From ef11de0a7abf3a95658a5fff2df72e00f1f3ab9c Mon Sep 17 00:00:00 2001 From: Robert Bradshaw Date: Mon, 25 Nov 2024 11:55:15 -0800 Subject: [PATCH] Bounded Trie proto representation. --- .../beam/model/pipeline/v1/metrics.proto | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/model/pipeline/src/main/proto/org/apache/beam/model/pipeline/v1/metrics.proto b/model/pipeline/src/main/proto/org/apache/beam/model/pipeline/v1/metrics.proto index 4ec189e4637f..33bb5ae729f8 100644 --- a/model/pipeline/src/main/proto/org/apache/beam/model/pipeline/v1/metrics.proto +++ b/model/pipeline/src/main/proto/org/apache/beam/model/pipeline/v1/metrics.proto @@ -198,6 +198,17 @@ message MonitoringInfoSpecs { }] }]; + // Represents a set of strings seen across bundles. + USER_BOUNDED_TRIE = 22 [(monitoring_info_spec) = { + urn: "beam:metric:user:bounded_trie:v1", + type: "beam:metrics:bounded_trie:v1", + required_labels: ["PTRANSFORM", "NAMESPACE", "NAME"], + annotations: [{ + key: "description", + value: "URN utilized to report user metric." + }] + }]; + // General monitored state information which contains structured information // which does not fit into a typical metric format. See MonitoringTableData // for more details. @@ -576,6 +587,12 @@ message MonitoringInfoTypeUrns { SET_STRING_TYPE = 11 [(org.apache.beam.model.pipeline.v1.beam_urn) = "beam:metrics:set_string:v1"]; + // Represents a bounded trie of strings. + // + // Encoding: BoundedTrie proto + BOUNDED_TRIE_TYPE = 12 [(org.apache.beam.model.pipeline.v1.beam_urn) = + "beam:metrics:bounded_trie:v1"]; + // General monitored state information which contains structured information // which does not fit into a typical metric format. See MonitoringTableData // for more details. @@ -588,6 +605,30 @@ message MonitoringInfoTypeUrns { } } + +// A single node in a BoundedTrie. +message BoundedTrieNode { + // Whether this node has been truncated. + // A truncated leaf represents possibly many children with the same prefix. + bool truncated = 1; + + // Children of this node. Must be empty if truncated is true. + map children = 2; +} + +// The message type used for encoding metrics of type bounded trie. +message BoundedTrie { + // The maximum number of elements to store before truncation. + int32 bound = 1; + + // A compact representation of all the elements in this trie. + BoundedTrieNode root = 2; + + // A more efficient representation for metrics consisting of a single value. + repeated string singleton = 3; +} + + // General monitored state information which contains structured information // which does not fit into a typical metric format. //