-
Notifications
You must be signed in to change notification settings - Fork 238
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
(PE-38408) Remove expensive Regexes from puppet profiler Java impl #2880
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
|
@@ -5,6 +5,7 @@ | |||
import org.apache.commons.lang.StringUtils; | ||||
|
||||
import java.util.ArrayList; | ||||
import java.util.Arrays; | ||||
import java.util.HashMap; | ||||
import java.util.List; | ||||
import java.util.Map; | ||||
|
@@ -21,17 +22,21 @@ public class MetricsPuppetProfiler implements PuppetProfiler { | |||
private final MetricRegistry registry; | ||||
private final Set<String> metric_ids; | ||||
|
||||
private static final Pattern FUNCTION_PATTERN = Pattern.compile(".*\\.functions\\.([\\w\\d_]+)$"); | ||||
private static final Pattern RESOURCE_PATTERN = Pattern.compile(".*\\.compiler\\.evaluate_resource\\.([\\w\\d_]+\\[([\\w\\d_]+::)*[\\w\\d_]+\\])$"); | ||||
private static final Pattern CATALOG_PATTERN = Pattern.compile(".*\\.compiler\\.(static_compile_postprocessing|static_compile|compile|find_node)$"); | ||||
private static final Pattern INLINING_PATTERN = Pattern.compile(".*\\.compiler\\.static_compile_inlining\\.(.*)$"); | ||||
private static final Pattern PUPPETDB_PATTERN = Pattern.compile(".*\\.puppetdb\\.(resource\\.search|facts\\.encode|command\\.submit\\.replace facts|catalog\\.munge|command\\.submit\\.replace catalog|report\\.convert_to_wire_format_hash|command\\.submit\\.store report|query)$"); | ||||
|
||||
private final Map<String, Timer> function_timers; | ||||
private final Map<String, Timer> resource_timers; | ||||
private final Map<String, Timer> catalog_timers; | ||||
private final Map<String, Timer> inlining_timers; | ||||
private final Map<String, Timer> puppetdb_timers; | ||||
|
||||
public MetricsPuppetProfiler(String hostname, MetricRegistry registry) { | ||||
this.hostname = hostname; | ||||
this.registry = registry; | ||||
this.metric_ids = Collections.newSetFromMap(new ConcurrentHashMap<String, Boolean>()); | ||||
this.function_timers = new ConcurrentHashMap<String, Timer>(); | ||||
this.resource_timers = new ConcurrentHashMap<String, Timer>(); | ||||
this.catalog_timers = new ConcurrentHashMap<String, Timer>(); | ||||
this.inlining_timers = new ConcurrentHashMap<String, Timer>(); | ||||
this.puppetdb_timers = new ConcurrentHashMap<String, Timer>(); | ||||
} | ||||
|
||||
@Override | ||||
|
@@ -43,9 +48,12 @@ public Object start(String message, String[] metric_id) { | |||
public void finish(Object context, String message, String[] metric_id) { | ||||
if (shouldTime(metric_id)) { | ||||
Long elapsed = System.currentTimeMillis() - (Long)context; | ||||
for (Timer t : getTimers(metric_id)) { | ||||
Map<String, Timer> metricsByID = getOrCreateTimersByIDs(metric_id); | ||||
for (Timer t : metricsByID.values()) { | ||||
t.update(elapsed, TimeUnit.MILLISECONDS); | ||||
} | ||||
|
||||
updateMetricsTrackers(metric_id, metricsByID); | ||||
} | ||||
} | ||||
|
||||
|
@@ -54,29 +62,107 @@ public Set<String> getAllMetricIds() { | |||
} | ||||
|
||||
public Map<String, Timer> getFunctionTimers() { | ||||
return getTimers(FUNCTION_PATTERN); | ||||
return this.function_timers; | ||||
} | ||||
|
||||
public Map<String, Timer> getResourceTimers() { | ||||
return getTimers(RESOURCE_PATTERN); | ||||
return this.resource_timers; | ||||
} | ||||
|
||||
public Map<String, Timer> getCatalogTimers() { | ||||
return getTimers(CATALOG_PATTERN); | ||||
return this.catalog_timers; | ||||
} | ||||
|
||||
public Map<String, Timer> getInliningTimers() { | ||||
return getTimers(INLINING_PATTERN); | ||||
return this.inlining_timers; | ||||
} | ||||
|
||||
public Map<String, Timer> getPuppetDBTimers() { | ||||
return getTimers(PUPPETDB_PATTERN); | ||||
return this.puppetdb_timers; | ||||
} | ||||
|
||||
@Override | ||||
public void shutdown() { | ||||
} | ||||
|
||||
private List<String> sliceOfArrayToList(String[] idSegments, int lengthOfID) { | ||||
// Callers expect a mutable List returned, but Arrays.asList() returns a | ||||
// fix length array, which is why we have to create a List and then add to it. | ||||
List<String> idList = new ArrayList<String>(); | ||||
idList.addAll(Arrays.asList(Arrays.copyOf(idSegments, lengthOfID))); | ||||
|
||||
return idList; | ||||
} | ||||
|
||||
private String safeGet(String[] collection, int i) { | ||||
try { | ||||
return collection[i]; | ||||
} catch (IndexOutOfBoundsException _ex) { | ||||
return ""; | ||||
} | ||||
} | ||||
|
||||
private void updateMetricsTrackers(String[] metricId, Map<String, Timer> metricsByID) { | ||||
String firstElement = safeGet(metricId, 0); | ||||
String secondElement = safeGet(metricId, 1); | ||||
|
||||
if ("functions".equals(firstElement)) { | ||||
Timer metric = metricsByID.get(getMetricName(sliceOfArrayToList(metricId, 2))); | ||||
this.function_timers.put(secondElement, metric); | ||||
|
||||
} else if ("compiler".equals(firstElement)) { | ||||
String thirdElemet = safeGet(metricId, 2); | ||||
|
||||
if ("evaluate_resource".equals(secondElement)) { | ||||
Timer metric = metricsByID.get(getMetricName(sliceOfArrayToList(metricId, 3))); | ||||
this.resource_timers.put(thirdElemet, metric); | ||||
|
||||
} else if ("static_compile_inlining".equals(secondElement)) { | ||||
Timer metric = metricsByID.get(getMetricName(sliceOfArrayToList(metricId, 3))); | ||||
this.inlining_timers.put(thirdElemet, metric); | ||||
|
||||
} else { | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The previous regex based selection explicitly grabbed these submetrics: static_compile_postprocessing, static_compile, compile, and find_node. This else will grab all metrics that aren't evaluate_resource or static_compile_inlining, don't know if that is a good thing or a bad thing. Thoughts @Sharpie ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this mean these more specific metrics will no longer be counted in catalog timers? Does that matter? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. They will still be there, but we'll report a lot more now. I don't know if that will be helpful or just noise. Here are the ones I can easily find with grep:
|
||||
Timer metric = metricsByID.get(getMetricName(sliceOfArrayToList(metricId, 2))); | ||||
this.catalog_timers.put(secondElement, metric); | ||||
} | ||||
|
||||
} else if ("puppetdb".equals(firstElement)) { | ||||
if ("query".equals(secondElement)) { | ||||
Timer metric = metricsByID.get(getMetricName(sliceOfArrayToList(metricId, 2))); | ||||
this.puppetdb_timers.put(secondElement, metric); | ||||
|
||||
} else { | ||||
String thirdElemet = safeGet(metricId, 2); | ||||
|
||||
if ( | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Its probably overkill, but i wonder if it would be worth comparing metrics from two reference workflows. Matching the regex to this logic is hurting my brain. It may not matter too much though... These are hard to reason about 🫠 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The regex matching definitely hurt my brain as well! I got it wrong a couple of times. I can run the acceptance test and collect what metrics are reported before and after this change. I've ran it after to make sure they look right (see this comment #2880 (comment)) , but haven't done a full comparison. I'll do that. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here are the previous metrics from 7.17.2:
and here are the metrics returned after this pr:
These aren't pretty printed, not as an f-u, but because I think putting them pretty printed in a github comment would make the content very hard to compare. I put them into files, piped them through jq, and compared two terminals side by side. I think they look correct. |
||||
("resource".equals(secondElement) && "search".equals(thirdElemet)) || | ||||
("payload".equals(secondElement) && "format".equals(thirdElemet)) || | ||||
// Set.of would be preferrable but 7.x still support Java 8, which does not have Set.of | ||||
("facts".equals(secondElement) && Arrays.asList("save", "find", "search", "encode").contains(thirdElemet)) || | ||||
("catalog".equals(secondElement) && Arrays.asList("save", "munge").contains(thirdElemet)) || | ||||
("report".equals(secondElement) && Arrays.asList("convert_to_wire_format_hash", "process").contains(thirdElemet)) | ||||
) { | ||||
String key = String.join(".", secondElement, thirdElemet); | ||||
Timer metric = metricsByID.get(getMetricName(sliceOfArrayToList(metricId, 3))); | ||||
this.puppetdb_timers.put(key, metric); | ||||
|
||||
} else if ("command".equals(secondElement) && "submit".equals(thirdElemet)) { | ||||
String fourthElement = safeGet(metricId, 3); | ||||
|
||||
if ( | ||||
"store report".equals(fourthElement) || | ||||
"replace facts".equals(fourthElement) || | ||||
"replace catalog".equals(fourthElement) | ||||
) { | ||||
String key = String.join(".", secondElement, thirdElemet, fourthElement); | ||||
Timer metric = metricsByID.get(getMetricName(sliceOfArrayToList(metricId, 4))); | ||||
this.puppetdb_timers.put(key, metric); | ||||
} | ||||
} | ||||
} | ||||
} | ||||
} | ||||
|
||||
private boolean shouldTime(String[] metric_id) { | ||||
if (metric_id == null) { | ||||
return false; | ||||
|
@@ -90,8 +176,8 @@ private boolean shouldTime(String[] metric_id) { | |||
return true; | ||||
} | ||||
|
||||
private List<Timer> getTimers(String[] metric_id) { | ||||
List<Timer> timers = new ArrayList<Timer>(); | ||||
private Map<String, Timer> getOrCreateTimersByIDs(String[] metric_id) { | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This name is not used anywhere else right? Its not important for
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure i'm following the "or" in the name here. It seems like this just creates a new map and returns it unconditionally. Am I missing something? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
It's not, the method is private and external callers are calling this method on the MetricRegistry https://www.javadoc.io/doc/io.dropwizard.metrics/metrics-core/4.1.0-rc2/com/codahale/metrics/MetricRegistry.html#getTimers-- I think the two cases of "getTimers()" here where inspired by that method. But, the call to MetricRegister.timer() in this method is actually the place in this class were we create timers (if they don't exist). See https://www.javadoc.io/doc/io.dropwizard.metrics/metrics-core/4.1.0-rc2/com/codahale/metrics/MetricRegistry.html#timer-java.lang.String- The There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, thanks for the reference. I think leaving it as is will be fine! |
||||
Map<String, Timer> timers = new HashMap<String, Timer>(); | ||||
// If this is turns out to be a performance hit, we could cache these in a | ||||
// map or something. | ||||
for (int i = 0; i < metric_id.length; i++) { | ||||
|
@@ -101,7 +187,7 @@ private List<Timer> getTimers(String[] metric_id) { | |||
} | ||||
String metric_name = getMetricName(current_id); | ||||
registerMetricName(metric_name); | ||||
timers.add(registry.timer(metric_name)); | ||||
timers.put(metric_name, registry.timer(metric_name)); | ||||
} | ||||
return timers; | ||||
} | ||||
|
@@ -114,15 +200,4 @@ private String getMetricName(List<String> metric_id) { | |||
private void registerMetricName(String metric_name) { | ||||
this.metric_ids.add(metric_name); | ||||
} | ||||
|
||||
private Map<String, Timer> getTimers(Pattern pattern) { | ||||
Map<String, Timer> rv = new HashMap<>(); | ||||
for (String metric_id : this.metric_ids) { | ||||
Matcher matcher = pattern.matcher(metric_id); | ||||
if (matcher.matches()) { | ||||
rv.put(matcher.group(1), registry.timer(metric_id)); | ||||
} | ||||
} | ||||
return rv; | ||||
} | ||||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I cant see how
subList
does not work for this?Seems like for example the
getMetricName
could be called with the result of ametricId.subList(0,2)
as the result ofsubList
I think would be mutable?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We start with a
String[]
which doesn't have thesubList()
method defined, but we can turn an array into a List withArrays.asList()
.I originally tried
Arrays.asList(ids).subList(0,2)
. But the problem isasList()
returns a fixed sized List andsubList()
simply returns a view of the original fixed size list. So we end up erring whengetMetricName
attempts to prepend the hostname to the list here: https://github.com/puppetlabs/puppetserver/pull/2880/files#diff-300731d432bcbd4678957c962c2aa20d068698fff7a4441434c3d1ac5e55fdfdR196I should probably clean up
getMetricName()
but I didn't want to lead with a larger refactor than was strictly necessary (at least after I had changedgetTimers
)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah, i see, i misread that parameter type.