Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid infinite loops due to corrupted flow graphs in some cases and improve resumption error handling #349

Merged
merged 3 commits into from
Aug 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -368,14 +368,19 @@ private static final class ParallelResumer {
LOGGER.log(Level.WARNING, "Could not look up FlowNode for " + se + " so it will not be resumed", x);
}
}
for (FlowNode n : nodes.keySet()) {
LinearBlockHoppingScanner scanner = new LinearBlockHoppingScanner();
scanner.setup(n);
for (FlowNode parent : scanner) {
if (parent != n && nodes.containsKey(parent)) {
enclosing.put(n, parent);
break;
for (Map.Entry<FlowNode, StepExecution> entry : nodes.entrySet()) {
FlowNode n = entry.getKey();
try {
LinearBlockHoppingScanner scanner = new LinearBlockHoppingScanner();
scanner.setup(n);
for (FlowNode parent : scanner) {
if (parent != n && nodes.containsKey(parent)) {
enclosing.put(n, parent);
break;
}
}
} catch (Exception x) {
LOGGER.log(Level.WARNING, x, () -> "Unable to compute enclosing blocks for " + n + ", so " + entry.getValue() + " might not resume successfully");
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;

/**
Expand Down Expand Up @@ -119,8 +121,11 @@
/** Do a brute-force scan for the enclosing blocks **/
BlockStartNode bruteForceScanForEnclosingBlock(@NonNull final FlowNode node) {
FlowNode current = node;

Set<String> visited = new HashSet<>();
while (!(current instanceof FlowStartNode)) { // Hunt back for enclosing blocks, a potentially expensive operation
if (!visited.add(current.getId())) {

Check warning on line 126 in src/main/java/org/jenkinsci/plugins/workflow/graph/StandardGraphLookupView.java

View check run for this annotation

ci.jenkins.io / Code Coverage

Partially covered line

Line 126 is only partially covered, one branch is missing
throw new IllegalStateException("Cycle in flow graph for " + node.getExecution() + " involving " + current);

Check warning on line 127 in src/main/java/org/jenkinsci/plugins/workflow/graph/StandardGraphLookupView.java

View check run for this annotation

ci.jenkins.io / Code Coverage

Not covered line

Line 127 is not covered by tests
}
if (current instanceof BlockEndNode) {
// Hop over the block to the start
BlockStartNode start = ((BlockEndNode) current).getStartNode();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,10 @@
import edu.umd.cs.findbugs.annotations.NonNull;
import net.jcip.annotations.NotThreadSafe;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;

Expand Down Expand Up @@ -85,9 +87,12 @@ protected void setHeads(@NonNull Collection<FlowNode> heads) {
@CheckForNull
protected FlowNode jumpBlockScan(@CheckForNull FlowNode node, @NonNull Collection<FlowNode> blacklistNodes) {
FlowNode candidate = node;

Set<String> visited = new HashSet<>();
// Find the first candidate node preceding a block... and filtering by blacklist
while (candidate instanceof BlockEndNode) {
if (!visited.add(candidate.getId())) {
throw new IllegalStateException("Cycle in flow graph for " + candidate.getExecution() + " involving " + candidate);
}
candidate = ((BlockEndNode) candidate).getStartNode();
if (blacklistNodes.contains(candidate)) {
return null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,14 @@
import java.io.Serializable;
import java.lang.ref.WeakReference;
import java.util.Collections;
import java.util.Objects;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.logging.Level;
import java.util.logging.LogRecord;
import java.util.stream.Collectors;
import jenkins.model.Jenkins;
import org.jenkinsci.plugins.workflow.cps.CpsFlowDefinition;
import org.jenkinsci.plugins.workflow.job.WorkflowJob;
Expand All @@ -65,6 +68,7 @@
import org.jvnet.hudson.test.JenkinsSessionRule;
import org.jvnet.hudson.test.MemoryAssert;
import org.jvnet.hudson.test.TestExtension;
import org.jvnet.hudson.test.recipes.LocalData;
import org.kohsuke.stapler.DataBoundConstructor;

public class FlowExecutionListTest {
Expand Down Expand Up @@ -164,6 +168,35 @@ public class FlowExecutionListTest {
});
}

@LocalData
@Test public void resumeStepExecutionsWithCorruptFlowGraphWithCycle() throws Throwable {
// LocalData created using the following snippet while the build was waiting in the _second_ sleep, except
// for build.xml, which was captured during the sleep step. The StepEndNode for the stage was then adjusted to
// have its startId point to the timeout step's StepStartNode, creating a loop.
/*
sessions.then(r -> {
var stuck = r.createProject(WorkflowJob.class);
stuck.setDefinition(new CpsFlowDefinition("stage('stage') { sleep 30 }; timeout(time: 10) { sleep 30 }", true));
var b = stuck.scheduleBuild2(0).waitForStart();
System.out.println(b.getRootDir());
r.waitForCompletion(b);
});
*/
logging.capture(50);
sessions.then(r -> {
var p = r.jenkins.getItemByFullName("test0", WorkflowJob.class);
var b = p.getBuildByNumber(1);
r.waitForCompletion(b);
assertThat(logging.getMessages(), hasItem(containsString("Unable to compute enclosing blocks")));
var loggedExceptions = logging.getRecords().stream()
.map(LogRecord::getThrown)
.filter(Objects::nonNull)
.map(Throwable::toString)
.collect(Collectors.toList());
assertThat(loggedExceptions, hasItem(containsString("Cycle in flow graph")));
});
}

@Test public void stepExecutionIteratorDoesNotLeakBuildsWhenOneIsStuck() throws Throwable {
sessions.then(r -> {
var notStuck = r.createProject(WorkflowJob.class, "not-stuck");
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
<?xml version='1.1' encoding='UTF-8'?>
<flow-build plugin="[email protected]_ec82f">
<queueId>1</queueId>
<timestamp>1722979974600</timestamp>
<startTime>1722979974615</startTime>
<duration>0</duration>
<charset>UTF-8</charset>
<keepLog>false</keepLog>
<execution class="org.jenkinsci.plugins.workflow.cps.CpsFlowExecution">
<result>SUCCESS</result>
<script>stage(&apos;stage&apos;) { sleep 30 }; timeout(time: 10) { sleep 30 }</script>
<loadedScripts class="linked-hash-map"/>
<durabilityHint>MAX_SURVIVABILITY</durabilityHint>
<timings class="map">
<entry>
<string>flowNode</string>
<long>101709541</long>
</entry>
<entry>
<string>classLoad</string>
<long>124446251</long>
</entry>
<entry>
<string>run</string>
<long>200459289</long>
</entry>
<entry>
<string>parse</string>
<long>166818958</long>
</entry>
<entry>
<string>saveProgram</string>
<long>57936125</long>
</entry>
</timings>
<internalCalls class="sorted-set"/>
<sandbox>true</sandbox>
<iota>5</iota>
<head>1:5</head>
<start>2</start>
<done>false</done>
<resumeBlocked>false</resumeBlocked>
</execution>
<completed>false</completed>
<checkouts class="hudson.util.PersistedList"/>
</flow-build>
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
Started
ha:////4LSn6sjy3SQJvLr4M0Jcw5zYiu9jzzxAhCQhf5ciKGcGAAAAoh+LCAAAAAAAAP9tjTEOwjAQBM8BClpKHuFItIiK1krDC0x8GCfWnbEdkooX8TX+gCESFVvtrLSa5wtWKcKBo5UdUu8otU4GP9jS5Mixv3geZcdn2TIl9igbHBs2eJyx4YwwR1SwULBGaj0nRzbDRnX6rmuvydanHMu2V1A5c4MHCFXMWcf8hSnC9jqYxPTz/BXAFEIGsfuclm8zQVqFvQAAAA==[Pipeline] Start of Pipeline
ha:////4FLPcBhXXN+T+Uy5Bq+9NjiuG45smS/CK+MQ8sUKcTBsAAAApR+LCAAAAAAAAP9tjTEOwjAUQ3+KOrAycohUghExsUZZOEFIQkgb/d8mKe3EibgadyBQiQlLlmxL1nu+oE4RjhQdby12HpP2vA+jK4lPFLtroIm3dOGaMFGwXNpJkrGnpUrKFhaxClYC1hZ1oOTRZdiIVt1VExS65pxj2Q4CKm8GeAAThZxVzN8yR9jeRpMIf5y/AJj7DGxXvP/86jduZBmjwAAAAA==[Pipeline] stage
ha:////4MPOLP4Go7JuW3NkAKhksIfyE+NroTrcISNM8xfRFLQ8AAAApR+LCAAAAAAAAP9tjTEOwjAUQ3+KOrAycoh0gA0xsUZZOEFIQkgb/d8mKe3EibgadyBQiQlLlmxL1nu+oE4RjhQdby12HpP2vA+jK4lPFLtroIm3dOGaMFGwXNpJkrGnpUrKFhaxClYC1hZ1oOTRZdiIVt1VExS65pxj2Q4CKm8GeAAThZxVzN8yR9jeRpMIf5y/AJj7DGxXvP/86jfoP95RwAAAAA==[Pipeline] { (stage)
ha:////4E53KhegWm+s/q0TJkIC5MI9kTq62Eqnzz2Qdi1URRTJAAAAoh+LCAAAAAAAAP9tjTEOAiEURD9rLGwtPQTbaGWsbAmNJ0AWEZb8zwLrbuWJvJp3kLiJlZNMMm+a93rDOic4UbLcG+wdZu14DKOti0+U+lugiXu6ck2YKRguzSSpM+cFJRUDS1gDKwEbgzpQdmgLbIVXD9UGhba9lFS/o4DGdQM8gYlqLiqVL8wJdvexy4Q/z18BzLEA29ce4gdpL1fxvAAAAA==[Pipeline] sleep
Sleeping for 30 sec
ha:////4G8hLCAAqKEvMe92YhTNPJa4MSOZpWK2lhgTDgEHbCXUAAAAoh+LCAAAAAAAAP9tjTEOAiEURD9rLGwtPQTbGBtjZUtoPAGyiLDkfxZYdytP5NW8g8RNrJxkknnTvNcb1jnBiZLl3mDvMGvHYxhtXXyi1N8CTdzTlWvCTMFwaSZJnTkvKKkYWMIaWAnYGNSBskNbYCu8eqg2KLTtpaT6HQU0rhvgCUxUc1GpfGFOsLuPXSb8ef4KYI6xADvU7j8OXFZ7vAAAAA==[Pipeline] }
ha:////4AnScT3OQumBbV+luAyxvhEcCl/8MozDCcq/aC6iNLpjAAAAox+LCAAAAAAAAP9tjTEOAiEURD9rLGwtPQRbWFgYK1tC4wmQRYQl/7PAult5Iq/mHSRuYuUkk8yb5r3esM4JTpQs9wZ7h1k7HsNo6+ITpf4WaOKerlwTZgqGSzNJ6sx5QUnFwBLWwErAxqAOlB3aAlvh1UO1QaFtLyXV7yigcd0AT2CimotK5Qtzgt197DLhz/NXAHOMBdihdv8BHeBS2LwAAAA=[Pipeline] // stage
ha:////4PTj6M4JscP6Gdk49EfTAaLMCwLZYd9IOq+brFvOiJPAAAAAph+LCAAAAAAAAP9tjUEKwjAURH8qXbh16SFScCWIK7chG08QkxjThv/bJLVdeSKv5h2MFlw5MDAzMLznC+oU4UjR8dZi5zFpz/swupL4RLG7Bpp4SxeuCRMFy6WdJBl7WqqkbGERq2AlYG1RB0oeXYaNaNVdNUGha845lu0goPJmgAcwUchZxfwtc4TtbTSJ8Mf5C4C5z8B2xfvPr34DrZTeycAAAAA=[Pipeline] timeout
Timeout set to expire in 10 min
ha:////4M9FYx/jFzgoF1Ji4m6uzCtxEvJBQzBzYoKBBTbKepUTAAAApR+LCAAAAAAAAP9tjTEOwjAUQ3+KOrAycoh0BSEm1igLJwhJCGmj/9skpZ04EVfjDgQqMWHJkm3Jes8X1CnCkaLjrcXOY9Ke92F0JfGJYncNNPGWLlwTJgqWSztJMva0VEnZwiJWwUrA2qIOlDy6DBvRqrtqgkLXnHMs20FA5c0AD2CikLOK+VvmCNvbaBLhj/MXAHOfge2K959f/QbB16AVwAAAAA==[Pipeline] {
ha:////4O/MG/IybiYM4oG30m877qNjUwTyRLwWY87qTVAOsZwoAAAAoh+LCAAAAAAAAP9tjTEOwjAQBDdBFLSUPMKBEiEqWisNLzCJMU6su2BfSCpexNf4AxGRqNhqZ5p5vbFMEUeOTjWWWk+p8qoLvZueGji218CDaviiKqbEwarSDiXX9jRjyWIxL8ux0FhZqgInT06w1o15mCIYcsVZ4uQOGrmv73gi01NZTJQvjBGbW18npl/nbwBjJ8j2gny37T6VOYoyvQAAAA==[Pipeline] sleep
Sleeping for 30 sec
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
1232 5
1252
2157 8
2189
2789 10
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
<?xml version='1.1' encoding='UTF-8'?>
<Tag plugin="[email protected]_ed8a_573">
<node class="cps.n.StepAtomNode" plugin="[email protected]_2c931e7935">
<parentIds>
<string>9</string>
</parentIds>
<id>10</id>
<descriptorId>org.jenkinsci.plugins.workflow.steps.SleepStep</descriptorId>
</node>
<actions>
<cps.a.ArgumentsActionImpl plugin="[email protected]_2c931e7935">
<arguments>
<entry>
<string>time</string>
<long>30</long>
</entry>
</arguments>
<sensitiveVariables/>
<isUnmodifiedBySanitization>true</isUnmodifiedBySanitization>
</cps.a.ArgumentsActionImpl>
<wf.a.TimingAction>
<startTime>1722980005296</startTime>
</wf.a.TimingAction>
<s.a.LogStorageAction/>
</actions>
</Tag>
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<?xml version='1.1' encoding='UTF-8'?>
<Tag plugin="[email protected]_ed8a_573">
<node class="org.jenkinsci.plugins.workflow.graph.FlowStartNode">
<parentIds/>
<id>2</id>
</node>
<actions>
<wf.a.TimingAction>
<startTime>1722979974868</startTime>
</wf.a.TimingAction>
</actions>
</Tag>
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
<?xml version='1.1' encoding='UTF-8'?>
<Tag plugin="[email protected]_ed8a_573">
<node class="cps.n.StepStartNode" plugin="[email protected]_2c931e7935">
<parentIds>
<string>2</string>
</parentIds>
<id>3</id>
<descriptorId>org.jenkinsci.plugins.workflow.support.steps.StageStep</descriptorId>
</node>
<actions>
<s.a.LogStorageAction/>
<cps.a.ArgumentsActionImpl plugin="[email protected]_2c931e7935">
<arguments>
<entry>
<string>name</string>
<string>stage</string>
</entry>
</arguments>
<sensitiveVariables/>
<isUnmodifiedBySanitization>true</isUnmodifiedBySanitization>
</cps.a.ArgumentsActionImpl>
<wf.a.TimingAction>
<startTime>1722979974972</startTime>
</wf.a.TimingAction>
</actions>
</Tag>
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<?xml version='1.1' encoding='UTF-8'?>
<Tag plugin="[email protected]_ed8a_573">
<node class="cps.n.StepStartNode" plugin="[email protected]_2c931e7935">
<parentIds>
<string>3</string>
</parentIds>
<id>4</id>
<descriptorId>org.jenkinsci.plugins.workflow.support.steps.StageStep</descriptorId>
</node>
<actions>
<wf.a.BodyInvocationAction/>
<wf.a.LabelAction>
<displayName>stage</displayName>
</wf.a.LabelAction>
<wf.a.TimingAction>
<startTime>1722979974988</startTime>
</wf.a.TimingAction>
</actions>
</Tag>
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
<?xml version='1.1' encoding='UTF-8'?>
<Tag plugin="[email protected]_ed8a_573">
<node class="cps.n.StepAtomNode" plugin="[email protected]_2c931e7935">
<parentIds>
<string>4</string>
</parentIds>
<id>5</id>
<descriptorId>org.jenkinsci.plugins.workflow.steps.SleepStep</descriptorId>
</node>
<actions>
<cps.a.ArgumentsActionImpl plugin="[email protected]_2c931e7935">
<arguments>
<entry>
<string>time</string>
<long>30</long>
</entry>
</arguments>
<sensitiveVariables/>
<isUnmodifiedBySanitization>true</isUnmodifiedBySanitization>
</cps.a.ArgumentsActionImpl>
<wf.a.TimingAction>
<startTime>1722979975077</startTime>
</wf.a.TimingAction>
<s.a.LogStorageAction/>
</actions>
</Tag>
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<?xml version='1.1' encoding='UTF-8'?>
<Tag plugin="[email protected]_ed8a_573">
<node class="cps.n.StepEndNode" plugin="[email protected]_2c931e7935">
<parentIds>
<string>5</string>
</parentIds>
<id>6</id>
<startId>4</startId>
</node>
<actions>
<wf.a.BodyInvocationAction/>
<wf.a.TimingAction>
<startTime>1722980005117</startTime>
</wf.a.TimingAction>
</actions>
</Tag>
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
<?xml version='1.1' encoding='UTF-8'?>
<Tag plugin="[email protected]_ed8a_573">
<node class="cps.n.StepEndNode" plugin="[email protected]_2c931e7935">
<parentIds>
<string>6</string>
</parentIds>
<id>7</id>
<startId>8</startId> <!-- Manually edited to simulate corruption observed in the real world. -->
</node>
<actions>
<wf.a.TimingAction>
<startTime>1722980005177</startTime>
</wf.a.TimingAction>
</actions>
</Tag>
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
<?xml version='1.1' encoding='UTF-8'?>
<Tag plugin="[email protected]_ed8a_573">
<node class="cps.n.StepStartNode" plugin="[email protected]_2c931e7935">
<parentIds>
<string>7</string>
</parentIds>
<id>8</id>
<descriptorId>org.jenkinsci.plugins.workflow.steps.TimeoutStep</descriptorId>
</node>
<actions>
<s.a.LogStorageAction/>
<cps.a.ArgumentsActionImpl plugin="[email protected]_2c931e7935">
<arguments>
<entry>
<string>time</string>
<int>10</int>
</entry>
</arguments>
<sensitiveVariables/>
<isUnmodifiedBySanitization>true</isUnmodifiedBySanitization>
</cps.a.ArgumentsActionImpl>
<wf.a.TimingAction>
<startTime>1722980005227</startTime>
</wf.a.TimingAction>
</actions>
</Tag>
Loading
Loading