diff --git a/clustercontroller-apps/src/test/java/com/yahoo/vespa/clustercontroller/apps/clustercontroller/ClusterControllerTest.java b/clustercontroller-apps/src/test/java/com/yahoo/vespa/clustercontroller/apps/clustercontroller/ClusterControllerTest.java new file mode 100644 index 000000000000..cfda92b472dd --- /dev/null +++ b/clustercontroller-apps/src/test/java/com/yahoo/vespa/clustercontroller/apps/clustercontroller/ClusterControllerTest.java @@ -0,0 +1,70 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.clustercontroller.apps.clustercontroller; + +import com.yahoo.jdisc.Metric; +import com.yahoo.vdslib.distribution.ConfiguredNode; +import com.yahoo.vespa.clustercontroller.core.FleetController; +import com.yahoo.vespa.clustercontroller.core.FleetControllerOptions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.util.Map; +import java.util.Set; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Doesn't really test cluster controller, but runs some lines of code. + * System tests verifies that container can load it.. + */ +public class ClusterControllerTest { + + private FleetControllerOptions options = new FleetControllerOptions("storage", Set.of(new ConfiguredNode(0, false))); + + private final Metric metric = new Metric() { + @Override + public void set(String s, Number number, Context context) {} + @Override + public void add(String s, Number number, Context context) {} + @Override + public Context createContext(Map stringMap) { return null; } + }; + + @BeforeEach + public void setUp() { + options = new FleetControllerOptions("storage", Set.of(new ConfiguredNode(0, false))); + options.zooKeeperServerAddress = null; + options.slobrokConfigId = "raw:"; + options.slobrokConnectionSpecs = null; + } + + @Test + void testSimple() throws Exception { + ClusterController cc = new ClusterController(); + cc.setOptions(options, metric); + cc.setOptions(options, metric); + assertEquals(1, cc.getFleetControllers().size()); + assertNotNull(cc.get("storage")); + assertNull(cc.get("music")); + cc.countdown(); + assertTrue(cc.getController("storage").isRunning()); + cc.countdown(); + assertFalse(cc.getController("storage").isRunning()); + } + + @Test + void testShutdownException() throws Exception { + ClusterController cc = new ClusterController() { + void shutdownController(FleetController controller) throws Exception { + throw new Exception("Foo"); + } + }; + cc.setOptions(options, metric); + cc.countdown(); + } + +} diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStateGenerator.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStateGenerator.java index 403c5c6089b3..75c6dbe6cecc 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStateGenerator.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStateGenerator.java @@ -41,6 +41,7 @@ static class Params { Params() { } + // FIXME de-dupe static Map buildTransitionTimeMap(int distributorTransitionTimeMs, int storageTransitionTimeMs) { Map maxTransitionTime = new TreeMap<>(); maxTransitionTime.put(com.yahoo.vdslib.state.NodeType.DISTRIBUTOR, distributorTransitionTimeMs); @@ -64,7 +65,7 @@ Params transitionTimes(Map timesMs) { this.transitionTimes = timesMs; return this; } - Params currentTimeInMillis(long currentTimeMs) { + Params currentTimeInMilllis(long currentTimeMs) { this.currentTimeInMillis = currentTimeMs; return this; } diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java index 3f3bf62bf4d2..29887666a1b7 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java @@ -1006,7 +1006,7 @@ private void scheduleVersionDependentTasksForFutureCompletion(int completeAtVers private AnnotatedClusterState computeCurrentAnnotatedState() { ClusterStateGenerator.Params params = ClusterStateGenerator.Params.fromOptions(options); - params.currentTimeInMillis(timer.getCurrentTimeInMillis()) + params.currentTimeInMilllis(timer.getCurrentTimeInMillis()) .cluster(cluster) .lowestObservedDistributionBitCount(stateVersionTracker.getLowestObservedDistributionBits()); return ClusterStateGenerator.generatedStateFrom(params); @@ -1200,7 +1200,7 @@ public void waitForNodesInSlobrok(int distNodeCount, int storNodeCount, Duration while (true) { int distCount = 0, storCount = 0; for (NodeInfo info : cluster.getNodeInfos()) { - if (info.isInSlobrok()) { + if (!info.isRpcAddressOutdated()) { if (info.isDistributor()) ++distCount; else ++storCount; } diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeInfo.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeInfo.java index 746432f1d382..2993784dba4b 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeInfo.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeInfo.java @@ -3,6 +3,7 @@ import com.yahoo.collections.Pair; import com.yahoo.jrt.Target; +import java.util.logging.Level; import com.yahoo.vdslib.distribution.Distribution; import com.yahoo.vdslib.distribution.Group; import com.yahoo.vdslib.state.ClusterState; @@ -12,12 +13,12 @@ import com.yahoo.vdslib.state.State; import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo; import com.yahoo.vespa.clustercontroller.core.rpc.RPCCommunicator; + import java.io.PrintWriter; import java.io.StringWriter; import java.util.LinkedList; import java.util.List; import java.util.TreeMap; -import java.util.logging.Level; import java.util.logging.Logger; /** @@ -243,13 +244,11 @@ public int getNodeIndex() { public ContentCluster getCluster() { return cluster; } - /** Returns true if the node is registered in slobrok */ - public boolean isInSlobrok() { return lastSeenInSlobrok == null; } - - /** Returns true if the node is NOT registered in slobrok */ - public boolean isNotInSlobrok() { return ! isInSlobrok(); } + /** Returns true if the node is currently registered in slobrok */ + // FIXME why is this called "isRpcAddressOutdated" then??? + public boolean isRpcAddressOutdated() { return lastSeenInSlobrok != null; } - public Long lastSeenInSlobrok() { return lastSeenInSlobrok; } + public Long getRpcAddressOutdatedTimestamp() { return lastSeenInSlobrok; } public void abortCurrentNodeStateRequests() { for(Pair it : pendingNodeStateRequests) { @@ -276,7 +275,7 @@ public NodeState getWantedState() { return wantedState; } - /** Returns the wanted state set directly by a user (i.e. not configured) */ + /** Returns the wanted state set directly by a user (i.e not configured) */ public NodeState getUserWantedState() { return wantedState; } public long getTimeOfFirstFailingConnectionAttempt() { diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateGatherer.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateGatherer.java index 68e46414c22c..69e97de84f9d 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateGatherer.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateGatherer.java @@ -3,13 +3,14 @@ import com.yahoo.jrt.ErrorCode; import com.yahoo.jrt.Target; +import java.util.logging.Level; import com.yahoo.vdslib.state.NodeState; import com.yahoo.vdslib.state.State; import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo; import com.yahoo.vespa.clustercontroller.core.listeners.NodeListener; + import java.util.LinkedList; import java.util.List; -import java.util.logging.Level; import java.util.logging.Logger; /** @@ -62,10 +63,10 @@ public boolean sendMessages(ContentCluster cluster, Communicator communicator, N if (requestTime != null && (currentTime - requestTime < nodeStateRequestTimeoutMS)) continue; // pending request if (info.getTimeForNextStateRequestAttempt() > currentTime) continue; // too early - if (info.getRpcAddress() == null || info.isNotInSlobrok()) { // Cannot query state of node without RPC address or not in slobrok + if (info.getRpcAddress() == null || info.isRpcAddressOutdated()) { // Cannot query state of node without RPC address log.log(Level.FINE, () -> "Not sending getNodeState request to node " + info.getNode() + ": Not in slobrok"); NodeState reportedState = info.getReportedState().clone(); - if (( ! reportedState.getState().equals(State.DOWN) && currentTime - info.lastSeenInSlobrok() > maxSlobrokDisconnectGracePeriod) + if (( ! reportedState.getState().equals(State.DOWN) && currentTime - info.getRpcAddressOutdatedTimestamp() > maxSlobrokDisconnectGracePeriod) || reportedState.getState().equals(State.STOPPING)) // Don't wait for grace period if we expect node to be stopping { log.log(Level.FINE, () -> "Setting reported state to DOWN " @@ -74,8 +75,8 @@ public boolean sendMessages(ContentCluster cluster, Communicator communicator, N : "as node has been out of slobrok longer than " + maxSlobrokDisconnectGracePeriod + ".")); if (reportedState.getState().oneOf("iur") || ! reportedState.hasDescription()) { StringBuilder sb = new StringBuilder().append("Set node down as it has been out of slobrok for ") - .append(currentTime - info.lastSeenInSlobrok()).append(" ms which is more than the max limit of ") - .append(maxSlobrokDisconnectGracePeriod).append(" ms."); + .append(currentTime - info.getRpcAddressOutdatedTimestamp()).append(" ms which is more than the max limit of ") + .append(maxSlobrokDisconnectGracePeriod).append(" ms."); reportedState.setDescription(sb.toString()); } reportedState.setState(State.DOWN); @@ -180,7 +181,7 @@ private NodeState handleError(GetNodeStateRequest req, NodeInfo info, long curre newState.setState(State.DOWN); } else if (msg.equals("jrt: Connection closed by peer") || msg.equals("Connection reset by peer")) { msg = "Connection error: Closed at other end. (Node or switch likely shut down)"; - if (info.isNotInSlobrok()) { + if (info.isRpcAddressOutdated()) { msg += " Node is no longer in slobrok."; } if (info.getReportedState().getState().oneOf("ui")) { diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StateChangeHandler.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StateChangeHandler.java index 9897e3cf04c4..4c8325924226 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StateChangeHandler.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StateChangeHandler.java @@ -329,7 +329,7 @@ private boolean nodeStillUnavailableAfterTransitionTimeExceeded( { return currentStateInSystem.getState().equals(State.MAINTENANCE) && node.getWantedState().above(new NodeState(node.getNode().getType(), State.DOWN)) - && (lastReportedState.getState().equals(State.DOWN) || node.isNotInSlobrok()) + && (lastReportedState.getState().equals(State.DOWN) || node.isRpcAddressOutdated()) && node.getTransitionTime() + maxTransitionTime.get(node.getNode().getType()) < currentTime; } @@ -339,14 +339,14 @@ private boolean reportDownIfOutdatedSlobrokNode(ClusterState currentClusterState NodeInfo node, NodeState lastReportedState) { - if (node.isNotInSlobrok() + if (node.isRpcAddressOutdated() && !lastReportedState.getState().equals(State.DOWN) - && node.lastSeenInSlobrok() + maxSlobrokDisconnectGracePeriod <= currentTime) + && node.getRpcAddressOutdatedTimestamp() + maxSlobrokDisconnectGracePeriod <= currentTime) { final String desc = String.format( "Set node down as it has been out of slobrok for %d ms which " + "is more than the max limit of %d ms.", - currentTime - node.lastSeenInSlobrok(), + currentTime - node.getRpcAddressOutdatedTimestamp(), maxSlobrokDisconnectGracePeriod); node.abortCurrentNodeStateRequests(); NodeState state = lastReportedState.clone(); diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/SystemStateBroadcaster.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/SystemStateBroadcaster.java index 004cefe1e3c5..2359e4d8389a 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/SystemStateBroadcaster.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/SystemStateBroadcaster.java @@ -161,7 +161,7 @@ private void processSetClusterStateResponses() { } private static boolean nodeIsReachable(NodeInfo node) { - if (node.getRpcAddress() == null || node.isNotInSlobrok()) { + if (node.getRpcAddress() == null || node.isRpcAddressOutdated()) { return false; // Can't set state on nodes we don't know where are } if (node.getReportedState().getState() == State.MAINTENANCE || diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/RpcServer.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/RpcServer.java index 06f6777ab803..dc21693dcdb6 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/RpcServer.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/RpcServer.java @@ -199,7 +199,7 @@ public boolean handleRpcRequests(ContentCluster cluster, ClusterState systemStat if (!e.getMessage().equals(lastConnectError) || time - lastConnectErrorTime > 60 * 1000) { lastConnectError = e.getMessage(); lastConnectErrorTime = time; - log.log(Level.WARNING, "Failed to initialize RPC server socket: " + e.getMessage()); + log.log(Level.WARNING, "Failed to initailize RPC server socket: " + e.getMessage()); } } } @@ -227,8 +227,8 @@ public boolean handleRpcRequests(ContentCluster cluster, ClusterState systemStat } if (req.methodName().equals("getNodeList")) { log.log(Level.FINE, "Resolving RPC getNodeList request"); - List slobrok = new ArrayList<>(); - List rpc = new ArrayList<>(); + List slobrok = new ArrayList(); + List rpc = new ArrayList(); for(NodeInfo node : cluster.getNodeInfos()) { String s1 = node.getSlobrokAddress(); String s2 = node.getRpcAddress(); @@ -236,8 +236,8 @@ public boolean handleRpcRequests(ContentCluster cluster, ClusterState systemStat slobrok.add(s1); rpc.add(s2 == null ? "" : s2); } - req.returnValues().add(new StringArray(slobrok.toArray(new String[0]))); - req.returnValues().add(new StringArray(rpc.toArray(new String[0]))); + req.returnValues().add(new StringArray(slobrok.toArray(new String[slobrok.size()]))); + req.returnValues().add(new StringArray(rpc.toArray(new String[rpc.size()]))); req.returnRequest(); } else if (req.methodName().equals("getSystemState")) { log.log(Level.FINE, "Resolving RPC getSystemState request"); @@ -280,7 +280,7 @@ public boolean handleRpcRequests(ContentCluster cluster, ClusterState systemStat NodeState oldState = node.getUserWantedState(); String message = (nodeState.getState().equals(State.UP) ? "Clearing wanted nodeState for node " + node - : "New wantedstate '" + nodeState + "' stored for node " + node); + : "New wantedstate '" + nodeState.toString() + "' stored for node " + node); if (!oldState.equals(nodeState) || !oldState.getDescription().equals(nodeState.getDescription())) { if (!nodeState.getState().validWantedNodeState(nodeType)) { throw new IllegalStateException("State " + nodeState.getState() @@ -289,7 +289,7 @@ public boolean handleRpcRequests(ContentCluster cluster, ClusterState systemStat node.setWantedState(nodeState); changeListener.handleNewWantedNodeState(node, nodeState); } else { - message = "Node " + node + " already had wanted state " + nodeState; + message = "Node " + node + " already had wanted state " + nodeState.toString(); log.log(Level.FINE, message); } req.returnValues().add(new StringValue(message)); diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/SlobrokClient.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/SlobrokClient.java index 8393e776fc2d..c88bf71af091 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/SlobrokClient.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/SlobrokClient.java @@ -150,7 +150,7 @@ public boolean updateCluster(ContentCluster cluster, SlobrokListener listener) { } cluster.setSlobrokGenerationCount(mirrorVersion); for (NodeInfo nodeInfo : cluster.getNodeInfos()) { - if (slobrokNodes.containsKey(nodeInfo.getNode()) && nodeInfo.isNotInSlobrok()) { + if (slobrokNodes.containsKey(nodeInfo.getNode()) && nodeInfo.isRpcAddressOutdated()) { context.log(log, Level.WARNING, "Node " + nodeInfo @@ -187,7 +187,7 @@ private void detectNewAndMissingNodes( newNext = null; } else if (newNext == null || newNext.node.compareTo(oldNext.getNode()) > 0) { assert(slobrokNodes.get(oldNext.getNode()) == null); - if (oldNext.isInSlobrok() && oldNext.getRpcAddress() != null) { + if (!oldNext.isRpcAddressOutdated() && oldNext.getRpcAddress() != null) { missingNodeInfos.add(oldNext); } oldNext = null; @@ -195,7 +195,7 @@ private void detectNewAndMissingNodes( assert(newNext.rpcAddress != null); if (oldNext.getRpcAddress() == null || !oldNext.getRpcAddress().equals(newNext.rpcAddress)) { alteredRpcAddress.add(newNext); - } else if (oldNext.isNotInSlobrok()) { + } else if (oldNext.isRpcAddressOutdated()) { returningRpcAddressNodeInfos.add(oldNext); } oldNext = null; diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/statuspage/VdsClusterHtmlRenderer.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/statuspage/VdsClusterHtmlRenderer.java index d840529d361b..1081f3e77cd2 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/statuspage/VdsClusterHtmlRenderer.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/statuspage/VdsClusterHtmlRenderer.java @@ -243,7 +243,7 @@ private void addRpcAddress(NodeInfo nodeInfo, HtmlTable.Row row) { row.addCell(new HtmlTable.Cell("-").addProperties(ERROR_PROPERTY)); } else { row.addCell(new HtmlTable.Cell(HtmlTable.escape(nodeInfo.getRpcAddress()))); - if (nodeInfo.isNotInSlobrok()) { + if (nodeInfo.isRpcAddressOutdated()) { row.getLastCell().addProperties(WARNING_PROPERTY); } } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFixture.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFixture.java index eccdb4d7832c..f8d41405e85e 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFixture.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFixture.java @@ -9,10 +9,13 @@ import com.yahoo.vdslib.state.NodeType; import com.yahoo.vdslib.state.State; import com.yahoo.vespa.clustercontroller.core.listeners.NodeListener; + import java.util.Collection; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; +import java.util.TreeMap; import static org.mockito.Mockito.mock; @@ -21,6 +24,7 @@ public class ClusterFixture { public final ContentCluster cluster; public final Distribution distribution; public final FakeTimer timer; + private final EventLogInterface eventLog; final StateChangeHandler nodeStateChangeHandler; private final ClusterStateGenerator.Params params = new ClusterStateGenerator.Params(); @@ -28,8 +32,9 @@ public ClusterFixture(ContentCluster cluster, Distribution distribution) { this.cluster = cluster; this.distribution = distribution; this.timer = new FakeTimer(); + this.eventLog = mock(EventLogInterface.class); var context = new FleetControllerContextImpl(new FleetControllerId(cluster.getName(), 0)); - this.nodeStateChangeHandler = new StateChangeHandler(context, timer, mock(EventLogInterface.class)); + this.nodeStateChangeHandler = new StateChangeHandler(context, timer, eventLog); this.params.cluster(this.cluster); } @@ -145,6 +150,13 @@ public ClusterFixture assignDummyRpcAddresses() { return this; } + static Map buildTransitionTimeMap(int distributorTransitionTime, int storageTransitionTime) { + Map maxTransitionTime = new TreeMap<>(); + maxTransitionTime.put(NodeType.DISTRIBUTOR, distributorTransitionTime); + maxTransitionTime.put(NodeType.STORAGE, storageTransitionTime); + return maxTransitionTime; + } + void disableTransientMaintenanceModeOnDown() { this.params.transitionTimes(0); } @@ -162,7 +174,7 @@ ClusterFixture markNodeAsConfigRetired(int nodeIndex) { } AnnotatedClusterState annotatedGeneratedClusterState() { - params.currentTimeInMillis(timer.getCurrentTimeInMillis()); + params.currentTimeInMilllis(timer.getCurrentTimeInMillis()); return ClusterStateGenerator.generatedStateFrom(params); } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStateGeneratorTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStateGeneratorTest.java index ef1d676fc4a7..78911c414a2a 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStateGeneratorTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStateGeneratorTest.java @@ -24,7 +24,7 @@ public class ClusterStateGeneratorTest { private static AnnotatedClusterState generateFromFixtureWithDefaultParams(ClusterFixture fixture) { final ClusterStateGenerator.Params params = new ClusterStateGenerator.Params(); params.cluster = fixture.cluster; - params.transitionTimes = ClusterStateGenerator.Params.buildTransitionTimeMap(0, 0); + params.transitionTimes = ClusterFixture.buildTransitionTimeMap(0, 0); params.currentTimeInMillis = 0; return ClusterStateGenerator.generatedStateFrom(params); } @@ -257,7 +257,7 @@ void config_retired_mode_is_overridden_by_worse_wanted_state() { private void do_test_change_within_node_transition_time_window_generates_maintenance(State reportedState) { final ClusterFixture fixture = ClusterFixture.forFlatCluster(5).bringEntireClusterUp(); final ClusterStateGenerator.Params params = fixture.generatorParams() - .currentTimeInMillis(10_000) + .currentTimeInMilllis(10_000) .transitionTimes(2000); fixture.reportStorageNodeState(1, reportedState); @@ -292,7 +292,7 @@ void reported_stopping_node_within_transition_time_has_maintenance_generated_sta void reported_node_down_after_transition_time_has_down_generated_state() { final ClusterFixture fixture = ClusterFixture.forFlatCluster(5).bringEntireClusterUp(); final ClusterStateGenerator.Params params = fixture.generatorParams() - .currentTimeInMillis(11_000) + .currentTimeInMilllis(11_000) .transitionTimes(2000); fixture.reportStorageNodeState(1, State.DOWN); @@ -309,7 +309,7 @@ void reported_node_down_after_transition_time_has_down_generated_state() { void distributor_nodes_are_not_implicitly_transitioned_to_maintenance_mode() { final ClusterFixture fixture = ClusterFixture.forFlatCluster(5).bringEntireClusterUp(); final ClusterStateGenerator.Params params = fixture.generatorParams() - .currentTimeInMillis(10_000) + .currentTimeInMilllis(10_000) .transitionTimes(2000); fixture.reportDistributorNodeState(2, State.DOWN); @@ -326,7 +326,7 @@ void distributor_nodes_are_not_implicitly_transitioned_to_maintenance_mode() { void transient_maintenance_mode_does_not_override_wanted_down_state() { final ClusterFixture fixture = ClusterFixture.forFlatCluster(5).bringEntireClusterUp(); final ClusterStateGenerator.Params params = fixture.generatorParams() - .currentTimeInMillis(10_000) + .currentTimeInMilllis(10_000) .transitionTimes(2000); fixture.proposeStorageNodeWantedState(2, State.DOWN); @@ -343,7 +343,7 @@ void transient_maintenance_mode_does_not_override_wanted_down_state() { void reported_down_retired_node_within_transition_time_transitions_to_maintenance() { final ClusterFixture fixture = ClusterFixture.forFlatCluster(5).bringEntireClusterUp(); final ClusterStateGenerator.Params params = fixture.generatorParams() - .currentTimeInMillis(10_000) + .currentTimeInMilllis(10_000) .transitionTimes(2000); fixture.proposeStorageNodeWantedState(2, State.RETIRED); @@ -787,7 +787,7 @@ private String do_test_storage_node_with_no_init_progress(State wantedState) { final ClusterStateGenerator.Params params = fixture.generatorParams() .maxInitProgressTime(1000) - .currentTimeInMillis(11_000); + .currentTimeInMilllis(11_000); final AnnotatedClusterState state = ClusterStateGenerator.generatedStateFrom(params); return state.toString(); } @@ -891,7 +891,7 @@ void configured_zero_init_progress_time_disables_auto_init_to_down_feature() { final ClusterStateGenerator.Params params = fixture.generatorParams() .maxInitProgressTime(0) - .currentTimeInMillis(11_000); + .currentTimeInMilllis(11_000); final AnnotatedClusterState state = ClusterStateGenerator.generatedStateFrom(params); assertThat(state.toString(), equalTo("distributor:3 storage:3 .0.s:i .0.i:0.5")); } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java index 91a348ab1ba9..c39a5c528362 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java @@ -176,9 +176,12 @@ FleetController createFleetController(boolean useFakeTimer, FleetControllerOptio } protected void setUpFleetController(boolean useFakeTimer, FleetControllerOptions options) throws Exception { - if (slobrok == null) - setUpSystem(options); - startFleetController(useFakeTimer); + if (slobrok == null) setUpSystem(options); + if (fleetController == null) { + fleetController = createFleetController(useFakeTimer, options); + } else { + throw new Exception("called setUpFleetcontroller but it was already setup"); + } } void stopFleetController() throws Exception { @@ -189,10 +192,11 @@ void stopFleetController() throws Exception { } void startFleetController(boolean useFakeTimer) throws Exception { - if (fleetController == null) + if (fleetController == null) { fleetController = createFleetController(useFakeTimer, options); - else + } else { log.log(Level.WARNING, "already started fleetcontroller, not starting another"); + } } protected void setUpVdsNodes(boolean useFakeTimer, DummyVdsNodeOptions options) throws Exception { diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java index e4535076e071..16a3e41f1497 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java @@ -18,7 +18,6 @@ import java.time.Instant; import java.util.ArrayList; import java.util.List; -import java.util.Objects; import java.util.concurrent.TimeoutException; import java.util.logging.Level; import java.util.logging.Logger; @@ -39,7 +38,7 @@ public class MasterElectionTest extends FleetControllerTest { private static int defaultZkSessionTimeoutInMillis() { return 30_000; } - protected void setUpFleetControllers(boolean useFakeTimer, FleetControllerOptions options) throws Exception { + protected void setUpFleetController(int count, boolean useFakeTimer, FleetControllerOptions options) throws Exception { if (zooKeeperServer == null) { zooKeeperServer = new ZooKeeperTestServer(); } @@ -48,20 +47,21 @@ protected void setUpFleetControllers(boolean useFakeTimer, FleetControllerOption this.options.zooKeeperSessionTimeout = defaultZkSessionTimeoutInMillis(); this.options.zooKeeperServerAddress = zooKeeperServer.getAddress(); this.options.slobrokConnectionSpecs = getSlobrokConnectionSpecs(slobrok); - this.options.fleetControllerCount = 3; - for (int i = 0; i< this.options.fleetControllerCount; ++i) { + this.options.fleetControllerCount = count; + for (int i=0; i connections, int[] nodes) { - Objects.requireNonNull(reason, "reason cannot be null"); + private void waitForMasterReason(String reason, Integer master, List connections, int[] nodes) { Instant endTime = Instant.now().plus(timeout()); while (Instant.now().isBefore(endTime)) { boolean allOk = true; @@ -298,17 +318,17 @@ private void waitForNoMasterWithExpectedReason(String reason, List conne allOk = false; break; } - if (req.returnValues().get(0).asInt32() != -1) { // -1 means no master, which we are waiting for + if (master != null && master != req.returnValues().get(0).asInt32()) { allOk = false; break; } - if ( ! reason.equals(req.returnValues().get(1).asString())) { + if (reason != null && ! reason.equals(req.returnValues().get(1).asString())) { allOk = false; break; } } if (allOk) return; - try { Thread.sleep(100); } catch (InterruptedException e) { /* ignore */ } + try{ Thread.sleep(100); } catch (InterruptedException e) { /* ignore */ } } throw new IllegalStateException("Did not get master reason '" + reason + "' within timeout of " + timeout()); } @@ -318,7 +338,7 @@ void testGetMaster() throws Exception { startingTest("MasterElectionTest::testGetMaster"); FleetControllerOptions options = defaultOptions("mycluster"); options.masterZooKeeperCooldownPeriod = 3600 * 1000; // An hour - setUpFleetControllers(true, options); + setUpFleetController(3, true, options); waitForMaster(0); supervisor = new Supervisor(new Transport()); @@ -359,10 +379,9 @@ void testGetMaster() throws Exception { timer.advanceTime(300 * 1000); // 5 minutes int[] remainingNodes = {1, 2}; - waitForNoMasterWithExpectedReason( + waitForMasterReason( "2 of 3 nodes agree 1 should be master, but old master cooldown period of 3600000 ms has not passed yet. To ensure it has got time to realize it is no longer master before we elect a new one, currently there is no master.", - connections, - remainingNodes); + -1, connections, remainingNodes); // Verify that fc 1 is not master, and the correct reasons for why not assertFalse(fleetControllers.get(1).isMaster()); @@ -400,12 +419,12 @@ void testReconfigure() throws Exception { startingTest("MasterElectionTest::testReconfigure"); FleetControllerOptions options = defaultOptions("mycluster"); options.masterZooKeeperCooldownPeriod = 1; - setUpFleetControllers(false, options); + setUpFleetController(3, false, options); waitForMaster(0); FleetControllerOptions newOptions = options.clone(); for (int i = 0; i < fleetControllers.size(); ++i) { - FleetControllerOptions nodeOptions = adjustConfig(newOptions, i); + FleetControllerOptions nodeOptions = adjustConfig(newOptions, i, fleetControllers.size()); fleetControllers.get(i).updateOptions(nodeOptions); } waitForMaster(0); @@ -429,7 +448,7 @@ void cluster_state_version_written_to_zookeeper_even_with_empty_send_set() throw options.minRatioOfStorageNodesUp = 0; options.minDistributorNodesUp = 0; options.minStorageNodesUp = 1; - setUpFleetControllers(false, options); + setUpFleetController(3, false, options); setUpVdsNodes(false, new DummyVdsNodeOptions()); fleetController = fleetControllers.get(0); // Required to prevent waitForStableSystem from NPE'ing waitForStableSystem(); @@ -474,7 +493,7 @@ void previously_published_state_is_taken_into_account_for_default_space_when_con options.masterZooKeeperCooldownPeriod = 1; options.minTimeBeforeFirstSystemStateBroadcast = 100000; boolean useFakeTimer = false; - setUpFleetControllers(useFakeTimer, options); + setUpFleetController(3, useFakeTimer, options); setUpVdsNodes(false, new DummyVdsNodeOptions()); fleetController = fleetControllers.get(0); // Required to prevent waitForStableSystem from NPE'ing waitForMaster(0); @@ -517,7 +536,7 @@ void default_space_nodes_not_marked_as_maintenance_when_cluster_has_no_global_do options.clusterHasGlobalDocumentTypes = false; options.masterZooKeeperCooldownPeriod = 1; options.minTimeBeforeFirstSystemStateBroadcast = 100000; - setUpFleetControllers(false, options); + setUpFleetController(3, false, options); setUpVdsNodes(false, new DummyVdsNodeOptions()); fleetController = fleetControllers.get(0); // Required to prevent waitForStableSystem from NPE'ing waitForMaster(0); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeHandlerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeHandlerTest.java index 32478bf7b4f1..2bea95ab8c5e 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeHandlerTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeHandlerTest.java @@ -97,7 +97,7 @@ private void initialize(Config config) { } private ClusterState currentClusterState() { - params.currentTimeInMillis(clock.getCurrentTimeInMillis()); + params.currentTimeInMilllis(clock.getCurrentTimeInMillis()); return ClusterStateGenerator.generatedStateFrom(params).getClusterState(); } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/WaitCondition.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/WaitCondition.java index deccaf282d63..d6d3e2876a51 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/WaitCondition.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/WaitCondition.java @@ -9,10 +9,11 @@ import com.yahoo.vespa.clustercontroller.core.DummyVdsNode; import com.yahoo.vespa.clustercontroller.core.FleetController; import com.yahoo.vespa.clustercontroller.core.listeners.SystemStateListener; + import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.List; -import java.util.Objects; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -64,10 +65,10 @@ ClusterState getCurrentState() { class RegexStateMatcher extends StateWait { private final Pattern pattern; - private Collection nodesToCheck = Set.of(); + private Collection nodesToCheck; private ClusterState lastCheckedState; private boolean checkAllSpaces = false; - private Set checkSpaceSubset = Set.of(); + private Set checkSpaceSubset = Collections.emptySet(); RegexStateMatcher(String regex, FleetController fc, Object monitor) { super(fc, monitor); @@ -75,7 +76,6 @@ class RegexStateMatcher extends StateWait { } RegexStateMatcher includeNotifyingNodes(Collection nodes) { - Objects.requireNonNull(nodes, "nodes must be non-null"); nodesToCheck = nodes; return this; } @@ -86,7 +86,6 @@ RegexStateMatcher checkAllSpaces(boolean checkAllSpaces) { } RegexStateMatcher checkSpaceSubset(Set spaces) { - Objects.requireNonNull(spaces, "spaces must be non-null"); this.checkSpaceSubset = spaces; return this; } @@ -100,39 +99,45 @@ private static List statesInBundle(ClusterStateBundle bundle) { @Override public String isConditionMet() { - if (convergedState == null) return "No cluster state defined yet"; - - lastCheckedState = convergedState; - Matcher m = pattern.matcher(lastCheckedState.toString()); - if (!m.matches() && checkSpaceSubset.isEmpty()) return "Cluster state mismatch"; - - for (DummyVdsNode node : nodesToCheck) { - if (node.getClusterState() == null) return "Node " + node + " has not received a cluster state yet"; - - boolean match; - if (checkAllSpaces) { - match = statesInBundle(node.getClusterStateBundle()).stream() - .allMatch(state -> pattern - .matcher(withoutTimestamps(state.toString())) - .matches()); - } else if (!checkSpaceSubset.isEmpty()) { - match = checkSpaceSubset.stream().allMatch(space -> { - String state = node.getClusterStateBundle().getDerivedBucketSpaceStates() - .getOrDefault(space, AnnotatedClusterState.emptyState()).getClusterState().toString(); - return pattern.matcher(withoutTimestamps(state)).matches(); - }); - } else { - match = pattern.matcher(withoutTimestamps(node.getClusterState().toString())).matches(); - } - - if (!match) { - return "Node " + node + " state mismatch.\n wanted: " + pattern + "\n is: " + node.getClusterStateBundle().toString(); - } - if (node.getStateCommunicationVersion() > 0 && !node.hasPendingGetNodeStateRequest()) { - return "Node " + node + " has not received another get node state request yet"; + if (convergedState != null) { + lastCheckedState = convergedState; + Matcher m = pattern.matcher(lastCheckedState.toString()); + if (m.matches() || !checkSpaceSubset.isEmpty()) { + if (nodesToCheck != null) { + for (DummyVdsNode node : nodesToCheck) { + if (node.getClusterState() == null) { + return "Node " + node + " has not received a cluster state yet"; + } + // TODO refactor, simplify + boolean match; + if (checkAllSpaces) { + match = statesInBundle(node.getClusterStateBundle()).stream() + .allMatch(state -> pattern.matcher(withoutTimestamps(state.toString())).matches()); + } else if (!checkSpaceSubset.isEmpty()) { + match = checkSpaceSubset.stream().allMatch(space -> { + String state = node.getClusterStateBundle().getDerivedBucketSpaceStates() + .getOrDefault(space, AnnotatedClusterState.emptyState()).getClusterState().toString(); + return pattern.matcher(withoutTimestamps(state)).matches(); + }); + } else { + match = pattern.matcher(withoutTimestamps(node.getClusterState().toString())).matches(); + } + + if (!match) { + return "Node " + node + " state mismatch.\n wanted: " + pattern + "\n is: " + node.getClusterStateBundle().toString(); + } + if (node.getStateCommunicationVersion() > 0) { + if (!node.hasPendingGetNodeStateRequest()) { + return "Node " + node + " has not received another get node state request yet"; + } + } + } + } + return null; } + return "Cluster state mismatch"; } - return null; + return "No cluster state defined yet"; } /** Returns the given state string with timestamps removed */