Madan Jampani
Committed by Gerrit Code Review

Fix balance-masters functionality in the new LeadershipService based device mastership store

Change-Id: I9f64d514cee7d5a5383fd4c2fa30a8616c97785c
...@@ -67,6 +67,32 @@ public interface LeadershipService { ...@@ -67,6 +67,32 @@ public interface LeadershipService {
67 void withdraw(String path); 67 void withdraw(String path);
68 68
69 /** 69 /**
70 + * If the local nodeId is the leader for specified topic, this method causes it to
71 + * step down temporarily from leadership.
72 + * <p>
73 + * The node will continue to be in contention for leadership and can
74 + * potentially become the leader again if and when it becomes the highest
75 + * priority candidate
76 + * <p>
77 + * If the local nodeId is not the leader, this method will be a noop.
78 + *
79 + * @param path topic for which this controller node should give up leadership
80 + * @return true if this node stepped down from leadership, false otherwise
81 + */
82 + boolean stepdown(String path);
83 +
84 + /**
85 + * Moves the specified nodeId to the top of the candidates list for the topic.
86 + * <p>
87 + * If the node is not a candidate for this topic, this method will be a noop.
88 + *
89 + * @param path leadership topic
90 + * @param nodeId nodeId to make the top candidate
91 + * @return true if nodeId is now the top candidate, false otherwise
92 + */
93 + boolean makeTopCandidate(String path, NodeId nodeId);
94 +
95 + /**
70 * Returns the current leader board. 96 * Returns the current leader board.
71 * 97 *
72 * @return mapping from topic to leadership info. 98 * @return mapping from topic to leadership info.
......
...@@ -73,4 +73,14 @@ public class LeadershipServiceAdapter implements LeadershipService { ...@@ -73,4 +73,14 @@ public class LeadershipServiceAdapter implements LeadershipService {
73 public List<NodeId> getCandidates(String path) { 73 public List<NodeId> getCandidates(String path) {
74 return null; 74 return null;
75 } 75 }
76 +
77 + @Override
78 + public boolean stepdown(String path) {
79 + return false;
80 + }
81 +
82 + @Override
83 + public boolean makeTopCandidate(String path, NodeId nodeId) {
84 + return false;
85 + }
76 } 86 }
...\ No newline at end of file ...\ No newline at end of file
......
...@@ -24,8 +24,6 @@ import org.apache.felix.scr.annotations.Reference; ...@@ -24,8 +24,6 @@ import org.apache.felix.scr.annotations.Reference;
24 import org.apache.felix.scr.annotations.ReferenceCardinality; 24 import org.apache.felix.scr.annotations.ReferenceCardinality;
25 import org.apache.felix.scr.annotations.Service; 25 import org.apache.felix.scr.annotations.Service;
26 import org.onlab.metrics.MetricsService; 26 import org.onlab.metrics.MetricsService;
27 -import org.onosproject.cluster.ClusterEvent;
28 -import org.onosproject.cluster.ClusterEventListener;
29 import org.onosproject.cluster.ClusterService; 27 import org.onosproject.cluster.ClusterService;
30 import org.onosproject.cluster.ControllerNode; 28 import org.onosproject.cluster.ControllerNode;
31 import org.onosproject.cluster.NodeId; 29 import org.onosproject.cluster.NodeId;
...@@ -52,8 +50,6 @@ import java.util.Iterator; ...@@ -52,8 +50,6 @@ import java.util.Iterator;
52 import java.util.List; 50 import java.util.List;
53 import java.util.Map; 51 import java.util.Map;
54 import java.util.Set; 52 import java.util.Set;
55 -import java.util.concurrent.atomic.AtomicInteger;
56 -
57 import static com.google.common.base.Preconditions.checkNotNull; 53 import static com.google.common.base.Preconditions.checkNotNull;
58 import static com.google.common.collect.Lists.newArrayList; 54 import static com.google.common.collect.Lists.newArrayList;
59 import static org.onlab.metrics.MetricsUtil.startTimer; 55 import static org.onlab.metrics.MetricsUtil.startTimer;
...@@ -91,7 +87,6 @@ public class MastershipManager ...@@ -91,7 +87,6 @@ public class MastershipManager
91 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY) 87 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
92 protected MetricsService metricsService; 88 protected MetricsService metricsService;
93 89
94 - private ClusterEventListener clusterListener = new InternalClusterEventListener();
95 private Timer requestRoleTimer; 90 private Timer requestRoleTimer;
96 91
97 @Activate 92 @Activate
...@@ -99,7 +94,6 @@ public class MastershipManager ...@@ -99,7 +94,6 @@ public class MastershipManager
99 requestRoleTimer = createTimer("Mastership", "requestRole", "responseTime"); 94 requestRoleTimer = createTimer("Mastership", "requestRole", "responseTime");
100 95
101 eventDispatcher.addSink(MastershipEvent.class, listenerRegistry); 96 eventDispatcher.addSink(MastershipEvent.class, listenerRegistry);
102 - clusterService.addListener(clusterListener);
103 store.setDelegate(delegate); 97 store.setDelegate(delegate);
104 log.info("Started"); 98 log.info("Started");
105 } 99 }
...@@ -107,7 +101,6 @@ public class MastershipManager ...@@ -107,7 +101,6 @@ public class MastershipManager
107 @Deactivate 101 @Deactivate
108 public void deactivate() { 102 public void deactivate() {
109 eventDispatcher.removeSink(MastershipEvent.class); 103 eventDispatcher.removeSink(MastershipEvent.class);
110 - clusterService.removeListener(clusterListener);
111 store.unsetDelegate(delegate); 104 store.unsetDelegate(delegate);
112 log.info("Stopped"); 105 log.info("Stopped");
113 } 106 }
...@@ -282,52 +275,6 @@ public class MastershipManager ...@@ -282,52 +275,6 @@ public class MastershipManager
282 } 275 }
283 } 276 }
284 277
285 - //callback for reacting to cluster events
286 - private class InternalClusterEventListener implements ClusterEventListener {
287 -
288 - // A notion of a local maximum cluster size, used to tie-break.
289 - // Think of a better way to do this.
290 - private AtomicInteger clusterSize;
291 -
292 - InternalClusterEventListener() {
293 - clusterSize = new AtomicInteger(0);
294 - }
295 -
296 - @Override
297 - public void event(ClusterEvent event) {
298 - switch (event.type()) {
299 - case INSTANCE_ADDED:
300 - case INSTANCE_ACTIVATED:
301 - clusterSize.incrementAndGet();
302 - log.info("instance {} added/activated", event.subject());
303 - break;
304 - case INSTANCE_REMOVED:
305 - case INSTANCE_DEACTIVATED:
306 - ControllerNode node = event.subject();
307 - log.info("instance {} removed/deactivated", node);
308 - store.relinquishAllRole(node.id());
309 -
310 - clusterSize.decrementAndGet();
311 - break;
312 - default:
313 - log.warn("unknown cluster event {}", event);
314 - }
315 - }
316 -
317 - // Can be removed if we go with naive split-brain handling: only majority
318 - // assigns mastership
319 - private boolean isInMajority() {
320 - if (clusterService.getNodes().size() > (clusterSize.intValue() / 2)) {
321 - return true;
322 - }
323 -
324 - //FIXME: break tie for equal-sized clusters,
325 -
326 - return false;
327 - }
328 -
329 - }
330 -
331 public class InternalDelegate implements MastershipStoreDelegate { 278 public class InternalDelegate implements MastershipStoreDelegate {
332 279
333 @Override 280 @Override
......
...@@ -536,7 +536,7 @@ public class DeviceManager ...@@ -536,7 +536,7 @@ public class DeviceManager
536 if (myNextRole == NONE) { 536 if (myNextRole == NONE) {
537 mastershipService.requestRoleFor(did); 537 mastershipService.requestRoleFor(did);
538 MastershipTerm term = termService.getMastershipTerm(did); 538 MastershipTerm term = termService.getMastershipTerm(did);
539 - if (myNodeId.equals(term.master())) { 539 + if (term != null && myNodeId.equals(term.master())) {
540 myNextRole = MASTER; 540 myNextRole = MASTER;
541 } else { 541 } else {
542 myNextRole = STANDBY; 542 myNextRole = STANDBY;
......
...@@ -584,4 +584,14 @@ public class HazelcastLeadershipService implements LeadershipService { ...@@ -584,4 +584,14 @@ public class HazelcastLeadershipService implements LeadershipService {
584 public List<NodeId> getCandidates(String path) { 584 public List<NodeId> getCandidates(String path) {
585 return null; 585 return null;
586 } 586 }
587 +
588 + @Override
589 + public boolean stepdown(String path) {
590 + throw new UnsupportedOperationException();
591 + }
592 +
593 + @Override
594 + public boolean makeTopCandidate(String path, NodeId nodeId) {
595 + throw new UnsupportedOperationException();
596 + }
587 } 597 }
......
...@@ -34,6 +34,7 @@ import org.onosproject.store.service.StorageService; ...@@ -34,6 +34,7 @@ import org.onosproject.store.service.StorageService;
34 import org.onosproject.store.service.Versioned; 34 import org.onosproject.store.service.Versioned;
35 import org.slf4j.Logger; 35 import org.slf4j.Logger;
36 36
37 +import java.util.ArrayList;
37 import java.util.Map; 38 import java.util.Map;
38 import java.util.Map.Entry; 39 import java.util.Map.Entry;
39 import java.util.Objects; 40 import java.util.Objects;
...@@ -48,7 +49,6 @@ import java.util.stream.Collectors; ...@@ -48,7 +49,6 @@ import java.util.stream.Collectors;
48 import static com.google.common.base.Preconditions.checkArgument; 49 import static com.google.common.base.Preconditions.checkArgument;
49 import static org.onlab.util.Tools.groupedThreads; 50 import static org.onlab.util.Tools.groupedThreads;
50 import static org.slf4j.LoggerFactory.getLogger; 51 import static org.slf4j.LoggerFactory.getLogger;
51 -
52 import static org.onosproject.cluster.ControllerNode.State.ACTIVE; 52 import static org.onosproject.cluster.ControllerNode.State.ACTIVE;
53 import static org.onosproject.cluster.ControllerNode.State.INACTIVE; 53 import static org.onosproject.cluster.ControllerNode.State.INACTIVE;
54 54
...@@ -210,7 +210,7 @@ public class DistributedLeadershipManager implements LeadershipService { ...@@ -210,7 +210,7 @@ public class DistributedLeadershipManager implements LeadershipService {
210 candidateList.add(localNodeId); 210 candidateList.add(localNodeId);
211 if (candidateMap.replace(path, candidates.version(), candidateList)) { 211 if (candidateMap.replace(path, candidates.version(), candidateList)) {
212 Versioned<List<NodeId>> newCandidates = candidateMap.get(path); 212 Versioned<List<NodeId>> newCandidates = candidateMap.get(path);
213 - notifyCandidateAdded( 213 + notifyCandidateUpdated(
214 path, candidateList, newCandidates.version(), newCandidates.creationTime()); 214 path, candidateList, newCandidates.version(), newCandidates.creationTime());
215 } else { 215 } else {
216 rerunForLeadership(path); 216 rerunForLeadership(path);
...@@ -221,7 +221,7 @@ public class DistributedLeadershipManager implements LeadershipService { ...@@ -221,7 +221,7 @@ public class DistributedLeadershipManager implements LeadershipService {
221 List<NodeId> candidateList = ImmutableList.of(localNodeId); 221 List<NodeId> candidateList = ImmutableList.of(localNodeId);
222 if ((candidateMap.putIfAbsent(path, candidateList) == null)) { 222 if ((candidateMap.putIfAbsent(path, candidateList) == null)) {
223 Versioned<List<NodeId>> newCandidates = candidateMap.get(path); 223 Versioned<List<NodeId>> newCandidates = candidateMap.get(path);
224 - notifyCandidateAdded(path, candidateList, newCandidates.version(), newCandidates.creationTime()); 224 + notifyCandidateUpdated(path, candidateList, newCandidates.version(), newCandidates.creationTime());
225 } else { 225 } else {
226 rerunForLeadership(path); 226 rerunForLeadership(path);
227 return; 227 return;
...@@ -270,6 +270,27 @@ public class DistributedLeadershipManager implements LeadershipService { ...@@ -270,6 +270,27 @@ public class DistributedLeadershipManager implements LeadershipService {
270 } 270 }
271 271
272 @Override 272 @Override
273 + public boolean stepdown(String path) {
274 + if (!activeTopics.contains(path)) {
275 + return false;
276 + }
277 +
278 + try {
279 + Versioned<NodeId> leader = leaderMap.get(path);
280 + if (leader != null && Objects.equals(leader.value(), localNodeId)) {
281 + if (leaderMap.remove(path, leader.version())) {
282 + log.info("Gave up leadership for {}", path);
283 + notifyRemovedLeader(path, localNodeId, leader.version(), leader.creationTime());
284 + return true;
285 + }
286 + }
287 + } catch (Exception e) {
288 + log.warn("Error executing stepdown for {}", path, e);
289 + }
290 + return false;
291 + }
292 +
293 + @Override
273 public void addListener(LeadershipEventListener listener) { 294 public void addListener(LeadershipEventListener listener) {
274 listenerRegistry.addListener(listener); 295 listenerRegistry.addListener(listener);
275 } 296 }
...@@ -279,6 +300,28 @@ public class DistributedLeadershipManager implements LeadershipService { ...@@ -279,6 +300,28 @@ public class DistributedLeadershipManager implements LeadershipService {
279 listenerRegistry.removeListener(listener); 300 listenerRegistry.removeListener(listener);
280 } 301 }
281 302
303 + @Override
304 + public boolean makeTopCandidate(String path, NodeId nodeId) {
305 + Versioned<List<NodeId>> candidates = candidateMap.get(path);
306 + if (candidates == null || !candidates.value().contains(nodeId)) {
307 + return false;
308 + }
309 + if (nodeId.equals(candidates.value().get(0))) {
310 + return true;
311 + }
312 + List<NodeId> currentRoster = candidates.value();
313 + List<NodeId> newRoster = new ArrayList<>(currentRoster.size());
314 + newRoster.add(nodeId);
315 + currentRoster.stream().filter(id -> !nodeId.equals(id)).forEach(newRoster::add);
316 + boolean updated = candidateMap.replace(path, candidates.version(), newRoster);
317 + if (updated) {
318 + Versioned<List<NodeId>> newCandidates = candidateMap.get(path);
319 + notifyCandidateUpdated(
320 + path, newCandidates.value(), newCandidates.version(), newCandidates.creationTime());
321 + }
322 + return updated;
323 + }
324 +
282 private void tryLeaderLock(String path) { 325 private void tryLeaderLock(String path) {
283 if (!activeTopics.contains(path)) { 326 if (!activeTopics.contains(path)) {
284 return; 327 return;
...@@ -334,7 +377,7 @@ public class DistributedLeadershipManager implements LeadershipService { ...@@ -334,7 +377,7 @@ public class DistributedLeadershipManager implements LeadershipService {
334 } 377 }
335 } 378 }
336 379
337 - private void notifyCandidateAdded( 380 + private void notifyCandidateUpdated(
338 String path, List<NodeId> candidates, long epoch, long electedTime) { 381 String path, List<NodeId> candidates, long epoch, long electedTime) {
339 Leadership newInfo = new Leadership(path, candidates, epoch, electedTime); 382 Leadership newInfo = new Leadership(path, candidates, epoch, electedTime);
340 final MutableBoolean updated = new MutableBoolean(false); 383 final MutableBoolean updated = new MutableBoolean(false);
......
...@@ -93,6 +93,8 @@ public class ConsistentDeviceMastershipStore ...@@ -93,6 +93,8 @@ public class ConsistentDeviceMastershipStore
93 new MessageSubject("mastership-store-device-role-query"); 93 new MessageSubject("mastership-store-device-role-query");
94 private static final MessageSubject ROLE_RELINQUISH_SUBJECT = 94 private static final MessageSubject ROLE_RELINQUISH_SUBJECT =
95 new MessageSubject("mastership-store-device-role-relinquish"); 95 new MessageSubject("mastership-store-device-role-relinquish");
96 + private static final MessageSubject MASTERSHIP_RELINQUISH_SUBJECT =
97 + new MessageSubject("mastership-store-device-mastership-relinquish");
96 98
97 private static final Pattern DEVICE_MASTERSHIP_TOPIC_PATTERN = 99 private static final Pattern DEVICE_MASTERSHIP_TOPIC_PATTERN =
98 Pattern.compile("device:(.*)"); 100 Pattern.compile("device:(.*)");
...@@ -111,6 +113,7 @@ public class ConsistentDeviceMastershipStore ...@@ -111,6 +113,7 @@ public class ConsistentDeviceMastershipStore
111 .register(KryoNamespaces.API) 113 .register(KryoNamespaces.API)
112 .register(MastershipRole.class) 114 .register(MastershipRole.class)
113 .register(MastershipEvent.class) 115 .register(MastershipEvent.class)
116 + .register(MastershipEvent.Type.class)
114 .build(); 117 .build();
115 } 118 }
116 }; 119 };
...@@ -125,6 +128,11 @@ public class ConsistentDeviceMastershipStore ...@@ -125,6 +128,11 @@ public class ConsistentDeviceMastershipStore
125 clusterCommunicator.addSubscriber(ROLE_RELINQUISH_SUBJECT, 128 clusterCommunicator.addSubscriber(ROLE_RELINQUISH_SUBJECT,
126 new RoleRelinquishHandler(), 129 new RoleRelinquishHandler(),
127 messageHandlingExecutor); 130 messageHandlingExecutor);
131 + clusterCommunicator.addSubscriber(MASTERSHIP_RELINQUISH_SUBJECT,
132 + SERIALIZER::decode,
133 + this::relinquishMastership,
134 + SERIALIZER::encode,
135 + messageHandlingExecutor);
128 localNodeId = clusterService.getLocalNode().id(); 136 localNodeId = clusterService.getLocalNode().id();
129 leadershipService.addListener(leadershipEventListener); 137 leadershipService.addListener(leadershipEventListener);
130 138
...@@ -135,6 +143,7 @@ public class ConsistentDeviceMastershipStore ...@@ -135,6 +143,7 @@ public class ConsistentDeviceMastershipStore
135 public void deactivate() { 143 public void deactivate() {
136 clusterCommunicator.removeSubscriber(ROLE_QUERY_SUBJECT); 144 clusterCommunicator.removeSubscriber(ROLE_QUERY_SUBJECT);
137 clusterCommunicator.removeSubscriber(ROLE_RELINQUISH_SUBJECT); 145 clusterCommunicator.removeSubscriber(ROLE_RELINQUISH_SUBJECT);
146 + clusterCommunicator.removeSubscriber(MASTERSHIP_RELINQUISH_SUBJECT);
138 messageHandlingExecutor.shutdown(); 147 messageHandlingExecutor.shutdown();
139 leadershipService.removeListener(leadershipEventListener); 148 leadershipService.removeListener(leadershipEventListener);
140 149
...@@ -237,7 +246,22 @@ public class ConsistentDeviceMastershipStore ...@@ -237,7 +246,22 @@ public class ConsistentDeviceMastershipStore
237 checkArgument(nodeId != null, NODE_ID_NULL); 246 checkArgument(nodeId != null, NODE_ID_NULL);
238 checkArgument(deviceId != null, DEVICE_ID_NULL); 247 checkArgument(deviceId != null, DEVICE_ID_NULL);
239 248
240 - throw new UnsupportedOperationException("This operation is not supported in " + this.getClass().getName()); 249 + NodeId currentMaster = getMaster(deviceId);
250 + if (nodeId.equals(currentMaster)) {
251 + return null;
252 + } else {
253 + String leadershipTopic = createDeviceMastershipTopic(deviceId);
254 + List<NodeId> candidates = leadershipService.getCandidates(leadershipTopic);
255 + if (candidates.isEmpty()) {
256 + return null;
257 + }
258 + if (leadershipService.makeTopCandidate(leadershipTopic, nodeId)) {
259 + return relinquishMastership(deviceId);
260 + } else {
261 + log.warn("Failed to promote {} to mastership for {}", nodeId, deviceId);
262 + }
263 + }
264 + return null;
241 } 265 }
242 266
243 @Override 267 @Override
...@@ -254,7 +278,13 @@ public class ConsistentDeviceMastershipStore ...@@ -254,7 +278,13 @@ public class ConsistentDeviceMastershipStore
254 checkArgument(nodeId != null, NODE_ID_NULL); 278 checkArgument(nodeId != null, NODE_ID_NULL);
255 checkArgument(deviceId != null, DEVICE_ID_NULL); 279 checkArgument(deviceId != null, DEVICE_ID_NULL);
256 280
257 - throw new UnsupportedOperationException("This operation is not supported in " + this.getClass().getName()); 281 + NodeId currentMaster = getMaster(deviceId);
282 + if (!nodeId.equals(currentMaster)) {
283 + return null;
284 + }
285 + // FIXME: This can becomes the master again unless it
286 + // is demoted to the end of candidates list.
287 + return relinquishMastership(deviceId);
258 } 288 }
259 289
260 @Override 290 @Override
...@@ -294,6 +324,37 @@ public class ConsistentDeviceMastershipStore ...@@ -294,6 +324,37 @@ public class ConsistentDeviceMastershipStore
294 return new MastershipEvent(eventType, deviceId, getNodes(deviceId)); 324 return new MastershipEvent(eventType, deviceId, getNodes(deviceId));
295 } 325 }
296 326
327 + private MastershipEvent relinquishMastership(DeviceId deviceId) {
328 + checkArgument(deviceId != null, DEVICE_ID_NULL);
329 +
330 + NodeId currentMaster = getMaster(deviceId);
331 + if (currentMaster == null) {
332 + return null;
333 + }
334 +
335 + if (!currentMaster.equals(localNodeId)) {
336 + log.info("Forwarding request to relinquish "
337 + + "mastership for device {} to {}", deviceId, currentMaster);
338 + return futureGetOrElse(clusterCommunicator.sendAndReceive(
339 + deviceId,
340 + MASTERSHIP_RELINQUISH_SUBJECT,
341 + SERIALIZER::encode,
342 + SERIALIZER::decode,
343 + currentMaster), null);
344 + }
345 +
346 + String leadershipTopic = createDeviceMastershipTopic(deviceId);
347 + Leadership currentLeadership = leadershipService.getLeadership(leadershipTopic);
348 +
349 + MastershipEvent.Type eventType = null;
350 + if (currentLeadership != null && currentLeadership.leader().equals(localNodeId)) {
351 + eventType = MastershipEvent.Type.MASTER_CHANGED;
352 + }
353 +
354 + return leadershipService.stepdown(leadershipTopic)
355 + ? new MastershipEvent(eventType, deviceId, getNodes(deviceId)) : null;
356 + }
357 +
297 private class RoleQueryHandler implements ClusterMessageHandler { 358 private class RoleQueryHandler implements ClusterMessageHandler {
298 @Override 359 @Override
299 public void handle(ClusterMessage message) { 360 public void handle(ClusterMessage message) {
......
...@@ -119,4 +119,14 @@ public class SimpleLeadershipManager implements LeadershipService { ...@@ -119,4 +119,14 @@ public class SimpleLeadershipManager implements LeadershipService {
119 public List<NodeId> getCandidates(String path) { 119 public List<NodeId> getCandidates(String path) {
120 return null; 120 return null;
121 } 121 }
122 +
123 + @Override
124 + public boolean stepdown(String path) {
125 + throw new UnsupportedOperationException();
126 + }
127 +
128 + @Override
129 + public boolean makeTopCandidate(String path, NodeId nodeId) {
130 + throw new UnsupportedOperationException();
131 + }
122 } 132 }
......