Madan Jampani
Committed by Gerrit Code Review

Couple of fixes:

1. Retry leadership lock after a successful stepdown
2. setStandby should adjust the candidates list to ensure another node steps up to become the master.

Change-Id: I8dc5da82c9b8b9e99d4118ec33a63037543927f0
...@@ -53,8 +53,6 @@ import org.onosproject.net.DeviceId; ...@@ -53,8 +53,6 @@ import org.onosproject.net.DeviceId;
53 import org.onosproject.net.MastershipRole; 53 import org.onosproject.net.MastershipRole;
54 import org.onosproject.store.AbstractStore; 54 import org.onosproject.store.AbstractStore;
55 import org.onosproject.store.cluster.messaging.ClusterCommunicationService; 55 import org.onosproject.store.cluster.messaging.ClusterCommunicationService;
56 -import org.onosproject.store.cluster.messaging.ClusterMessage;
57 -import org.onosproject.store.cluster.messaging.ClusterMessageHandler;
58 import org.onosproject.store.cluster.messaging.MessageSubject; 56 import org.onosproject.store.cluster.messaging.MessageSubject;
59 import org.onosproject.store.serializers.KryoNamespaces; 57 import org.onosproject.store.serializers.KryoNamespaces;
60 import org.onosproject.store.serializers.KryoSerializer; 58 import org.onosproject.store.serializers.KryoSerializer;
...@@ -122,12 +120,16 @@ public class ConsistentDeviceMastershipStore ...@@ -122,12 +120,16 @@ public class ConsistentDeviceMastershipStore
122 public void activate() { 120 public void activate() {
123 messageHandlingExecutor = 121 messageHandlingExecutor =
124 Executors.newSingleThreadExecutor(groupedThreads("onos/store/device/mastership", "message-handler")); 122 Executors.newSingleThreadExecutor(groupedThreads("onos/store/device/mastership", "message-handler"));
125 - clusterCommunicator.addSubscriber(ROLE_QUERY_SUBJECT, 123 + clusterCommunicator.<DeviceId, MastershipRole>addSubscriber(ROLE_QUERY_SUBJECT,
126 - new RoleQueryHandler(), 124 + SERIALIZER::decode,
125 + deviceId -> getRole(localNodeId, deviceId),
126 + SERIALIZER::encode,
127 + messageHandlingExecutor);
128 + clusterCommunicator.<DeviceId, MastershipEvent>addSubscriber(ROLE_RELINQUISH_SUBJECT,
129 + SERIALIZER::decode,
130 + deviceId -> relinquishRole(localNodeId, deviceId),
131 + SERIALIZER::encode,
127 messageHandlingExecutor); 132 messageHandlingExecutor);
128 - clusterCommunicator.addSubscriber(ROLE_RELINQUISH_SUBJECT,
129 - new RoleRelinquishHandler(),
130 - messageHandlingExecutor);
131 clusterCommunicator.addSubscriber(TRANSITION_FROM_MASTER_TO_STANDBY_SUBJECT, 133 clusterCommunicator.addSubscriber(TRANSITION_FROM_MASTER_TO_STANDBY_SUBJECT,
132 SERIALIZER::decode, 134 SERIALIZER::decode,
133 this::transitionFromMasterToStandby, 135 this::transitionFromMasterToStandby,
...@@ -211,8 +213,6 @@ public class ConsistentDeviceMastershipStore ...@@ -211,8 +213,6 @@ public class ConsistentDeviceMastershipStore
211 Map<NodeId, MastershipRole> roles = Maps.newHashMap(); 213 Map<NodeId, MastershipRole> roles = Maps.newHashMap();
212 clusterService 214 clusterService
213 .getNodes() 215 .getNodes()
214 - .stream()
215 - .parallel()
216 .forEach((node) -> roles.put(node.id(), getRole(node.id(), deviceId))); 216 .forEach((node) -> roles.put(node.id(), getRole(node.id(), deviceId)));
217 217
218 NodeId master = null; 218 NodeId master = null;
...@@ -282,9 +282,21 @@ public class ConsistentDeviceMastershipStore ...@@ -282,9 +282,21 @@ public class ConsistentDeviceMastershipStore
282 if (!nodeId.equals(currentMaster)) { 282 if (!nodeId.equals(currentMaster)) {
283 return null; 283 return null;
284 } 284 }
285 - // FIXME: This can become the master again unless it 285 +
286 - // is first demoted to the end of candidates list. 286 + String leadershipTopic = createDeviceMastershipTopic(deviceId);
287 - return transitionFromMasterToStandby(deviceId); 287 + List<NodeId> candidates = leadershipService.getCandidates(leadershipTopic);
288 +
289 + NodeId newMaster = candidates.stream()
290 + .filter(candidate -> !Objects.equal(nodeId, candidate))
291 + .findFirst()
292 + .orElse(null);
293 + log.info("Transitioning to role {} for {}. Next master: {}",
294 + newMaster != null ? MastershipRole.STANDBY : MastershipRole.NONE, deviceId, newMaster);
295 +
296 + if (newMaster != null) {
297 + return setMaster(newMaster, deviceId);
298 + }
299 + return relinquishRole(nodeId, deviceId);
288 } 300 }
289 301
290 @Override 302 @Override
...@@ -344,28 +356,11 @@ public class ConsistentDeviceMastershipStore ...@@ -344,28 +356,11 @@ public class ConsistentDeviceMastershipStore
344 ? new MastershipEvent(MastershipEvent.Type.MASTER_CHANGED, deviceId, getNodes(deviceId)) : null; 356 ? new MastershipEvent(MastershipEvent.Type.MASTER_CHANGED, deviceId, getNodes(deviceId)) : null;
345 } 357 }
346 358
347 - private class RoleQueryHandler implements ClusterMessageHandler {
348 - @Override
349 - public void handle(ClusterMessage message) {
350 - DeviceId deviceId = SERIALIZER.decode(message.payload());
351 - message.respond(SERIALIZER.encode(getRole(localNodeId, deviceId)));
352 - }
353 - }
354 -
355 -
356 @Override 359 @Override
357 public void relinquishAllRole(NodeId nodeId) { 360 public void relinquishAllRole(NodeId nodeId) {
358 // Noop. LeadershipService already takes care of detecting and purging deadlocks. 361 // Noop. LeadershipService already takes care of detecting and purging deadlocks.
359 } 362 }
360 363
361 - private class RoleRelinquishHandler implements ClusterMessageHandler {
362 - @Override
363 - public void handle(ClusterMessage message) {
364 - DeviceId deviceId = SERIALIZER.decode(message.payload());
365 - message.respond(SERIALIZER.encode(relinquishRole(localNodeId, deviceId)));
366 - }
367 - }
368 -
369 private class InternalDeviceMastershipEventListener implements LeadershipEventListener { 364 private class InternalDeviceMastershipEventListener implements LeadershipEventListener {
370 @Override 365 @Override
371 public void event(LeadershipEvent event) { 366 public void event(LeadershipEvent event) {
......