Jon Hall

[ONOS-4460] Relinquish device role when partitioned away from cluster

Change-Id: I578029614cced96a2d4503e4fe3052c927f051ab
...@@ -41,13 +41,23 @@ public class LeadershipEvent extends AbstractEvent<LeadershipEvent.Type, Leaders ...@@ -41,13 +41,23 @@ public class LeadershipEvent extends AbstractEvent<LeadershipEvent.Type, Leaders
41 * Signifies that the leader for a topic has changed. 41 * Signifies that the leader for a topic has changed.
42 */ 42 */
43 // TODO: We may not need this. We currently do not support a way for a current leader to step down 43 // TODO: We may not need this. We currently do not support a way for a current leader to step down
44 - // while still reamining a candidate 44 + // while still remaining a candidate
45 LEADER_CHANGED, 45 LEADER_CHANGED,
46 46
47 /** 47 /**
48 * Signifies a change in the list of candidates for a topic. 48 * Signifies a change in the list of candidates for a topic.
49 */ 49 */
50 - CANDIDATES_CHANGED 50 + CANDIDATES_CHANGED,
51 +
52 + /**
53 + * Signifies the Leadership Elector is unavailable.
54 + */
55 + SERVICE_DISRUPTED,
56 +
57 + /**
58 + * Signifies the Leadership Elector is available again.
59 + */
60 + SERVICE_RESTORED
51 } 61 }
52 62
53 /** 63 /**
......
...@@ -24,7 +24,7 @@ import org.onosproject.store.Store; ...@@ -24,7 +24,7 @@ import org.onosproject.store.Store;
24 public interface LeadershipStore extends Store<LeadershipEvent, LeadershipStoreDelegate> { 24 public interface LeadershipStore extends Store<LeadershipEvent, LeadershipStoreDelegate> {
25 25
26 /** 26 /**
27 - * Adds registration for the local instance to be leader for topic. 27 + * Adds registration for the local instance to be part of the leadership contest for topic.
28 * 28 *
29 * @param topic leadership topic 29 * @param topic leadership topic
30 * @return Updated leadership after operation is completed 30 * @return Updated leadership after operation is completed
......
...@@ -44,7 +44,13 @@ public class MastershipEvent extends AbstractEvent<MastershipEvent.Type, DeviceI ...@@ -44,7 +44,13 @@ public class MastershipEvent extends AbstractEvent<MastershipEvent.Type, DeviceI
44 * the change in the backups list is accompanied by a change in 44 * the change in the backups list is accompanied by a change in
45 * master, the event is subsumed by MASTER_CHANGED. 45 * master, the event is subsumed by MASTER_CHANGED.
46 */ 46 */
47 - BACKUPS_CHANGED 47 + BACKUPS_CHANGED,
48 +
49 + /**
50 + * Signifies that the underlying storage for the Mastership state
51 + * of this device is unavailable.
52 + */
53 + SUSPENDED
48 } 54 }
49 55
50 /** 56 /**
......
...@@ -729,16 +729,17 @@ public class DeviceManager ...@@ -729,16 +729,17 @@ public class DeviceManager
729 } 729 }
730 730
731 private void handleMastershipEvent(MastershipEvent event) { 731 private void handleMastershipEvent(MastershipEvent event) {
732 - if (event.type() != MastershipEvent.Type.MASTER_CHANGED) { 732 + if (event.type() == MastershipEvent.Type.BACKUPS_CHANGED) {
733 // Don't care if backup list changed. 733 // Don't care if backup list changed.
734 return; 734 return;
735 } 735 }
736 -
737 final DeviceId did = event.subject(); 736 final DeviceId did = event.subject();
738 737
739 // myRole suggested by MastershipService 738 // myRole suggested by MastershipService
740 MastershipRole myNextRole; 739 MastershipRole myNextRole;
741 - if (localNodeId.equals(event.roleInfo().master())) { 740 + if (event.type() == MastershipEvent.Type.SUSPENDED) {
741 + myNextRole = NONE; // FIXME STANDBY OR NONE?
742 + } else if (localNodeId.equals(event.roleInfo().master())) {
742 // confirm latest info 743 // confirm latest info
743 MastershipTerm term = termService.getMastershipTerm(did); 744 MastershipTerm term = termService.getMastershipTerm(did);
744 final boolean iHaveControl = term != null && localNodeId.equals(term.master()); 745 final boolean iHaveControl = term != null && localNodeId.equals(term.master());
......
...@@ -15,12 +15,16 @@ ...@@ -15,12 +15,16 @@
15 */ 15 */
16 package org.onosproject.store.cluster.impl; 16 package org.onosproject.store.cluster.impl;
17 17
18 +import static org.onlab.util.Tools.groupedThreads;
18 import static org.slf4j.LoggerFactory.getLogger; 19 import static org.slf4j.LoggerFactory.getLogger;
19 20
20 import java.util.Map; 21 import java.util.Map;
21 import java.util.Objects; 22 import java.util.Objects;
23 +import java.util.concurrent.ExecutorService;
24 +import java.util.concurrent.Executors;
22 import java.util.function.Consumer; 25 import java.util.function.Consumer;
23 26
27 +import com.google.common.collect.Maps;
24 import org.apache.felix.scr.annotations.Activate; 28 import org.apache.felix.scr.annotations.Activate;
25 import org.apache.felix.scr.annotations.Component; 29 import org.apache.felix.scr.annotations.Component;
26 import org.apache.felix.scr.annotations.Deactivate; 30 import org.apache.felix.scr.annotations.Deactivate;
...@@ -35,6 +39,7 @@ import org.onosproject.cluster.LeadershipStoreDelegate; ...@@ -35,6 +39,7 @@ import org.onosproject.cluster.LeadershipStoreDelegate;
35 import org.onosproject.cluster.NodeId; 39 import org.onosproject.cluster.NodeId;
36 import org.onosproject.event.Change; 40 import org.onosproject.event.Change;
37 import org.onosproject.store.AbstractStore; 41 import org.onosproject.store.AbstractStore;
42 +import org.onosproject.store.service.DistributedPrimitive.Status;
38 import org.onosproject.store.service.LeaderElector; 43 import org.onosproject.store.service.LeaderElector;
39 import org.onosproject.store.service.StorageService; 44 import org.onosproject.store.service.StorageService;
40 import org.slf4j.Logger; 45 import org.slf4j.Logger;
...@@ -57,8 +62,10 @@ public class DistributedLeadershipStore ...@@ -57,8 +62,10 @@ public class DistributedLeadershipStore
57 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY) 62 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
58 protected StorageService storageService; 63 protected StorageService storageService;
59 64
65 + private ExecutorService statusChangeHandler;
60 private NodeId localNodeId; 66 private NodeId localNodeId;
61 private LeaderElector leaderElector; 67 private LeaderElector leaderElector;
68 + private final Map<String, Leadership> localLeaderCache = Maps.newConcurrentMap();
62 69
63 private final Consumer<Change<Leadership>> leadershipChangeListener = 70 private final Consumer<Change<Leadership>> leadershipChangeListener =
64 change -> { 71 change -> {
...@@ -77,22 +84,54 @@ public class DistributedLeadershipStore ...@@ -77,22 +84,54 @@ public class DistributedLeadershipStore
77 eventType = LeadershipEvent.Type.CANDIDATES_CHANGED; 84 eventType = LeadershipEvent.Type.CANDIDATES_CHANGED;
78 } 85 }
79 notifyDelegate(new LeadershipEvent(eventType, change.newValue())); 86 notifyDelegate(new LeadershipEvent(eventType, change.newValue()));
87 + // Update local cache of currently held leaderships
88 + if (Objects.equals(newValue.leaderNodeId(), localNodeId)) {
89 + localLeaderCache.put(newValue.topic(), newValue);
90 + } else {
91 + localLeaderCache.remove(newValue.topic());
92 + }
80 }; 93 };
81 94
95 + private final Consumer<Status> clientStatusListener = status ->
96 + statusChangeHandler.execute(() -> handleStatusChange(status));
97 +
98 + private void handleStatusChange(Status status) {
99 + // Notify mastership Service of disconnect and reconnect
100 + if (status == Status.ACTIVE) {
101 + // Service Restored
102 + localLeaderCache.forEach((topic, leadership) -> leaderElector.run(topic, localNodeId));
103 + leaderElector.getLeaderships().forEach((topic, leadership) ->
104 + notifyDelegate(new LeadershipEvent(LeadershipEvent.Type.SERVICE_RESTORED, leadership)));
105 + } else if (status == Status.SUSPENDED) {
106 + // Service Suspended
107 + localLeaderCache.forEach((topic, leadership) ->
108 + notifyDelegate(new LeadershipEvent(LeadershipEvent.Type.SERVICE_DISRUPTED, leadership)));
109 + } else {
110 + // Should be only inactive state
111 + return;
112 + }
113 + }
114 +
115 +
82 @Activate 116 @Activate
83 public void activate() { 117 public void activate() {
118 + statusChangeHandler = Executors.newSingleThreadExecutor(
119 + groupedThreads("onos/store/dist/cluster/leadership", "status-change-handler", log));
84 localNodeId = clusterService.getLocalNode().id(); 120 localNodeId = clusterService.getLocalNode().id();
85 leaderElector = storageService.leaderElectorBuilder() 121 leaderElector = storageService.leaderElectorBuilder()
86 .withName("onos-leadership-elections") 122 .withName("onos-leadership-elections")
87 .build() 123 .build()
88 .asLeaderElector(); 124 .asLeaderElector();
89 leaderElector.addChangeListener(leadershipChangeListener); 125 leaderElector.addChangeListener(leadershipChangeListener);
126 + leaderElector.addStatusChangeListener(clientStatusListener);
90 log.info("Started"); 127 log.info("Started");
91 } 128 }
92 129
93 @Deactivate 130 @Deactivate
94 public void deactivate() { 131 public void deactivate() {
95 leaderElector.removeChangeListener(leadershipChangeListener); 132 leaderElector.removeChangeListener(leadershipChangeListener);
133 + leaderElector.removeStatusChangeListener(clientStatusListener);
134 + statusChangeHandler.shutdown();
96 log.info("Stopped"); 135 log.info("Stopped");
97 } 136 }
98 137
......
...@@ -18,6 +18,7 @@ package org.onosproject.store.mastership.impl; ...@@ -18,6 +18,7 @@ package org.onosproject.store.mastership.impl;
18 import static org.onlab.util.Tools.groupedThreads; 18 import static org.onlab.util.Tools.groupedThreads;
19 import static org.onosproject.mastership.MastershipEvent.Type.BACKUPS_CHANGED; 19 import static org.onosproject.mastership.MastershipEvent.Type.BACKUPS_CHANGED;
20 import static org.onosproject.mastership.MastershipEvent.Type.MASTER_CHANGED; 20 import static org.onosproject.mastership.MastershipEvent.Type.MASTER_CHANGED;
21 +import static org.onosproject.mastership.MastershipEvent.Type.SUSPENDED;
21 import static org.slf4j.LoggerFactory.getLogger; 22 import static org.slf4j.LoggerFactory.getLogger;
22 import static com.google.common.base.Preconditions.checkArgument; 23 import static com.google.common.base.Preconditions.checkArgument;
23 24
...@@ -319,7 +320,8 @@ public class ConsistentDeviceMastershipStore ...@@ -319,7 +320,8 @@ public class ConsistentDeviceMastershipStore
319 private void handleEvent(LeadershipEvent event) { 320 private void handleEvent(LeadershipEvent event) {
320 Leadership leadership = event.subject(); 321 Leadership leadership = event.subject();
321 DeviceId deviceId = extractDeviceIdFromTopic(leadership.topic()); 322 DeviceId deviceId = extractDeviceIdFromTopic(leadership.topic());
322 - RoleInfo roleInfo = getNodes(deviceId); 323 + RoleInfo roleInfo = event.type() != LeadershipEvent.Type.SERVICE_DISRUPTED ?
324 + getNodes(deviceId) : new RoleInfo();
323 switch (event.type()) { 325 switch (event.type()) {
324 case LEADER_AND_CANDIDATES_CHANGED: 326 case LEADER_AND_CANDIDATES_CHANGED:
325 notifyDelegate(new MastershipEvent(BACKUPS_CHANGED, deviceId, roleInfo)); 327 notifyDelegate(new MastershipEvent(BACKUPS_CHANGED, deviceId, roleInfo));
...@@ -331,6 +333,12 @@ public class ConsistentDeviceMastershipStore ...@@ -331,6 +333,12 @@ public class ConsistentDeviceMastershipStore
331 case CANDIDATES_CHANGED: 333 case CANDIDATES_CHANGED:
332 notifyDelegate(new MastershipEvent(BACKUPS_CHANGED, deviceId, roleInfo)); 334 notifyDelegate(new MastershipEvent(BACKUPS_CHANGED, deviceId, roleInfo));
333 break; 335 break;
336 + case SERVICE_DISRUPTED:
337 + notifyDelegate(new MastershipEvent(SUSPENDED, deviceId, roleInfo));
338 + break;
339 + case SERVICE_RESTORED:
340 + // Do nothing, wait for updates from peers
341 + break;
334 default: 342 default:
335 return; 343 return;
336 } 344 }
......
...@@ -26,6 +26,7 @@ import java.util.Properties; ...@@ -26,6 +26,7 @@ import java.util.Properties;
26 import java.util.Set; 26 import java.util.Set;
27 import java.util.concurrent.CompletableFuture; 27 import java.util.concurrent.CompletableFuture;
28 import java.util.function.Consumer; 28 import java.util.function.Consumer;
29 +import java.util.function.Function;
29 30
30 import org.onosproject.cluster.Leadership; 31 import org.onosproject.cluster.Leadership;
31 import org.onosproject.cluster.NodeId; 32 import org.onosproject.cluster.NodeId;
...@@ -63,6 +64,19 @@ public class AtomixLeaderElector extends AbstractResource<AtomixLeaderElector> ...@@ -63,6 +64,19 @@ public class AtomixLeaderElector extends AbstractResource<AtomixLeaderElector>
63 public static final String CHANGE_SUBJECT = "leadershipChangeEvents"; 64 public static final String CHANGE_SUBJECT = "leadershipChangeEvents";
64 private final LoadingCache<String, CompletableFuture<Leadership>> cache; 65 private final LoadingCache<String, CompletableFuture<Leadership>> cache;
65 66
67 + Function<CopycatClient.State, Status> mapper = state -> {
68 + switch (state) {
69 + case CONNECTED:
70 + return Status.ACTIVE;
71 + case SUSPENDED:
72 + return Status.SUSPENDED;
73 + case CLOSED:
74 + return Status.INACTIVE;
75 + default:
76 + throw new IllegalStateException("Unknown state " + state);
77 + }
78 + };
79 +
66 public AtomixLeaderElector(CopycatClient client, Properties properties) { 80 public AtomixLeaderElector(CopycatClient client, Properties properties) {
67 super(client, properties); 81 super(client, properties);
68 cache = CacheBuilder.newBuilder() 82 cache = CacheBuilder.newBuilder()
...@@ -79,6 +93,7 @@ public class AtomixLeaderElector extends AbstractResource<AtomixLeaderElector> ...@@ -79,6 +93,7 @@ public class AtomixLeaderElector extends AbstractResource<AtomixLeaderElector>
79 } 93 }
80 }; 94 };
81 addStatusChangeListener(statusListener); 95 addStatusChangeListener(statusListener);
96 + client.onStateChange(this::handleStateChange);
82 } 97 }
83 98
84 @Override 99 @Override
...@@ -193,4 +208,8 @@ public class AtomixLeaderElector extends AbstractResource<AtomixLeaderElector> ...@@ -193,4 +208,8 @@ public class AtomixLeaderElector extends AbstractResource<AtomixLeaderElector>
193 private boolean isListening() { 208 private boolean isListening() {
194 return !leadershipChangeListeners.isEmpty(); 209 return !leadershipChangeListeners.isEmpty();
195 } 210 }
211 +
212 + private void handleStateChange(CopycatClient.State state) {
213 + statusChangeListeners().forEach(listener -> listener.accept(mapper.apply(state)));
214 + }
196 } 215 }
......
...@@ -542,22 +542,20 @@ public class LldpLinkProvider extends AbstractProvider implements ProbedLinkProv ...@@ -542,22 +542,20 @@ public class LldpLinkProvider extends AbstractProvider implements ProbedLinkProv
542 private class InternalRoleListener implements MastershipListener { 542 private class InternalRoleListener implements MastershipListener {
543 @Override 543 @Override
544 public void event(MastershipEvent event) { 544 public void event(MastershipEvent event) {
545 - if (MastershipEvent.Type.BACKUPS_CHANGED.equals(event.type())) { 545 + if (MastershipEvent.Type.MASTER_CHANGED.equals(event.type())) {
546 // only need new master events 546 // only need new master events
547 - return; 547 + eventExecutor.execute(() -> {
548 + DeviceId deviceId = event.subject();
549 + Device device = deviceService.getDevice(deviceId);
550 + if (device == null) {
551 + log.debug("Device {} doesn't exist, or isn't there yet", deviceId);
552 + return;
553 + }
554 + if (clusterService.getLocalNode().id().equals(event.roleInfo().master())) {
555 + updateDevice(device).ifPresent(ld -> updatePorts(ld, device.id()));
556 + }
557 + });
548 } 558 }
549 -
550 - eventExecutor.execute(() -> {
551 - DeviceId deviceId = event.subject();
552 - Device device = deviceService.getDevice(deviceId);
553 - if (device == null) {
554 - log.debug("Device {} doesn't exist, or isn't there yet", deviceId);
555 - return;
556 - }
557 - if (clusterService.getLocalNode().id().equals(event.roleInfo().master())) {
558 - updateDevice(device).ifPresent(ld -> updatePorts(ld, device.id()));
559 - }
560 - });
561 } 559 }
562 } 560 }
563 561
......