[ONOS-4460] Relinquish device role when partitioned away from cluster
Change-Id: I578029614cced96a2d4503e4fe3052c927f051ab
Showing
8 changed files
with
103 additions
and
22 deletions
... | @@ -41,13 +41,23 @@ public class LeadershipEvent extends AbstractEvent<LeadershipEvent.Type, Leaders | ... | @@ -41,13 +41,23 @@ public class LeadershipEvent extends AbstractEvent<LeadershipEvent.Type, Leaders |
41 | * Signifies that the leader for a topic has changed. | 41 | * Signifies that the leader for a topic has changed. |
42 | */ | 42 | */ |
43 | // TODO: We may not need this. We currently do not support a way for a current leader to step down | 43 | // TODO: We may not need this. We currently do not support a way for a current leader to step down |
44 | - // while still reamining a candidate | 44 | + // while still remaining a candidate |
45 | LEADER_CHANGED, | 45 | LEADER_CHANGED, |
46 | 46 | ||
47 | /** | 47 | /** |
48 | * Signifies a change in the list of candidates for a topic. | 48 | * Signifies a change in the list of candidates for a topic. |
49 | */ | 49 | */ |
50 | - CANDIDATES_CHANGED | 50 | + CANDIDATES_CHANGED, |
51 | + | ||
52 | + /** | ||
53 | + * Signifies the Leadership Elector is unavailable. | ||
54 | + */ | ||
55 | + SERVICE_DISRUPTED, | ||
56 | + | ||
57 | + /** | ||
58 | + * Signifies the Leadership Elector is available again. | ||
59 | + */ | ||
60 | + SERVICE_RESTORED | ||
51 | } | 61 | } |
52 | 62 | ||
53 | /** | 63 | /** | ... | ... |
... | @@ -24,7 +24,7 @@ import org.onosproject.store.Store; | ... | @@ -24,7 +24,7 @@ import org.onosproject.store.Store; |
24 | public interface LeadershipStore extends Store<LeadershipEvent, LeadershipStoreDelegate> { | 24 | public interface LeadershipStore extends Store<LeadershipEvent, LeadershipStoreDelegate> { |
25 | 25 | ||
26 | /** | 26 | /** |
27 | - * Adds registration for the local instance to be leader for topic. | 27 | + * Adds registration for the local instance to be part of the leadership contest for topic. |
28 | * | 28 | * |
29 | * @param topic leadership topic | 29 | * @param topic leadership topic |
30 | * @return Updated leadership after operation is completed | 30 | * @return Updated leadership after operation is completed | ... | ... |
... | @@ -44,7 +44,13 @@ public class MastershipEvent extends AbstractEvent<MastershipEvent.Type, DeviceI | ... | @@ -44,7 +44,13 @@ public class MastershipEvent extends AbstractEvent<MastershipEvent.Type, DeviceI |
44 | * the change in the backups list is accompanied by a change in | 44 | * the change in the backups list is accompanied by a change in |
45 | * master, the event is subsumed by MASTER_CHANGED. | 45 | * master, the event is subsumed by MASTER_CHANGED. |
46 | */ | 46 | */ |
47 | - BACKUPS_CHANGED | 47 | + BACKUPS_CHANGED, |
48 | + | ||
49 | + /** | ||
50 | + * Signifies that the underlying storage for the Mastership state | ||
51 | + * of this device is unavailable. | ||
52 | + */ | ||
53 | + SUSPENDED | ||
48 | } | 54 | } |
49 | 55 | ||
50 | /** | 56 | /** | ... | ... |
... | @@ -729,16 +729,17 @@ public class DeviceManager | ... | @@ -729,16 +729,17 @@ public class DeviceManager |
729 | } | 729 | } |
730 | 730 | ||
731 | private void handleMastershipEvent(MastershipEvent event) { | 731 | private void handleMastershipEvent(MastershipEvent event) { |
732 | - if (event.type() != MastershipEvent.Type.MASTER_CHANGED) { | 732 | + if (event.type() == MastershipEvent.Type.BACKUPS_CHANGED) { |
733 | // Don't care if backup list changed. | 733 | // Don't care if backup list changed. |
734 | return; | 734 | return; |
735 | } | 735 | } |
736 | - | ||
737 | final DeviceId did = event.subject(); | 736 | final DeviceId did = event.subject(); |
738 | 737 | ||
739 | // myRole suggested by MastershipService | 738 | // myRole suggested by MastershipService |
740 | MastershipRole myNextRole; | 739 | MastershipRole myNextRole; |
741 | - if (localNodeId.equals(event.roleInfo().master())) { | 740 | + if (event.type() == MastershipEvent.Type.SUSPENDED) { |
741 | + myNextRole = NONE; // FIXME STANDBY OR NONE? | ||
742 | + } else if (localNodeId.equals(event.roleInfo().master())) { | ||
742 | // confirm latest info | 743 | // confirm latest info |
743 | MastershipTerm term = termService.getMastershipTerm(did); | 744 | MastershipTerm term = termService.getMastershipTerm(did); |
744 | final boolean iHaveControl = term != null && localNodeId.equals(term.master()); | 745 | final boolean iHaveControl = term != null && localNodeId.equals(term.master()); | ... | ... |
... | @@ -15,12 +15,16 @@ | ... | @@ -15,12 +15,16 @@ |
15 | */ | 15 | */ |
16 | package org.onosproject.store.cluster.impl; | 16 | package org.onosproject.store.cluster.impl; |
17 | 17 | ||
18 | +import static org.onlab.util.Tools.groupedThreads; | ||
18 | import static org.slf4j.LoggerFactory.getLogger; | 19 | import static org.slf4j.LoggerFactory.getLogger; |
19 | 20 | ||
20 | import java.util.Map; | 21 | import java.util.Map; |
21 | import java.util.Objects; | 22 | import java.util.Objects; |
23 | +import java.util.concurrent.ExecutorService; | ||
24 | +import java.util.concurrent.Executors; | ||
22 | import java.util.function.Consumer; | 25 | import java.util.function.Consumer; |
23 | 26 | ||
27 | +import com.google.common.collect.Maps; | ||
24 | import org.apache.felix.scr.annotations.Activate; | 28 | import org.apache.felix.scr.annotations.Activate; |
25 | import org.apache.felix.scr.annotations.Component; | 29 | import org.apache.felix.scr.annotations.Component; |
26 | import org.apache.felix.scr.annotations.Deactivate; | 30 | import org.apache.felix.scr.annotations.Deactivate; |
... | @@ -35,6 +39,7 @@ import org.onosproject.cluster.LeadershipStoreDelegate; | ... | @@ -35,6 +39,7 @@ import org.onosproject.cluster.LeadershipStoreDelegate; |
35 | import org.onosproject.cluster.NodeId; | 39 | import org.onosproject.cluster.NodeId; |
36 | import org.onosproject.event.Change; | 40 | import org.onosproject.event.Change; |
37 | import org.onosproject.store.AbstractStore; | 41 | import org.onosproject.store.AbstractStore; |
42 | +import org.onosproject.store.service.DistributedPrimitive.Status; | ||
38 | import org.onosproject.store.service.LeaderElector; | 43 | import org.onosproject.store.service.LeaderElector; |
39 | import org.onosproject.store.service.StorageService; | 44 | import org.onosproject.store.service.StorageService; |
40 | import org.slf4j.Logger; | 45 | import org.slf4j.Logger; |
... | @@ -57,8 +62,10 @@ public class DistributedLeadershipStore | ... | @@ -57,8 +62,10 @@ public class DistributedLeadershipStore |
57 | @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY) | 62 | @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY) |
58 | protected StorageService storageService; | 63 | protected StorageService storageService; |
59 | 64 | ||
65 | + private ExecutorService statusChangeHandler; | ||
60 | private NodeId localNodeId; | 66 | private NodeId localNodeId; |
61 | private LeaderElector leaderElector; | 67 | private LeaderElector leaderElector; |
68 | + private final Map<String, Leadership> localLeaderCache = Maps.newConcurrentMap(); | ||
62 | 69 | ||
63 | private final Consumer<Change<Leadership>> leadershipChangeListener = | 70 | private final Consumer<Change<Leadership>> leadershipChangeListener = |
64 | change -> { | 71 | change -> { |
... | @@ -77,22 +84,54 @@ public class DistributedLeadershipStore | ... | @@ -77,22 +84,54 @@ public class DistributedLeadershipStore |
77 | eventType = LeadershipEvent.Type.CANDIDATES_CHANGED; | 84 | eventType = LeadershipEvent.Type.CANDIDATES_CHANGED; |
78 | } | 85 | } |
79 | notifyDelegate(new LeadershipEvent(eventType, change.newValue())); | 86 | notifyDelegate(new LeadershipEvent(eventType, change.newValue())); |
87 | + // Update local cache of currently held leaderships | ||
88 | + if (Objects.equals(newValue.leaderNodeId(), localNodeId)) { | ||
89 | + localLeaderCache.put(newValue.topic(), newValue); | ||
90 | + } else { | ||
91 | + localLeaderCache.remove(newValue.topic()); | ||
92 | + } | ||
80 | }; | 93 | }; |
81 | 94 | ||
95 | + private final Consumer<Status> clientStatusListener = status -> | ||
96 | + statusChangeHandler.execute(() -> handleStatusChange(status)); | ||
97 | + | ||
98 | + private void handleStatusChange(Status status) { | ||
99 | + // Notify mastership Service of disconnect and reconnect | ||
100 | + if (status == Status.ACTIVE) { | ||
101 | + // Service Restored | ||
102 | + localLeaderCache.forEach((topic, leadership) -> leaderElector.run(topic, localNodeId)); | ||
103 | + leaderElector.getLeaderships().forEach((topic, leadership) -> | ||
104 | + notifyDelegate(new LeadershipEvent(LeadershipEvent.Type.SERVICE_RESTORED, leadership))); | ||
105 | + } else if (status == Status.SUSPENDED) { | ||
106 | + // Service Suspended | ||
107 | + localLeaderCache.forEach((topic, leadership) -> | ||
108 | + notifyDelegate(new LeadershipEvent(LeadershipEvent.Type.SERVICE_DISRUPTED, leadership))); | ||
109 | + } else { | ||
110 | + // Should be only inactive state | ||
111 | + return; | ||
112 | + } | ||
113 | + } | ||
114 | + | ||
115 | + | ||
82 | @Activate | 116 | @Activate |
83 | public void activate() { | 117 | public void activate() { |
118 | + statusChangeHandler = Executors.newSingleThreadExecutor( | ||
119 | + groupedThreads("onos/store/dist/cluster/leadership", "status-change-handler", log)); | ||
84 | localNodeId = clusterService.getLocalNode().id(); | 120 | localNodeId = clusterService.getLocalNode().id(); |
85 | leaderElector = storageService.leaderElectorBuilder() | 121 | leaderElector = storageService.leaderElectorBuilder() |
86 | .withName("onos-leadership-elections") | 122 | .withName("onos-leadership-elections") |
87 | .build() | 123 | .build() |
88 | .asLeaderElector(); | 124 | .asLeaderElector(); |
89 | leaderElector.addChangeListener(leadershipChangeListener); | 125 | leaderElector.addChangeListener(leadershipChangeListener); |
126 | + leaderElector.addStatusChangeListener(clientStatusListener); | ||
90 | log.info("Started"); | 127 | log.info("Started"); |
91 | } | 128 | } |
92 | 129 | ||
93 | @Deactivate | 130 | @Deactivate |
94 | public void deactivate() { | 131 | public void deactivate() { |
95 | leaderElector.removeChangeListener(leadershipChangeListener); | 132 | leaderElector.removeChangeListener(leadershipChangeListener); |
133 | + leaderElector.removeStatusChangeListener(clientStatusListener); | ||
134 | + statusChangeHandler.shutdown(); | ||
96 | log.info("Stopped"); | 135 | log.info("Stopped"); |
97 | } | 136 | } |
98 | 137 | ... | ... |
... | @@ -18,6 +18,7 @@ package org.onosproject.store.mastership.impl; | ... | @@ -18,6 +18,7 @@ package org.onosproject.store.mastership.impl; |
18 | import static org.onlab.util.Tools.groupedThreads; | 18 | import static org.onlab.util.Tools.groupedThreads; |
19 | import static org.onosproject.mastership.MastershipEvent.Type.BACKUPS_CHANGED; | 19 | import static org.onosproject.mastership.MastershipEvent.Type.BACKUPS_CHANGED; |
20 | import static org.onosproject.mastership.MastershipEvent.Type.MASTER_CHANGED; | 20 | import static org.onosproject.mastership.MastershipEvent.Type.MASTER_CHANGED; |
21 | +import static org.onosproject.mastership.MastershipEvent.Type.SUSPENDED; | ||
21 | import static org.slf4j.LoggerFactory.getLogger; | 22 | import static org.slf4j.LoggerFactory.getLogger; |
22 | import static com.google.common.base.Preconditions.checkArgument; | 23 | import static com.google.common.base.Preconditions.checkArgument; |
23 | 24 | ||
... | @@ -319,7 +320,8 @@ public class ConsistentDeviceMastershipStore | ... | @@ -319,7 +320,8 @@ public class ConsistentDeviceMastershipStore |
319 | private void handleEvent(LeadershipEvent event) { | 320 | private void handleEvent(LeadershipEvent event) { |
320 | Leadership leadership = event.subject(); | 321 | Leadership leadership = event.subject(); |
321 | DeviceId deviceId = extractDeviceIdFromTopic(leadership.topic()); | 322 | DeviceId deviceId = extractDeviceIdFromTopic(leadership.topic()); |
322 | - RoleInfo roleInfo = getNodes(deviceId); | 323 | + RoleInfo roleInfo = event.type() != LeadershipEvent.Type.SERVICE_DISRUPTED ? |
324 | + getNodes(deviceId) : new RoleInfo(); | ||
323 | switch (event.type()) { | 325 | switch (event.type()) { |
324 | case LEADER_AND_CANDIDATES_CHANGED: | 326 | case LEADER_AND_CANDIDATES_CHANGED: |
325 | notifyDelegate(new MastershipEvent(BACKUPS_CHANGED, deviceId, roleInfo)); | 327 | notifyDelegate(new MastershipEvent(BACKUPS_CHANGED, deviceId, roleInfo)); |
... | @@ -331,6 +333,12 @@ public class ConsistentDeviceMastershipStore | ... | @@ -331,6 +333,12 @@ public class ConsistentDeviceMastershipStore |
331 | case CANDIDATES_CHANGED: | 333 | case CANDIDATES_CHANGED: |
332 | notifyDelegate(new MastershipEvent(BACKUPS_CHANGED, deviceId, roleInfo)); | 334 | notifyDelegate(new MastershipEvent(BACKUPS_CHANGED, deviceId, roleInfo)); |
333 | break; | 335 | break; |
336 | + case SERVICE_DISRUPTED: | ||
337 | + notifyDelegate(new MastershipEvent(SUSPENDED, deviceId, roleInfo)); | ||
338 | + break; | ||
339 | + case SERVICE_RESTORED: | ||
340 | + // Do nothing, wait for updates from peers | ||
341 | + break; | ||
334 | default: | 342 | default: |
335 | return; | 343 | return; |
336 | } | 344 | } | ... | ... |
... | @@ -26,6 +26,7 @@ import java.util.Properties; | ... | @@ -26,6 +26,7 @@ import java.util.Properties; |
26 | import java.util.Set; | 26 | import java.util.Set; |
27 | import java.util.concurrent.CompletableFuture; | 27 | import java.util.concurrent.CompletableFuture; |
28 | import java.util.function.Consumer; | 28 | import java.util.function.Consumer; |
29 | +import java.util.function.Function; | ||
29 | 30 | ||
30 | import org.onosproject.cluster.Leadership; | 31 | import org.onosproject.cluster.Leadership; |
31 | import org.onosproject.cluster.NodeId; | 32 | import org.onosproject.cluster.NodeId; |
... | @@ -63,6 +64,19 @@ public class AtomixLeaderElector extends AbstractResource<AtomixLeaderElector> | ... | @@ -63,6 +64,19 @@ public class AtomixLeaderElector extends AbstractResource<AtomixLeaderElector> |
63 | public static final String CHANGE_SUBJECT = "leadershipChangeEvents"; | 64 | public static final String CHANGE_SUBJECT = "leadershipChangeEvents"; |
64 | private final LoadingCache<String, CompletableFuture<Leadership>> cache; | 65 | private final LoadingCache<String, CompletableFuture<Leadership>> cache; |
65 | 66 | ||
67 | + Function<CopycatClient.State, Status> mapper = state -> { | ||
68 | + switch (state) { | ||
69 | + case CONNECTED: | ||
70 | + return Status.ACTIVE; | ||
71 | + case SUSPENDED: | ||
72 | + return Status.SUSPENDED; | ||
73 | + case CLOSED: | ||
74 | + return Status.INACTIVE; | ||
75 | + default: | ||
76 | + throw new IllegalStateException("Unknown state " + state); | ||
77 | + } | ||
78 | + }; | ||
79 | + | ||
66 | public AtomixLeaderElector(CopycatClient client, Properties properties) { | 80 | public AtomixLeaderElector(CopycatClient client, Properties properties) { |
67 | super(client, properties); | 81 | super(client, properties); |
68 | cache = CacheBuilder.newBuilder() | 82 | cache = CacheBuilder.newBuilder() |
... | @@ -79,6 +93,7 @@ public class AtomixLeaderElector extends AbstractResource<AtomixLeaderElector> | ... | @@ -79,6 +93,7 @@ public class AtomixLeaderElector extends AbstractResource<AtomixLeaderElector> |
79 | } | 93 | } |
80 | }; | 94 | }; |
81 | addStatusChangeListener(statusListener); | 95 | addStatusChangeListener(statusListener); |
96 | + client.onStateChange(this::handleStateChange); | ||
82 | } | 97 | } |
83 | 98 | ||
84 | @Override | 99 | @Override |
... | @@ -193,4 +208,8 @@ public class AtomixLeaderElector extends AbstractResource<AtomixLeaderElector> | ... | @@ -193,4 +208,8 @@ public class AtomixLeaderElector extends AbstractResource<AtomixLeaderElector> |
193 | private boolean isListening() { | 208 | private boolean isListening() { |
194 | return !leadershipChangeListeners.isEmpty(); | 209 | return !leadershipChangeListeners.isEmpty(); |
195 | } | 210 | } |
211 | + | ||
212 | + private void handleStateChange(CopycatClient.State state) { | ||
213 | + statusChangeListeners().forEach(listener -> listener.accept(mapper.apply(state))); | ||
214 | + } | ||
196 | } | 215 | } | ... | ... |
... | @@ -542,22 +542,20 @@ public class LldpLinkProvider extends AbstractProvider implements ProbedLinkProv | ... | @@ -542,22 +542,20 @@ public class LldpLinkProvider extends AbstractProvider implements ProbedLinkProv |
542 | private class InternalRoleListener implements MastershipListener { | 542 | private class InternalRoleListener implements MastershipListener { |
543 | @Override | 543 | @Override |
544 | public void event(MastershipEvent event) { | 544 | public void event(MastershipEvent event) { |
545 | - if (MastershipEvent.Type.BACKUPS_CHANGED.equals(event.type())) { | 545 | + if (MastershipEvent.Type.MASTER_CHANGED.equals(event.type())) { |
546 | // only need new master events | 546 | // only need new master events |
547 | - return; | 547 | + eventExecutor.execute(() -> { |
548 | + DeviceId deviceId = event.subject(); | ||
549 | + Device device = deviceService.getDevice(deviceId); | ||
550 | + if (device == null) { | ||
551 | + log.debug("Device {} doesn't exist, or isn't there yet", deviceId); | ||
552 | + return; | ||
553 | + } | ||
554 | + if (clusterService.getLocalNode().id().equals(event.roleInfo().master())) { | ||
555 | + updateDevice(device).ifPresent(ld -> updatePorts(ld, device.id())); | ||
556 | + } | ||
557 | + }); | ||
548 | } | 558 | } |
549 | - | ||
550 | - eventExecutor.execute(() -> { | ||
551 | - DeviceId deviceId = event.subject(); | ||
552 | - Device device = deviceService.getDevice(deviceId); | ||
553 | - if (device == null) { | ||
554 | - log.debug("Device {} doesn't exist, or isn't there yet", deviceId); | ||
555 | - return; | ||
556 | - } | ||
557 | - if (clusterService.getLocalNode().id().equals(event.roleInfo().master())) { | ||
558 | - updateDevice(device).ifPresent(ld -> updatePorts(ld, device.id())); | ||
559 | - } | ||
560 | - }); | ||
561 | } | 559 | } |
562 | } | 560 | } |
563 | 561 | ... | ... |
-
Please register or login to post a comment