Committed by
Gerrit Code Review
Remove all the mastership roles when a Node leaves the cluster
- Fix for ONOS-1189 Change-Id: I695ccd6bf2ff12da3702d1a982e377b7082c9341
Showing
4 changed files
with
58 additions
and
24 deletions
... | @@ -113,4 +113,12 @@ public interface MastershipStore extends Store<MastershipEvent, MastershipStoreD | ... | @@ -113,4 +113,12 @@ public interface MastershipStore extends Store<MastershipEvent, MastershipStoreD |
113 | */ | 113 | */ |
114 | MastershipEvent relinquishRole(NodeId nodeId, DeviceId deviceId); | 114 | MastershipEvent relinquishRole(NodeId nodeId, DeviceId deviceId); |
115 | 115 | ||
116 | + /** | ||
117 | + * Removes all the roles for the specified controller instance. | ||
118 | + * If the role was MASTER, another controller instance will be selected | ||
119 | + * as a candidate master. | ||
120 | + * | ||
121 | + * @param nodeId the controller instance identifier | ||
122 | + */ | ||
123 | + void relinquishAllRole(NodeId nodeId); | ||
116 | } | 124 | } | ... | ... |
... | @@ -304,38 +304,18 @@ public class MastershipManager | ... | @@ -304,38 +304,18 @@ public class MastershipManager |
304 | case INSTANCE_REMOVED: | 304 | case INSTANCE_REMOVED: |
305 | case INSTANCE_DEACTIVATED: | 305 | case INSTANCE_DEACTIVATED: |
306 | ControllerNode node = event.subject(); | 306 | ControllerNode node = event.subject(); |
307 | + log.info("instance {} removed/deactivated", node); | ||
308 | + store.relinquishAllRole(node.id()); | ||
307 | 309 | ||
308 | - if (node.equals(clusterService.getLocalNode())) { | ||
309 | - //If we are in smaller cluster, relinquish and return | ||
310 | - for (DeviceId device : getDevicesOf(node.id())) { | ||
311 | - if (!isInMajority()) { | ||
312 | - //own DeviceManager should catch event and tell switch | ||
313 | - store.relinquishRole(node.id(), device); | ||
314 | - } | ||
315 | - } | ||
316 | - log.info("broke off from cluster, relinquished devices"); | ||
317 | - break; | ||
318 | - } | ||
319 | - | ||
320 | - // if we are the larger one and the removed node(s) are brain dead, | ||
321 | - // force relinquish on behalf of disabled node. | ||
322 | - // check network channel to do this? | ||
323 | - for (DeviceId device : getDevicesOf(node.id())) { | ||
324 | - //some things to check: | ||
325 | - // 1. we didn't break off as well while we're at it | ||
326 | - // 2. others don't pile in and try too - maybe a lock | ||
327 | - if (isInMajority()) { | ||
328 | - store.relinquishRole(node.id(), device); | ||
329 | - } | ||
330 | - } | ||
331 | clusterSize.decrementAndGet(); | 310 | clusterSize.decrementAndGet(); |
332 | - log.info("instance {} removed/deactivated", event.subject()); | ||
333 | break; | 311 | break; |
334 | default: | 312 | default: |
335 | log.warn("unknown cluster event {}", event); | 313 | log.warn("unknown cluster event {}", event); |
336 | } | 314 | } |
337 | } | 315 | } |
338 | 316 | ||
317 | + // Can be removed if we go with naive split-brain handling: only majority | ||
318 | + // assigns mastership | ||
339 | private boolean isInMajority() { | 319 | private boolean isInMajority() { |
340 | if (clusterService.getNodes().size() > (clusterSize.intValue() / 2)) { | 320 | if (clusterService.getNodes().size() > (clusterSize.intValue() / 2)) { |
341 | return true; | 321 | return true; | ... | ... |
... | @@ -19,8 +19,11 @@ import static org.onosproject.mastership.MastershipEvent.Type.MASTER_CHANGED; | ... | @@ -19,8 +19,11 @@ import static org.onosproject.mastership.MastershipEvent.Type.MASTER_CHANGED; |
19 | import static org.onosproject.mastership.MastershipEvent.Type.BACKUPS_CHANGED; | 19 | import static org.onosproject.mastership.MastershipEvent.Type.BACKUPS_CHANGED; |
20 | import static org.apache.commons.lang3.concurrent.ConcurrentUtils.putIfAbsent; | 20 | import static org.apache.commons.lang3.concurrent.ConcurrentUtils.putIfAbsent; |
21 | 21 | ||
22 | +import java.util.ArrayList; | ||
22 | import java.util.HashSet; | 23 | import java.util.HashSet; |
24 | +import java.util.List; | ||
23 | import java.util.Map; | 25 | import java.util.Map; |
26 | +import java.util.Map.Entry; | ||
24 | import java.util.Set; | 27 | import java.util.Set; |
25 | 28 | ||
26 | import org.apache.felix.scr.annotations.Activate; | 29 | import org.apache.felix.scr.annotations.Activate; |
... | @@ -360,6 +363,26 @@ public class DistributedMastershipStore | ... | @@ -360,6 +363,26 @@ public class DistributedMastershipStore |
360 | } | 363 | } |
361 | } | 364 | } |
362 | 365 | ||
366 | + @Override | ||
367 | + public void relinquishAllRole(NodeId nodeId) { | ||
368 | + | ||
369 | + List<MastershipEvent> events = new ArrayList<>(); | ||
370 | + for (Entry<DeviceId, RoleValue> entry : roleMap.entrySet()) { | ||
371 | + final DeviceId deviceId = entry.getKey(); | ||
372 | + final RoleValue roleValue = entry.getValue(); | ||
373 | + | ||
374 | + if (roleValue.contains(MASTER, nodeId) || | ||
375 | + roleValue.contains(STANDBY, nodeId)) { | ||
376 | + | ||
377 | + MastershipEvent event = relinquishRole(nodeId, deviceId); | ||
378 | + if (event != null) { | ||
379 | + events.add(event); | ||
380 | + } | ||
381 | + } | ||
382 | + } | ||
383 | + notifyDelegate(events); | ||
384 | + } | ||
385 | + | ||
363 | // TODO: Consider moving this to RoleValue method | 386 | // TODO: Consider moving this to RoleValue method |
364 | //helper to fetch a new master candidate for a given device. | 387 | //helper to fetch a new master candidate for a given device. |
365 | private NodeId reelect( | 388 | private NodeId reelect( | ... | ... |
... | @@ -352,4 +352,27 @@ public class SimpleMastershipStore | ... | @@ -352,4 +352,27 @@ public class SimpleMastershipStore |
352 | } | 352 | } |
353 | return null; | 353 | return null; |
354 | } | 354 | } |
355 | + | ||
356 | + @Override | ||
357 | + public synchronized void relinquishAllRole(NodeId nodeId) { | ||
358 | + List<MastershipEvent> events = new ArrayList<>(); | ||
359 | + Set<DeviceId> toRelinquish = new HashSet<>(); | ||
360 | + | ||
361 | + masterMap.entrySet().stream() | ||
362 | + .filter(entry -> nodeId.equals(entry.getValue())) | ||
363 | + .forEach(entry -> toRelinquish.add(entry.getKey())); | ||
364 | + | ||
365 | + backups.entrySet().stream() | ||
366 | + .filter(entry -> entry.getValue().contains(nodeId)) | ||
367 | + .forEach(entry -> toRelinquish.add(entry.getKey())); | ||
368 | + | ||
369 | + toRelinquish.forEach(deviceId -> { | ||
370 | + MastershipEvent event = relinquishRole(nodeId, deviceId); | ||
371 | + if (event != null) { | ||
372 | + events.add(event); | ||
373 | + } | ||
374 | + }); | ||
375 | + | ||
376 | + notifyDelegate(events); | ||
377 | + } | ||
355 | } | 378 | } | ... | ... |
-
Please register or login to post a comment