HIGUCHI Yuta
Committed by Gerrit Code Review

Remove all the mastership roles when a Node leaves the cluster

- Fix for ONOS-1189

Change-Id: I695ccd6bf2ff12da3702d1a982e377b7082c9341
...@@ -113,4 +113,12 @@ public interface MastershipStore extends Store<MastershipEvent, MastershipStoreD ...@@ -113,4 +113,12 @@ public interface MastershipStore extends Store<MastershipEvent, MastershipStoreD
113 */ 113 */
114 MastershipEvent relinquishRole(NodeId nodeId, DeviceId deviceId); 114 MastershipEvent relinquishRole(NodeId nodeId, DeviceId deviceId);
115 115
116 + /**
117 + * Removes all the roles for the specified controller instance.
118 + * If the role was MASTER, another controller instance will be selected
119 + * as a candidate master.
120 + *
121 + * @param nodeId the controller instance identifier
122 + */
123 + void relinquishAllRole(NodeId nodeId);
116 } 124 }
......
...@@ -304,38 +304,18 @@ public class MastershipManager ...@@ -304,38 +304,18 @@ public class MastershipManager
304 case INSTANCE_REMOVED: 304 case INSTANCE_REMOVED:
305 case INSTANCE_DEACTIVATED: 305 case INSTANCE_DEACTIVATED:
306 ControllerNode node = event.subject(); 306 ControllerNode node = event.subject();
307 + log.info("instance {} removed/deactivated", node);
308 + store.relinquishAllRole(node.id());
307 309
308 - if (node.equals(clusterService.getLocalNode())) {
309 - //If we are in smaller cluster, relinquish and return
310 - for (DeviceId device : getDevicesOf(node.id())) {
311 - if (!isInMajority()) {
312 - //own DeviceManager should catch event and tell switch
313 - store.relinquishRole(node.id(), device);
314 - }
315 - }
316 - log.info("broke off from cluster, relinquished devices");
317 - break;
318 - }
319 -
320 - // if we are the larger one and the removed node(s) are brain dead,
321 - // force relinquish on behalf of disabled node.
322 - // check network channel to do this?
323 - for (DeviceId device : getDevicesOf(node.id())) {
324 - //some things to check:
325 - // 1. we didn't break off as well while we're at it
326 - // 2. others don't pile in and try too - maybe a lock
327 - if (isInMajority()) {
328 - store.relinquishRole(node.id(), device);
329 - }
330 - }
331 clusterSize.decrementAndGet(); 310 clusterSize.decrementAndGet();
332 - log.info("instance {} removed/deactivated", event.subject());
333 break; 311 break;
334 default: 312 default:
335 log.warn("unknown cluster event {}", event); 313 log.warn("unknown cluster event {}", event);
336 } 314 }
337 } 315 }
338 316
317 + // Can be removed if we go with naive split-brain handling: only majority
318 + // assigns mastership
339 private boolean isInMajority() { 319 private boolean isInMajority() {
340 if (clusterService.getNodes().size() > (clusterSize.intValue() / 2)) { 320 if (clusterService.getNodes().size() > (clusterSize.intValue() / 2)) {
341 return true; 321 return true;
......
...@@ -19,8 +19,11 @@ import static org.onosproject.mastership.MastershipEvent.Type.MASTER_CHANGED; ...@@ -19,8 +19,11 @@ import static org.onosproject.mastership.MastershipEvent.Type.MASTER_CHANGED;
19 import static org.onosproject.mastership.MastershipEvent.Type.BACKUPS_CHANGED; 19 import static org.onosproject.mastership.MastershipEvent.Type.BACKUPS_CHANGED;
20 import static org.apache.commons.lang3.concurrent.ConcurrentUtils.putIfAbsent; 20 import static org.apache.commons.lang3.concurrent.ConcurrentUtils.putIfAbsent;
21 21
22 +import java.util.ArrayList;
22 import java.util.HashSet; 23 import java.util.HashSet;
24 +import java.util.List;
23 import java.util.Map; 25 import java.util.Map;
26 +import java.util.Map.Entry;
24 import java.util.Set; 27 import java.util.Set;
25 28
26 import org.apache.felix.scr.annotations.Activate; 29 import org.apache.felix.scr.annotations.Activate;
...@@ -360,6 +363,26 @@ public class DistributedMastershipStore ...@@ -360,6 +363,26 @@ public class DistributedMastershipStore
360 } 363 }
361 } 364 }
362 365
366 + @Override
367 + public void relinquishAllRole(NodeId nodeId) {
368 +
369 + List<MastershipEvent> events = new ArrayList<>();
370 + for (Entry<DeviceId, RoleValue> entry : roleMap.entrySet()) {
371 + final DeviceId deviceId = entry.getKey();
372 + final RoleValue roleValue = entry.getValue();
373 +
374 + if (roleValue.contains(MASTER, nodeId) ||
375 + roleValue.contains(STANDBY, nodeId)) {
376 +
377 + MastershipEvent event = relinquishRole(nodeId, deviceId);
378 + if (event != null) {
379 + events.add(event);
380 + }
381 + }
382 + }
383 + notifyDelegate(events);
384 + }
385 +
363 // TODO: Consider moving this to RoleValue method 386 // TODO: Consider moving this to RoleValue method
364 //helper to fetch a new master candidate for a given device. 387 //helper to fetch a new master candidate for a given device.
365 private NodeId reelect( 388 private NodeId reelect(
......
...@@ -352,4 +352,27 @@ public class SimpleMastershipStore ...@@ -352,4 +352,27 @@ public class SimpleMastershipStore
352 } 352 }
353 return null; 353 return null;
354 } 354 }
355 +
356 + @Override
357 + public synchronized void relinquishAllRole(NodeId nodeId) {
358 + List<MastershipEvent> events = new ArrayList<>();
359 + Set<DeviceId> toRelinquish = new HashSet<>();
360 +
361 + masterMap.entrySet().stream()
362 + .filter(entry -> nodeId.equals(entry.getValue()))
363 + .forEach(entry -> toRelinquish.add(entry.getKey()));
364 +
365 + backups.entrySet().stream()
366 + .filter(entry -> entry.getValue().contains(nodeId))
367 + .forEach(entry -> toRelinquish.add(entry.getKey()));
368 +
369 + toRelinquish.forEach(deviceId -> {
370 + MastershipEvent event = relinquishRole(nodeId, deviceId);
371 + if (event != null) {
372 + events.add(event);
373 + }
374 + });
375 +
376 + notifyDelegate(events);
377 + }
355 } 378 }
......